Update to new stable branch 1750

This also includes an updated ninja and chromium dependencies needed on Windows. Change-Id: Icd597d80ed3fa4425933c9f1334c3c2e31291c42 Reviewed-by: Zoltan Arvai <zarvai@inf.u-szeged.hu> Reviewed-by: Zeno Albisser <zeno.albisser@digia.com>
author: Andras Becsi <andras.becsi@digia.com> 2014-03-18 13:16:26 +0100
committer: Frederik Gladhorn <frederik.gladhorn@digia.com> 2014-03-20 15:55:39 +0100
commit: 3f0f86b0caed75241fa71c95a5d73bc0164348c5 (patch)
tree: 92b9fb00f2e9e90b0be2262093876d4f43b6cd13 /chromium/third_party/openmax_dl
parent: e90d7c4b152c56919d963987e2503f9909a666d2 (diff)
download: qtwebengine-chromium-3f0f86b0caed75241fa71c95a5d73bc0164348c5.tar.gz
99 files changed, 7826 insertions, 276 deletions
diff --git a/chromium/third_party/openmax_dl/dl/api/armCOMM_s.h b/chromium/third_party/openmax_dl/dl/api/arm/armCOMM_s.h
index 6b0d2be66a2..6ce1e2fc6a3 100644
--- a/chromium/third_party/openmax_dl/dl/api/armCOMM_s.h
+++ b/chromium/third_party/openmax_dl/dl/api/arm/armCOMM_s.h
@@ -371,6 +371,17 @@
 	
 	.endm
 
+        @// Allocate 8-byte aligned area of name
+        @// |name| and size |size| bytes.
+	.macro	M_ALLOC8 name, size
+	.if	(_SBytes & 7) != 0
+	.set	_SBytes, _SBytes + (8 - (_SBytes & 7))
+	.endif
+	.set	\name\()_F, _SBytes
+	.set	_SBytes, _SBytes + \size
+	
+	.endm
+
         @ Load word from stack
 	.macro M_LDR r, a0, a1, a2, a3
 	_M_DATA "ldr", 4, \r, \a0, \a1, \a2, \a3
@@ -381,6 +392,16 @@
 	_M_DATA "str", 4, \r, \a0, \a1, \a2, \a3
 	.endm
 
+        @ Load double word from stack
+	.macro M_LDRD r0, r1, a0, a1, a2, a3
+	_M_DATA2 "ldrd", 8, \r0, \r1, \a0, \a1, \a2, \a3
+	.endm
+
+        @ Store double word to stack
+	.macro M_STRD r0, r1, a0, a1, a2, a3
+	_M_DATA2 "strd", 8, \r0, \r1, \a0, \a1, \a2, \a3
+	.endm
+
         @ Macro to perform a data access operation
         @ Such as LDR or STR
         @ The addressing mode is modified such that
@@ -407,3 +428,31 @@
 	.set	_Offset, _Workspace + \a0\()_F
 	\i\a1	\r, [sp, #_Offset]	
 	.endm
+
+        @ Macro to perform a data access operation
+        @ Such as LDR or STR
+        @ The addressing mode is modified such that
+        @ 1. If no address is given then the name is taken
+        @    as a stack offset
+        @ 2. If the addressing mode is not available for the
+        @    state being assembled for (eg Thumb) then a suitable
+        @    addressing mode is substituted.
+        @
+        @ On Entry:
+        @ $i = Instruction to perform (eg "LDRB")
+        @ $a = Required byte alignment
+        @ $r = Register(s) to transfer (eg "r1")
+        @ $a0,$a1,$a2. Addressing mode and condition. One of:
+        @     label {,cc}
+        @     [base]                    {,,,cc}
+        @     [base, offset]{!}         {,,cc}
+        @     [base, offset, shift]{!}  {,cc}
+        @     [base], offset            {,,cc}
+        @     [base], offset, shift     {,cc}
+	@
+	@ WARNING: Most of the above are not supported, except the first case.
+	.macro _M_DATA2 i, a, r0, r1, a0, a1, a2, a3
+	.set	_Offset, _Workspace + \a0\()_F
+	\i\a1	\r0, \r1, [sp, #_Offset]	
+	.endm
+	
+\ No newline at end of file
diff --git a/chromium/third_party/openmax_dl/dl/api/armOMX.h b/chromium/third_party/openmax_dl/dl/api/arm/armOMX.h
index 0ad21c42ce2..0ad21c42ce2 100644
--- a/chromium/third_party/openmax_dl/dl/api/armOMX.h
+++ b/chromium/third_party/openmax_dl/dl/api/arm/armOMX.h
diff --git a/chromium/third_party/openmax_dl/dl/api/omxtypes_s.h b/chromium/third_party/openmax_dl/dl/api/arm/omxtypes_s.h
index d880d351fd5..d880d351fd5 100644
--- a/chromium/third_party/openmax_dl/dl/api/omxtypes_s.h
+++ b/chromium/third_party/openmax_dl/dl/api/arm/omxtypes_s.h
diff --git a/chromium/third_party/openmax_dl/dl/dl.gyp b/chromium/third_party/openmax_dl/dl/dl.gyp
index 0573ce25631..61a05b007d9 100644
--- a/chromium/third_party/openmax_dl/dl/dl.gyp
+++ b/chromium/third_party/openmax_dl/dl/dl.gyp
@@ -18,79 +18,10 @@
       'include_dirs': [
         '../',
       ],
-      'cflags!': [
-        '-mfpu=vfpv3-d16',
-      ],
-      'cflags': [
-        # We enable Neon instructions even with arm_neon==0, to support
-        # runtime detection.
-        '-mfpu=neon',
-      ],
       'sources': [
-        'api/armCOMM_s.h',
-        'api/armOMX.h',
         'api/omxtypes.h',
-        'api/omxtypes_s.h',
-        'sp/api/armSP.h',
         'sp/api/omxSP.h',
-        # Complex 32-bit fixed-point FFT.
-        'sp/src/armSP_FFT_S32TwiddleTable.c',
-        'sp/src/omxSP_FFTGetBufSize_C_SC32.c',
-        'sp/src/omxSP_FFTInit_C_SC32.c',
-        'sp/src/armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_SC32_Radix4_ls_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_SC32_Radix2_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_SC32_Radix4_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_SC32_Radix8_fs_unsafe_s.S',
-        'sp/src/omxSP_FFTInv_CToC_SC32_Sfs_s.S',
-        'sp/src/omxSP_FFTFwd_CToC_SC32_Sfs_s.S',
-        # Real 32-bit fixed-point FFT
-        'sp/src/armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe_s.S',
-        'sp/src/omxSP_FFTFwd_RToCCS_S32_Sfs_s.S',
-        'sp/src/omxSP_FFTGetBufSize_R_S32.c',
-        'sp/src/omxSP_FFTInit_R_S32.c',
-        'sp/src/omxSP_FFTInv_CCSToR_S32_Sfs_s.S',
-        # Complex 16-bit fixed-point FFT
-        'sp/src/omxSP_FFTInit_C_SC16.c',
-        'sp/src/omxSP_FFTGetBufSize_C_SC16.c',
-        'sp/src/armSP_FFT_CToC_SC16_Radix2_fs_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_SC16_Radix2_ls_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_SC16_Radix2_ps_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_SC16_Radix2_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_SC16_Radix4_ls_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_SC16_Radix4_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S',
-        'sp/src/omxSP_FFTFwd_CToC_SC16_Sfs_s.S',
-        'sp/src/omxSP_FFTInv_CToC_SC16_Sfs_s.S',
-        # Real 16-bit fixed-point FFT
-        'sp/src/omxSP_FFTFwd_RToCCS_S16S32_Sfs_s.S',
-        'sp/src/omxSP_FFTGetBufSize_R_S16S32.c',
-        'sp/src/omxSP_FFTInit_R_S16S32.c',
-        'sp/src/omxSP_FFTInv_CCSToR_S32S16_Sfs_s.S',
-        # Complex floating-point FFT
-        'sp/src/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_FC32_Radix2_ls_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_FC32_Radix4_fs_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_FC32_Radix4_ls_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_FC32_Radix2_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_FC32_Radix4_unsafe_s.S',
-        'sp/src/armSP_FFT_CToC_FC32_Radix8_fs_unsafe_s.S',
         'sp/src/armSP_FFT_F32TwiddleTable.c',
-        'sp/src/omxSP_FFTGetBufSize_C_FC32.c',
-        'sp/src/omxSP_FFTInit_C_FC32.c',
-        'sp/src/omxSP_FFTInv_CToC_FC32_Sfs_s.S',
-        'sp/src/omxSP_FFTFwd_CToC_FC32_Sfs_s.S',
-        # Real floating-point FFT
-        'sp/src/armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe_s.S',
-        'sp/src/omxSP_FFTFwd_RToCCS_F32_Sfs_s.S',
-        'sp/src/omxSP_FFTGetBufSize_R_F32.c',
-        'sp/src/omxSP_FFTInit_R_F32.c',
-        'sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_s.S',
       ],
       'conditions' : [
         ['big_float_fft == 1', {
@@ -98,6 +29,166 @@
             'BIG_FFT_TABLE',
           ],
         }],
+        ['target_arch=="arm"', {
+          'cflags!': [
+            '-mfpu=vfpv3-d16',
+          ],
+          'cflags': [
+            # We enable Neon instructions even with arm_neon==0, to support
+            # runtime detection.
+            '-mfpu=neon',
+          ],
+          'dependencies': [
+            '<(android_ndk_root)/android_tools_ndk.gyp:cpu_features',
+            'openmax_dl_armv7',
+          ],
+          'link_settings' : {
+            'libraries': [
+              # To get the __android_log_print routine
+              '-llog',
+            ],
+          },
+          'sources': [
+            # Common files that are used by both the NEON and non-NEON code.
+            'api/armCOMM_s.h',
+            'api/armOMX.h',
+            'api/omxtypes_s.h',
+            'sp/api/armSP.h',
+            'sp/src/arm/armSP_FFT_S32TwiddleTable.c',
+            'sp/src/arm/detect.c',
+            'sp/src/arm/omxSP_FFTGetBufSize_C_FC32.c',
+            'sp/src/arm/omxSP_FFTGetBufSize_C_SC16.c',
+            'sp/src/arm/omxSP_FFTGetBufSize_C_SC32.c',
+            'sp/src/arm/omxSP_FFTGetBufSize_R_F32.c',
+            'sp/src/arm/omxSP_FFTGetBufSize_R_S16.c',
+            'sp/src/arm/omxSP_FFTGetBufSize_R_S16S32.c',
+            'sp/src/arm/omxSP_FFTGetBufSize_R_S32.c',
+            'sp/src/arm/omxSP_FFTInit_C_FC32.c',
+            'sp/src/arm/omxSP_FFTInit_C_SC16.c',
+            'sp/src/arm/omxSP_FFTInit_C_SC32.c',
+            'sp/src/arm/omxSP_FFTInit_R_F32.c',
+            'sp/src/arm/omxSP_FFTInit_R_S16.c',
+            'sp/src/arm/omxSP_FFTInit_R_S16S32.c',
+            'sp/src/arm/omxSP_FFTInit_R_S32.c',
+
+            # Complex 32-bit fixed-point FFT.
+            'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix4_ls_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix2_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix4_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix8_fs_unsafe_s.S',
+            'sp/src/arm/neon/omxSP_FFTInv_CToC_SC32_Sfs_s.S',
+            'sp/src/arm/neon/omxSP_FFTFwd_CToC_SC32_Sfs_s.S',
+            # Real 32-bit fixed-point FFT
+            'sp/src/arm/neon/armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe_s.S',
+            'sp/src/arm/neon/omxSP_FFTFwd_RToCCS_S32_Sfs_s.S',
+            'sp/src/arm/neon/omxSP_FFTInv_CCSToR_S32_Sfs_s.S',
+            # Complex 16-bit fixed-point FFT
+            'sp/src/arm/neon/armSP_FFTInv_CCSToR_S16_preTwiddleRadix2_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_fs_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_ls_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_ps_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix4_ls_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix4_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S',
+            'sp/src/arm/neon/omxSP_FFTFwd_CToC_SC16_Sfs_s.S',
+            'sp/src/arm/neon/omxSP_FFTInv_CToC_SC16_Sfs_s.S',
+            # Real 16-bit fixed-point FFT
+            'sp/src/arm/neon/omxSP_FFTFwd_RToCCS_S16_Sfs_s.S',
+            'sp/src/arm/neon/omxSP_FFTInv_CCSToR_S16_Sfs_s.S',
+            'sp/src/arm/neon/omxSP_FFTFwd_RToCCS_S16S32_Sfs_s.S',
+            'sp/src/arm/neon/omxSP_FFTInv_CCSToR_S32S16_Sfs_s.S',
+            # Complex floating-point FFT
+            'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix2_ls_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix4_fs_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix4_ls_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix2_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix4_unsafe_s.S',
+            'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix8_fs_unsafe_s.S',
+            'sp/src/arm/neon/omxSP_FFTInv_CToC_FC32_Sfs_s.S',
+            'sp/src/arm/neon/omxSP_FFTFwd_CToC_FC32_Sfs_s.S',
+            # Real floating-point FFT
+            'sp/src/arm/neon/armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe_s.S',
+            'sp/src/arm/neon/omxSP_FFTFwd_RToCCS_F32_Sfs_s.S',
+            'sp/src/arm/neon/omxSP_FFTInv_CCSToR_F32_Sfs_s.S',
+          ],
+        }],
+        ['target_arch=="ia32" or target_arch=="x64"', {
+          'cflags': [
+            '-msse2',
+          ],
+          'sources': [
+            # Real 32-bit floating-point FFT.
+            'sp/api/x86SP.h',
+            'sp/src/x86/omxSP_FFTFwd_RToCCS_F32_Sfs.c',
+            'sp/src/x86/omxSP_FFTGetBufSize_R_F32.c',
+            'sp/src/x86/omxSP_FFTInit_R_F32.c',
+            'sp/src/x86/omxSP_FFTInv_CCSToR_F32_Sfs.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix2_fs.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix2_ls.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix2_ls_sse.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix2_ms.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_fs.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_fs_sse.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_ls.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_ls_sse.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_ms.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_ms_sse.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix2_fs.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix2_ls.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix2_ls_sse.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix2_ms.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_fs.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_fs_sse.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_ls.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_ls_sse.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_ms.c',
+            'sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_ms_sse.c',
+            'sp/src/x86/x86SP_FFT_F32_radix2_kernel.c',
+            'sp/src/x86/x86SP_FFT_F32_radix4_kernel.c',
+            'sp/src/x86/x86SP_SSE_Math.h',
+          ],
+        }],
+      ],
+    },
+  ],
+  'conditions': [
+    ['target_arch=="arm"', {
+      'targets': [
+        {
+          # Non-NEON implementation of FFT. This library is NOT
+          # standalone. Applications must link with openmax_dl.
+          'target_name': 'openmax_dl_armv7',
+          'type': 'static_library',
+          'include_dirs': [
+            '../',
+          ],
+          'cflags!': [
+            '-mfpu=neon',
+          ],
+          'sources': [
+            # Complex floating-point FFT
+            'sp/src/arm/armv7/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S',
+            'sp/src/arm/armv7/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S',
+            'sp/src/arm/armv7/armSP_FFT_CToC_FC32_Radix4_fs_unsafe_s.S',
+            'sp/src/arm/armv7/armSP_FFT_CToC_FC32_Radix4_unsafe_s.S',
+            'sp/src/arm/armv7/armSP_FFT_CToC_FC32_Radix8_fs_unsafe_s.S',
+            'sp/src/arm/armv7/omxSP_FFTInv_CToC_FC32_Sfs_s.S',
+            'sp/src/arm/armv7/omxSP_FFTFwd_CToC_FC32_Sfs_s.S',
+            # Real floating-point FFT
+            'sp/src/arm/armv7/armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe_s.S',
+            'sp/src/arm/armv7/omxSP_FFTFwd_RToCCS_F32_Sfs_s.S',
+            'sp/src/arm/armv7/omxSP_FFTInv_CCSToR_F32_Sfs_s.S',
+          ],
+        },
       ],
-  }]
+    }],
+  ],
 }
diff --git a/chromium/third_party/openmax_dl/dl/sp/api/armSP.h b/chromium/third_party/openmax_dl/dl/sp/api/armSP.h
index f615a87c7ab..4972f09c554 100644
--- a/chromium/third_party/openmax_dl/dl/sp/api/armSP.h
+++ b/chromium/third_party/openmax_dl/dl/sp/api/armSP.h
@@ -64,6 +64,14 @@ typedef struct  ARMsFFTSpec_R_SC32_Tag
     OMX_S32     *pBuf;
 }ARMsFFTSpec_R_SC32;
 
+typedef struct  ARMsFFTSpec_R_SC16_Tag 
+{
+    OMX_U32     N;
+    OMX_U16     *pBitRev;    
+    OMX_SC16    *pTwiddle;
+    OMX_S16     *pBuf;
+} ARMsFFTSpec_R_SC16;
+
 typedef struct ARMsFFTSpec_R_FC32_Tag
 {
     OMX_U32 N;
diff --git a/chromium/third_party/openmax_dl/dl/sp/api/omxSP.h b/chromium/third_party/openmax_dl/dl/sp/api/omxSP.h
index 3016c772f73..5a7980ad452 100644
--- a/chromium/third_party/openmax_dl/dl/sp/api/omxSP.h
+++ b/chromium/third_party/openmax_dl/dl/sp/api/omxSP.h
@@ -44,6 +44,7 @@ extern "C" {
  typedef void OMXFFTSpec_C_SC16;
  typedef void OMXFFTSpec_C_SC32;
  typedef void OMXFFTSpec_R_S16S32;
+ typedef void OMXFFTSpec_R_S16;
  typedef void OMXFFTSpec_R_S32;
  typedef void OMXFFTSpec_R_F32;
  typedef void OMXFFTSpec_C_FC32;
@@ -1423,7 +1424,7 @@ OMXResult omxSP_FFTInit_C_SC32 (
  * Input Arguments:
  *   
  *   order - base-2 logarithm of the desired block length; valid in the range 
- *            [0,12] 
+ *            [1,15] 
  *
  * Output Arguments:
  *   
@@ -1436,7 +1437,7 @@ OMXResult omxSP_FFTInit_C_SC32 (
  *              following is true: 
  *    -   pFFTSpec is either NULL or violates the 8-byte alignment 
  *              restrictions 
- *    -   order < 0 or order > 12 
+ *    -   order < 1 or order > 15
  *
  */
 OMXResult omxSP_FFTInit_C_FC32(
@@ -1487,6 +1488,45 @@ OMXResult omxSP_FFTInit_R_S16S32(
 
 
 /**
+ * Function:  omxSP_FFTInit_R_S16
+ *
+ * Description:
+ * These functions initialize specification structures required for the real 
+ * FFT and IFFT functions. The function <FFTInit_R_S16> is used 
+ * to initialize the specification structures for functions
+ * <FFTFwd_RToCCS_S16_Sfs> and <FFTInv_CCSToR_S16_Sfs>.
+ *
+ * Memory for *pFFTFwdSpec must be allocated before calling these functions
+ * and should be 8-byte aligned. 
+ *
+ * The number of bytes required for *pFFTFwdSpec can be 
+ * determined using <FFTGetBufSize_R_S16>. 
+ *
+ * Input Arguments:
+ *   
+ *   order - base-2 logarithm of the desired block length; valid in the range 
+ *            [1,12] 
+ *
+ * Output Arguments:
+ *   
+ *   pFFTFwdSpec - pointer to the initialized specification structure 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if one or more of the 
+ *              following is true: 
+ *    -   pFFTFwdSpec is either NULL or violates the 8-byte alignment 
+ *              restrictions 
+ *    -   order < 1 or order > 12 
+ *
+ */
+OMXResult omxSP_FFTInit_R_S16 (
+    OMXFFTSpec_R_S32*pFFTFwdSpec,
+    OMX_INT order
+);
+
+/**
  * Function:  omxSP_FFTInit_R_S32   (2.2.4.1.4)
  *
  * Description:
@@ -1543,7 +1583,7 @@ OMXResult omxSP_FFTInit_R_S32 (
  * Input Arguments:
  *   
  *   order - base-2 logarithm of the desired block length; valid in the range 
- *            [0,12] 
+ *            [1,15] 
  *
  * Output Arguments:
  *   
@@ -1556,7 +1596,7 @@ OMXResult omxSP_FFTInit_R_S32 (
  *              following is true: 
  *    -   pFFTFwdSpec is either NULL or violates the 8-byte alignment 
  *              restrictions 
- *    -   order < 0 or order > 12 
+ *    -   order < 1 or order > 15
  *
  */
 OMXResult omxSP_FFTInit_R_F32(
@@ -1644,7 +1684,7 @@ OMXResult omxSP_FFTGetBufSize_C_SC32 (
  * Input Arguments:
  *   
  *   order - base-2 logarithm of the desired block length; valid in the range 
- *            [0,12] 
+ *            [1,15] 
  *
  * Output Arguments:
  *   
@@ -1657,7 +1697,7 @@ OMXResult omxSP_FFTGetBufSize_C_SC32 (
  *    OMX_Sts_BadArgErr - bad arguments; returned if one or more of the 
  *              following is true: 
  *    -    pSize is NULL 
- *    -    order < 0 or order > 12 
+ *    -    order < 1 or order > 15 
  *
  */
 OMXResult omxSP_FFTGetBufSize_C_FC32(
@@ -1699,6 +1739,38 @@ OMXResult omxSP_FFTGetBufSize_R_S16S32(
 );
 
 
+/**
+ * Function:  omxSP_FFTGetBufSize_R_S16
+ *
+ * Description:
+ * These functions compute the size of the specification structure 
+ * required for the length 2^order real FFT and IFFT functions.  The function 
+ * <FFTGetBufSize_R_S16> is used in conjunction with the 16-bit 
+ * functions <FFTFwd_RToCCS_S16_Sfs> and <FFTInv_CCSToR_S16_Sfs>. 
+ *
+ * Input Arguments:
+ *   
+ *   order - base-2 logarithm of the length; valid in the range
+ *   [1,12]
+ *
+ * Output Arguments:
+ *   
+ *   pSize - pointer to the number of bytes required for the specification 
+ *            structure 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments The function returns 
+ *              OMX_Sts_BadArgErr if one or more of the following is true: 
+ *    pSize is NULL 
+ *    order < 1 or order > 12 
+ *
+ */
+OMXResult omxSP_FFTGetBufSize_R_S16 (
+    OMX_INT order,
+    OMX_INT *pSize
+);
 
 /**
  * Function:  omxSP_FFTGetBufSize_R_S32   (2.2.4.1.8)
@@ -1743,7 +1815,7 @@ OMXResult omxSP_FFTGetBufSize_R_S32 (
  *
  * Input Arguments:
  *   
- *   order - base-2 logarithm of the length; valid in the range [0,12] 
+ *   order - base-2 logarithm of the length; valid in the range [1,15] 
  *
  * Output Arguments:
  *   
@@ -1756,7 +1828,7 @@ OMXResult omxSP_FFTGetBufSize_R_S32 (
  *    OMX_Sts_BadArgErr - bad arguments The function returns 
  *              OMX_Sts_BadArgErr if one or more of the following is true: 
  *    pSize is NULL 
- *    order < 0 or order > 12 
+ *    order < 1 or order > 15
  *
  */
 OMXResult omxSP_FFTGetBufSize_R_F32(
@@ -1886,8 +1958,7 @@ OMXResult omxSP_FFTFwd_CToC_SC32_Sfs (
  *          must be aligned on a 32-byte boundary. 
  *   pFFTSpec - pointer to the preallocated and initialized specification 
  *            structure 
- *   scaleFactor - scale factor of the output. Valid value is 0
- *          only.
+ *   scaleFactor - scale factor of the output. Valid range is [0,16].
  *
  * Output Arguments:
  *   order 
@@ -2024,6 +2095,59 @@ OMXResult omxSP_FFTFwd_RToCCS_S16S32_Sfs (
 );
 
 
+/**
+ * Function:  omxSP_FFTFwd_RToCCS_S16_Sfs
+ *
+ * Description:
+ * These functions compute an FFT for a real-valued signal of length of 2^order,
+ * where 0 < order <= 12. Transform length is determined by the
+ * specification structure, which must be initialized prior to calling the FFT
+ * function using the appropriate helper, i.e., <FFTInit_R_S16>.
+ * The relationship between the input and output sequences can
+ * be expressed in terms of the DFT, i.e.:
+ *
+ *     x[n] = (2^(-scalefactor)/N)  . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N)
+ *     n=0,1,2,...N-1
+ *     N=2^order.
+ *
+ * The conjugate-symmetric output sequence is represented using a CCS vector,
+ * which is of length N+2, and is organized as follows:
+ *
+ *   Index:      0  1  2  3  4  5   . . .   N-2       N-1       N       N+1
+ *   Component:  R0 0  R1 I1 R2 I2  . . .   R[N/2-1]  I[N/2-1]  R[N/2]  0
+ *
+ * where R[n] and I[n], respectively, denote the real and imaginary components
+ * for FFT bin 'n'. Bins  are numbered from 0 to N/2, where N is the FFT length.
+ * Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to
+ * the foldover frequency.
+ *
+ * Input Arguments:
+ *   pSrc - pointer to the real-valued input sequence, of length 2^order;
+ *          must be aligned on a 32-byte boundary.
+ *   pFFTSpec - pointer to the preallocated and initialized specification
+ *            structure
+ *   scaleFactor - output scale factor; valid range is [0, 16]
+ *
+ * Output Arguments:
+ *   pDst - pointer to output sequence, represented using CCS format, of
+ *            length (2^order)+2; must be aligned on a 32-byte boundary.
+ *
+ * Return Value:
+ *
+ *    OMX_Sts_NoErr - no error
+ *    OMX_Sts_BadArgErr - bad arguments, if one or more of followings is true:
+ *    -    one of the pointers pSrc, pDst, or pFFTSpec is NULL
+ *    -    pSrc or pDst is not aligned on a 32-byte boundary
+ *    -    scaleFactor<0 or scaleFactor >16
+ *
+ */
+OMXResult omxSP_FFTFwd_RToCCS_S16_Sfs (
+    const OMX_S16* pSrc,
+    OMX_S16* pDst,
+    const OMXFFTSpec_R_S16* pFFTSpec,
+    OMX_INT scaleFactor
+);
+
 
 /**
  * Function:  omxSP_FFTFwd_RToCCS_S32_Sfs   (2.2.4.4.2)
@@ -2129,7 +2253,29 @@ OMXResult omxSP_FFTFwd_RToCCS_F32_Sfs(
     const OMXFFTSpec_R_F32* pFFTSpec
 );
 
+#ifdef __arm__
+/*
+ * Non-NEON version of omxSP_FFTFwd_RToCCS_F32_Sfs
+ */    
+OMXResult omxSP_FFTFwd_RToCCS_F32_Sfs_vfp(
+    const OMX_F32* pSrc,
+    OMX_F32* pDst,
+    const OMXFFTSpec_R_F32* pFFTSpec
+);
 
+/*
+ * Just like omxSP_FFTFwd_RToCCS_F32_Sfs, but automatically detects
+ * whether NEON is available or not and chooses the appropriate
+ * routine.
+ */    
+extern OMXResult (*omxSP_FFTFwd_RToCCS_F32)(
+    const OMX_F32* pSrc,
+    OMX_F32* pDst,
+    const OMXFFTSpec_R_F32* pFFTSpec
+);
+#else
+#define omxSP_FFTFwd_RToCCS_F32 omxSP_FFTFwd_RToCCS_F32_Sfs
+#endif
 
 /**
  * Function:  omxSP_FFTInv_CCSToR_S32S16_Sfs   (2.2.4.4.4)
@@ -2179,6 +2325,53 @@ OMXResult omxSP_FFTInv_CCSToR_S32S16_Sfs (
 );
 
 
+/**
+ * Function:  omxSP_FFTInv_CCSToR_S16_Sfs
+ *
+ * Description:
+ * These functions compute the inverse FFT for a conjugate-symmetric input
+ * sequence.  Transform length is determined by the specification structure,
+ * which must be initialized prior to calling the FFT function using
+ * <FFTInit_R_S16>. For a transform of length M, the input
+ * sequence is represented using a packed CCS vector of length
+ * M+2, and is organized as follows:
+ *
+ *   Index:     0    1  2    3    4    5    . . .  M-2       M-1      M      M+1
+ *   Component  R[0] 0  R[1] I[1] R[2] I[2] . . .  R[M/2-1]  I[M/2-1] R[M/2] 0
+ *
+ * where R[n] and I[n], respectively, denote the real and imaginary components
+ * for FFT bin n.
+ * Bins are numbered from 0 to M/2, where M is the FFT length.  Bin index 0
+ * corresponds to the DC component, and bin index M/2 corresponds to the
+ * foldover frequency.
+ *
+ * Input Arguments:
+ *   pSrc - pointer to the complex-valued input sequence represented using
+ *            CCS format, of length (2^order) + 2; must be aligned on a 32-byte
+ *            boundary.
+ *   pFFTSpec - pointer to the preallocated and initialized specification
+ *            structure
+ *   scaleFactor - output scalefactor; range is [0,16]
+ *
+ * Output Arguments:
+ *   pDst - pointer to the real-valued output sequence, of length 2^order ; must
+ *            be aligned on a 32-byte boundary.
+ *
+ * Return Value:
+ *
+ *    OMX_Sts_NoErr - no error
+ *    OMX_Sts_BadArgErr - bad arguments if one or more of the following is true:
+ *    -    pSrc, pDst, or pFFTSpec is NULL
+ *    -    pSrc or pDst is not aligned on a 32-byte boundary
+ *    -    scaleFactor<0 or scaleFactor >16
+ *
+ */
+OMXResult omxSP_FFTInv_CCSToR_S16_Sfs (
+    const OMX_S16* pSrc,
+    OMX_S16* pDst,
+    const OMXFFTSpec_R_S16* pFFTSpec,
+    OMX_INT scaleFactor
+);
 
 /**
  * Function:  omxSP_FFTInv_CCSToR_S32_Sfs   (2.2.4.4.4)
@@ -2274,7 +2467,28 @@ OMXResult omxSP_FFTInv_CCSToR_F32_Sfs(
     const OMXFFTSpec_R_F32* pFFTSpec
 );
 
+#ifdef __arm__
+/*
+ * Non-NEON version of omxSP_FFTInv_CCSToR_F32_Sfs
+ */    
+OMXResult omxSP_FFTInv_CCSToR_F32_Sfs_vfp(
+    const OMX_F32* pSrc,
+    OMX_F32* pDst,
+    const OMXFFTSpec_R_F32* pFFTSpec
+);
 
+/*
+ * Just like omxSP_FFTInv_CCSToR_F32_Sfs, but automatically detects
+ * whether NEON is available or not and chooses the appropriate
+ * routine.
+ */    
+extern OMXResult (*omxSP_FFTInv_CCSToR_F32)(
+    const OMX_F32* pSrc,
+    OMX_F32* pDst,
+    const OMXFFTSpec_R_F32* pFFTSpec);
+#else
+#define omxSP_FFTInv_CCSToR_F32 omxSP_FFTInv_CCSToR_F32_Sfs    
+#endif
 
 #ifdef __cplusplus
 }
diff --git a/chromium/third_party/openmax_dl/dl/sp/api/x86SP.h b/chromium/third_party/openmax_dl/dl/sp/api/x86SP.h
new file mode 100644
index 00000000000..53127343b75
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/api/x86SP.h
@@ -0,0 +1,39 @@
+/*
+ *  Copyright (c) 2007-2008 ARM Limited. All Rights Reserved.
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ *  It has been relicensed with permission from the copyright holders.
+ */
+
+#ifndef _x86SP_H_
+#define _x86SP_H_
+
+#include "dl/api/omxtypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern OMX_F32 armSP_FFT_F32TwiddleTable[];
+
+typedef struct X86FFTSpec_R_FC32_Tag
+{
+    OMX_U32 N;
+    OMX_F32* pTwiddle;
+    // Ping Pong buffer for doing the N/2 point complex FFT.
+    OMX_F32* pBuf1;
+    OMX_F32* pBuf2;
+
+} X86FFTSpec_R_FC32;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_S32TwiddleTable.c b/chromium/third_party/openmax_dl/dl/sp/src/arm/armSP_FFT_S32TwiddleTable.c
index a0db0575b50..a0db0575b50 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_S32TwiddleTable.c
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/armSP_FFT_S32TwiddleTable.c
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe_s.S
new file mode 100644
index 00000000000..75d6711cd64
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe_s.S
@@ -0,0 +1,260 @@
+@//
+@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+@//
+@//  Use of this source code is governed by a BSD-style license
+@//  that can be found in the LICENSE file in the root of the source
+@//  tree. An additional intellectual property rights grant can be found
+@//  in the file PATENTS.  All contributing project authors may
+@//  be found in the AUTHORS file in the root of the source tree.
+@//
+@//  This is a modification of 
+@//  armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe_s.S to support float
+@//  instead of SC32.
+@//
+
+@//
+@// Description:
+@// Compute the "preTwiddleRadix2" stage prior to the call to the complexFFT
+@// It does a Z(k) = Feven(k) + jW^(-k) FOdd(k); k=0,1,2,...N/2-1 computation
+@// It implements the "scaled"(by 1/2) version of the above formula.
+@//
+@//
+
+
+@// Include standard headers
+
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
+
+@//        M_VARIANTS ARM1136JS
+
+@// Import symbols required from other files
+@// (For example tables)
+
+
+@// Set debugging level
+@//DEBUG_ON    SETL {TRUE}
+
+
+
+@// Guarding implementation by the processor name
+
+@/    IF  ARM1136JS
+
+@//Input Registers
+
+#define pSrc            r0
+#define pDst            r1
+#define pFFTSpec        r2
+
+
+@// Output registers
+#define result          r0
+
+@//Local Scratch Registers
+
+
+#define argTwiddle      r1
+#define argDst          r2
+#define argScale        r4
+#define pTwiddle        r4
+#define pOut            r5
+#define subFFTSize      r7
+#define subFFTNum       r6
+#define N               r6
+#define order           r14
+#define diff            r9
+#define count           r8
+#define diffMinusOne    r2
+#define round           r3
+
+#define pOut1           r2
+#define size            r7
+#define step            r3
+#define step1           r6
+#define twStep          r12
+#define pTwiddleTmp     r14
+#define t0              r12
+
+#define x0r     s0
+#define x0i     s1
+#define x1r     s2
+#define x1i     s3
+#define w0r     s4
+#define w0i     s5
+#define y0r     s6
+#define y0i     s7
+#define w1r     s6
+#define w1i     s7
+#define y1r     s6              /*@// w1r,w1i*/
+#define y1i     s7
+#define st0     s8
+#define st1     s9
+#define st2     s10
+#define st3     s11
+#define st4     s12
+#define st5     s13
+//@ half = 0.5
+#define half    s15
+
+
+
+
+
+        .MACRO FFTSTAGE scaled, inverse,name
+
+        @// Initialize half now.
+        movw    N, #0x0000
+        movt    N, #0x3f00
+        vmov.f32 half, N                @// half = 0.5
+
+        @// Read the size from structure and take log
+        LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
+
+        @// Read other structure parameters
+        LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
+        LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
+
+
+        MOV     size,N,ASR #1           @// preserve the contents of N
+
+        MOV     step,size,LSL #3        @// step = N/2 * 8 bytes
+        ADD     pTwiddleTmp,pTwiddle,#8 @// W^2
+
+        ADD     pOut1,pOut,step         @// pOut1 = pOut+ N/2*8 bytes
+        @// twStep = 3N/8 * 8 bytes pointing to W^1
+        SUB     twStep,step,size,LSL #1
+        MOV     step1,size,LSL #2       @// step1 = N/4 * 8 = N/2*4 bytes
+        SUB     step1,step1,#8          @// (N/4-1)*8 bytes
+        ADD     argTwiddle,pTwiddle,twStep      @// W^1
+
+        @// Z(k) = 1/2 {[F(k) +  F'(N/2-k)] +j*W^(-k) [F(k) -  F'(N/2-k)]}
+        @// Note: W^(k) is stored as negated value and also need to
+        @// conjugate the values from the table
+
+        @// Z(0) : no need of twiddle multiply
+        @// Z(0) = 1/2 { [F(0) +  F'(N/2)] +j [F(0) -  F'(N/2)] }
+
+
+        add      pSrc, step             @// step = N/2*8 bytes
+        vldm.f32 pSrc, {x1r, x1i}       @// {x1r, x1i} = [pSrc, step]
+        sub      pSrc, step
+        vldm.f32 pSrc!, {x0r, x0i}
+
+        SUBS    size,size,#2
+
+        vadd.f32 st0, x0r, x1r          @// a+c
+        vsub.f32 st1, x0r, x1r          @// a-c
+        vmov.f32 x0r, st0
+        vmov.f32 x1r, st1
+        vsub.f32 st0, x0i, x1i          @// b-d
+        vadd.f32 x1i, x0i, x1i          @// b+d
+        vmov.f32 x0i, st0
+
+
+        vsub.f32     x0r,x0r,x1i        @// Z(0).r
+        vadd.f32     x0i,x0i,x1r        @// Z(0).i
+
+        vmul.f32 x0r, half
+        vmul.f32 x0i, half
+        vstm.f32 pOut1!, {x0r, x0i}     @// pOut1 = pOut+ N/2*8 bytes
+
+        BLT     end\name
+        BEQ     lastElement\name
+
+        ASR     size,size,#1
+evenOddButterflyLoop\name:
+
+        SUB     step,step,#16           @// (N/2-2)*8 bytes
+
+        add      pSrc, step             @// (N/2-1)*8 bytes
+        vldm.f32 pSrc, {x1r, x1i}       @// {x1r, x1i} = [pSrc, step]
+        sub      pSrc, step
+        vldm.f32 pSrc!, {x0r, x0i}
+        add      argTwiddle, step1
+        vldm.f32 argTwiddle, {w1r, w1i} @// {w1r, w1i} = [argTwiddle, step]
+        sub      argTwiddle, step1
+        vldm.f32 argTwiddle!, {w0r, w0i}
+
+        SUB     step1,step1,#8
+        SUBS    size,size,#1
+
+
+        vsub.f32     st2,x0r,x1r        @// a-c
+        vadd.f32     st3,x0i,x1i        @// b+d
+        vadd.f32     st0,x0r,x1r        @// a+c
+        vsub.f32     st1,x0i,x1i        @// b-d
+
+        vmul.f32  x1r,w1r,st2
+        vmul.f32  x1i,w1r,st3
+        vmls.f32  x1r,w1i,st3
+        vmla.f32  x1i,w1i,st2
+
+        vadd.f32     y1r,st0,x1i        @// F(N/2 -1)
+        vsub.f32     y1i,x1r,st1        @// y1r,y1i same as w1r, w1i
+
+
+        vmul.f32  x0r,w0r,st2
+        vmul.f32  x0i,w0r,st3
+        vmla.f32  x0r,w0i,st3
+        vmls.f32  x0i,w0i,st2
+
+
+        vadd.f32     st4,st0,x0i        @// F(1)
+        vsub.f32     st5,st1,x0r
+
+
+        vmul.f32 y1r, half
+        vmul.f32 y1i, half
+        vmul.f32 st4, half
+        vmul.f32 st5, half
+        add      pOut1, step            @// (N/2-1)*8 bytes
+        vstm.f32 pOut1, {y1r, y1i}      @// {y1r,y1i} = [pOut1, step]
+        sub      pOut1, step
+        vstm.f32 pOut1!, {st4, st5}
+
+        MOV     t0,argTwiddle           @// swap ptr for even and odd twiddles
+        MOV     argTwiddle,pTwiddleTmp
+        MOV     pTwiddleTmp,t0
+
+        BGT     evenOddButterflyLoop\name
+
+
+        @// Last element can be expanded as follows
+        @// 1/2[Z(k) + Z'(k)] - j w^-k [Z(k) - Z'(k)]
+        @// (since W^k is stored as -ve)
+        @// 1/2[(a+jb) + (a-jb)] - j w^-k [(a+jb) - (a-jb)]
+        @// 1/2[2a+j0] + j (c-jd) [0+j2b]
+        @// (a+bc, -bd)
+        @// Since (c,d) = (0,1) for the last element, result is just (a,-b)
+
+lastElement\name:
+        vldm.f32 pSrc, {x0r, x0i}
+
+        vneg.f32 x0i, x0i
+        vstm.f32 pOut1, {x0r, x0i}
+end\name:
+
+
+        .endm
+
+
+@ Structure offsets for FFTSpec
+        .set    ARMsFFTSpec_N, 0
+        .set    ARMsFFTSpec_pBitRev, 4
+        .set    ARMsFFTSpec_pTwiddle, 8
+        .set    ARMsFFTSpec_pBuf, 12
+
+
+        M_START armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe_vfp,r4
+             FFTSTAGE "FALSE","TRUE",Inv
+        M_END
+
+@//    ENDIF                                           @//ARM1136JS
+
+
+      @// Guarding implementation by the processor name
+
+
+
+    .end
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S
new file mode 100644
index 00000000000..c2feb0bc758
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S
@@ -0,0 +1,145 @@
+@//
+@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+@//
+@//  Use of this source code is governed by a BSD-style license
+@//  that can be found in the LICENSE file in the root of the source
+@//  tree. An additional intellectual property rights grant can be found
+@//  in the file PATENTS.  All contributing project authors may
+@//  be found in the AUTHORS file in the root of the source tree.
+@//
+@//  This is a modification of armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.S
+@//  to support float instead of SC32.
+@//
+
+@//
+@// Description:
+@// Compute a Radix 2 FFT stage for a N point complex signal
+@//
+@//
+
+
+@// Include standard headers
+
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
+
+@//        M_VARIANTS ARM1136JS
+
+@// Import symbols required from other files
+@// (For example tables)
+
+
+
+
+@// Set debugging level
+@//DEBUG_ON    SETL {TRUE}
+
+
+
+@// Guarding implementation by the processor name
+
+@//    IF  ARM1136JS
+
+@//Input Registers
+
+#define pSrc            r0
+#define pDst            r2
+#define pTwiddle        r1
+#define pPingPongBuf    r5
+#define subFFTNum       r6
+#define subFFTSize      r7
+
+
+@//Output Registers
+
+
+@//Local Scratch Registers
+
+#define pDstBuf         r3                   /*@// Temporarily hold pingpong buffer ptr*/
+#define grpSize         r14
+#define outPointStep    r12
+#define setCount        r14
+#define pointStep       r12
+
+@// Real and Imaginary parts
+#define x0r s0
+#define x0i s1
+#define x1r s2
+#define x1i s3
+#define y1r s4
+#define y1i s5
+#define y0r s6
+#define y0i s7
+
+
+
+        .MACRO FFTSTAGE scaled, inverse, name
+
+        @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount 
+	@// and pGrpSize regs
+
+        mov     subFFTSize, #2
+        lsr     grpSize, subFFTNum, #1
+        mov     subFFTNum, grpSize
+
+        @// pT0+1 increments pT0 by 8 bytes
+        @// pT0+pointStep = increment of 8*pointStep bytes = 4*grpSize bytes
+        @// Note: outPointStep = pointStep for firststage
+        @// Note: setCount = grpSize/2 (reuse the updated grpSize for setCount)
+        MOV     pointStep,grpSize,LSL #3
+
+
+
+        @// Loop on the sets for grp zero
+
+grpZeroSetLoop\name:
+
+        add      pSrc, pSrc, pointStep
+        @// {x1r,x1i} = [pSrc, pointStep]
+        vldm.f32 pSrc, {x1r, x1i}
+        sub      pSrc, pSrc, pointStep
+        vldm.f32 pSrc!, {x0r, x0i}
+
+        SUBS    setCount,setCount,#1            @// decrement the loop counter
+
+
+
+        vsub.f32     y1r,x0r,x1r
+        vsub.f32     y1i,x0i,x1i
+
+        vadd.f32     y0r,x0r,x1r
+        vadd.f32     y0i,x0i,x1i
+
+        add     pDst, pDst, outPointStep
+        @// {y1r,y1i} -> [pDst, outPointStep]
+        vstm    pDst, {y1r, y1i}
+        sub     pDst, pDst, outPointStep
+        vstm    pDst!, {y0r, y0i}
+
+        BGT     grpZeroSetLoop\name
+
+
+        @// reset pSrc to pDst for the next stage
+        SUB     pSrc,pDst,pointStep             @// pDst -= 2*grpSize
+        mov     pDst, pPingPongBuf
+
+        .endm
+
+
+        M_START armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp,r4
+        FFTSTAGE "FALSE","FALSE",FWD
+        M_END
+
+        M_START armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp,r4
+        FFTSTAGE "FALSE","TRUE",INV
+        M_END
+
+
+@/    ENDIF                                                           @//ARM1136JS
+
+
+@// Guarding implementation by the processor name
+
+
+
+    .end
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/armSP_FFT_CToC_FC32_Radix4_fs_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/armSP_FFT_CToC_FC32_Radix4_fs_unsafe_s.S
new file mode 100644
index 00000000000..3bd47252f1e
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/armSP_FFT_CToC_FC32_Radix4_fs_unsafe_s.S
@@ -0,0 +1,213 @@
+@//
+@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+@//
+@//  Use of this source code is governed by a BSD-style license
+@//  that can be found in the LICENSE file in the root of the source
+@//  tree. An additional intellectual property rights grant can be found
+@//  in the file PATENTS.  All contributing project authors may
+@//  be found in the AUTHORS file in the root of the source tree.
+@//
+@//  This is a modification of armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.S
+@//  to support float instead of SC32.
+@//
+
+@//
+@// Description:
+@// Compute a first stage Radix 4 FFT stage for a N point complex signal
+@//
+@//
+
+
+@// Include standard headers
+
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
+
+@//        M_VARIANTS ARM1136JS
+
+@// Import symbols required from other files
+@// (For example tables)
+
+
+
+
+@// Set debugging level
+@//DEBUG_ON    SETL {TRUE}
+
+
+
+@// Guarding implementation by the processor name
+
+@//    IF  ARM1136JS
+
+@//Input Registers
+
+#define pSrc            r0
+#define pDst            r2
+#define pTwiddle        r1
+#define pPingPongBuf    r5
+#define subFFTNum       r6
+#define subFFTSize      r7
+
+
+@//Output Registers
+
+
+@//Local Scratch Registers
+
+#define grpSize         r14
+#define outPointStep    r12
+#define setStep         r3
+#define setCount        r14                  /*@// Reuse grpSize as setCount*/
+#define pointStep       r12
+
+@// Real and Imaginary parts
+#define x0r s0
+#define x0i s1
+#define x1r s2
+#define x1i s3
+#define x2r s4
+#define x2i s5
+#define x3r s6
+#define x3i s7
+#define t3r s0                 /*@// Temporarily hold x3r and x3i*/
+#define t3i s1
+#define sr  s8
+#define si  s9
+
+
+
+        .MACRO FFTSTAGE scaled, inverse, name
+
+        @// Define stack arguments
+
+
+        @// Update grpCount and grpSize rightaway inorder to reuse
+        @// pSubFFTSize and pSubFFTNum regs
+        mov     subFFTSize, #4
+        lsr     grpSize, subFFTNum, #2
+        mov     subFFTNum, grpSize
+
+
+        @// pT0+1 increments pT0 by 8 bytes
+        @// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
+        @// Note: outPointStep = pointStep for firststage
+        @// Note: setCount = grpSize/4 (reuse the updated grpSize for setCount)
+        MOV     pointStep,grpSize,LSL #3
+
+
+        @// Calculate the step of input data for the next set
+        @//MOV     setStep,pointStep,LSL #1
+        MOV     setStep,grpSize,LSL #4
+        @// setStep = 3*pointStep
+        ADD     setStep,setStep,pointStep
+        @// setStep = - 3*pointStep+8
+        RSB     setStep,setStep,#8
+
+        @// grp = 0 a special case since all the twiddle factors are 1
+        @// Loop on the sets
+
+grpZeroSetLoop\name:
+
+        vldm.f32 pSrc, {x0r, x0i}
+        add     pSrc, pSrc, pointStep
+        vldm.f32 pSrc, {x1r, x1i}
+        add     pSrc, pSrc, pointStep
+        vldm.f32 pSrc, {x2r, x2i}
+        add     pSrc, pSrc, pointStep
+        vldm.f32 pSrc, {x3r, x3i}
+        add     pSrc, pSrc, setStep
+
+
+        @// Decrement setcount
+        SUBS    setCount,setCount,#1
+
+
+
+        @// finish first stage of 4 point FFT
+
+        vadd.f32     x0r,x0r,x2r                @// x0 = x0 + x2
+        vadd.f32     x0i,x0i,x2i
+
+        vadd.f32     sr, x2r, x2r
+        vadd.f32     si, x2i, x2i
+        vsub.f32     x2r,x0r,sr                 @// x2 = x0 - x2
+        vsub.f32     x2i,x0i,si
+
+        vadd.f32     x1r,x1r,x3r                @// x1 = x1 + x3
+        vadd.f32     x1i,x1i,x3i
+
+        vadd.f32     sr, x3r, x3r
+        vadd.f32     si, x3i, x3i
+        vsub.f32     x3r,x1r,sr                 @// x3 = x1 - x3
+        vsub.f32     x3i,x1i,si
+
+
+        @// finish second stage of 4 point FFT
+
+
+        vadd.f32     x0r,x0r,x1r                @// x0 = x0 + x1
+        vadd.f32     x0i,x0i,x1i
+
+        vadd.f32     sr, x1r, x1r
+        vadd.f32     si, x1i, x1i
+        vsub.f32     x1r,x0r,sr                 @// x1 = x0 - x1
+        vsub.f32     x1i,x0i,si
+
+        vstm.f32 pDst, {x0r, x0i}
+        add      pDst, pDst, outPointStep
+
+        vadd.f32     x2r,x2r,x3i
+        vsub.f32     x2i,x2i,x3r
+
+        vadd.f32     sr, x3r, x3r
+        vadd.f32     si, x3i, x3i
+        vsub.f32     t3r, x2r, si
+        vadd.f32     t3i, x2i, sr
+
+        .ifeqs  "\inverse", "TRUE"
+            vstm.f32 pDst, {t3r, t3i}
+            add      pDst, pDst, outPointStep
+            vstm.f32 pDst, {x1r, x1i}
+            add      pDst, pDst, outPointStep
+            vstm.f32 pDst, {x2r, x2i}
+            add      pDst, pDst, setStep
+        .else
+            vstm.f32 pDst, {x2r, x2i}
+            add      pDst, pDst, outPointStep
+            vstm.f32 pDst, {x1r, x1i}
+            add      pDst, pDst, outPointStep
+            vstm.f32 pDst, {t3r, t3i}
+            add      pDst, pDst, setStep
+        .endif
+
+
+        BGT     grpZeroSetLoop\name
+
+
+        @// reset pSrc to pDst for the next stage
+        SUB     pSrc,pDst,pointStep             @// pDst -= 2*grpSize
+        mov     pDst, pPingPongBuf
+
+        .endm
+
+
+        M_START armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp,r4
+        FFTSTAGE "FALSE","FALSE",FWD
+        M_END
+
+
+        M_START armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp,r4
+        FFTSTAGE "FALSE","TRUE",INV
+        M_END
+
+
+@//    ENDIF                                                           @//ARM1136JS
+
+
+@// Guarding implementation by the processor name
+
+
+
+
+    .end
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/armSP_FFT_CToC_FC32_Radix4_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/armSP_FFT_CToC_FC32_Radix4_unsafe_s.S
new file mode 100644
index 00000000000..00e48d1e6ea
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/armSP_FFT_CToC_FC32_Radix4_unsafe_s.S
@@ -0,0 +1,310 @@
+@//
+@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+@//
+@//  Use of this source code is governed by a BSD-style license
+@//  that can be found in the LICENSE file in the root of the source
+@//  tree. An additional intellectual property rights grant can be found
+@//  in the file PATENTS.  All contributing project authors may
+@//  be found in the AUTHORS file in the root of the source tree.
+@//
+@//  This is a modification of armSP_FFT_CToC_SC32_Radix4_unsafe_s.S
+@//  to support float instead of SC32.
+@//
+
+@//
+@// Description:
+@// Compute a Radix 4 FFT stage for a N point complex signal
+@//
+@//
+
+
+@// Include standard headers
+
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
+
+@//        M_VARIANTS ARM1136JS
+
+@// Import symbols required from other files
+@// (For example tables)
+
+
+
+
+@// Set debugging level
+@//DEBUG_ON    SETL {TRUE}
+
+
+
+@// Guarding implementation by the processor name
+
+@//    IF  ARM1136JS
+
+@//Input Registers
+
+#define pSrc            r0
+#define pDst            r2
+#define pTwiddle        r1
+#define subFFTNum       r6
+#define subFFTSize      r7
+
+
+
+@//Output Registers
+
+
+@//Local Scratch Registers
+
+#define grpCount        r12
+#define step            r12                  /*@// Reuse grpCount*/
+#define outPointStep    r3
+#define setCount        r8
+#define diff            r9
+#define pointStep       r14
+
+#define t1              r3                 /*@// Reuse outPointStep*/
+
+@// Real and Imaginary parts used in the inner grp loop
+#define x0r s0
+#define x0i s1
+#define x1r s2
+#define x1i s3
+#define x2r s4
+#define x2i s5
+#define x3r s6
+#define x3i s7
+
+@// Temporary reg to hold the twiddle multiplies
+
+#define t0r s8
+#define t0i s9
+#define t2r s10
+#define t2i s11
+#define sr  s12
+#define si  s13
+
+
+
+
+        .MACRO FFTSTAGE scaled, inverse , name
+
+        @// Define stack arguments
+
+
+        @// Update grpCount and grpSize rightaway inorder to reuse
+        @// pGrpCount and pGrpSize regs
+
+        LSL     grpCount,subFFTSize,#2
+        lsr     subFFTNum, subFFTNum, #2
+        mov     subFFTSize, grpCount
+
+
+        @// pT0+1 increments pT0 by 8 bytes
+        @// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
+        mov     pointStep, subFFTNum, lsl #1
+
+
+        @// pOut0+1 increments pOut0 by 8 bytes
+        @// pOut0+outPointStep == increment of 8*outPointStep bytes = 2*size
+        @// bytes
+
+        @// Use setCount as dummy.  It's set correctly below.
+        smull   outPointStep, setCount, grpCount, pointStep
+
+        LSL     pointStep,pointStep,#2                      @// 2*grpSize
+
+
+        MOV     setCount,pointStep,LSR #3
+
+        @// Interchange grpLoop and setLoop
+
+setLoop\name:
+
+        MOV     step,#0
+        @// Set pSrc and pDst for the grpLoop
+
+        SUB      diff,outPointStep,pointStep
+
+        @// Save setCount on stack to reuse the reg
+
+        ADD      pSrc,pSrc,diff,LSL #2  @// pSrc += (grpCount-1)*grpStep
+        ADD      pDst,pDst,diff         @// pDst += (grpCount-1)*setCount
+        ADD      step,step,diff         @// step += (grpCount-1)*setCount
+
+
+
+        @// Loop on the grps
+
+grpLoop\name:
+
+
+
+        @// butterfly loop
+        add         pSrc, pointStep
+        vldm.f32    pSrc, {x3r, x3i}                    @// data[1]
+        add         pTwiddle, step
+        vldm.f32    pTwiddle, {x1r, x1i}                @// coef[1]
+        add         pTwiddle, step
+        vldm.f32    pTwiddle, {x2r, x2i}                @// coef[2]
+        add         pSrc, pointStep
+        vldm.f32    pSrc, {x0r, x0i}                    @// data[2]
+
+        @// do first complex multiply
+        vmul.f32 t0r, x3r, x1r
+        vmul.f32 t0i, x3i, x1r
+
+        .ifeqs  "\inverse", "TRUE"
+            vmla.f32 t0r, x3i, x1i
+            vmls.f32 t0i, x3r, x1i
+            vmov.f32 x1r, t0r
+            vmov.f32 x1i, t0i
+        .else
+            vmls.f32 t0r, x3i, x1i
+            vmla.f32 t0i, x3r, x1i
+            vmov.f32 x1r, t0r
+            vmov.f32 x1i, t0i
+        .endif
+
+        add     pTwiddle, pTwiddle, step
+        vldm    pTwiddle, {x3r, x3i}                    @// coef[3]
+        sub     pTwiddle, pTwiddle, step
+
+        @// do second complex multiply
+        vmul.f32 t0r, x0r, x2r
+        vmul.f32 t0i, x0i, x2r
+
+        .ifeqs  "\inverse", "TRUE"
+            vmla.f32 t0r, x0i, x2i
+            vmls.f32 t0i, x0r, x2i
+            vmov.f32 x2r, t0r
+            vmov.f32 x2i, t0i
+        .else
+            vmls.f32 t0r, x0i, x2i
+            vmla.f32 t0i, x0r, x2i
+            vmov.f32 x2r, t0r
+            vmov.f32 x2i, t0i
+        .endif
+
+        add     pSrc, pointStep
+        vldm    pSrc, {x0r, x0i}                @// data[3]
+        sub     pSrc, pointStep
+
+        SUB     pTwiddle,pTwiddle,step,LSL #1   @// reset pTwiddle
+        SUBS    step,step,pointStep             @// decrement loop counter
+
+        @// do third complex multiply
+        SUB     pSrc,pSrc,pointStep,LSL #1      @// reset pSrc to data[0]
+        vmul.f32 t0r, x0r, x3r
+        vmul.f32 t0i, x0i, x3r
+
+        .ifeqs  "\inverse", "TRUE"
+            vmla.f32 t0r, x0i, x3i
+            vmls.f32 t0i, x0r, x3i
+            vmov.f32 x3r, t0r
+            vmov.f32 x3i, t0i
+        .else
+            vmls.f32 t0r, x0i, x3i
+            vmla.f32 t0i, x0r, x3i
+            vmov.f32 x3r, t0r
+            vmov.f32 x3i, t0i
+        .endif
+
+        vldm    pSrc, {x0r, x0i}                @// data[0]
+
+        @// finish first stage of 4 point FFT
+        vadd.f32     x0r,x0r,x2r                @// x0 = x0 + x2 (u0)
+        vadd.f32     x0i,x0i,x2i
+
+        vadd.f32     sr, x2r, x2r
+        vadd.f32     si, x2i, x2i
+        vsub.f32     x2r,x0r,sr                 @// x2 = x0 - x2 (u1)
+        vsub.f32     x2i,x0i,si
+
+        vadd.f32     x1r,x1r,x3r                @// x1 = x1/2 + x3/2 (u2/2)
+        vadd.f32     x1i,x1i,x3i
+
+        vadd.f32     sr, x3r, x3r
+        vadd.f32     si, x3i, x3i
+        vsub.f32     x3r,x1r,sr                 @// x3 = x1/2 - x3/2 (u3/2)
+        vsub.f32     x3i,x1i,si
+
+
+        @// finish second stage of 4 point FFT
+
+        @// y0 = u1-u2 since twiddle's are stored as -ve values
+        vsub.f32     x2r,x2r,x1r
+        vsub.f32     x2i,x2i,x1i
+
+        vadd.f32     sr, x1r, x1r
+        vadd.f32     si, x1i, x1i
+        vadd.f32     x1r,x2r,sr                 @// y2 = u1+u2
+        vadd.f32     x1i,x2i,si
+        vstm    pDst, {x2r, x2i}                @// store y0
+
+        vsub.f32     x0r,x0r,x3i                @// y3 = u0+ju3
+        vadd.f32     x0i,x0i,x3r
+
+        vadd.f32     sr, x3r, x3r
+        vadd.f32     si, x3i, x3i
+        vadd.f32     t2r,x0r,si                 @// y1 = u0-ju3
+        vsub.f32     t2i,x0i,sr                 @// t2 will be same as x2r reg
+
+        .ifeqs  "\inverse", "TRUE"
+            add     pDst, outPointStep
+            vstm    pDst, {t2r, t2i}            @// store y1
+            add     pDst, outPointStep
+            vstm    pDst, {x1r, x1i}            @// store y2
+            add     pDst, outPointStep
+            vstm    pDst, {x0r, x0i}            @// store y3
+            sub     pDst, outPointStep
+        .else
+            add     pDst, outPointStep
+            vstm    pDst, {x0r, x0i}            @// store y1
+            add     pDst, outPointStep
+            vstm    pDst, {x1r, x1i}            @// store y2
+            add     pDst, outPointStep
+            vstm    pDst, {t2r, t2i}            @// store y3
+            sub     pDst, outPointStep
+        .endif
+
+        SUB     pDst,pDst,outPointStep, LSL #1  @// reset pDst
+        @// update the pDst for the next grp
+        SUBGE   pDst,pDst,pointStep
+        @// update the pSrc for the next grp
+        SUBGE   pSrc,pSrc,pointStep,LSL #2
+
+
+        BGE     grpLoop\name
+
+        ADD     pSrc,pSrc,#8                    @// pSrc += 1; for the next set
+        ADD     pDst,pDst,#8                    @// pDst += 1; for the next set
+
+        SUBS    setCount,setCount,#1            @// decrement loop counter
+
+
+        BGT     setLoop\name
+
+        @// Reset and Swap pSrc and pDst for the next stage
+        MOV     t1,pDst
+        SUB     pDst,pSrc,subFFTNum,LSL #3
+        SUB     pSrc,t1,subFFTNum,LSL #3
+
+        .endm
+
+
+        M_START armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp,r4
+        FFTSTAGE "FALSE","FALSE",FWD
+        M_END
+
+        M_START armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp,r4
+        FFTSTAGE "FALSE","TRUE",INV
+        M_END
+
+
+@//    ENDIF                                                           @//ARM1136JS
+
+
+
+@// Guarding implementation by the processor name
+
+    .end
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/armSP_FFT_CToC_FC32_Radix8_fs_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/armSP_FFT_CToC_FC32_Radix8_fs_unsafe_s.S
new file mode 100644
index 00000000000..4ac2da47ac3
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/armSP_FFT_CToC_FC32_Radix8_fs_unsafe_s.S
@@ -0,0 +1,386 @@
+@//
+@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+@//
+@//  Use of this source code is governed by a BSD-style license
+@//  that can be found in the LICENSE file in the root of the source
+@//  tree. An additional intellectual property rights grant can be found
+@//  in the file PATENTS.  All contributing project authors may
+@//  be found in the AUTHORS file in the root of the source tree.
+@//
+@//  This is a modification of armSP_FFT_CToC_SC32_Radix8_fs_unsafe_s.S
+@//  to support float instead of SC32.
+@//
+
+@//
+@// Description:
+@// Compute a first stage Radix 8 FFT stage for a N point complex signal
+@//
+@//
+
+
+@// Include standard headers
+
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
+
+@//        M_VARIANTS ARM1136JS
+
+@// Import symbols required from other files
+@// (For example tables)
+
+
+@// Set debugging level
+@//DEBUG_ON    SETL {TRUE}
+
+
+
+@// Guarding implementation by the processor name
+
+@//    IF  ARM1136JS
+
+@//Input Registers
+
+#define pSrc            r0
+#define pDst            r2
+#define pTwiddle        r1
+#define subFFTNum       r6
+#define subFFTSize      r7
+#define pPingPongBuf    r5
+
+
+@//Output Registers
+
+
+@//Local Scratch Registers
+
+#define grpSize         r14
+#define step1           r3
+#define step2           r8
+#define setCount        r14             /*@// Reuse grpSize as setCount*/
+#define pointStep       r12
+
+#define t0              r4
+@// Real and Imaginary parts
+
+#define x0r             s0
+#define x0i             s1
+#define x1r             s2
+#define x1i             s3
+#define x2r             s4
+#define x2i             s5
+#define x3r             s6
+#define x3i             s7
+#define t3r             s8              /*@// Temporarily hold x3r and x3i*/
+#define t3i             s9
+#define t1r             s4
+#define t1i             s5
+#define sr              s10
+#define si              s11
+#define roothalf        s12
+
+@// Define macros to load/store two float regs from/to the stack.
+        .macro M_VSTM r0, r1, p
+        .set    _Offset, _Workspace + \p\()_F
+        add     t0, sp, #_Offset
+        vstm.f32 t0, {\r0, \r1}
+        .endm
+
+        .macro M_VLDM r0, r1, p
+        .set    _Offset, _Workspace + \p\()_F
+        add     t0, sp, #_Offset
+        vldm.f32 t0, {\r0, \r1}
+        .endm
+
+@// Define constants
+
+        .MACRO FFTSTAGE scaled, inverse , name
+
+        @// Define stack arguments
+
+
+        @// Update grpCount and grpSize rightaway inorder to reuse
+        @// pSubFFTSize and pSubFFTNum regs
+
+        mov     subFFTSize, #8
+        lsr     grpSize, subFFTNum, #3
+        mov     subFFTNum, grpSize
+
+
+        @// pT0+1 increments pT0 by 8 bytes
+        @// pT0+pointStep = increment of 8*pointStep bytes = grpSize bytes
+        @// Note: setCount = grpSize/8 (reuse the updated grpSize for
+        @// setCount)
+        MOV     pointStep,grpSize,LSL #3
+
+
+        @// Calculate the step of input data for the next set
+        MOV     step1,grpSize,LSL #4
+        MOV     step2,pointStep,LSL #3
+        SUB     step2,step2,pointStep           @// step2 = 7*pointStep
+
+
+        @// grp = 0 a special case since all the twiddle factors are 1
+        @// Loop on the sets
+
+        movw    t0,#0x04f3
+        movt    t0,#0x3f35
+        vmov.f32 roothalf, t0                   @// roothalf = sqrt(1/2)
+
+grpZeroSetLoop\name:
+
+        vldm.f32 pSrc, {x0r, x0i}               @// x0
+        add      pSrc, step1
+        vldm.f32 pSrc, {x1r, x1i}               @// x2
+        add      pSrc, step1
+        vldm.f32 pSrc, {x2r, x2i}               @// x4
+        add      pSrc, step1
+        vldm.f32 pSrc, {x3r, x3i}               @// x6
+        add      pSrc, step1
+
+        SUB     pSrc, pSrc, step2
+
+        @// finish first stage of 8 point FFT and save on stack
+
+        vadd.f32     x0r,x0r,x2r                @// u0
+        vadd.f32     x0i,x0i,x2i
+
+        vadd.f32     sr, x2r, x2r
+        vadd.f32     si, x2i, x2i
+        vsub.f32     x2r,x0r,sr                 @// u1
+        vsub.f32     x2i,x0i,si
+
+        M_VSTM   x0r,x0i, pU0
+        M_VSTM   x2r,x2i, pU1
+
+        vadd.f32     x1r,x1r,x3r                @// u4
+        vadd.f32     x1i,x1i,x3i
+
+        vadd.f32     sr, x3r, x3r
+        vadd.f32     si, x3i, x3i
+        vsub.f32     x3r,x1r,sr                 @// u5
+        vsub.f32     x3i,x1i,si
+
+        M_VSTM   x1r,x1i, pU4
+        M_VSTM   x3r,x3i, pU5
+
+
+        vldm    pSrc, {x0r, x0i}                @// x1
+        add     pSrc, step1
+        vldm    pSrc, {x1r, x1i}                @// x3
+        add     pSrc, step1
+        vldm    pSrc, {x2r, x2i}                @// x5
+        add     pSrc, step1
+        vldm    pSrc, {x3r, x3i}                @// x7
+        add     pSrc, #8
+
+        SUB     pSrc, pSrc, step2
+
+        vadd.f32     x0r,x0r,x2r                @// u2
+        vadd.f32     x0i,x0i,x2i
+
+        vadd.f32         sr, x2r, x2r
+        vadd.f32         si, x2i, x2i
+        vsub.f32     x2r,x0r,sr                 @// u3
+        vsub.f32     x2i,x0i,si
+
+        M_VSTM   x2r,x2i, pU3
+
+        vadd.f32     x1r,x1r,x3r                @// u6
+        vadd.f32     x1i,x1i,x3i
+
+        vadd.f32         sr, x3r, x3r
+        vadd.f32         si, x3i, x3i
+        vsub.f32     x3r,x1r,sr                 @// u7
+        vsub.f32     x3i,x1i,si
+
+        @// finish second and third stage of 8 point FFT
+
+        M_VSTM  x3r,x3i, pU7
+        M_VLDM  x2r,x2i, pU0
+
+        @// Decrement setcount
+        SUBS    setCount,setCount,#1
+        M_VLDM  x3r,x3i, pU4
+
+        vadd.f32     x0r,x0r,x1r                @// v4
+        vadd.f32     x0i,x0i,x1i
+
+        vadd.f32     sr, x1r, x1r
+        vadd.f32     si, x1i, x1i
+        vsub.f32     x1r,x0r,sr                 @// v6
+        vsub.f32     x1i,x0i,si
+
+        vadd.f32     x2r,x2r,x3r                @// v0
+        vadd.f32     x2i,x2i,x3i
+
+        vadd.f32     sr, x3r, x3r
+        vadd.f32     si, x3i, x3i
+        vsub.f32     x3r,x2r,sr                 @// v2
+        vsub.f32     x3i,x2i,si
+
+
+
+        vadd.f32     x2r,x2r,x0r                @// y0
+        vadd.f32     x2i,x2i,x0i
+
+        vadd.f32     sr, x0r, x0r
+        vadd.f32     si, x0i, x0i
+        vsub.f32     x0r,x2r,sr                 @// y4
+        vsub.f32     x0i,x2i,si
+
+        vstm    pDst, {x2r, x2i}                @// store y0
+        add     pDst, step1
+
+        vadd.f32     x3r,x3r,x1i                @// y6
+        vsub.f32     x3i,x3i,x1r
+
+        vadd.f32     sr, x1r, x1r
+        vadd.f32     si, x1i, x1i
+        vsub.f32     t1r,x3r,si                 @// t1r=x2r reg;t1i=x2i reg
+        vadd.f32     t1i,x3i,sr                 @// y2
+
+        .ifeqs  "\inverse", "TRUE"
+            vstm        pDst, {t1r, t1i}        @// store y2
+            add pDst, step1
+            vstm        pDst, {x0r, x0i}        @// store y4
+            add pDst, step1
+            vstm        pDst, {x3r, x3i}        @// store y6
+            add pDst, step1
+        .else
+            vstm        pDst, {x3r, x3i}        @// store y2
+            add pDst, step1
+            vstm        pDst, {x0r, x0i}        @// store y4
+            add pDst, step1
+            vstm        pDst, {t1r, t1i}        @// store y6
+            add pDst, step1
+        .endif
+
+        SUB     pDst, pDst, step2               @// set pDst to y1
+
+
+        M_VLDM  x0r,x0i,pU1                     @// Load u1,u3,u5,u7
+        M_VLDM  x1r,x1i,pU5
+        M_VLDM  x3r,x3i,pU7
+
+        vsub.f32     x0r,x0r,x1i                @// v1
+        vadd.f32     x0i,x0i,x1r
+        vadd.f32     sr, x1r, x1r
+        vadd.f32     si, x1i, x1i
+        vadd.f32     t1r,x0r,si                 @// t1r=x2r reg;t1i=x2i reg
+        vsub.f32     t1i,x0i,sr                 @// v3
+
+        M_VLDM  x1r,x1i,pU3
+
+        vsub.f32     x1r,x1r,x3i                @// v5
+        vadd.f32     x1i,x1i,x3r
+
+        vadd.f32     sr, x3r, x3r
+        vadd.f32     si, x3i, x3i
+        vadd.f32     t3r,x1r,si                 @// t3i = x3i
+        vsub.f32     t3i,x1i,sr                 @// v7
+
+        @// store v5  as (v5.r - v5.i,v5.r + v5.i)
+        @// store v7  as (v7.i + v7.r,v7.i - v7.r)
+
+        vadd.f32     x3r,t3i,t3r                @// v7
+        vsub.f32     x3i,t3i,t3r
+
+        vsub.f32     x1r,x1r,x1i                @// v5
+        vadd.f32     x1i, x1i
+        vadd.f32     x1i,x1r,x1i
+
+        vmul.f32  x3r, x3r, roothalf            @// (v7.i + v7.r)*(1/sqrt(2))
+        vmul.f32  x3i, x3i, roothalf            @// (v7.i - v7.r)*(1/sqrt(2))
+        vmul.f32  x1r, x1r, roothalf            @// (v5.r - v5.i)*(1/sqrt(2))
+        vmul.f32  x1i, x1i, roothalf            @// (v5.r + v5.i)*(1/sqrt(2))
+
+        vadd.f32     x2r,x2r,x3r                @// y7
+        vadd.f32     x2i,x2i,x3i
+
+        vadd.f32     sr, x3r, x3r
+        vadd.f32     si, x3i, x3i
+        vsub.f32     x3r,x2r,sr                 @// y3
+        vsub.f32     x3i,x2i,si
+
+
+        vsub.f32     x0r,x0r,x1r                @// y5
+        vsub.f32     x0i,x0i,x1i
+
+        vadd.f32     sr, x1r, x1r
+        vadd.f32     si, x1i, x1i
+        vadd.f32     x1r,x0r,sr                 @// y1
+        vadd.f32     x1i,x0i,si
+
+        .ifeqs  "\inverse", "TRUE"
+            vstm    pDst, {x1r, x1i}            @// store y1
+            add pDst, step1
+            vstm    pDst, {x3r, x3i}            @// store y3
+            add pDst, step1
+            vstm    pDst, {x0r, x0i}            @// store y5
+            add pDst, step1
+            vstm    pDst, {x2r, x2i}            @// store y7
+            add pDst, #8
+        .else
+            vstm    pDst, {x2r, x2i}            @// store y1
+            add pDst, step1
+            vstm    pDst, {x0r, x0i}            @// store y3
+            add pDst, step1
+            vstm    pDst, {x3r, x3i}            @// store y5
+            add pDst, step1
+            vstm    pDst, {x1r, x1i}            @// store y7
+            add pDst, #8
+        .endif
+
+        SUB     pDst, pDst, step2               @// update pDst for the next set
+
+
+        BGT     grpZeroSetLoop\name
+
+
+        @// reset pSrc to pDst for the next stage
+        SUB     pSrc,pDst,pointStep             @// pDst -= 2*grpSize
+        mov     pDst, pPingPongBuf
+
+
+        .ENDM
+
+
+
+
+
+        @// Allocate stack memory required by the function
+
+        @// Ensure 8 byte alignment to use M_VLDM
+        M_ALLOC8    pU0, 8
+        M_ALLOC8    pU1, 8
+        M_ALLOC8    pU3, 8
+        M_ALLOC8    pU4, 8
+        M_ALLOC8    pU5, 8
+        M_ALLOC8    pU7, 8
+
+        M_START armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp,r4
+            FFTSTAGE "FALSE","FALSE",FWD
+        M_END
+
+        @// Allocate stack memory required by the function
+
+        @// Ensure 8 byte alignment to use M_VLDM
+        M_ALLOC8    pU0, 8
+        M_ALLOC8    pU1, 8
+        M_ALLOC8    pU3, 8
+        M_ALLOC8    pU4, 8
+        M_ALLOC8    pU5, 8
+        M_ALLOC8    pU7, 8
+
+        M_START armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp,r4
+            FFTSTAGE "FALSE","TRUE",INV
+        M_END
+
+@//    ENDIF        @//ARM1136JS
+
+
+
+@// Guarding implementation by the processor name
+
+
+    .END
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/omxSP_FFTFwd_CToC_FC32_Sfs_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/omxSP_FFTFwd_CToC_FC32_Sfs_s.S
new file mode 100644
index 00000000000..25b4976ca80
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/omxSP_FFTFwd_CToC_FC32_Sfs_s.S
@@ -0,0 +1,161 @@
+@//
+@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+@//
+@//  Use of this source code is governed by a BSD-style license
+@//  that can be found in the LICENSE file in the root of the source
+@//  tree. An additional intellectual property rights grant can be found
+@//  in the file PATENTS.  All contributing project authors may
+@//  be found in the AUTHORS file in the root of the source tree.
+@//
+@//  This is a modification of omxSP_FFTFwd_CToC_SC32_Sfs_s.S
+@//  to support float instead of SC32.
+@//
+
+@//
+@// Description:
+@// Compute an inverse FFT for a complex signal
+@// 
+@// 
+
+        
+@// Include standard headers
+
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
+        
+@//        M_VARIANTS ARM1136JS
+        
+@// Import symbols required from other files
+@// (For example tables)
+        
+        .extern  armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp
+        .extern  armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp
+        .extern  armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp
+        .extern  armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp
+        
+@// Set debugging level        
+@//DEBUG_ON    SETL {TRUE}
+
+
+
+@// Guarding implementation by the processor name
+    
+@/    IF  ARM1136JS 
+    
+@//Input Registers
+
+#define pSrc		r0
+#define pDst		r1
+#define pFFTSpec	r2
+
+
+@// Output registers
+#define result		r0
+
+@//Local Scratch Registers
+
+#define argTwiddle	r1
+#define argDst		r2
+#define argScale	r4
+#define pTwiddle	r4
+#define pOut		r5
+#define subFFTSize	r7     
+#define subFFTNum	r6
+#define N		r6
+#define order		r14
+#define diff		r9
+#define count		r8
+#define diffMinusOne	r2
+#define round		r3
+
+#define x0r s0    
+#define x0i s1
+
+
+
+
+    @// Allocate stack memory required by the function
+
+    @// Write function header
+        M_START     omxSP_FFTFwd_CToC_FC32_Sfs_vfp,r11
+        
+@ Structure offsets for FFTSpec	
+	.set	ARMsFFTSpec_N, 0
+	.set	ARMsFFTSpec_pBitRev, 4
+	.set	ARMsFFTSpec_pTwiddle, 8
+	.set	ARMsFFTSpec_pBuf, 12
+        
+        @// Define stack arguments
+        
+        @// Read the size from structure and take log
+        LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
+        
+        @// Read other structure parameters
+        LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
+        LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
+                
+        CLZ     order,N			@// N = 2^order 
+        RSB     order,order,#31     
+        MOV     subFFTSize,#1
+        @//MOV     subFFTNum,N
+        
+        
+        CMP     order,#1
+        BGT     orderGreaterthan1	@// order > 1
+	@// order = 0, 1
+	vldmlt.f32 pSrc, {x0r, x0i}
+	vstmlt.f32 pDst, {x0r, x0i}
+	
+        MOVLT   pSrc,pDst
+        BLT     End
+
+	@// Handle order = 1
+        MOV     argDst,pDst             @// Set input args to fft stages
+        MOV     argTwiddle,pTwiddle
+        BL    armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp
+        B     End  
+        
+
+
+orderGreaterthan1:       
+        
+        TST     order, #2               @// Set input args to fft stages
+        MOVNE   argDst,pDst        
+        MOVEQ   argDst,pOut
+        MOVEQ   pOut,pDst               @// Pass the first stage destination in RN5
+        MOV     argTwiddle,pTwiddle  
+
+        @//check for even or odd order
+	
+        @// NOTE: The following combination of BL's would work fine
+	@// eventhough the first BL would corrupt the flags. This is
+	@// because the end of the "grpZeroSetLoop" loop inside
+        @// armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp
+	@// sets the Z flag to EQ
+        
+        TST     order,#0x00000001
+        BLEQ    armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp 
+        BLNE    armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp 
+
+unscaledRadix4Loop:
+        CMP        subFFTNum,#1
+         BEQ        End
+         BL        armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp
+         B        unscaledRadix4Loop
+
+       
+End:                        
+        @// Set return value
+        MOV     result, #OMX_Sts_NoErr       
+
+        @// Write function tail
+        M_END
+        
+@//    ENDIF                                           @//ARM1136JS    
+    
+    
+    @// Guarding implementation by the processor name
+    
+    
+    
+    .end
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/omxSP_FFTFwd_RToCCS_F32_Sfs_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/omxSP_FFTFwd_RToCCS_F32_Sfs_s.S
new file mode 100644
index 00000000000..dd1690ad10b
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/omxSP_FFTFwd_RToCCS_F32_Sfs_s.S
@@ -0,0 +1,328 @@
+@//
+@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+@//
+@//  Use of this source code is governed by a BSD-style license
+@//  that can be found in the LICENSE file in the root of the source
+@//  tree. An additional intellectual property rights grant can be found
+@//  in the file PATENTS.  All contributing project authors may
+@//  be found in the AUTHORS file in the root of the source tree.
+@//
+@//  This is a modification of omxSP_FFTFwd_RToCCS_S32_Sfs_s.s
+@//  to support float instead of SC32.
+@//
+
+@//
+@// Description:
+@// Compute FFT for a real signal
+@//
+@//
+
+
+@// Include standard headers
+
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
+
+@//        M_VARIANTS ARM1136JS
+
+@// Import symbols required from other files
+@// (For example tables)
+
+        .extern  armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp
+        .extern  armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp
+        .extern  armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp
+        .extern  armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp
+
+@// Set debugging level
+@//DEBUG_ON    SETL {TRUE}
+
+
+
+@// Guarding implementation by the processor name
+
+@//    IF  ARM1136JS
+
+@//Input Registers
+
+#define pSrc            r0
+#define pDst            r1
+#define pFFTSpec        r2
+
+
+@// Output registers
+#define result          r0
+
+@//Local Scratch Registers
+
+@// N=1 case
+#define scaleMinusOne   r2
+#define rnd             r2
+#define zero            r8
+#define Zero            r9
+
+
+#define argTwiddle      r1
+#define argDst          r2
+#define argScale        r4
+#define pTwiddle        r4
+#define pOut            r5
+#define subFFTSize      r7
+#define subFFTNum       r6
+#define N               r6
+#define order           r14
+#define diff            r9
+#define count           r8
+#define diffMinusOne    r10
+#define round           r3
+
+#define step            r3
+#define step1           r6
+#define twStep          r12
+#define pTwiddleTmp     r14
+#define t0              r12
+#define t1              r14              /*@// pTwiddleTmp*/
+#define t2              r0
+#define t3              r1               /*@// pSrc,argTwiddle*/
+#define t4              r6
+#define t5              r7               /*@// step1,subFFTSize*/
+
+#define x0r     s0
+#define x0i     s1
+#define y0r     s2
+#define y0i     s3
+#define x1r     s4
+#define x1i     s5
+#define w1r     s2
+#define w1i     s3
+#define w0r     s6
+#define w0i     s7
+#define y1r     s2              /*@// w1r,w1i*/
+#define y1i     s3
+#define st0     s8
+#define st1     s9
+#define st2     s10
+#define st3     s11
+#define st4     s12
+#define st5     s13
+#define half    s15
+
+
+
+
+    @// Allocate stack memory required by the function
+
+
+
+    @// Write function header
+        M_START     omxSP_FFTFwd_RToCCS_F32_Sfs_vfp,r11
+
+@ Structure offsets for FFTSpec
+        .set    ARMsFFTSpec_N, 0
+        .set    ARMsFFTSpec_pBitRev, 4
+        .set    ARMsFFTSpec_pTwiddle, 8
+        .set    ARMsFFTSpec_pBuf, 12
+
+        @// Define stack arguments
+
+        @// Setup half value
+        movw    N, #0                   @// Use N as a temp.
+        movt    N, #0x3f00
+        vmov.f32 half, N
+
+        @// Read the size from structure and take log
+        LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
+
+        @// Read other structure parameters
+        LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
+        LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
+
+        @//  N=1 Treat seperately
+        CMP     N,#1
+        BGT     sizeGreaterThanOne
+        // N<=1 is not supported
+        @// Set return value
+        MOV     result, #OMX_Sts_NoErr
+        B       FunctionEnd
+
+sizeGreaterThanOne:
+        @// Do a N/2 point complex FFT including the scaling
+
+        MOV     N,N,ASR #1              @// N/2 point complex FFT
+        CLZ     order,N                 @// N = 2^order
+        RSB     order,order,#31
+        MOV     subFFTSize,#1
+        @//MOV     subFFTNum,N
+
+
+        CMP     order,#1
+        BGT     orderGreaterthan1       @// order > 1
+        vldmlt.f32 pSrc, {x0r, x0i}
+        vstmlt.f32 pOut, {x0r, x0i}
+        MOVLT   pSrc,pOut
+        MOVLT   argDst,pDst
+        BLT     FFTEnd
+
+        MOV     argDst,pOut             @// Set input args to fft stages
+        MOV     pOut,pDst               @// Set input args to fft stages
+        MOV     argTwiddle,pTwiddle
+
+        BL    armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp
+        B     finalComplexToRealFixup
+
+orderGreaterthan1:
+
+        TST     order, #2               @// Set input args to fft stages
+        MOVEQ   argDst,pDst
+        MOVNE   argDst,pOut
+        MOVNE   pOut,pDst               @// Pass the first stage dest in RN5
+        MOV     argTwiddle,pTwiddle
+
+        @//check for even or odd order
+
+        @// NOTE: The following combination of BL's would work fine
+        @// eventhough the first BL would corrupt the flags. This is
+        @// because the end of the "grpZeroSetLoop" loop inside
+        @// armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp sets
+        @// the Z flag to EQ
+
+        TST     order,#0x00000001
+        BLEQ    armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp
+        BLNE    armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp
+
+unscaledRadix4Loop:
+        CMP        subFFTNum,#1
+         BEQ        FFTEnd
+         BL        armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp
+         B        unscaledRadix4Loop
+
+FFTEnd:
+finalComplexToRealFixup:
+
+        @// step = N/2 * 8 bytes
+        MOV     step,subFFTSize,LSL #3
+        @// twStep = 3N/8 * 8 bytes pointing to W^1
+        SUB     twStep,step,subFFTSize,LSL #1
+        @// step1 = N/4 * 8 = N/2*4 bytes
+        MOV     step1,subFFTSize,LSL #2
+        @// (N/4-1)*8 bytes
+        SUB     step1,step1,#8
+
+        @// F(0) = 1/2 [Z(0) + Z'(0)] - j [Z(0) - Z'(0)]
+        @// 1/2 [(a+jb) + (a-jb)] - j  [(a+jb) - (a-jb)]
+        @// 1/2 [2a+j0] - j [0+j2b]
+        @// (a+b, 0)
+
+        @// F(N/2) =1/2 [Z(0) + Z'(0)] + j [Z(0) - Z'(0)]
+        @// 1/2 [(a+jb) + (a-jb)] + j  [(a+jb) - (a-jb)]
+        @// 1/2 [2a+j0] + j [0+j2b]
+        @// (a-b, 0)
+
+        @// F(0) and F(N/2)
+        vldm.f32 pSrc!, {x0r, x0i}
+        vadd.f32 y0r,x0r,x0i            @// F(0) = (2(Z0.r+Z0.i) , 0)
+        vsub.f32 x0r,x0r,x0i            @// F(N/2) = (2(Z0.r-Z0.i) , 0)
+        vsub.f32 y0i, y0i               @ y0i and x0i set to 0.0
+        vsub.f32 x0i, x0i
+
+        add      argDst, step
+        vstm.f32 argDst, {x0r, x0i}     @// {x0r,x0i}->[argDst, step]
+        sub      argDst, step
+        vstm.f32 argDst!, {y0r, y0i}
+
+        SUBS    subFFTSize,subFFTSize,#2
+
+        ADD     pTwiddleTmp,argTwiddle,#8       @// W^2
+        ADD     argTwiddle,argTwiddle,twStep    @// W^1
+        BLT     End
+        BEQ     lastElement
+
+
+        @// F(k) = 1/2 [Z(k) +  Z'(N/2-k)] -j*W^(k) [Z(k) -  Z'(N/2-k)]
+        @// Process 2 elements at a time. E.g: F(1) and F(N/2-1) since
+        @// both of them require Z(1) and Z(N/2-1)
+
+        ASR     subFFTSize,subFFTSize,#1
+evenOddButterflyLoop:
+
+        SUB     step,step,#16           @// (N/2-2)*8 bytes
+
+        add      pSrc, step
+        vldm.f32 pSrc, {x1r, x1i}       @// {x1r, x1i} = [pSrc, step]
+        sub      pSrc, step
+        vldm.f32 pSrc!, {x0r, x0i}
+        add      argTwiddle, step1
+        vldm.f32 argTwiddle, {w1r, w1i}  @// {w1r, w1i} = [argTwiddle, step1]
+        sub      argTwiddle, step1
+        vldm.f32 argTwiddle!, {w0r, w0i} @// {w0r, w0i} = [argTwiddle], #8
+
+        SUB     step1,step1,#8
+        SUBS    subFFTSize,subFFTSize,#1
+
+        vsub.f32 st2,x0r,x1r            @// a-c
+        vadd.f32 st3,x0i,x1i            @// b+d
+        vadd.f32 st0,x0r,x1r            @// a+c
+        vsub.f32 st1,x0i,x1i            @// b-d
+
+        vmul.f32 x1r,w1r,st2
+        vmul.f32 x1i,w1r,st3
+        vmla.f32 x1r,w1i,st3            @// x1r = w1r*st2 + w1i*st3
+        @//RSB     x1r,x1r,#0
+        vmls.f32 x1i,w1i,st2            @// x1i = w1r*st3 - wli*st2
+
+        vsub.f32 y1r, st0, x1i
+        vadd.f32 y1i, x1r, st1
+        vneg.f32 y1i, y1i
+
+        vmul.f32  x0r,w0r,st2
+        vmul.f32  x0i,w0r,st3
+        vmls.f32  x0r,w0i,st3           @// x0r = w0r*st2 - w0i*st3
+        vmla.f32  x0i,w0i,st2           @// x0i = w0r*st3 + x0i*st1
+
+        vsub.f32   st4,st0,x0i          @// F(1)
+        vadd.f32   st5,x0r,st1
+
+
+        vmul.f32 y1r, half
+        vmul.f32 y1i, half
+        vmul.f32 st4, half
+        vmul.f32 st5, half
+
+        add      argDst, step
+        vstm.f32 argDst, {y1r, y1i}     @// {y1r,y1i} -> [argDst,step]
+        sub      argDst, step
+        vstm.f32 argDst!, {st4, st5}
+
+
+        MOV     t0,argTwiddle           @// swap ptr for even and odd twiddles
+        MOV     argTwiddle,pTwiddleTmp
+        MOV     pTwiddleTmp,t0
+
+        BGT     evenOddButterflyLoop
+
+        @// Last element can be expanded as follows
+        @// 1/2[Z(k) + Z'(k)] + j w^k [Z(k) - Z'(k)]
+        @// 1/2[(a+jb) + (a-jb)] + j w^k [(a+jb) - (a-jb)]
+        @// 1/2[2a+j0] + j (c+jd) [0+j2b]
+        @// (a-bc, -bd)
+
+lastElement:
+        vldm.f32 pSrc, {x0r, x0i}
+        vneg.f32 x0i, x0i
+        vstm.f32 argDst, {x0r, x0i}
+
+End:
+        @// Set return value
+        MOV     result, #OMX_Sts_NoErr
+
+FunctionEnd:
+        @// Write function tail
+        M_END
+
+@//    ENDIF                                           @//ARM1136JS
+
+
+    @// Guarding implementation by the processor name
+
+
+
+    .end
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/omxSP_FFTInv_CCSToR_F32_Sfs_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/omxSP_FFTInv_CCSToR_F32_Sfs_s.S
new file mode 100644
index 00000000000..d6a47652738
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/omxSP_FFTInv_CCSToR_F32_Sfs_s.S
@@ -0,0 +1,227 @@
+@//
+@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+@//
+@//  Use of this source code is governed by a BSD-style license
+@//  that can be found in the LICENSE file in the root of the source
+@//  tree. An additional intellectual property rights grant can be found
+@//  in the file PATENTS.  All contributing project authors may
+@//  be found in the AUTHORS file in the root of the source tree.
+@//
+@//  This is a modification of omxSP_FFTInv_CCSToR_S32_Sfs_s.s
+@//  to support float instead of SC32.
+@//
+
+@//
+@// Description:
+@// Compute an inverse FFT for a complex signal
+@//
+@//
+
+
+@// Include standard headers
+
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
+
+@//        M_VARIANTS ARM1136JS
+
+@// Import symbols required from other files
+@// (For example tables)
+
+        .extern  armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp
+        .extern  armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp
+        .extern  armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp
+        .extern  armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp
+        .extern  armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe_vfp
+
+@// Set debugging level
+@//DEBUG_ON    SETL {TRUE}
+
+
+
+@// Guarding implementation by the processor name
+
+@//    IF  ARM1136JS
+
+@//Input Registers
+
+#define pSrc            r0
+#define pDst            r1
+#define pFFTSpec        r2
+
+
+@// Output registers
+#define result          r0
+
+@//Local Scratch Registers
+
+
+#define argTwiddle      r1
+#define argDst          r2
+#define argScale        r4
+#define pTwiddle        r4
+#define pOut            r5
+#define subFFTSize      r7
+#define subFFTNum       r6
+#define N               r6
+#define order           r14
+#define diff            r9
+@// Total num of radix stages required to comple the FFT*/
+#define count           r8
+
+#define round           r3
+
+#define x0r     s0
+#define x0i     s1
+#define y0r     s2
+#define y0i     s3
+#define x1r     s4
+#define x1i     s5
+#define w1r     s2
+#define w1i     s3
+#define w0r     s6
+#define w0i     s7
+#define y1r     s2              /*@// w1r,w1i*/
+#define y1i     s3
+#define st0     s8
+#define st1     s9
+#define st2     s10
+#define st3     s11
+#define st4     s12
+#define st5     s13
+#define fscale  s2
+#define fone    s3
+
+
+
+    @// Allocate stack memory required by the function
+        M_ALLOC4        pDstOnStack, 4
+        M_ALLOC4        pFFTSpecOnStack, 4
+
+    @// Write function header
+        M_START     omxSP_FFTInv_CCSToR_F32_Sfs_vfp,r11
+
+@ Structure offsets for FFTSpec
+        .set    ARMsFFTSpec_N, 0
+        .set    ARMsFFTSpec_pBitRev, 4
+        .set    ARMsFFTSpec_pTwiddle, 8
+        .set    ARMsFFTSpec_pBuf, 12
+
+        @// Define stack arguments
+
+        @// Read the size from structure and take log
+        LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
+
+
+
+        @//  N=1 Treat seperately
+        CMP     N,#1
+        BGT     sizeGreaterThanOne
+        vldr.f32 x0r, [pSrc]
+        vstr.f32 x0r, [pDst]
+
+        B       End
+
+sizeGreaterThanOne:
+        M_STR   pDst,pDstOnStack                    @// store all the pointers
+        M_STR   pFFTSpec,pFFTSpecOnStack
+
+
+        @// Call the preTwiddle Radix2 stage before doing the compledIFFT
+
+        BL    armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe_vfp
+
+
+complexIFFT:
+
+        M_LDR   pFFTSpec,pFFTSpecOnStack
+        LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
+        LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
+        LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
+
+        ASR     N,N,#1                  @// N/2 point complex IFFT
+        ADD     pSrc,pOut,N,LSL #3      @// set pSrc as pOut1
+        M_LDR   pDst,pDstOnStack
+
+        CLZ     order,N                 @// N = 2^order
+        RSB     order,order,#31
+        MOV     subFFTSize,#1
+
+        CMP     order,#1
+        BGT     orderGreaterthan1       @// order > 1
+        vldmlt.f32 pSrc, {x0r, x0i}
+        vstmlt.f32 pDst, {x0r, x0i}
+
+        MOVLT   pSrc,pDst
+        BLT     FFTEnd
+
+        MOV     argDst,pDst             @// Set input args to fft stages
+        MOV     argTwiddle,pTwiddle
+
+        BL      armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp
+        B       FFTEnd
+
+
+orderGreaterthan1:
+
+        TST     order, #2               @// Set input args to fft stages
+        MOVNE   argDst,pDst
+        MOVEQ   argDst,pOut
+        MOVEQ   pOut,pDst               @// Pass the first stage destination in RN5
+        MOV     argTwiddle,pTwiddle
+
+
+        @//check for even or odd order
+
+        @// NOTE: The following combination of BL's would work fine
+        @// eventhough the first BL would corrupt the flags. This is
+        @// because the end of the "grpZeroSetLoop" loop inside
+        @// armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp sets
+        @// the Z flag to EQ
+
+        TST     order,#0x00000001
+        BLEQ    armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp
+        BLNE    armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp
+
+unscaledRadix4Loop:
+        CMP        subFFTNum,#1
+         BEQ        FFTEnd
+         BL        armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp
+         B        unscaledRadix4Loop
+
+FFTEnd:
+
+        vldm.f32 pSrc, {x0r, x0i}
+
+        vmov.f32     fscale, subFFTSize
+        vcvt.f32.s32 fscale, fscale             @// fscale = N as a float
+        mov          round, #1
+        vmov.f32     fone, round
+        vcvt.f32.s32 fone, fone
+        vdiv.f32     fscale, fone, fscale       @// fscale = 1/N
+
+scaleFFTData:                                   @// N = subFFTSize
+        SUBS    subFFTSize,subFFTSize,#1
+        vmul.f32 x0r, x0r, fscale
+        vmul.f32 x0i, x0i, fscale
+        vstm.f32 pSrc!, {x0r, x0i}
+        vldmgt.f32 pSrc, {x0r, x0i}
+
+        BGT     scaleFFTData
+
+
+End:
+        @// Set return value
+        MOV     result, #OMX_Sts_NoErr
+
+        @// Write function tail
+        M_END
+
+@//    ENDIF                                           @//ARM1136JS
+
+
+      @// Guarding implementation by the processor name
+
+
+
+    .end
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/omxSP_FFTInv_CToC_FC32_Sfs_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/omxSP_FFTInv_CToC_FC32_Sfs_s.S
new file mode 100644
index 00000000000..64aa5da8c5a
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/armv7/omxSP_FFTInv_CToC_FC32_Sfs_s.S
@@ -0,0 +1,180 @@
+@//
+@//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+@//
+@//  Use of this source code is governed by a BSD-style license
+@//  that can be found in the LICENSE file in the root of the source
+@//  tree. An additional intellectual property rights grant can be found
+@//  in the file PATENTS.  All contributing project authors may
+@//  be found in the AUTHORS file in the root of the source tree.
+@//
+@//  This is a modification of omxSP_FFTInv_CToC_SC32_Sfs_s.s
+@//  to support float instead of SC32.
+@//
+
+@//
+@// Description:
+@// Compute an inverse FFT for a complex signal
+@//
+@//
+
+
+@// Include standard headers
+
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
+
+@//        M_VARIANTS ARM1136JS
+
+@// Import symbols required from other files
+@// (For example tables)
+
+        .extern  armSP_FFTInv_CToC_FC32_Sfs_Radix2_fs_OutOfPlace_unsafe_vfp
+        .extern  armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp
+        .extern  armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp
+        .extern  armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp
+        .extern  armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp
+
+@// Set debugging level
+@//DEBUG_ON    SETL {TRUE}
+
+
+
+@// Guarding implementation by the processor name
+
+@//    IF  ARM1136JS
+
+@//Input Registers
+
+#define pSrc            r0
+#define pDst            r1
+#define pFFTSpec        r2
+
+
+@// Output registers
+#define result          r0
+
+@//Local Scratch Registers
+
+#define argTwiddle      r1
+#define argDst          r2
+#define argScale        r4
+#define pTwiddle        r4
+#define pOut            r5
+#define subFFTSize      r7
+#define subFFTNum       r6
+#define N               r6
+#define order           r14
+#define diff            r9
+#define count           r8
+#define diffMinusOne    r2
+#define round           r3
+
+#define x0r     s0
+#define x0i     s1
+#define fone    s2
+#define fscale  s3
+
+
+    @// Allocate stack memory required by the function
+
+    @// Write function header
+        M_START     omxSP_FFTInv_CToC_FC32_Sfs_vfp,r11
+
+@ Structure offsets for FFTSpec
+        .set    ARMsFFTSpec_N, 0
+        .set    ARMsFFTSpec_pBitRev, 4
+        .set    ARMsFFTSpec_pTwiddle, 8
+        .set    ARMsFFTSpec_pBuf, 12
+
+        @// Define stack arguments
+
+        @// Read the size from structure and take log
+        LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
+
+        @// Read other structure parameters
+        LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
+        LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
+
+        CLZ     order,N                 @// N = 2^order
+        RSB     order,order,#31
+        MOV     subFFTSize,#1
+        @//MOV     subFFTNum,N
+
+        CMP     order,#1
+        BGT     orderGreaterthan1       @// order > 1
+        @// Order = 0 or 1
+        vldmlt.f32 pSrc, {x0r, x0i}
+        vstmlt.f32 pDst, {x0r, x0i}
+
+        MOVLT   pSrc,pDst
+        BLT     FFTEnd
+
+        @// Handle order = 1
+        MOV     argDst,pDst
+        MOV     argTwiddle,pTwiddle
+
+        BL      armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp
+        B       FFTEnd
+
+orderGreaterthan1:
+
+        TST     order, #2               @// Set input args to fft stages
+        MOVNE   argDst,pDst
+        MOVEQ   argDst,pOut
+        MOVEQ   pOut,pDst               @// Pass the first stage dest in RN5
+        MOV     argTwiddle,pTwiddle
+
+
+        @//check for even or odd order
+        @// NOTE: The following combination of BL's would work fine
+        @// eventhough the first BL would corrupt the flags. This is
+        @// because the end of the "grpZeroSetLoop" loop inside
+        @// armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp sets
+        @// the Z flag to EQ
+
+        TST     order,#0x00000001
+        BLEQ    armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp
+        BLNE    armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp
+
+unscaledRadix4Loop:
+        CMP        subFFTNum,#1
+         BEQ        FFTEnd
+         BL        armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp
+         B        unscaledRadix4Loop
+
+
+FFTEnd:
+
+        vldm.f32 pSrc, {x0r, x0i}
+
+        vmov.f32     fscale, subFFTSize
+        vcvt.f32.s32 fscale, fscale             @// fscale = N as a float
+        movw         round, #0
+        movt         round, #0x3f80             @// round = 1.0
+        vmov.f32     fone, round
+        vdiv.f32     fscale, fone, fscale       @// fscale = 1/N
+scaleFFTData:                                   @// N = subFFTSize
+        SUBS    subFFTSize,subFFTSize,#1
+        vmul.f32 x0r, x0r, fscale
+        vmul.f32 x0i, x0i, fscale
+        vstm.f32 pSrc, {x0r, x0i}
+        add      pSrc, #8
+        vldmgt.f32 pSrc, {x0r, x0i}
+
+        bgt     scaleFFTData
+
+
+        @// Set return value
+        MOV     result, #OMX_Sts_NoErr
+
+        @// Write function tail
+        M_END
+
+@//    ENDIF                                           @//ARM1136JS
+
+
+      @// Guarding implementation by the processor name
+
+
+
+    .end
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/arm/detect.c b/chromium/third_party/openmax_dl/dl/sp/src/arm/detect.c
new file mode 100644
index 00000000000..b74220a92fc
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/detect.c
@@ -0,0 +1,85 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include <cpu-features.h>
+
+#include "android/log.h"
+#include "dl/sp/api/omxSP.h"
+
+int HasArmNeon() {
+  return (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0;
+}
+
+static void SetFFTRoutines() {
+  /*
+   * Choose the correct (NEON or non-NEON) routines for both the
+   * forward and inverse FFTs
+   */
+  if (HasArmNeon()) {
+    __android_log_print(ANDROID_LOG_INFO, "OpenMAX DL FFT",
+                        "Using NEON FFT");
+    omxSP_FFTFwd_RToCCS_F32 = omxSP_FFTFwd_RToCCS_F32_Sfs;
+    omxSP_FFTInv_CCSToR_F32 = omxSP_FFTInv_CCSToR_F32_Sfs;
+  } else {
+    __android_log_print(ANDROID_LOG_INFO, "OpenMAX DL FFT",
+                        "Using non-NEON FFT");
+    omxSP_FFTFwd_RToCCS_F32 = omxSP_FFTFwd_RToCCS_F32_Sfs_vfp;
+    omxSP_FFTInv_CCSToR_F32 = omxSP_FFTInv_CCSToR_F32_Sfs_vfp;
+  }
+}
+
+/*
+ * FIXME: It would be beneficial to use the GCC ifunc attribute to
+ * select the appropriate function at load time. This is apparently
+ * not supported on Android at this time. (Compiler warning that the
+ * ifunc attribute is ignored.)
+ */
+
+/*
+ * Forward FFT.  Detect if NEON is supported and update function
+ * pointers to the correct routines for both the forward and inverse
+ * FFTs.  Then run the forward FFT routine.
+ */
+static OMXResult DetectForwardRealFFT(
+    const OMX_F32* pSrc,
+    OMX_F32* pDst,
+    const OMXFFTSpec_R_F32* pFFTSpec) {
+  SetFFTRoutines();
+  return omxSP_FFTFwd_RToCCS_F32(pSrc, pDst, pFFTSpec);
+}
+
+/*
+ * Inverse FFT.  Detect if NEON is supported and update function
+ * pointers to the correct routines for both the forward and inverse
+ * FFTs.  Then run the inverse FFT routine.
+ */
+static OMXResult DetectInverseRealFFT(
+    const OMX_F32* pSrc,
+    OMX_F32* pDst,
+    const OMXFFTSpec_R_F32* pFFTSpec) {
+  SetFFTRoutines();
+  return omxSP_FFTInv_CCSToR_F32(pSrc, pDst, pFFTSpec);
+}
+
+/*
+ * Implementation of the forward and inverse real float FFT.
+ * Initialize to detection routine which will update the pointer to
+ * the correct routine and then call the correct one.
+ */
+OMXResult (*omxSP_FFTFwd_RToCCS_F32)(
+    const OMX_F32* pSrc,
+    OMX_F32* pDst,
+    const OMXFFTSpec_R_F32* pFFTSpec) = DetectForwardRealFFT;
+
+OMXResult (*omxSP_FFTInv_CCSToR_F32)(
+    const OMX_F32* pSrc,
+    OMX_F32* pDst,
+    const OMXFFTSpec_R_F32* pFFTSpec) = DetectInverseRealFFT;
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe_s.S
index f375991f7dd..f9dd26e491e 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe_s.S
@@ -22,8 +22,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFTInv_CCSToR_S16_preTwiddleRadix2_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFTInv_CCSToR_S16_preTwiddleRadix2_unsafe_s.S
new file mode 100644
index 00000000000..950defde8ca
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFTInv_CCSToR_S16_preTwiddleRadix2_unsafe_s.S
@@ -0,0 +1,409 @@
+@
+@  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+@
+@  Use of this source code is governed by a BSD-style license
+@  that can be found in the LICENSE file in the root of the source
+@  tree. An additional intellectual property rights grant can be found
+@  in the file PATENTS.  All contributing project authors may
+@  be found in the AUTHORS file in the root of the source tree.
+@
+@ Some code in this file was originally from file
+@ armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe_s.S which was licensed as
+@ follows. It has been relicensed with permission from the copyright holders.
+@
+
+@
+@ OpenMAX DL: v1.0.2
+@ Last Modified Revision:   7485
+@ Last Modified Date:       Fri, 21 Sep 2007
+@ 
+@ (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+@
+
+@
+@ Description:
+@ Compute the "preTwiddleRadix2" stage prior to the call to the complexFFT.
+@ It does a Z(k) = Feven(k) + jW^(-k) FOdd(k); k=0,1,2,...N/2-1 computation.
+@ It implements both "scaled"(by 1/2) and "unscaled" versions of the above
+@ formula.
+@ 
+        
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
+        
+@//Input Registers
+#define pSrc            r0
+#define pDst            r1
+#define pFFTSpec        r2
+#define scale           r3
+
+@ Output registers
+#define result          r0
+
+@//Local Scratch Registers
+#define argTwiddle      r1
+#define argDst          r2
+#define argScale        r4
+#define tmpOrder        r4
+#define pTwiddle        r4
+#define pOut            r5
+#define subFFTSize      r7     
+#define subFFTNum       r6
+#define N               r6
+#define order           r14
+#define diff            r9
+@ Total num of radix stages to comple the FFT.
+#define count           r8
+#define x0r             r4    
+#define x0i             r5
+#define diffMinusOne    r2
+#define round           r3
+#define pOut1           r2
+#define size            r7
+#define step            r8            
+#define step1           r9
+#define step2           r10
+#define twStep          r10
+#define pTwiddleTmp     r11
+#define argTwiddle1     r12
+#define zero            r14
+
+@ Neon registers
+#define dX0             D0.S16
+#define dX0S32          D0.S32
+#define dShift          D1.S16
+#define dX1             D1.S16
+#define dX1S32          D1.S32
+#define dY0             D2.S16
+#define dY1             D3.S16
+#define dX0r            D0.S16            
+#define dX0rS32         D0.S32
+#define dX0i            D1.S16
+#define dX1r            D2.S16
+#define dX1i            D3.S16
+#define qX1             Q1.S16
+#define dW0r            D4.S16
+#define dW0i            D5.S16
+#define dW1r            D6.S16
+#define dW1i            D7.S16
+#define dW0rS32         D4.S32
+#define dW0iS32         D5.S32
+#define dW1rS32         D6.S32
+#define dW1iS32         D7.S32
+#define dT0             D8.S16
+#define dT1             D9.S16
+#define dT2             D10.S16
+#define dT3             D11.S16
+#define qT0             Q6.S32
+#define qT1             Q7.S32
+#define qT2             Q8.S32
+#define qT3             Q9.S32
+#define dY0r            D4.S16
+#define dY0i            D5.S16
+#define dY1r            D6.S16
+#define dY1i            D7.S16
+#define qY1             Q3.S16
+#define dY2             D4.S16
+#define dY3             D5.S16
+#define dW0             D6.S16
+#define dW1             D7.S16
+#define dW0Tmp          D10.S16
+#define dW1Neg          D11.S16
+
+        @ Structure offsets for the FFTSpec             
+        .set    ARMsFFTSpec_N, 0
+        .set    ARMsFFTSpec_pBitRev, 4
+        .set    ARMsFFTSpec_pTwiddle, 8
+        .set    ARMsFFTSpec_pBuf, 12
+
+        .MACRO FFTSTAGE scaled, inverse, name
+        
+        @ Read the size from structure and take log
+        LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
+        
+        @ Read other structure parameters
+        LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
+        LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
+        
+        MOV     size,N,ASR #1        @ preserve the contents of N
+        MOV     step,N,LSL #1        @ step = N/2 * 4 bytes
+        
+        @ Process different FFT sizes with different loops.
+        CMP    size,#4
+        BLE    smallFFTSize\name
+        
+        @ Z(k) = 1/2 {[F(k) +  F'(N/2-k)] +j*W^(-k) [F(k) -  F'(N/2-k)]}
+        @ Note: W^(k) is stored as negated value and also need to
+        @ conjugate the values from the table.
+        
+        @ Z(0) : no need of twiddle multiply
+        @ Z(0) = 1/2 { [F(0) +  F'(N/2)] +j [F(0) -  F'(N/2)] }
+        
+        VLD1    dX0S32[0],[pSrc],step
+        ADD     pOut1,pOut,step      @ pOut1 = pOut+ N/2*4 bytes 
+                
+        VLD1    dX1S32[0],[pSrc]!
+        SUB     twStep,step,size     @ twStep = 3N/8 * 4 bytes pointing to W^1
+        
+        MOV     step1,size,LSL #1    @ step1 = N/4 * 4 = N/2*2 bytes
+        SUB     step1,step1,#4       @ (N/4-1)*4 bytes
+        
+        VHADD    dY0,dX0,dX1         @ [b+d | a+c]
+        VHSUB    dY1,dX0,dX1         @ [b-d | a-c] 
+        VTRN    dY0,dY1              @ dY0= [a-c | a+c] ;dY1= [b-d | b+d] 
+        
+        .ifeqs  "\scaled", "TRUE"
+            VHSUB   dX0,dY0,dY1
+            SUBS    size,size,#2
+            VHADD   dX1,dY0,dY1
+        .else
+            VSUB   dX0,dY0,dY1
+            SUBS    size,size,#2
+            VADD   dX1,dY0,dY1
+        .endif
+                    
+        SUB     pSrc,pSrc,step
+        VST1    dX0[0],[pOut1]!
+        ADD     pTwiddleTmp,pTwiddle,#4                @ W^2
+        VST1    dX1[1],[pOut1]!
+        ADD     argTwiddle1,pTwiddle,twStep            @ W^1 
+        
+        BLT     decrementScale\name
+        BEQ     lastElement\name
+                        
+        SUB     step,step,#20
+        SUB     step1,step1,#4                         @ (N/4-1)*8 bytes
+        SUB     step2, step1, #4
+                        
+        @ Z(k) = 1/2[F(k) +  F'(N/2-k)] +j*W^(-k) [F(k) -  F'(N/2-k)]
+        @ Note: W^k is stored as negative values in the table and also need to
+        @ conjugate the values from the table.
+        @ Process 4 elements at a time. E.g: Z(1),Z(2) and Z(N/2-2),Z(N/2-1)
+        @ since both of them require F(1),F(2) and F(N/2-2),F(N/2-1).
+
+evenOddButterflyLoop\name:     
+        VLD2    {dX0r,dX0i},[pSrc],step
+        VLD2    {dX1r,dX1i},[pSrc]!
+        SUB     pSrc, pSrc, step
+
+        VLD1    dW0r,[argTwiddle1],step1
+        VREV64  qX1,qX1
+        VLD1    dW1r,[argTwiddle1]!
+        VHSUB   dT2,dX0r,dX1r                          @ a-c
+        SUB     argTwiddle1, argTwiddle1, step1
+        SUB     step1,step1,#16
+
+        VLD1    dW0i,[pTwiddleTmp],step2
+        VHADD   dT3,dX0i,dX1i                          @ b+d
+        VLD1    dW1i,[pTwiddleTmp]!
+        VHADD   dT0,dX0r,dX1r                          @ a+c
+        VHSUB   dT1,dX0i,dX1i                          @ b-d
+        SUB     pTwiddleTmp, pTwiddleTmp, step2
+        SUB     step2,step2,#16
+
+        SUBS    size,size,#8
+        
+        VZIP    dW1r,dW1i
+        VTRN    dW0r,dW0i
+        VZIP    dW1iS32, dW1rS32
+                                
+        VMULL   qT0,dW1i,dT2
+        VMLSL   qT0,dW1r,dT3
+        VMULL   qT1,dW1i,dT3
+        VMLAL   qT1,dW1r,dT2
+        VMULL   qT2,dW0r,dT2
+        VMLAL   qT2,dW0i,dT3
+        VMULL   qT3,dW0r,dT3
+        VMLSL   qT3,dW0i,dT2
+        
+        VRSHRN  dX1r,qT0,#15
+        VRSHRN  dX1i,qT1,#15
+        VRSHRN  dX0r,qT2,#15
+        VRSHRN  dX0i,qT3,#15
+        
+        .ifeqs  "\scaled", "TRUE"
+            VHADD    dY1r,dT0,dX1i                     @ F(N/2 -1)
+            VHSUB    dY1i,dX1r,dT1
+        .else
+            VADD    dY1r,dT0,dX1i                      @ F(N/2 -1)
+            VSUB    dY1i,dX1r,dT1
+        .endif
+        
+        .ifeqs  "\scaled", "TRUE"
+            VHADD    dY0r,dT0,dX0i                     @ F(1)
+            VHSUB    dY0i,dT1,dX0r
+        .else
+            VADD    dY0r,dT0,dX0i                      @ F(1)
+            VSUB    dY0i,dT1,dX0r
+        .endif
+        
+        VREV64  qY1,qY1
+
+        VST2    {dY0r,dY0i},[pOut1],step
+        VST2    {dY1r,dY1i},[pOut1]
+        ADD     pOut1,pOut1,#16
+        SUB     pOut1, pOut1, step
+        SUB     step,step,#32
+       
+        BGT     evenOddButterflyLoop\name
+
+        SUB     pSrc,pSrc,#4           @ set both the ptrs to the last element
+        SUB     pOut1,pOut1,#4
+        B       lastElement\name
+        
+smallFFTSize\name:
+        @ Z(k) = 1/2 {[F(k) +  F'(N/2-k)] +j*W^(-k) [F(k) -  F'(N/2-k)]}
+        @ Note: W^(k) is stored as negated value and also need to
+        @ conjugate the values from the table.
+        
+        @ Z(0) : no need of twiddle multiply
+        @ Z(0) = 1/2 { [F(0) +  F'(N/2)] +j [F(0) -  F'(N/2)] }
+        
+        VLD1    dX0S32[0],[pSrc],step
+        ADD     pOut1,pOut,step      @ pOut1 = pOut+ N/2*4 bytes 
+                
+        VLD1    dX1S32[0],[pSrc]!
+        SUB     twStep,step,size     @ twStep = 3N/8 * 4 bytes pointing to W^1
+        
+        MOV     step1,size,LSL #1    @ step1 = N/4 * 4 = N/2*2 bytes
+        SUB     step1,step1,#4       @ (N/4-1)*4 bytes
+        
+        VHADD    dY0,dX0,dX1         @ [b+d | a+c]
+        VHSUB    dY1,dX0,dX1         @ [b-d | a-c] 
+        VTRN    dY0,dY1              @ dY0= [a-c | a+c] ;dY1= [b-d | b+d] 
+        
+        .ifeqs  "\scaled", "TRUE"
+            VHSUB   dX0,dY0,dY1
+            SUBS    size,size,#2
+            VHADD   dX1,dY0,dY1
+        .else
+            VSUB   dX0,dY0,dY1
+            SUBS    size,size,#2
+            VADD   dX1,dY0,dY1
+        .endif
+                    
+        SUB     pSrc,pSrc,step
+        VST1    dX0[0],[pOut1]!
+        ADD     pTwiddleTmp,pTwiddle,#4                @ W^2
+        VST1    dX1[1],[pOut1]!
+        ADD     argTwiddle1,pTwiddle,twStep            @ W^1 
+        
+        BLT     decrementScale\name
+        BEQ     lastElement\name
+                        
+        @ Z(k) = 1/2[F(k) +  F'(N/2-k)] +j*W^(-k) [F(k) -  F'(N/2-k)]
+        @ Note: W^k is stored as negative values in the table and also need to
+        @ conjugate the values from the table.
+        @ Process 4 elements at a time. E.g: Z(1),Z(2) and Z(N/2-2),Z(N/2-1)
+        @ since both of them require F(1),F(2) and F(N/2-2),F(N/2-1).
+
+        SUB     step,step,#12
+
+evenOddButterflyLoopSize4\name:     
+        VLD1    dW0rS32[0],[argTwiddle1],step1
+        VLD1    dW1rS32[0],[argTwiddle1]!
+        
+        VLD2    {dX0r[0],dX0i[0]},[pSrc]!
+        VLD2    {dX0r[1],dX0i[1]},[pSrc],step
+        SUB     pSrc,pSrc,#4
+        SUB     argTwiddle1,argTwiddle1,step1
+        VLD2    {dX1r[0],dX1i[0]},[pSrc]!
+        VLD2    {dX1r[1],dX1i[1]},[pSrc]!
+        
+        SUB     step1,step1,#4                         @ (N/4-2)*4 bytes
+        VLD1    dW0iS32[0],[pTwiddleTmp],step1
+        VLD1    dW1iS32[0],[pTwiddleTmp]!
+        SUB     pSrc,pSrc,step
+        
+        SUB     pTwiddleTmp,pTwiddleTmp,step1
+        VREV32  dX1r,dX1r
+        VREV32  dX1i,dX1i
+        SUBS    size,size,#4
+                        
+        VHSUB   dT2,dX0r,dX1r                          @ a-c
+        VHADD   dT3,dX0i,dX1i                          @ b+d
+        SUB     step1,step1,#4
+        VHADD   dT0,dX0r,dX1r                          @ a+c
+        VHSUB   dT1,dX0i,dX1i                          @ b-d
+        
+        VTRN    dW1r,dW1i
+        VTRN    dW0r,dW0i
+                                
+        VMULL   qT0,dW1r,dT2
+        VMLSL   qT0,dW1i,dT3
+        VMULL   qT1,dW1r,dT3
+        VMLAL   qT1,dW1i,dT2
+        VMULL   qT2,dW0r,dT2
+        VMLAL   qT2,dW0i,dT3
+        VMULL   qT3,dW0r,dT3
+        VMLSL   qT3,dW0i,dT2
+        
+        VRSHRN  dX1r,qT0,#15
+        VRSHRN  dX1i,qT1,#15
+        
+        .ifeqs  "\scaled", "TRUE"
+            VHADD    dY1r,dT0,dX1i                     @ F(N/2 -1)
+            VHSUB    dY1i,dX1r,dT1
+        .else
+            VADD    dY1r,dT0,dX1i                      @ F(N/2 -1)
+            VSUB    dY1i,dX1r,dT1
+        .endif
+        
+        VREV32  dY1r,dY1r
+        VREV32  dY1i,dY1i
+                            
+        VRSHRN  dX0r,qT2,#15
+        VRSHRN  dX0i,qT3,#15
+        
+        .ifeqs  "\scaled", "TRUE"
+            VHADD    dY0r,dT0,dX0i                     @ F(1)
+            VHSUB    dY0i,dT1,dX0r
+        .else
+            VADD    dY0r,dT0,dX0i                      @ F(1)
+            VSUB    dY0i,dT1,dX0r
+        .endif
+        
+        VST2    {dY0r[0],dY0i[0]},[pOut1]!
+        VST2    {dY0r[1],dY0i[1]},[pOut1],step
+        SUB     pOut1, #4
+        VST2    {dY1r[0],dY1i[0]},[pOut1]!
+        VST2    {dY1r[1],dY1i[1]},[pOut1]!
+        SUB     pOut1,pOut1,step
+        SUB     pSrc,pSrc,#4           @ set both the ptrs to the last element
+        SUB     pOut1,pOut1,#4
+        
+        @ Last element can be expanded as follows
+        @ 1/2[Z(k) + Z'(k)] - j w^-k [Z(k) - Z'(k)] (W^k is stored as -ve)
+        @ 1/2[(a+jb) + (a-jb)] - j w^-k [(a+jb) - (a-jb)]
+        @ 1/2[2a+j0] - j (c-jd) [0+j2b]
+        @ (a+bc, -bd)
+        @ Since (c,d) = (0,1) for the last element, result is just (a,-b)
+        
+lastElement\name:      
+        VLD1    dX0rS32[0],[pSrc]
+        
+        .ifeqs  "\scaled", "TRUE"
+            VSHR    dX0r,dX0r,#1
+        .endif
+        
+        VST1    dX0r[0],[pOut1]!
+        VNEG    dX0r,dX0r
+        VST1    dX0r[1],[pOut1]
+
+decrementScale\name:          
+        .ifeqs  "\scaled", "TRUE"
+            SUB scale,scale,#1
+        .endif
+        
+        .endm
+        
+        M_START armSP_FFTInv_CCSToR_S16_preTwiddleRadix2_unsafe,r4
+        FFTSTAGE "FALSE","TRUE",Inv
+        M_END
+        
+        M_START armSP_FFTInv_CCSToR_S16_Sfs_preTwiddleRadix2_unsafe,r4
+        FFTSTAGE "TRUE","TRUE",InvSfs
+        M_END
+
+        
+        .end
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe_s.S
index 57fef7a9404..9959f8fdde8 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe_s.S
@@ -30,8 +30,8 @@
         
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
         
         
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S
index 323eb8319da..88a08ff3fab 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S
@@ -21,8 +21,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix2_ls_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix2_ls_unsafe_s.S
index 02f3888c56f..85b85295076 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix2_ls_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix2_ls_unsafe_s.S
@@ -21,8 +21,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix2_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix2_unsafe_s.S
index 73c1f4b82f3..20c35e15651 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix2_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix2_unsafe_s.S
@@ -21,8 +21,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix4_fs_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix4_fs_unsafe_s.S
index ff62dd132b8..dbe170c62e0 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix4_fs_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix4_fs_unsafe_s.S
@@ -21,8 +21,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 @// Import symbols required from other files
 @// (For example tables)
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix4_ls_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix4_ls_unsafe_s.S
index 9d2e4ab8b44..af86b919a8b 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix4_ls_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix4_ls_unsafe_s.S
@@ -20,8 +20,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 @// Import symbols required from other files
 @// (For example tables)
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix4_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix4_unsafe_s.S
index ae450c5f629..8f63eb8510f 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix4_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix4_unsafe_s.S
@@ -21,8 +21,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix8_fs_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix8_fs_unsafe_s.S
index 4447e76b1f7..19a2f253dc0 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix8_fs_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix8_fs_unsafe_s.S
@@ -20,8 +20,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 @// Import symbols required from other files
 @// (For example tables)
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix2_fs_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_fs_unsafe_s.S
index a16c79f75eb..4bdbb52c914 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix2_fs_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_fs_unsafe_s.S
@@ -29,8 +29,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix2_ls_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_ls_unsafe_s.S
index 9f7b531d300..94b3d49e848 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix2_ls_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_ls_unsafe_s.S
@@ -29,8 +29,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix2_ps_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_ps_unsafe_s.S
index 666f4f349a7..2b34d997341 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix2_ps_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_ps_unsafe_s.S
@@ -29,8 +29,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix2_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_unsafe_s.S
index f9bbebcca91..17e0415e822 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix2_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_unsafe_s.S
@@ -29,8 +29,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.S
index cdb42a994a1..049621bfabc 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.S
@@ -29,8 +29,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 @// Import symbols required from other files
 @// (For example tables)
@@ -142,7 +142,6 @@
         RSB     setStep,setStep,#16                   @// setStep = - 3*pointStep+16
 
 
-        VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3]
         MOV     subFFTSize,#4                         @// subFFTSize = 1 for the first stage
 
 
@@ -158,6 +157,7 @@
 
 grpZeroSetLoop\name:
 
+        VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3]
 
         .ifeqs "\scaled", "TRUE"
 
@@ -178,9 +178,6 @@ grpZeroSetLoop\name:
             VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
             VHADD    qZ0,qY0,qY1             @// y0
 
-            VLD2    {dXr3,dXi3},[pSrc :128],setStep
-
-
             .ifeqs  "\inverse", "TRUE"
 
                 VHSUB    dZr3,dYr2,dYi3                  @// y3
@@ -235,9 +232,6 @@ grpZeroSetLoop\name:
             VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
             VADD    qZ0,qY0,qY1             @// y0
 
-            VLD2    {dXr3,dXi3},[pSrc :128],setStep
-
-
             .ifeqs  "\inverse", "TRUE"
 
                 VSUB    dZr3,dYr2,dYi3                  @// y3
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_ls_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix4_ls_unsafe_s.S
index 23e2c373d62..4e46a010641 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_ls_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix4_ls_unsafe_s.S
@@ -29,8 +29,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 
 @// Import symbols required from other files
@@ -163,7 +163,6 @@
         @// Define stack arguments
 
         MOV     pw2,pTwiddle
-        VLD4 {dW2r,dW2i,dTmp0,dTmp1},[pw2 :256]!
 
         MOV     pw3,pTwiddle
         MOV     pw1,pTwiddle
@@ -171,42 +170,47 @@
         @// pOut0+outPointStep == increment of 4*outPointStep bytes
         MOV     outPointStep,subFFTSize,LSL #2
 
-        VLD3 {dW3rS32,dTmp1S32,dTmp2S32},[pw3 :64]!
         MOV     subFFTNum,#1                            @//after the last stage
         LSL     grpCount,subFFTSize,#2
 
 
         @// Update grpCount and grpSize rightaway
-        VLD3 {dW3iS32,dTmp2S32,dTmp3S32},[pw3 :64]!
 
         @// update subFFTSize for the next stage
         MOV     subFFTSize,grpCount
         MOV     dstStep,outPointStep,LSL #1
 
-        VLD2 {dW1r,dW1i}, [pw1 :128]!
-
-
         ADD     dstStep,dstStep,outPointStep                @// dstStep = 3*outPointStep
         RSB     dstStep,dstStep,#16                         @// dstStep = - 3*outPointStep+16
 
-        VLD4     {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
-        VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
-
         @// Process 4 groups at a time
 
 grpLoop\name:
+        VLD4     {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
+        VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
 
+        @// Load the second twiddle for 4 groups : w^2
+        @// w^2 twiddle (2i+0,2i+2,2i+4,2i+6)   for group 0,1,2,3
+        VLD4 {dW2r,dW2i,dTmp0,dTmp1},[pw2 :256]!
 
-        @// Rearrange the third twiddle
-        VUZP    dW3r,dW3i
-        SUBS    grpCount,grpCount,#16                    @// grpCount is multiplied by 4
+        VUZP     dButterfly1Real13, dButterfly2Real13        @// B.r D.r
 
+        @// Load the third twiddle for 4 groups : w^3
+        @// w^3 twiddle (3i+0,3i+3,3i+6,3i+9)   for group 0,1,2,3
+        VLD3 {dW3rS32,dTmp1S32,dTmp2S32},[pw3 :64]!
 
-        VUZP     dButterfly1Real13, dButterfly2Real13        @// B.r D.r
         VUZP     dButterfly1Imag13, dButterfly2Imag13        @// B.i D.i
         VUZP     dButterfly1Real02, dButterfly2Real02        @// A.r C.r
+
+        VLD3 {dW3iS32,dTmp2S32,dTmp3S32},[pw3 :64]!
+
         VUZP     dButterfly1Imag02, dButterfly2Imag02        @// A.i C.i
 
+        VLD2 {dW1r,dW1i}, [pw1 :128]!
+
+        @// Rearrange the third twiddle
+        VUZP    dW3r,dW3i
+        SUBS    grpCount,grpCount,#16                    @// grpCount is multiplied by 4
 
         .ifeqs  "\inverse", "TRUE"
             VMULL   qT0,dXr1,dW1r
@@ -225,8 +229,6 @@ grpLoop\name:
         @// Load the first twiddle for 4 groups : w^1
         @// w^1 twiddle (i+0,i+1,i+2,i+3)       for group 0,1,2,3
 
-        VLD2 {dW1r,dW1i}, [pw1 :128]!
-
         .ifeqs  "\inverse", "TRUE"
             VMULL   qT2,dXr2,dW2r
             VMLAL   qT2,dXi2,dW2i                       @// real part
@@ -260,24 +262,12 @@ grpLoop\name:
 
         .ENDIF
 
-        @// Load the second twiddle for 4 groups : w^2
-        @// w^2 twiddle (2i+0,2i+2,2i+4,2i+6)   for group 0,1,2,3
-        VLD4 {dW2r,dW2i,dTmp0,dTmp1},[pw2 :256]!
-
-
         VRSHRN  dZr2,qT2,#15
         VRSHRN  dZi2,qT3,#15
 
-        @// Load the third twiddle for 4 groups : w^3
-        @// w^3 twiddle (3i+0,3i+3,3i+6,3i+9)   for group 0,1,2,3
-
-        VLD3 {dW3rS32,dTmp1S32,dTmp2S32},[pw3 :64]!
-
         VRSHRN  dZr3,qT0,#15
         VRSHRN  dZi3,qT1,#15
 
-        VLD3 {dW3iS32,dTmp2S32,dTmp3S32},[pw3 :64]!
-
         .ifeqs "\scaled", "TRUE"
 
             @// finish first stage of 4 point FFT
@@ -285,7 +275,6 @@ grpLoop\name:
             VHADD    qY0,qX0,qZ2
             VHSUB    qY2,qX0,qZ2
             VHADD    qY1,qZ1,qZ3
-            VLD4    {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
 
             VHSUB    qY3,qZ1,qZ3
 
@@ -293,7 +282,6 @@ grpLoop\name:
 
             VHSUB    qZ0,qY2,qY1
             VHADD    qZ2,qY2,qY1
-            VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
 
 
             .ifeqs "\inverse", "TRUE"
@@ -329,7 +317,6 @@ grpLoop\name:
             VADD    qY0,qX0,qZ2
             VSUB    qY2,qX0,qZ2
             VADD    qY1,qZ1,qZ3
-            VLD4    {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
 
             VSUB    qY3,qZ1,qZ3
 
@@ -337,7 +324,6 @@ grpLoop\name:
 
             VSUB    qZ0,qY2,qY1
             VADD    qZ2,qY2,qY1
-            VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
 
 
             .ifeqs "\inverse", "TRUE"
@@ -376,7 +362,6 @@ grpLoop\name:
 
         @// Reset and Swap pSrc and pDst for the next stage
         MOV     pTmp,pDst
-        SUB     pSrc,pSrc,#64                       @// Extra increment currently done in the loop
         SUB     pDst,pSrc,outPointStep,LSL #2       @// pDst -= size; pSrc -= 4*size bytes
         SUB     pSrc,pTmp,outPointStep
 
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix4_unsafe_s.S
index 0eba3856f2a..7bdbe41e08d 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix4_unsafe_s.S
@@ -29,8 +29,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 
 
@@ -154,7 +154,6 @@
         MOV     srcStep,pointStep,LSL #1                    @// srcStep = 2*pointStep
         VLD1     dW2,[pTwiddle :64]                             @//[wi | wr]
         ADD     setStep,srcStep,pointStep                   @// setStep = 3*pointStep
-        SUB     srcStep,srcStep,#16                         @// srcStep = 2*pointStep-16
         VLD1     dW3,[pTwiddle :64]
         @//RSB     setStep,setStep,#16                      @// setStep = - 3*pointStep+16
         RSB     setStep,setStep,#0                          @// setStep = - 3*pointStep
@@ -167,26 +166,23 @@
 
 grpLoop\name:
 
-        VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0]
         ADD      stepTwiddle,stepTwiddle,pointStep
-        VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
         ADD      pTwiddle,pTwiddle,stepTwiddle               @// set pTwiddle to the first point
-        VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
         MOV      twStep,stepTwiddle,LSL #2
-        VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & reset pSrc
 
         SUB      twStep,stepTwiddle,twStep                   @// twStep = -3*stepTwiddle
 
 
         MOV      setCount,pointStep,LSR #2
-        ADD     pSrc,pSrc,#16                         @// set pSrc to data[0] of the next set
-        ADD     pSrc,pSrc,pointStep                   @// increment to data[1] of the next set
+        ADD      pSrc,pSrc,pointStep                   @// increment to data[1] of the next set
 
         @// Loop on the sets : 4 at a time
 
 setLoop\name:
+        VLD2    {dXr1,dXi1},[pSrc :128],pointStep         @//  data[1]
+        VLD2    {dXr2,dXi2},[pSrc :128],pointStep         @//  data[2]
 
-        SUBS    setCount,setCount,#4                    @// decrement the loop counter
+        SUBS    setCount,setCount,#4                      @// decrement the loop counter
 
         .ifeqs  "\inverse", "TRUE"
             VMULL   qT0,dXr1,dW1[0]
@@ -202,8 +198,6 @@ setLoop\name:
 
         .ENDIF
 
-        VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
-
         .ifeqs  "\inverse", "TRUE"
             VMULL   qT2,dXr2,dW2[0]
             VMLAL   qT2,dXi2,dW2[1]                       @// real part
@@ -218,11 +212,13 @@ setLoop\name:
 
         .ENDIF
 
+        VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
+
         VRSHRN  dZr1,qT0,#15
         VRSHRN  dZi1,qT1,#15
 
-
-        VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
+        VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0]
+        ADD     pSrc,pSrc,#16                              @// set pSrc to data[1] of the next set
 
         .ifeqs  "\inverse", "TRUE"
             VMULL   qT0,dXr3,dW3[0]
@@ -244,7 +240,6 @@ setLoop\name:
 
         VRSHRN  dZr3,qT0,#15
         VRSHRN  dZi3,qT1,#15
-        VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
 
 
         .ifeqs "\scaled", "TRUE"
@@ -253,7 +248,6 @@ setLoop\name:
             VHADD    qY0,qX0,qZ2
             VHSUB    qY2,qX0,qZ2
 
-            VLD2    {dXr0,dXi0},[pSrc :128]!          @//  data[0]
             VHADD    qY1,qZ1,qZ3
             VHSUB    qY3,qZ1,qZ3
 
@@ -303,7 +297,6 @@ setLoop\name:
             VADD    qY0,qX0,qZ2
             VSUB    qY2,qX0,qZ2
 
-            VLD2    {dXr0,dXi0},[pSrc]!          @//  data[0]
             VADD    qY1,qZ1,qZ3
             VSUB    qY3,qZ1,qZ3
 
@@ -351,7 +344,6 @@ setLoop\name:
 
         .ENDIF
 
-        ADD     pSrc,pSrc,pointStep                         @// increment to data[1] of the next set
         BGT     setLoop\name
 
         VLD1     dW1,[pTwiddle :64],stepTwiddle                 @//[wi | wr]
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S
index 588c3197db9..f9ff37a275d 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S
@@ -29,8 +29,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 
 @// Import symbols required from other files
@@ -233,12 +233,12 @@
         VLD2    {dXr4,dXi4},[pSrc :128],pointStep          @//  data[4]
         VLD2    {dXr5,dXi5},[pSrc :128],pointStep          @//  data[5]
         VLD2    {dXr6,dXi6},[pSrc :128],pointStep          @//  data[6]
-        VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7] & update pSrc for the next set
-                                                      @//  setStep = -7*pointStep + 16
         @// grp = 0 a special case since all the twiddle factors are 1
         @// Loop on the sets : 4 sets at a time
 
 grpZeroSetLoop\name:
+        VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7] & update pSrc for the next set
+                                                           @//  setStep = -7*pointStep + 16
 
         @// Decrement setcount
         SUBS    setCount,setCount,#4                    @// decrement the set loop counter
@@ -348,9 +348,6 @@ grpZeroSetLoop\name:
                 VSUB    dVi7,dVi7,dT1
                 SUB     pDst, pDst, step2                           @// set pDst to y1
 
-                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]
-
-
                 VHSUB    dYr3,dVr3,dVr7
                 VHSUB    dYi3,dVi3,dVi7
                 VST2    {dYr1,dYi1},[pDst :128],step1                    @// store y1
@@ -388,7 +385,6 @@ grpZeroSetLoop\name:
 
                 VSUB    dVr5,dT1,dVi5                               @// a * V5
                 VADD    dVi5,dT1,dVi5
-                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]
 
                 VHSUB    qY5,qV1,qV5
 
@@ -514,9 +510,6 @@ grpZeroSetLoop\name:
                 VSUB    dVi7,dVi7,dT1
                 SUB     pDst, pDst, step2                           @// set pDst to y1
 
-                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]
-
-
                 VSUB    dYr3,dVr3,dVr7
                 VSUB    dYi3,dVi3,dVi7
                 VST2    {dYr1,dYi1},[pDst :128],step1                    @// store y1
@@ -554,7 +547,6 @@ grpZeroSetLoop\name:
 
                 VSUB    dVr5,dT1,dVi5                               @// a * V5
                 VADD    dVi5,dT1,dVi5
-                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]
 
                 VSUB    qY5,qV1,qV5
 
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.S
index 3bc5f02a743..de589c95fa5 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.S
@@ -29,8 +29,8 @@
         
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
         
         
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S
index 30a8f56b487..eeb8c6eb289 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S
@@ -30,8 +30,8 @@
         
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
         
         
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix2_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix2_unsafe_s.S
index a9700ec3eab..967d7b59750 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix2_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix2_unsafe_s.S
@@ -29,8 +29,8 @@
         
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
         
         
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.S
index 685f85b6f6e..412b64fb59a 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.S
@@ -29,8 +29,8 @@
         
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
         
 @// Import symbols required from other files
 @// (For example tables)
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_ls_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix4_ls_unsafe_s.S
index 1b5478b2503..91e5299e071 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_ls_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix4_ls_unsafe_s.S
@@ -28,8 +28,8 @@
         
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
         
 @// Import symbols required from other files
 @// (For example tables)
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix4_unsafe_s.S
index 3c23983efee..22efea45b0b 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix4_unsafe_s.S
@@ -30,8 +30,8 @@
         
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
         
         
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix8_fs_unsafe_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix8_fs_unsafe_s.S
index a5fb0e27105..d4d4abb4c21 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix8_fs_unsafe_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix8_fs_unsafe_s.S
@@ -30,8 +30,8 @@
         
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
         
 @// Import symbols required from other files
 @// (For example tables)
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTFwd_CToC_FC32_Sfs_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTFwd_CToC_FC32_Sfs_s.S
index da0c10f1f66..aa761126a82 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTFwd_CToC_FC32_Sfs_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTFwd_CToC_FC32_Sfs_s.S
@@ -20,8 +20,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 @// Import symbols required from other files
 @// (For example tables)
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTFwd_CToC_SC16_Sfs_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTFwd_CToC_SC16_Sfs_s.S
index ca15c6b06cb..a3c21ac015d 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTFwd_CToC_SC16_Sfs_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTFwd_CToC_SC16_Sfs_s.S
@@ -29,8 +29,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTFwd_CToC_SC32_Sfs_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTFwd_CToC_SC32_Sfs_s.S
index 90f969a83d5..504ef955d24 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTFwd_CToC_SC32_Sfs_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTFwd_CToC_SC32_Sfs_s.S
@@ -27,8 +27,8 @@
         
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
         
 @// Import symbols required from other files
 @// (For example tables)
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTFwd_RToCCS_F32_Sfs_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTFwd_RToCCS_F32_Sfs_s.S
index fda1ae4a16e..fda446cc896 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTFwd_RToCCS_F32_Sfs_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTFwd_RToCCS_F32_Sfs_s.S
@@ -20,8 +20,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTFwd_RToCCS_S16S32_Sfs_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTFwd_RToCCS_S16S32_Sfs_s.S
index 84d230036fc..402885fa8fb 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTFwd_RToCCS_S16S32_Sfs_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTFwd_RToCCS_S16S32_Sfs_s.S
@@ -28,8 +28,8 @@
         
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
         
         
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTFwd_RToCCS_S16_Sfs_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTFwd_RToCCS_S16_Sfs_s.S
new file mode 100644
index 00000000000..e9530774cdf
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTFwd_RToCCS_S16_Sfs_s.S
@@ -0,0 +1,639 @@
+@
+@ Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+@
+@ Use of this source code is governed by a BSD-style license
+@ that can be found in the LICENSE file in the root of the source
+@ tree. An additional intellectual property rights grant can be found
+@ in the file PATENTS.  All contributing project authors may
+@ be found in the AUTHORS file in the root of the source tree.
+@
+@ Some code in this file was originally from file
+@ omxSP_FFTFwd_RToCCS_S32_Sfs_s.S which was licensed as follows.
+@ It has been relicensed with permission from the copyright holders.
+@
+
+@
+@ OpenMAX DL: v1.0.2
+@ Last Modified Revision:   7810
+@ Last Modified Date:       Thu, 04 Oct 2007
+@
+@ (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+@
+
+@
+@ Description:
+@ Compute a forward FFT for a real signal, using 16 bit complex FFT routines.
+@
+
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
+
+.extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
+.extern  armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
+.extern  armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
+.extern  armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
+.extern  armSP_FFTFwd_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
+.extern  armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe
+.extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
+.extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
+.extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
+.extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
+.extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
+.extern  armSP_FFTFwd_CToC_SC16_Radix2_OutOfPlace_unsafe
+.extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
+.extern  armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
+.extern  armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
+.extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
+
+@Input Registers
+#define pSrc            r0
+#define pDst            r1
+#define pFFTSpec        r2
+#define scale           r3
+
+@ Output registers
+#define result          r0
+
+@Local Scratch Registers
+#define argTwiddle      r1
+#define argDst          r2
+#define argScale        r4
+#define pTwiddle        r4
+#define tmpOrder        r4
+#define pOut            r5
+#define subFFTSize      r7
+#define subFFTNum       r6
+#define N               r6
+#define order           r14
+#define diff            r9
+@ Total num of radix stages to comple the FFT
+#define count           r8
+#define x0r             r4
+#define x0i             r5
+#define diffMinusOne    r2
+#define round           r3
+#define subFFTSizeTmp   r6
+#define step            r3
+#define stepr           r11
+#define step1           r10
+#define step1r          r6
+#define step2           r8
+#define step2r          r9
+#define twStep          r8
+#define zero            r9
+#define pTwiddleTmp     r5
+#define t0              r10
+
+@ Neon registers
+#define dX0             d0.s16
+#define dX0S32          d0.s32
+#define dzero           d1.s16
+#define dZero           d2.s16
+#define dShift          d3.s16
+#define qShift          q1.s16
+#define dX0r            d2.s16
+#define dX0i            d3.s16
+#define dX1r            d4.s16
+#define dX1i            d5.s16
+#define qX1             q2.s16
+#define dX0rS32         d2.s32
+#define dX0iS32         d3.s32
+#define dX1rS32         d4.s32
+#define dX1iS32         d5.s32
+#define dT0             d6.s16
+#define dT1             d7.s16
+#define dT2             d8.s16
+#define dT3             d9.s16
+#define qT0             q5.s32
+#define qT1             q6.s32
+#define qT0s            q5.s16
+#define qT1s            q6.s16
+#define dW0r            d14.s16
+#define dW0i            d15.s16
+#define dW1r            d16.s16
+#define dW1i            d17.s16
+#define dW0rS32         d14.s32
+#define dW0iS32         d15.s32
+#define dW1rS32         d16.s32
+#define dW1iS32         d17.s32
+#define dY0r            d14.s16
+#define dY0i            d15.s16
+#define dY0rS32         d14.s32
+#define dY0iS32         d15.s32
+#define dY1r            d16.s16
+#define dY1i            d17.s16
+#define qY1             q8.s16
+#define dY1rS32         d16.s32
+#define dY1iS32         d17.s32
+#define dY0rS64         d14.s32
+#define dY0iS64         d15.s32
+#define qT2             q9.s32
+#define qT3             q10.s32
+#define d18s16          d18.s16
+#define d19s16          d19.s16
+#define d20s16          d20.s16
+#define d21s16          d21.s16
+@ lastThreeelements
+#define dX1             d3.s16
+#define dW0             d4.s16
+#define dW1             d5.s16
+#define dY0             d10.s16
+#define dY1             d11.s16
+#define dY2             d12.s16
+#define dY3             d13.s16
+
+        @ Allocate stack memory required by the function
+        M_ALLOC4        diffOnStack, 4
+
+        @ Write function header
+        M_START     omxSP_FFTFwd_RToCCS_S16_Sfs,r11,d15
+
+        @ Structure offsets for the FFTSpec
+        .set    ARMsFFTSpec_N, 0
+        .set    ARMsFFTSpec_pBitRev, 4
+        .set    ARMsFFTSpec_pTwiddle, 8
+        .set    ARMsFFTSpec_pBuf, 12
+
+        @ Define stack arguments
+
+        @ Read the size from structure and take log
+        LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
+
+        @ Read other structure parameters
+        LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
+        LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
+
+        @ Do a N/2 point complex FFT including the scaling
+
+        MOV     N,N,ASR #1                    @ N/2 point complex FFT
+
+        CLZ     order,N                       @ N = 2^order
+        RSB     order,order,#31
+        MOV     subFFTSize,#1
+
+        CMP     order,#3
+        BGT     orderGreaterthan3             @ order > 3
+
+        CMP     order,#1
+        BGE     orderGreaterthan0             @ order > 0
+        M_STR   scale, diffOnStack,LT         @ order = 0
+        LDR     x0r,[pSrc]
+        STR     x0r,[pOut]
+        MOV     pSrc,pOut
+        MOV     argDst,pDst
+        B       FFTEnd
+
+orderGreaterthan0:
+        @ set the buffers appropriately for various orders
+        CMP     order,#2
+        MOVEQ   argDst,pDst
+        MOVNE   argDst,pOut
+        MOVNE   pOut,pDst                  @ Pass 1st stage destination in RN5
+        MOV     argTwiddle,pTwiddle
+
+        SUBS    diff,scale,order
+        M_STR   diff,diffOnStack
+        MOVGT   scale,order
+        @ Now scale <= order
+
+        CMP     order,#1
+        BGT     orderGreaterthan1
+        @ order = 1:
+        SUBS    scale,scale,#1
+        BLEQ    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
+        BLLT    armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
+        B       FFTEnd
+
+orderGreaterthan1:
+        CMP     order,#2
+        MOV     argScale,scale
+        BGT     orderGreaterthan2
+        @ order = 2:
+        SUBS    argScale,argScale,#1
+        BLGE    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
+        BLLT    armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
+        SUBS    argScale,argScale,#1
+        BLEQ    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
+        BLLT    armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
+        B       FFTEnd
+
+orderGreaterthan2:   @ order = 3
+        SUBS    argScale,argScale,#1
+        BLGE    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
+        BLLT    armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
+        SUBS    argScale,argScale,#1
+        BLGE    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
+        BLLT    armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
+        SUBS    argScale,argScale,#1
+        BLEQ    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
+        BLLT    armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
+        B       FFTEnd
+
+
+orderGreaterthan3:
+        @ check scale = 0 or scale = order
+        SUBS    diff, scale, order   @ scale > order
+        MOVGT   scale,order
+        BGE     specialScaleCase     @ scale = 0 or scale = order
+        CMP     scale,#0
+        BEQ     specialScaleCase
+        B       generalScaleCase
+
+specialScaleCase:   @ scale = 0, or, scale = order && order > 3
+        TST     order, #2            @ Set input args to fft stages
+        MOVEQ   argDst,pDst
+        MOVNE   argDst,pOut
+        MOVNE   pOut,pDst            @ Pass the first stage destination in RN5
+        MOV     argTwiddle,pTwiddle
+
+        CMP     diff,#0
+        M_STR   diff, diffOnStack
+        BGE     scaleEqualsOrder
+
+        @ check for even or odd order.
+        @ NOTE: The following combination of BL's would work fine even though
+        @ the first BL would corrupt the flags. This is because the end of the
+        @ "grpZeroSetLoop" loop inside
+        @ armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe sets Z flag to EQ.
+
+        TST     order,#0x00000001
+        BLEQ    armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
+        BLNE    armSP_FFTFwd_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
+
+        CMP     subFFTNum,#4
+        BLT     FFTEnd
+
+unscaledRadix4Loop:
+        BEQ     lastStageUnscaledRadix4
+        BL      armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe
+        CMP     subFFTNum,#4
+        B       unscaledRadix4Loop
+
+lastStageUnscaledRadix4:
+        BL      armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
+        B       FFTEnd
+
+scaleEqualsOrder:
+        @ check for even or odd order
+        @ NOTE: The following combination of BL's would work fine even though
+        @ the first BL would corrupt the flags. This is because the end of the
+        @ "grpZeroSetLoop" loop inside
+        @ armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets Z flag to EQ.
+
+        TST     order,#0x00000001
+        BLEQ    armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
+        BLNE    armSP_FFTFwd_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
+
+        CMP     subFFTNum,#4
+        BLT     FFTEnd
+
+scaledRadix4Loop:
+        BEQ     lastStageScaledRadix4
+        BL      armSP_FFTFwd_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
+        CMP     subFFTNum,#4
+        B       scaledRadix4Loop
+
+lastStageScaledRadix4:
+        BL      armSP_FFTFwd_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
+        B       FFTEnd
+
+generalScaleCase:                        @ 0 < scale < order and order > 3
+        @ Determine the correct destination buffer
+        SUB     diff,order,scale
+        TST     diff,#0x01
+        ADDEQ   count,scale,diff,LSR #1  @ count = scale + (order - scale)/2
+        MOVNE   count,order
+        TST     count,#0x01              @ Is count even or odd ?
+
+        MOVEQ   argDst,pDst              @ Set input args to fft stages
+        MOVNE   argDst,pOut
+        MOVNE   pOut,pDst                @ Pass 1st stage destination in RN5
+        MOV     argTwiddle,pTwiddle
+
+        CMP     diff,#1
+        M_STR   diff, diffOnStack
+        BEQ     scaleps                  @ scaling including a radix2_ps stage
+
+        MOV     argScale,scale           @ Put scale in RN4 to save and restore
+        BL      armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
+        SUBS    argScale,argScale,#1
+
+scaledRadix2Loop:
+        BLGT    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
+        SUBS    argScale,argScale,#1     @ save, restore scale in scaled stages
+        BGT     scaledRadix2Loop
+        B       outScale
+
+scaleps:
+        SUB     argScale,scale,#1        @ order>3 and diff=1 => scale >= 3
+        BL      armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
+        SUBS    argScale,argScale,#1
+
+scaledRadix2psLoop:
+        BEQ     scaledRadix2psStage
+        BLGT    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
+        SUBS    argScale,argScale,#1     @ save, restore scale in scaled stages
+        BGE     scaledRadix2psLoop
+
+scaledRadix2psStage:
+        BL      armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
+        B       generalLastStageUnscaledRadix2
+
+outScale:
+        M_LDR   diff, diffOnStack
+        @check for even or odd order
+        TST     diff,#0x00000001
+        BEQ     generalUnscaledRadix4Loop
+        B       unscaledRadix2Loop
+
+generalUnscaledRadix4Loop:
+        CMP     subFFTNum,#4
+        BEQ     generalLastStageUnscaledRadix4
+        BL      armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe
+        B       generalUnscaledRadix4Loop
+
+generalLastStageUnscaledRadix4:
+        BL      armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
+        B       End
+
+unscaledRadix2Loop:
+        CMP     subFFTNum,#4
+        BEQ     generalLastTwoStagesUnscaledRadix2
+        BL      armSP_FFTFwd_CToC_SC16_Radix2_OutOfPlace_unsafe
+        B       unscaledRadix2Loop
+
+generalLastTwoStagesUnscaledRadix2:
+        BL      armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
+generalLastStageUnscaledRadix2:
+        BL      armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
+        B       End
+
+FFTEnd:     @ Does only the scaling
+        M_LDR   diff, diffOnStack
+        CMP     diff,#0
+        BLE     finalComplexToRealFixup
+
+        RSB     diff,diff,#0               @ for right shift by a variable
+        VDUP    qShift,diff
+
+        @ save subFFTSize and use subFFTSizeTmp in the following loop
+        MOV     subFFTSizeTmp,subFFTSize   @ subFFTSizeTmp same reg as subFFTNum
+
+        @ Use parallel loads for bigger FFT size.
+        CMP     subFFTSizeTmp, #8
+        BLT     scaleLessFFTData
+
+scaleFFTData:
+        VLD1    {qT0s, qT1s},[pSrc:256]    @ pSrc contains pDst pointer
+        SUBS    subFFTSizeTmp,subFFTSizeTmp,#8
+        VSHL    qT0s,qShift
+        VSHL    qT1s,qShift
+        VST1    {qT0s, qT1s},[pSrc:256]!
+        BGT     scaleFFTData
+        B       afterScaling
+
+scaleLessFFTData:
+        VLD1    {dX0S32[0]},[pSrc]         @ pSrc contains pDst pointer
+        SUBS    subFFTSizeTmp,subFFTSizeTmp,#1
+        VSHL    dX0,dShift
+        VST1    {dX0S32[0]},[pSrc]!
+        BGT     scaleLessFFTData
+
+afterScaling:
+        SUB     pSrc,pSrc,subFFTSize,LSL #2 @ reset pSrc for final fixup
+
+        @  change the logic so that output after scaling is in pOut and not in pDst
+        @  finally store from pOut to pDst
+        @  change branch "End" to branch "finalComplexToRealFixup" in the above
+        @  chk the code below for multiplication by j factor
+
+finalComplexToRealFixup:
+        @ F(0) = 1/2[Z(0) + Z'(0)] - j [Z(0) - Z'(0)]
+        @ 1/2[(a+jb) + (a-jb)] - j  [(a+jb) - (a-jb)]
+        @ 1/2[2a+j0] - j [0+j2b]
+        @ (a+b, 0)
+
+        @ F(N/2) = 1/2[Z(0) + Z'(0)] + j [Z(0) - Z'(0)]
+        @ 1/2[(a+jb) + (a-jb)] + j  [(a+jb) - (a-jb)]
+        @ 1/2[2a+j0] + j [0+j2b]
+        @ (a-b, 0)
+
+        CMP    subFFTSize,#4
+        BLE    smallFFTSize
+
+@ SubSize > 3:
+        @ F(0) and F(N/2)
+        VLD2    {dX0r[0],dX0i[0]},[pSrc]!
+        MOV     zero,#0
+        VMOV    dX0r[1],zero
+        MOV     step,subFFTSize,LSL #2        @ step = N/2 * 4 bytes
+        VMOV    dX0i[1],zero
+        SUB     twStep,step,subFFTSize        @ twStep = 3N/8 * 8 bytes
+
+        VADD    dY0r,dX0r,dX0i                @ F(0) = ((Z0.r+Z0.i) , 0)
+        MOV     step1,subFFTSize,LSL #1       @ step1 = N/2 * 2 bytes
+        VSUB    dY0i,dX0r,dX0i                @ F(N/2) = ((Z0.r-Z0.i) , 0)
+        SUBS    subFFTSize,subFFTSize,#2
+
+        VST1    dY0rS32[0],[argDst], step
+        ADD     pTwiddleTmp,argTwiddle,#4     @ W^2
+        VST1    dY0iS32[0],[argDst]!
+        ADD     argTwiddle,argTwiddle,twStep  @ W^1
+
+        VDUP    dzero,zero
+        SUB     argDst,argDst,step
+        SUB     step,step,#20
+        RSB     stepr, step, #16
+        SUB     step1,step1,#8                @ (N/4-1)*8 bytes
+        RSB     step1r,step1,#8
+
+        SUB     step2, step1, #4
+        RSB     step2r, step2, #8
+
+        @ F(k) = 1/2[Z(k) +  Z'(N/2-k)] -j*W^(k) [Z(k) -  Z'(N/2-k)]
+        @ Note: W^k is stored as negative values in the table.
+        @ Process 4 elements at a time. E.g: F(1),F(2) and F(N/2-2),F(N/2-1)
+        @ since both of them require Z(1),Z(2) and Z(N/2-2),Z(N/2-1).
+
+evenOddButterflyLoop:
+        VLD2    {dX0r,dX0i},[pSrc],step
+        VLD2    {dX1r,dX1i},[pSrc],stepr
+
+        VLD1    dW0r,[argTwiddle],step1
+        SUB     step1, step1, #16
+        VREV64  qX1,qX1
+
+        VLD1    dW1r,[argTwiddle],step1r
+        ADD     step1r, step1r, #16
+        VSUB    dT2,dX0r,dX1r                 @ a-c
+
+        VLD1    dW0i,[pTwiddleTmp],step2
+        SUB     step2, step2, #16
+        VADD    dT3,dX0i,dX1i                 @ b+d
+
+        VLD1    dW1i,[pTwiddleTmp],step2r
+        ADD     step2r, step2r, #16
+
+        VTRN    dW0r,dW0i
+        VZIP    dW1r, dW1i
+
+        SUBS    subFFTSize,subFFTSize,#8
+
+        VHADD   dT0,dX0r,dX1r                 @ (a+c)/2
+        VZIP    dW1iS32, dW1rS32
+        VHSUB   dT1,dX0i,dX1i                 @ (b-d)/2
+
+        VQDMULH dY0,dW1i,dT2
+        VQDMULH dY1,dW1r,dT3
+        VQDMULH dY2,dW1i,dT3
+        VQDMULH dY3,dW1r,dT2
+
+        VQDMULH d18s16,dW0r,dT2
+        VQDMULH d19s16,dW0i,dT3
+        VQDMULH d20s16,dW0r,dT3
+        VQDMULH d21s16,dW0i,dT2
+
+        VRHADD  dX1r, dY0, dY1
+        VHSUB   dX1i, dY2, dY3
+        VHSUB   dX0r, d18s16, d19s16
+        VADD    dY1i,dT1,dX1r
+        VRHADD  dX0i, d20s16, d21s16
+        VSUB    dY1r,dT0,dX1i                 @ F(N/2 -1)
+        VSUB    dY0r,dT0,dX0i                 @ F(1)
+        VADD    dY0i,dT1,dX0r
+
+        VNEG    dY1i,dY1i
+        VREV64  qY1, qY1
+
+        VST2    {dY0r,dY0i},[argDst],step
+        SUB     step,step,#32                 @ (N/2-4)*4 bytes
+        VST2    {dY1r,dY1i},[argDst],stepr
+        ADD     stepr,stepr,#32
+
+        BGT     evenOddButterflyLoop
+
+        SUB     pSrc,pSrc,#4                  @ points to the last element.
+        SUB     argDst,argDst,#4              @ points to the last element.
+
+        b lastElement
+
+smallFFTSize:
+
+        @ F(0) and F(N/2)
+        VLD2    {dX0r[0],dX0i[0]},[pSrc]!
+        MOV     zero,#0
+        VMOV    dX0r[1],zero
+        MOV     step,subFFTSize,LSL #2        @ step = N/2 * 4 bytes
+        VMOV    dX0i[1],zero
+        SUB     twStep,step,subFFTSize        @ twStep = 3N/8 * 8 bytes
+
+        VADD    dY0r,dX0r,dX0i                @ F(0) = ((Z0.r+Z0.i) , 0)
+        MOV     step1,subFFTSize,LSL #1       @ step1 = N/2 * 2 bytes
+        VSUB    dY0i,dX0r,dX0i                @ F(N/2) = ((Z0.r-Z0.i) , 0)
+        SUBS    subFFTSize,subFFTSize,#2
+
+
+        VST1    dY0rS32[0],[argDst], step
+        ADD     pTwiddleTmp,argTwiddle,#4     @ W^2
+        VST1    dY0iS32[0],[argDst]!
+        ADD     argTwiddle,argTwiddle,twStep  @ W^1
+
+        VDUP    dzero,zero
+        SUB     argDst,argDst,step
+
+        BLT     End
+        BEQ     lastElement
+
+        SUB     step,step,#12
+        SUB     step1,step1,#4                @ (N/4-1)*8 bytes
+
+        @ F(k) = 1/2[Z(k) +  Z'(N/2-k)] -j*W^(k) [Z(k) -  Z'(N/2-k)]
+
+butterflyLoopSubFFTSize4:
+        VLD1    dW0rS32[0], [argTwiddle],step1
+        VLD1    dW1rS32[0],[argTwiddle]!
+
+        VLD2    {dX0r[0],dX0i[0]},[pSrc]!
+        VLD2    {dX0r[1],dX0i[1]},[pSrc],step
+        SUB     pSrc,pSrc,#4
+        SUB     argTwiddle,argTwiddle,step1
+        VLD2    {dX1r[0],dX1i[0]},[pSrc]!
+        VLD2    {dX1r[1],dX1i[1]},[pSrc]!
+
+        SUB     step1,step1,#4                @ (N/4-2)*4 bytes
+        VLD1    dW0iS32[0],[pTwiddleTmp],step1
+        VLD1    dW1iS32[0],[pTwiddleTmp]!
+        SUB     pSrc,pSrc,step
+
+        SUB     pTwiddleTmp,pTwiddleTmp,step1
+        VREV32  dX1r,dX1r
+        VREV32  dX1i,dX1i
+        SUBS    subFFTSize,subFFTSize,#4
+
+        VSUB    dT2,dX0r,dX1r                 @ a-c
+        SUB     step1,step1,#4
+        VADD    dT3,dX0i,dX1i                 @ b+d
+        VADD    dT0,dX0r,dX1r                 @ a+c
+        VSUB    dT1,dX0i,dX1i                 @ b-d
+        VHADD   dT0,dT0,dzero
+        VHADD   dT1,dT1,dzero
+
+        VTRN    dW1r,dW1i
+        VTRN    dW0r,dW0i
+
+        VMULL   qT0,dW1r,dT2
+        VMLAL   qT0,dW1i,dT3
+        VMULL   qT1,dW1r,dT3
+        VMLSL   qT1,dW1i,dT2
+
+        VMULL   qT2,dW0r,dT2
+        VMLSL   qT2,dW0i,dT3
+        VMULL   qT3,dW0r,dT3
+        VMLAL   qT3,dW0i,dT2
+
+        VRSHRN  dX1r,qT0,#16
+        VRSHRN  dX1i,qT1,#16
+
+        VSUB    dY1r,dT0,dX1i                 @ F(N/2 -1)
+        VADD    dY1i,dT1,dX1r
+        VNEG    dY1i,dY1i
+
+        VREV32  dY1r,dY1r
+        VREV32  dY1i,dY1i
+
+        VRSHRN  dX0r,qT2,#16
+        VRSHRN  dX0i,qT3,#16
+
+        VSUB    dY0r,dT0,dX0i                 @ F(1)
+        VADD    dY0i,dT1,dX0r
+
+        VST2    {dY0r[0],dY0i[0]},[argDst]!
+        VST2    {dY0r[1],dY0i[1]},[argDst],step
+        SUB     argDst, #4
+        VST2    {dY1r[0],dY1i[0]},[argDst]!
+        VST2    {dY1r[1],dY1i[1]},[argDst]!
+        SUB     argDst,argDst,step
+        SUB     pSrc,pSrc,#4                  @ points to the last element.
+        SUB     argDst,argDst,#4              @ points to the last element.
+
+lastElement:
+        @ Last element can be expanded as follows
+        @ 1/2[Z(k) + Z'(k)] + j w^k [Z(k) - Z'(k)]
+        @ 1/2[(a+jb) + (a-jb)] + j w^k [(a+jb) - (a-jb)]
+        @ 1/2[2a+j0] + j (c+jd) [0+j2b]
+        @ (a-bc, -bd)
+        @ Since (c,d) = (0,1) for the last element, result is just (a,-b)
+
+        VLD1    dX0rS32[0],[pSrc]
+        VST1    dX0r[0],[argDst]!
+        VNEG    dX0r,dX0r
+        VST1    dX0r[1],[argDst]!
+
+End:
+        @ Set return value
+        MOV     result, #OMX_Sts_NoErr
+
+        @ Write function tail
+        M_END
+
+    .END
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTFwd_RToCCS_S32_Sfs_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTFwd_RToCCS_S32_Sfs_s.S
index a742162e616..c1385c025ed 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTFwd_RToCCS_S32_Sfs_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTFwd_RToCCS_S32_Sfs_s.S
@@ -29,8 +29,8 @@
         
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
         
         
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTInv_CCSToR_F32_Sfs_s.S
index 5deaf896c53..9c45b54cdc1 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTInv_CCSToR_F32_Sfs_s.S
@@ -20,8 +20,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTInv_CCSToR_S16_Sfs_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTInv_CCSToR_S16_Sfs_s.S
new file mode 100644
index 00000000000..311dba99e83
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTInv_CCSToR_S16_Sfs_s.S
@@ -0,0 +1,301 @@
+@
+@ Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+@
+@ Use of this source code is governed by a BSD-style license
+@ that can be found in the LICENSE file in the root of the source
+@ tree. An additional intellectual property rights grant can be found
+@ in the file PATENTS.  All contributing project authors may
+@ be found in the AUTHORS file in the root of the source tree.
+@
+@ Some code in this file was originally from file
+@ omxSP_FFTInv_CToC_SC16_Sfs_s.S which was licensed as follows.
+@ It has been relicensed with permission from the copyright holders.
+@
+
+@
+@ File Name:  omxSP_FFTInv_CToC_SC16_Sfs_s.s
+@ OpenMAX DL: v1.0.2
+@ Last Modified Revision:   6729
+@ Last Modified Date:       Tue, 17 Jul 2007
+@
+@ (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+@
+
+@
+@ Description:
+@ Compute an inverse FFT for a 16-bit real signal, with complex FFT routines.
+@
+
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
+
+.extern  armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
+.extern  armSP_FFTInv_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
+.extern  armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
+.extern  armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
+.extern  armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
+.extern  armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe
+.extern  armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
+.extern  armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
+.extern  armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
+.extern  armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
+.extern  armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
+.extern  armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe
+.extern  armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
+.extern  armSP_FFTInv_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
+.extern  armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
+.extern  armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
+
+@Input Registers
+#define pSrc            r0
+#define pDst            r1
+#define pFFTSpec        r2
+#define scale           r3
+
+@ Output registers
+#define result  r0
+
+@Local Scratch Registers
+#define argTwiddle      r1
+#define argDst          r2
+#define argScale        r4
+#define pTwiddle        r4
+#define tmpOrder        r4
+#define pOut            r5
+#define subFFTSize      r7
+#define subFFTNum       r6
+#define N               r6
+#define order           r14
+#define diff            r9
+@ Total num of radix stages to comple the FFT
+#define count           r8
+#define x0r             r4
+#define x0i             r5
+#define diffMinusOne    r2
+#define round           r3
+#define pOut1           r2
+#define size            r7
+#define step            r8
+#define step1           r9
+#define twStep          r10
+#define pTwiddleTmp     r11
+#define argTwiddle1     r12
+#define zero            r14
+
+@ Neon registers
+#define dX0             D0.S32
+#define dShift          D1.S32
+#define qShift          Q0.s16
+#define dX1             D1.S32
+#define dY0             D2.S32
+#define dY1             D3.S32
+#define dX0r            D0.S32
+#define dX0i            D1.S32
+#define dX1r            D2.S32
+#define dX1i            D3.S32
+#define dW0r            D4.S32
+#define dW0i            D5.S32
+#define dW1r            D6.S32
+#define dW1i            D7.S32
+#define dT0             D8.S32
+#define dT1             D9.S32
+#define dT2             D10.S32
+#define dT3             D11.S32
+#define qT0             Q6.S64
+#define qT1             Q7.S64
+#define qT0s            Q6.S16
+#define qT1s            Q7.S16
+#define qT2             Q8.S64
+#define qT3             Q9.S64
+#define dY0r            D4.S32
+#define dY0i            D5.S32
+#define dY1r            D6.S32
+#define dY1i            D7.S32
+#define dzero           D20.S32
+#define dY2             D4.S32
+#define dY3             D5.S32
+#define dW0             D6.S32
+#define dW1             D7.S32
+#define dW0Tmp          D10.S32
+#define dW1Neg          D11.S32
+
+
+
+    @ Allocate stack memory required by the function
+        M_ALLOC4        diffOnStack, 4
+
+    @ Write function header
+        M_START     omxSP_FFTInv_CCSToR_S16_Sfs,r11,d15
+
+@ Structure offsets for the FFTSpec
+        .set    ARMsFFTSpec_N, 0
+        .set    ARMsFFTSpec_pBitRev, 4
+        .set    ARMsFFTSpec_pTwiddle, 8
+        .set    ARMsFFTSpec_pBuf, 12
+
+        @ Define stack arguments
+
+        @ Read the size from structure and take log
+        LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
+
+        @ Read other structure parameters
+        LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
+        LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
+
+        @ Call the preTwiddle Radix2 stage before doing the complex IFFT
+
+        @ The following conditional BL combination would work since
+        @ evenOddButterflyLoop in the first call would set Z flag to zero
+
+        CMP     scale,#0
+        BLEQ    armSP_FFTInv_CCSToR_S16_preTwiddleRadix2_unsafe
+        BLGT    armSP_FFTInv_CCSToR_S16_Sfs_preTwiddleRadix2_unsafe
+
+complexIFFT:
+
+        ASR     N,N,#1                              @ N/2 point complex IFFT
+        ADD     pSrc,pOut,N,LSL #2                  @ set pSrc as pOut1
+
+        CLZ     order,N                             @ N = 2^order
+        RSB     order,order,#31
+        MOV     subFFTSize,#1
+
+        ADD     scale,scale,order                   @ FFTInverse has a final scaling factor by N
+
+        CMP     order,#3
+        BGT     orderGreaterthan3                   @ order > 3
+
+        CMP     order,#1
+        BGE     orderGreaterthan0                   @ order > 0
+        M_STR   scale, diffOnStack,LT               @ order = 0
+        LDRLT   x0r,[pSrc]
+        STRLT   x0r,[pDst]
+        MOVLT   pSrc,pDst
+        BLT     FFTEnd
+
+orderGreaterthan0:
+        @ set the buffers appropriately for various orders
+        CMP     order,#2
+        MOVNE   argDst,pDst
+        MOVEQ   argDst,pOut
+        MOVEQ   pOut,pDst                           @ Pass the first stage destination in RN5
+        MOV     argTwiddle,pTwiddle
+        @ Store the scale factor and scale at the end
+        SUB     diff,scale,order
+        M_STR   diff, diffOnStack
+        BGE     orderGreaterthan1
+        BLLT    armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe  @ order = 1
+        B       FFTEnd
+
+
+orderGreaterthan1:
+        MOV     tmpOrder,order                      @ tmpOrder = RN 4
+        BL      armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
+        CMP     tmpOrder,#2
+        BLGT    armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
+        BL      armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
+        B       FFTEnd
+
+
+
+
+orderGreaterthan3:
+        @ check scale = 0 or scale = order
+        SUB     diff, scale, order                  @ scale > order
+
+        TST     order, #2                           @ Set input args to fft stages
+        MOVNE   argDst,pDst
+        MOVEQ   argDst,pOut
+        MOVEQ   pOut,pDst                           @ Pass the first stage destination in RN5
+        MOV     argTwiddle,pTwiddle
+
+        CMP     diff,#0
+        M_STR   diff, diffOnStack
+        BGE     scaleEqualsOrder
+
+        @check for even or odd order
+        @ NOTE: The following combination of BL's would work fine eventhough the first
+        @ BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
+        @ armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
+
+        TST     order,#0x00000001
+        BLEQ    armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
+        BLNE    armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
+
+        CMP     subFFTNum,#4
+        BLT     FFTEnd
+
+unscaledRadix4Loop:
+        BEQ     lastStageUnscaledRadix4
+        BL      armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe
+        CMP     subFFTNum,#4
+        B       unscaledRadix4Loop
+
+lastStageUnscaledRadix4:
+        BL      armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
+        B       FFTEnd
+
+scaleEqualsOrder:
+        @check for even or odd order
+        @ NOTE: The following combination of BL's would work fine eventhough the first
+        @ BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
+        @ armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
+
+        TST     order,#0x00000001
+        BLEQ    armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
+        BLNE    armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
+
+        CMP     subFFTNum,#4
+        BLT     FFTEnd
+
+scaledRadix4Loop:
+        BEQ     lastStageScaledRadix4
+        BL      armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
+        CMP     subFFTNum,#4
+        B       scaledRadix4Loop
+
+lastStageScaledRadix4:
+        BL      armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
+
+FFTEnd:                                         @ Does only the scaling
+
+        M_LDR   diff, diffOnStack
+        CMP     diff,#0
+        BLE     End
+
+        RSB     diff,diff,#0                    @ to use VRSHL for right shift by a variable
+        VDUP    qShift,diff
+
+        @ Use parallel loads for bigger FFT size.
+        CMP     subFFTSize, #8
+        BLT     scaleLessFFTData
+
+scaleFFTData:
+        VLD1    {qT0s, qT1s},[pSrc:256]         @ pSrc contains pDst pointer
+        SUBS    subFFTSize,subFFTSize,#8
+        VSHL    qT0s,qShift
+        VSHL    qT1s,qShift
+        VST1    {qT0s, qT1s},[pSrc:256]!
+        BGT     scaleFFTData
+        B       End
+
+scaleLessFFTData:                               @ N = subFFTSize  ; dataptr = pDst  ; scale = diff
+        VLD1    {dX0[0]},[pSrc]                 @ pSrc contains pDst pointer
+        SUBS    subFFTSize,subFFTSize,#1
+        VRSHL   dX0,dShift
+        VST1    {dX0[0]},[pSrc]!
+        BGT     scaleLessFFTData
+
+End:
+        @ Set return value
+        MOV     result, #OMX_Sts_NoErr
+
+        @ Write function tail
+        M_END
+
+
+
+
+
+
+    .END
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_S32S16_Sfs_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTInv_CCSToR_S32S16_Sfs_s.S
index becc0327e7f..f2f2d025d22 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_S32S16_Sfs_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTInv_CCSToR_S32S16_Sfs_s.S
@@ -29,8 +29,8 @@
         
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
         
         
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_S32_Sfs_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTInv_CCSToR_S32_Sfs_s.S
index 003d666036d..10ce047dbff 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_S32_Sfs_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTInv_CCSToR_S32_Sfs_s.S
@@ -29,8 +29,8 @@
         
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
         
         
 @// Import symbols required from other files
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CToC_FC32_Sfs_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTInv_CToC_FC32_Sfs_s.S
index c2e86d2f7e8..73a6549f00c 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CToC_FC32_Sfs_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTInv_CToC_FC32_Sfs_s.S
@@ -20,8 +20,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 @// Import symbols required from other files
 @// (For example tables)
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CToC_SC16_Sfs_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTInv_CToC_SC16_Sfs_s.S
index ff85e2b5af6..2388d0f5811 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CToC_SC16_Sfs_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTInv_CToC_SC16_Sfs_s.S
@@ -29,8 +29,8 @@
 
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
 
 @// Import symbols required from other files
 @// (For example tables)
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CToC_SC32_Sfs_s.S b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTInv_CToC_SC32_Sfs_s.S
index 09c461cc78f..7df624301c3 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CToC_SC32_Sfs_s.S
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/neon/omxSP_FFTInv_CToC_SC32_Sfs_s.S
@@ -28,8 +28,8 @@
         
 @// Include standard headers
 
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
+#include "dl/api/arm/armCOMM_s.h"
+#include "dl/api/arm/omxtypes_s.h"
         
 @// Import symbols required from other files
 @// (For example tables)
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_C_FC32.c b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTGetBufSize_C_FC32.c
index 081f23739dd..6ac9de85a90 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_C_FC32.c
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTGetBufSize_C_FC32.c
@@ -8,7 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "dl/api/armOMX.h"
+#include "dl/api/arm/armOMX.h"
 #include "dl/api/omxtypes.h"
 #include "dl/sp/api/armSP.h"
 #include "dl/sp/api/omxSP.h"
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_C_SC16.c b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTGetBufSize_C_SC16.c
index 288c76ca614..1fc4fe2bd6f 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_C_SC16.c
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTGetBufSize_C_SC16.c
@@ -25,7 +25,7 @@
  * Compute the size of the specification structure required
  */
 
-#include "dl/api/armOMX.h"
+#include "dl/api/arm/armOMX.h"
 #include "dl/api/omxtypes.h"
 #include "dl/sp/api/armSP.h"
 #include "dl/sp/api/omxSP.h"
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_C_SC32.c b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTGetBufSize_C_SC32.c
index 0ca3b5664b4..176586407cb 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_C_SC32.c
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTGetBufSize_C_SC32.c
@@ -25,7 +25,7 @@
  * Compute the size of the specification structure required
  */
 
-#include "dl/api/armOMX.h"
+#include "dl/api/arm/armOMX.h"
 #include "dl/api/omxtypes.h"
 #include "dl/sp/api/armSP.h"
 #include "dl/sp/api/omxSP.h"
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_R_F32.c b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTGetBufSize_R_F32.c
index 19b16bbd959..046d069d06e 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_R_F32.c
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTGetBufSize_R_F32.c
@@ -9,7 +9,7 @@
  *
  */
 
-#include "dl/api/armOMX.h"
+#include "dl/api/arm/armOMX.h"
 #include "dl/api/omxtypes.h"
 #include "dl/sp/api/armSP.h"
 #include "dl/sp/api/omxSP.h"
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTGetBufSize_R_S16.c b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTGetBufSize_R_S16.c
new file mode 100644
index 00000000000..7ad27500dc0
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTGetBufSize_R_S16.c
@@ -0,0 +1,77 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ * Some code in this file was originally from file omxSP_FFTGetBufSize_R_S32.c
+ * which was licensed as follows.
+ * It has been relicensed with permission from the copyright holders.
+ */
+
+/*
+ * OpenMAX DL: v1.0.2
+ * Last Modified Revision:
+ * Last Modified Date:
+ */
+
+#include "dl/api/arm/armOMX.h"
+#include "dl/api/omxtypes.h"
+#include "dl/sp/api/armSP.h"
+#include "dl/sp/api/omxSP.h"
+
+/**
+ * Function: omxSP_FFTGetBufSize_R_S16
+ *
+ * Description:
+ * Computes the size of the specification structure required for the length
+ * 2^order real FFT and IFFT functions.
+ *
+ * Remarks:
+ * This function is used in conjunction with the 16-bit functions
+ * <FFTFwd_RToCCS_S16_Sfs> and <FFTInv_CCSToR_S16_Sfs>.
+ *
+ * Parameters:
+ * [in]  order       base-2 logarithm of the length; valid in the range
+ *			   [1,12].
+ * [out] pSize	   pointer to the number of bytes required for the
+ *			   specification structure.
+ *
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult omxSP_FFTGetBufSize_R_S16(OMX_INT order, OMX_INT *pSize) {
+  OMX_INT     NBy2,N,twiddleSize;
+
+  /* Order zero not allowed */
+  if (order == 0) {
+    return OMX_Sts_BadArgErr;
+  }
+
+  NBy2 = 1 << (order - 1);
+  N = NBy2 << 1;
+  twiddleSize = 5 * N / 8;  /* 3 / 4 (N / 2) + N / 4 */
+
+  /* 2 pointers to store bitreversed array and twiddle factor array */
+  *pSize = sizeof(ARMsFFTSpec_R_SC16)
+           /* Twiddle factors  */
+           + sizeof(OMX_SC16) * twiddleSize
+           /* Ping Pong buffer for doing the N/2 point complex FFT; */
+           /* extra size 'N' as a temporary buf for FFTInv_CCSToR_S16_Sfs */
+           + sizeof(OMX_S16) * (N << 1)
+           /* Extra bytes to get 32 byte alignment of ptwiddle and pBuf */
+           + 62 ;
+
+
+  return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_R_S16S32.c b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTGetBufSize_R_S16S32.c
index 846536386d9..6ebdae10c86 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_R_S16S32.c
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTGetBufSize_R_S16S32.c
@@ -25,7 +25,7 @@
  * Computes the size of the specification structure required.
  */
 
-#include "dl/api/armOMX.h"
+#include "dl/api/arm/armOMX.h"
 #include "dl/api/omxtypes.h"
 #include "dl/sp/api/armSP.h"
 #include "dl/sp/api/omxSP.h"
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_R_S32.c b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTGetBufSize_R_S32.c
index d57294700e8..d5758d0a7ee 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_R_S32.c
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTGetBufSize_R_S32.c
@@ -25,7 +25,7 @@
  * Computes the size of the specification structure required.
  */
 
-#include "dl/api/armOMX.h"
+#include "dl/api/arm/armOMX.h"
 #include "dl/api/omxtypes.h"
 #include "dl/sp/api/armSP.h"
 #include "dl/sp/api/omxSP.h"
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInit_C_FC32.c b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTInit_C_FC32.c
index cc53c5912f1..4a68b6f6b76 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInit_C_FC32.c
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTInit_C_FC32.c
@@ -11,7 +11,7 @@
  *  complex float instead of SC32.
  */
 
-#include "dl/api/armOMX.h"
+#include "dl/api/arm/armOMX.h"
 #include "dl/api/omxtypes.h"
 #include "dl/sp/api/armSP.h"
 #include "dl/sp/api/omxSP.h"
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInit_C_SC16.c b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTInit_C_SC16.c
index f8248bbbf0b..0a23b8b7651 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInit_C_SC16.c
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTInit_C_SC16.c
@@ -25,7 +25,7 @@
  * Initializes the specification structures required
  */
 
-#include "dl/api/armOMX.h"
+#include "dl/api/arm/armOMX.h"
 #include "dl/api/omxtypes.h"
 #include "dl/sp/api/armSP.h"
 #include "dl/sp/api/omxSP.h"
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInit_C_SC32.c b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTInit_C_SC32.c
index 9ea103f3d68..0b4b5371d5e 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInit_C_SC32.c
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTInit_C_SC32.c
@@ -25,7 +25,7 @@
  * Initializes the specification structures required
  */
 
-#include "dl/api/armOMX.h"
+#include "dl/api/arm/armOMX.h"
 #include "dl/api/omxtypes.h"
 #include "dl/sp/api/armSP.h"
 #include "dl/sp/api/omxSP.h"
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInit_R_F32.c b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTInit_R_F32.c
index 32d22230ed7..b5067833517 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInit_R_F32.c
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTInit_R_F32.c
@@ -11,7 +11,7 @@
  *  instead of S32.
  */
 
-#include "dl/api/armOMX.h"
+#include "dl/api/arm/armOMX.h"
 #include "dl/api/omxtypes.h"
 #include "dl/sp/api/armSP.h"
 #include "dl/sp/api/omxSP.h"
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTInit_R_S16.c b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTInit_R_S16.c
new file mode 100644
index 00000000000..e3fc2719e4d
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTInit_R_S16.c
@@ -0,0 +1,232 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ * Some code in this file was originally from file omxSP_FFTInit_R_S16S32.c
+ * which was licensed as follows.
+ * It has been relicensed with permission from the copyright holders.
+ */
+
+/*
+ * OpenMAX DL: v1.0.2
+ * Last Modified Revision:
+ * Last Modified Date:
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ */
+
+#include "dl/api/arm/armOMX.h"
+#include "dl/api/omxtypes.h"
+#include "dl/sp/api/armSP.h"
+#include "dl/sp/api/omxSP.h"
+
+/**
+ * Function: omxSP_FFTInit_R_S16
+ *
+ * Description:
+ * Initialize the real forward-FFT specification information struct.
+ *
+ * Remarks:
+ * This function is used to initialize the specification structures
+ * for functions <ippsFFTFwd_RToCCS_S16_Sfs> and
+ * <ippsFFTInv_CCSToR_S16_Sfs>. Memory for *pFFTSpec must be
+ * allocated prior to calling this function. The number of bytes
+ * required for *pFFTSpec can be determined using
+ * <FFTGetBufSize_R_S16>.
+ *
+ * Parameters:
+ * [in]  order       base-2 logarithm of the desired block length;
+ *			   valid in the range [1,12].
+ * [out] pFFTFwdSpec pointer to the initialized specification structure.
+ *
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult omxSP_FFTInit_R_S16(OMXFFTSpec_R_S16* pFFTSpec, OMX_INT order) {
+  OMX_INT i = 0, j = 0;
+  OMX_SC16 *pTwiddle = NULL, *pTwiddle1 = NULL, *pTwiddle2 = NULL;
+  OMX_SC16 *pTwiddle3 = NULL, *pTwiddle4 = NULL;
+  OMX_S16 *pBuf = NULL;
+  OMX_U16 *pBitRev = NULL;
+  OMX_U32 pTmp = 0;
+  OMX_INT Nby2 = 0, N = 0, M = 0, diff = 0, step = 0;
+  OMX_S16 x = 0, y = 0, xNeg = 0;
+  OMX_S32 xS32 = 0, yS32 = 0;
+  ARMsFFTSpec_R_SC16 *pFFTStruct = NULL;
+
+  /* Order zero not allowed */
+  if (order == 0) {
+    return OMX_Sts_BadArgErr;
+  }
+
+  /* Do the initializations */
+  pFFTStruct = (ARMsFFTSpec_R_SC16*) pFFTSpec;
+  Nby2 = 1 << (order - 1);
+  N = Nby2 << 1;
+  pBitRev = NULL ;  /* optimized implementations don't use bitreversal */
+  pTwiddle = (OMX_SC16*) (sizeof(ARMsFFTSpec_R_SC16) + (OMX_S8*)pFFTSpec);
+
+  /* Align to 32 byte boundary */
+  pTmp = ((OMX_U32)pTwiddle)&31;  /* (OMX_U32)pTwiddle % 32 */
+  if(pTmp != 0) {
+    pTwiddle = (OMX_SC16*) ((OMX_S8*)pTwiddle + (32 - pTmp));
+  }
+
+  pBuf = (OMX_S16*) (sizeof(OMX_SC16) * (5 * N / 8) + (OMX_S8*)pTwiddle);
+
+  /* Align to 32 byte boundary */
+  pTmp = ((OMX_U32)pBuf)&31;                 /* (OMX_U32)pBuf % 32 */
+  if(pTmp != 0) {
+    pBuf = (OMX_S16*)((OMX_S8*)pBuf + (32 - pTmp));
+  }
+
+  /*
+   * Filling Twiddle factors : exp^(-j*2*PI*k/ (N/2) ) ; k=0,1,2,...,3/4(N/2).
+   * N/2 point complex FFT is used to compute N point real FFT.
+   * The original twiddle table "armSP_FFT_S32TwiddleTable" is of size
+   * (MaxSize/8 + 1). Rest of the values i.e., up to MaxSize are calculated
+   * using the symmetries of sin and cos.
+   * The max size of the twiddle table needed is 3/4(N/2) for a radix-4 stage.
+   *
+   * W = (-2 * PI) / N
+   * N = 1 << order
+   * W = -PI >> (order - 1)
+   * 
+   * Note we use S32 twiddle factor table and round the values to 16 bits.
+   */
+
+  M = Nby2 >> 3;
+  diff = 12 - (order - 1);
+  step = 1 << diff;  /* Step into the twiddle table for the current order */
+
+  xS32 = armSP_FFT_S32TwiddleTable[0];
+  yS32 = armSP_FFT_S32TwiddleTable[1];
+  x = (xS32 + 0x8000) >> 16;
+  y = (yS32 + 0x8000) >> 16;
+  xNeg = 0x7FFF;
+
+  if((order-1) >= 3) {
+    /* i = 0 case */
+    pTwiddle[0].Re = x;
+    pTwiddle[0].Im = y;
+    pTwiddle[2 * M].Re = -y;
+    pTwiddle[2 * M].Im = xNeg;
+    pTwiddle[4 * M].Re = xNeg;
+    pTwiddle[4 * M].Im = y;
+
+    for (i=1; i<=M; i++){
+      OMX_S16 x_neg = 0, y_neg = 0;
+      j = i * step;
+
+      xS32 = armSP_FFT_S32TwiddleTable[2 * j];
+      yS32 = armSP_FFT_S32TwiddleTable[2 * j + 1];
+      x = (xS32 + 0x8000) >> 16;
+      y = (yS32 + 0x8000) >> 16;
+      /* |x_neg = -x| doesn't work when x is 0x8000. */
+      x_neg = (-(xS32 + 0x8000)) >> 16;
+      y_neg = (-(yS32 + 0x8000)) >> 16;
+
+      pTwiddle[i].Re = x;
+      pTwiddle[i].Im = y;
+      pTwiddle[2 * M - i].Re = y_neg;
+      pTwiddle[2 * M - i].Im = x_neg;
+      pTwiddle[2 * M + i].Re = y;
+      pTwiddle[2 * M + i].Im = x_neg;
+      pTwiddle[4 * M - i].Re = x_neg;
+      pTwiddle[4 * M - i].Im = y;
+      pTwiddle[4 * M + i].Re = x_neg;
+      pTwiddle[4 * M + i].Im = y_neg;
+      pTwiddle[6 * M - i].Re = y;
+      pTwiddle[6 * M - i].Im = x;
+    }
+  }
+  else {
+    if ((order - 1) == 2) {
+      pTwiddle[0].Re = x;
+      pTwiddle[0].Im = y;
+      pTwiddle[1].Re = -y;
+      pTwiddle[1].Im = xNeg;
+      pTwiddle[2].Re = xNeg;
+      pTwiddle[2].Im = y;
+    }
+    if ((order-1) == 1) {
+      pTwiddle[0].Re = x;
+      pTwiddle[0].Im = y;
+    }
+  }
+
+  /*
+   * Now fill the last N/4 values : exp^(-j*2*PI*k/N);  k=1,3,5,...,N/2-1.
+   * These are used for the final twiddle fix-up for converting complex to
+   * real FFT.
+   */
+
+  M = N >> 3;
+  diff = 12 - order;
+  step = 1 << diff;
+
+  pTwiddle1 = pTwiddle + 3 * N / 8;
+  pTwiddle4 = pTwiddle1 + (N / 4 - 1);
+  pTwiddle3 = pTwiddle1 + N / 8;
+  pTwiddle2 = pTwiddle1 + (N / 8 - 1);
+
+  xS32 = armSP_FFT_S32TwiddleTable[0];
+  yS32 = armSP_FFT_S32TwiddleTable[1];
+  x = (xS32 + 0x8000) >> 16;
+  y = (yS32 + 0x8000) >> 16;
+  xNeg = 0x7FFF;
+
+  if((order) >= 3) {
+    for (i = 1; i <= M; i += 2 ) {
+      OMX_S16 x_neg = 0, y_neg = 0;
+
+      j = i*step;
+
+      xS32 = armSP_FFT_S32TwiddleTable[2 * j];
+      yS32 = armSP_FFT_S32TwiddleTable[2 * j + 1];
+      x = (xS32 + 0x8000) >> 16;
+      y = (yS32 + 0x8000) >> 16;
+      /* |x_neg = -x| doesn't work when x is 0x8000. */
+      x_neg = (-(xS32 + 0x8000)) >> 16;
+      y_neg = (-(yS32 + 0x8000)) >> 16;
+
+      pTwiddle1[0].Re = x;
+      pTwiddle1[0].Im = y;
+      pTwiddle1 += 1;
+      pTwiddle2[0].Re = y_neg;
+      pTwiddle2[0].Im = x_neg;
+      pTwiddle2 -= 1;
+      pTwiddle3[0].Re = y;
+      pTwiddle3[0].Im = x_neg;
+      pTwiddle3 += 1;
+      pTwiddle4[0].Re = x_neg;
+      pTwiddle4[0].Im = y;
+      pTwiddle4 -= 1;
+    }
+  }
+  else {
+    if (order == 2) {
+      pTwiddle1[0].Re = -y;
+      pTwiddle1[0].Im = xNeg;
+    }
+  }
+
+  /* Update the structure */
+  pFFTStruct->N = N;
+  pFFTStruct->pTwiddle = pTwiddle;
+  pFFTStruct->pBitRev = pBitRev;
+  pFFTStruct->pBuf = pBuf;
+
+  return OMX_Sts_NoErr;
+}
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInit_R_S16S32.c b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTInit_R_S16S32.c
index d157b3457c4..9a66430c2df 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInit_R_S16S32.c
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTInit_R_S16S32.c
@@ -25,7 +25,7 @@
  * Initialize the real forward-FFT specification information struct.
  */
 
-#include "dl/api/armOMX.h"
+#include "dl/api/arm/armOMX.h"
 #include "dl/api/omxtypes.h"
 #include "dl/sp/api/armSP.h"
 #include "dl/sp/api/omxSP.h"
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInit_R_S32.c b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTInit_R_S32.c
index 337f2a20b28..d55ab065095 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/omxSP_FFTInit_R_S32.c
+++ b/chromium/third_party/openmax_dl/dl/sp/src/arm/omxSP_FFTInit_R_S32.c
@@ -25,7 +25,7 @@
  * Initialize the real forward-FFT specification information struct.
  */
 
-#include "dl/api/armOMX.h"
+#include "dl/api/arm/armOMX.h"
 #include "dl/api/omxtypes.h"
 #include "dl/sp/api/armSP.h"
 #include "dl/sp/api/omxSP.h"
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/test/test_fft.gyp b/chromium/third_party/openmax_dl/dl/sp/src/test/test_fft.gyp
index 99b3774324f..99280b59c2d 100644
--- a/chromium/third_party/openmax_dl/dl/sp/src/test/test_fft.gyp
+++ b/chromium/third_party/openmax_dl/dl/sp/src/test/test_fft.gyp
@@ -17,7 +17,7 @@
     ],
     'dependencies' : [
       '../../../dl.gyp:openmax_dl',
-      'test_utilities'
+      'test_utilities',
     ],
     'conditions': [
       ['big_float_fft == 1', {
@@ -27,7 +27,110 @@
       }],
     ],
   },
+  'conditions': [
+    ['target_arch == "arm"', {
+      # Test programs supported on ARM
+      'targets': [
+        {
+          # Test complex fixed-point 16-bit FFT
+          'target_name': 'test_fft16',
+          'type': 'executable',
+          'sources': [
+            'test_fft16.c',
+          ],
+        },
+        {
+          # Test complex fixed-point 32-bit FFT
+          'target_name': 'test_fft32',
+          'type': 'executable',
+          'sources': [
+            'test_fft32.c',
+          ],
+        },
+        {
+          # Test real 32-bit fixed-point FFT
+          'target_name': 'test_rfft32',
+          'type': 'executable',
+          'sources': [
+            'test_rfft32.c',
+          ],
+        },
+        {
+          # Test real 16-bit fixed-point FFT implemented with S32 routines.
+          'target_name': 'test_rfft16_s32',
+          'type': 'executable',
+          'sources': [
+            'test_rfft16_s32.c',
+          ],
+        },
+        {
+          # Test real 16-bit fixed-point FFT implemented with S16 routines.
+          'target_name': 'test_rfft16_s16',
+          'type': 'executable',
+          'sources': [
+            'test_rfft16_s16.c',
+          ],
+        },
+        {
+          # Test complex floating-point FFT
+          'target_name': 'test_float_fft',
+          'type': 'executable',
+          'sources': [
+            'test_float_fft.c',
+            'support/float_fft_neon.c',
+          ],
+        },
+        # Non-NEON test programs
+        {
+          # Test complex floating-point FFT, non-NEON
+          'target_name': 'test_float_fft_armv7',
+          'type': 'executable',
+          'defines': [
+            'ARM_VFP_TEST'
+          ],
+          'sources': [
+            'test_float_fft.c',
+            'support/float_fft_armv7.c',
+          ],
+        },
+        {
+          # Test real floating-point FFT, non-NEON
+          'target_name': 'test_float_rfft_armv7',
+          'type': 'executable',
+          'sources': [
+            'test_float_rfft.c',
+            'support/float_rfft_armv7.c',
+            'support/float_rfft_thresholds.h',
+          ],
+        },
+        {
+          # Test real floating-point FFT, detecting NEON support
+          'target_name': 'test_float_rfft_detect',
+          'type': 'executable',
+          'sources': [
+            'test_float_rfft.c',
+            'support/float_rfft_detect.c',
+            'support/float_rfft_thresholds.h',
+          ],
+        },
+        {
+          # Simple timing test of FFTs, non-NEON
+          'target_name': 'test_fft_time_armv7',
+          'type': 'executable',
+          'defines': [
+            # Timing test for non-NEON is only supported for float FFTs.
+            'ARM_VFP_TEST',
+            'FLOAT_ONLY',
+          ],
+          'sources': [
+            'test_fft_time.c',
+          ],
+        },
+      ],
+    }],
+  ],
   'targets': [
+    # Targets that should be supported by all architectures
     {
       # Test utilities
       'target_name': 'test_utilities',
@@ -43,51 +146,24 @@
       ],
     },
     {
-      # Test complex fixed-point 16-bit FFT
-      'target_name': 'test_fft16',
-      'type': 'executable',
-      'sources': [
-        'test_fft16.c',
-      ],
-    },
-    {
-      # Test complex fixed-point 32-bit FFT
-      'target_name': 'test_fft32',
-      'type': 'executable',
-      'sources': [
-        'test_fft32.c',
-      ],
-    },
-    {
-      # Test real 32-bit fixed-point FFT
-      'target_name': 'test_rfft32',
-      'type': 'executable',
-      'sources': [
-        'test_rfft32.c',
-      ],
-    },
-    {
-      # Test real 16-bit fixed-point FFT
-      'target_name': 'test_rfft16',
-      'type': 'executable',
-      'sources': [
-        'test_rfft16.c',
-      ],
-    },
-    {
-      # Test complex floating-point FFT
-      'target_name': 'test_float_fft',
-      'type': 'executable',
-      'sources': [
-        'test_float_fft.c',
-      ],
-    },
-    {
       # Test real floating-point FFT
       'target_name': 'test_float_rfft',
       'type': 'executable',
       'sources': [
         'test_float_rfft.c',
+        'support/float_rfft_thresholds.h',
+      ],
+      'conditions': [
+        ['target_arch == "arm"', {
+          'sources': [
+            'support/float_rfft_neon.c',
+          ],
+        }],
+        ['target_arch == "ia32"', {
+          'sources': [
+            'support/float_rfft_x86.c',
+          ],
+        }],
       ],
     },
     {
@@ -97,18 +173,42 @@
       'sources': [
         'test_fft_time.c',
       ],
+      'conditions': [
+        ['target_arch == "ia32"', {
+          'defines': [
+            # Timing test only for float FFTs on x86
+            'FLOAT_ONLY',
+          ],
+        }],
+      ],
     },
     {
       # Build all test programs.
       'target_name': 'All',
       'type': 'none',
-      'dependencies': [
-        'test_fft16',
-        'test_fft32',
-        'test_float_fft',
+      'conditions' : [
+        ['target_arch == "arm"', {
+          # Supported test programs for ARM
+          'dependencies': [
+            'test_fft16',
+            'test_fft32',
+            'test_float_fft',
+            'test_float_rfft',
+            'test_rfft16_s32',
+            'test_rfft16_s16',
+            'test_rfft32',
+            # Non-Neon tests
+            'test_fft_time_armv7',
+            'test_float_fft_armv7',
+            'test_float_rfft_armv7',
+            # Tests with detection
+            'test_float_rfft_detect',
+          ],
+        }],
+      ],
+      'dependencies' : [
+        # All architectures must support at least the float rfft test
         'test_float_rfft',
-        'test_rfft16',
-        'test_rfft32',
         'test_fft_time',
       ],
     },
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/omxSP_FFTFwd_RToCCS_F32_Sfs.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/omxSP_FFTFwd_RToCCS_F32_Sfs.c
new file mode 100644
index 00000000000..b6d1c98279d
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/omxSP_FFTFwd_RToCCS_F32_Sfs.c
@@ -0,0 +1,228 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+#include "dl/sp/api/omxSP.h"
+#include "dl/sp/api/x86SP.h"
+#include "dl/sp/src/x86/x86SP_SSE_Math.h"
+#include <stdbool.h>
+
+extern OMX_F32* x86SP_F32_radix2_kernel_OutOfPlace(
+    const OMX_F32 *src,
+    OMX_F32 *buf1,
+    OMX_F32 *buf2,
+    const OMX_F32 *twiddle,
+    OMX_INT n,
+    bool forward_fft);
+
+extern OMX_F32* x86SP_F32_radix4_kernel_OutOfPlace_sse(
+    const OMX_F32 *src,
+    OMX_F32 *buf1,
+    OMX_F32 *buf2,
+    const OMX_F32 *twiddle,
+    OMX_INT n,
+    bool forward_fft);
+
+/**
+ * A two-for-one algorithm is used here to do the real fft:
+ *
+ * Input x[n], (n = 0, ..., N - 1)
+ * Output X[k] = DFT(N, k){x}
+ * a[n] = x[2n], (n = 0, ..., N/2 - 1)
+ * b[n] = x[2n + 1], (n = 0, ..., N/2 - 1)
+ * z[n] = a[n] + j * b[n]
+ * Z[k] = DFT(N/2, k){z}
+ * Z' is the complex conjugate of Z
+ * A[k] = (Z[k] + Z'[N/2 - k]) / 2
+ * B[k] = -j * (Z[k] - Z'[N/2 - k]) / 2
+ * X[k] = A[k] + B[k] * W[k], (W = exp(-j*2*PI*k/N); k = 0, ..., N/2 - 1)
+ * X[k] = A[k] - B[k], (k = N/2)
+ * X' is complex conjugate of X
+ * X[k] = X'[N - k], (k = N/2 + 1, ..., N - 1)
+ */
+
+/**
+ * This function is the last permutation of two-for-one FFT algorithm.
+ * We move the division by 2 to the last step in the implementation, so:
+ * A[k] = (Z[k] + Z'[N/2 - k])
+ * B[k] = -j * (Z[k] - Z'[N/2 - k])
+ * X[k] = (A[k] + B[k] * W[k]) / 2, (k = 0, ..., N/2 - 1)
+ * X[k] = (A[k] - B[k]), (k = N/2)
+ * X[k] = X'[N - k], (k = N/2 + 1, ..., N - 1)
+ */
+static void RevbinPermuteFwd(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n) {
+  OMX_INT i;
+  OMX_INT j;
+  OMX_INT n_by_2 = n >> 1;
+  OMX_INT n_by_4 = n >> 2;
+
+  OMX_FC32 big_a;
+  OMX_FC32 big_b;
+  OMX_FC32 temp;
+  const OMX_F32 *tw;
+
+  for (i = 1, j = n_by_2 - 1; i < n_by_4; i++, j--) {
+    // A[k] = (Z[k] + Z'[N/2 - k])
+    big_a.Re = in[i] + in[j];
+    big_a.Im = in[j + n_by_2] - in[i + n_by_2];
+
+    // B[k] = -j * (Z[k] - Z'[N/2 - k])
+    big_b.Re = in[j] - in[i];
+    big_b.Im = in[j + n_by_2] + in[i + n_by_2];
+
+    // W[k]
+    tw = twiddle + i;
+
+    // temp = B[k] * W[k]
+    temp.Re =  big_b.Re * tw[0] + big_b.Im * tw[n];
+    temp.Im =  big_b.Re * tw[n] - big_b.Im * tw[0];
+
+    // Convert split format to interleaved format.
+    // X[k] = (A[k] + B[k] * W[k]) / 2, (k = 0, ..., N/2 - 1)
+    out[i << 1] = 0.5f * (big_a.Re - temp.Im);
+    out[(i << 1) + 1] = 0.5f * (temp.Re - big_a.Im);
+    // X[k] = X'[N - k] (k = N/2 + 1, ..., N - 1)
+    out[j << 1] = 0.5f * (big_a.Re + temp.Im);
+    out[(j << 1) + 1] = 0.5f * (temp.Re + big_a.Im);
+  }
+
+  // X[k] = A[k] - B[k] (k = N/2)
+  out[n_by_2] = in[n_by_4];
+  out[n_by_2 + 1] = -in[n_by_4 + n_by_2];
+
+  out[0] = in[0] + in[n_by_2];
+  out[1] = 0;
+  out[n] = in[0] - in[n_by_2];
+  out[n + 1] = 0;
+}
+
+// Sse version of RevbinPermuteFwd function.
+static void RevbinPermuteFwdSse(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n) {
+  OMX_INT i;
+  OMX_INT j;
+  OMX_INT n_by_2 = n >> 1;
+  OMX_INT n_by_4 = n >> 2;
+
+  VC v_i;
+  VC v_j;
+  VC v_big_a;
+  VC v_big_b;
+  VC v_temp;
+  VC v_x0;
+  VC v_x1;
+  VC v_tw;
+
+  __m128 factor = _mm_set1_ps(0.5f);
+
+  for (i = 0, j = n_by_2 - 3; i < n_by_4; i += 4, j -= 4) {
+    VC_LOAD_SPLIT(&v_i, (in + i), n_by_2);
+
+    VC_LOADU_SPLIT(&v_j, (in + j), n_by_2);
+    VC_REVERSE(&v_j);
+
+    // A[k] = (Z[k] + Z'[N/2 - k])
+    VC_ADD_SUB(&v_big_a, &v_j, &v_i);
+
+    // B[k] = -j * (Z[k] - Z'[N/2 - k])
+    VC_SUB_ADD(&v_big_b, &v_j, &v_i);
+
+    // W[k]
+    VC_LOAD_SPLIT(&v_tw, (twiddle + i), n);
+
+    // temp = B[k] * W[k]
+    VC_CONJ_MUL(&v_temp, &v_big_b, &v_tw);
+
+    VC_SUB_X(&v_x0, &v_big_a, &v_temp);
+    VC_ADD_X(&v_x1, &v_big_a, &v_temp);
+
+    VC_MUL_F(&v_x0, &v_x0, factor);
+    VC_MUL_F(&v_x1, &v_x1, factor);
+
+    // X[k] = A[k] + B[k] * W[k] (k = 0, ..., N/2 - 1)
+    VC_STORE_INTERLEAVE((out + (i << 1)), &v_x0);
+
+    // X[k] = X'[N - k] (k = N/2 + 1, ..., N - 1)
+    VC_REVERSE(&v_x1);
+    VC_STOREU_INTERLEAVE((out + (j << 1)), &v_x1);
+  }
+
+  out[n_by_2] = in[n_by_4];
+  out[n_by_2 + 1] = -in[n_by_4 + n_by_2];
+
+  out[0] = in[0] + in[n_by_2];
+  out[1] = 0;
+  out[n] = in[0] - in[n_by_2];
+  out[n + 1] = 0;
+}
+
+OMXResult omxSP_FFTFwd_RToCCS_F32_Sfs(const OMX_F32 *pSrc, OMX_F32 *pDst,
+                                      const OMXFFTSpec_R_F32 *pFFTSpec) {
+  // Input must be 32 byte aligned
+  if (!pSrc || !pDst || (OMX_INT)pSrc & 31 || (OMX_INT)pDst & 31)
+    return OMX_Sts_BadArgErr;
+
+  OMX_INT n;
+  OMX_INT n_by_2;
+  OMX_INT n_by_4;
+  const OMX_F32 *twiddle;
+  OMX_F32 *buf;
+
+  const X86FFTSpec_R_FC32 *pFFTStruct = (const X86FFTSpec_R_FC32*) pFFTSpec;
+
+  n = pFFTStruct->N;
+
+  // This is to handle the case of order == 1.
+  if (n == 2) {
+    pDst[0] = (pSrc[0] + pSrc[1]);
+    pDst[1] = 0.0f;
+    pDst[2] = (pSrc[0] - pSrc[1]);
+    pDst[3] = 0.0f;
+    return OMX_Sts_NoErr;
+  }
+
+  n_by_2 = n >> 1;
+  n_by_4 = n >> 2;
+  buf = pFFTStruct->pBuf1;
+  twiddle = pFFTStruct->pTwiddle;
+
+  if(n_by_2 >= 16) {
+    buf = x86SP_F32_radix4_kernel_OutOfPlace_sse(
+        pSrc,
+        pFFTStruct->pBuf2,
+        buf,
+        twiddle,
+        n_by_2,
+        1);
+  } else {
+    buf = x86SP_F32_radix2_kernel_OutOfPlace(
+        pSrc,
+        pFFTStruct->pBuf2,
+        buf,
+        twiddle,
+        n_by_2,
+        1);
+  }
+
+  if(n >= 8)
+    RevbinPermuteFwdSse(buf, pDst, twiddle, n);
+  else
+    RevbinPermuteFwd(buf, pDst, twiddle, n);
+
+  return OMX_Sts_NoErr;
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/omxSP_FFTGetBufSize_R_F32.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/omxSP_FFTGetBufSize_R_F32.c
new file mode 100644
index 00000000000..f686a7f2f58
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/omxSP_FFTGetBufSize_R_F32.c
@@ -0,0 +1,60 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+#include "dl/sp/api/x86SP.h"
+#include "dl/sp/api/omxSP.h"
+
+/**
+ * Function: omxSP_FFTGetBufSize_R_F32
+ *
+ * Description:
+ * Computes the size of the specification structure required for the length
+ * 2^order real FFT and IFFT functions.
+ *
+ * Remarks:
+ * This function is used in conjunction with the 32-bit functions
+ * <FFTFwd_RToCCS_F32_Sfs> and <FFTInv_CCSToR_F32_Sfs>.
+ *
+ * Parameters:
+ * [in]  order       base-2 logarithm of the length; valid in the range
+ *                    [1,12]. ([1,15] if BIG_FFT_TABLE is defined.)
+ * [out] pSize	   pointer to the number of bytes required for the
+ *			   specification structure.
+ *
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult omxSP_FFTGetBufSize_R_F32(OMX_INT order, OMX_INT *pSize) {
+  if (!pSize || (order < 1) || (order > TWIDDLE_TABLE_ORDER))
+    return OMX_Sts_BadArgErr;
+
+    OMX_INT n_by_2;
+    OMX_INT n;
+
+    n_by_2 = 1 << (order - 1);
+    n = n_by_2 << 1;
+
+    *pSize = sizeof(X86FFTSpec_R_FC32) +
+             // Twiddle factors.
+             sizeof(OMX_F32) * (n << 1) +
+             // Ping Pong buffer for doing the n/2 point complex FFT.
+             // pBuf1
+             sizeof(OMX_F32) * n + 4 +
+             // pBuf2
+             sizeof(OMX_F32) * n + 4 +
+             // Extra bytes to get 32 byte alignment of ptwiddle, pBuf1
+             62;
+
+    return OMX_Sts_NoErr;
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/omxSP_FFTInit_R_F32.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/omxSP_FFTInit_R_F32.c
new file mode 100644
index 00000000000..564f1666274
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/omxSP_FFTInit_R_F32.c
@@ -0,0 +1,126 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ *  This is a modification of omxSP_FFTInit_R_S32.c to support float
+ *  instead of S32.
+ */
+
+#include "dl/api/omxtypes.h"
+#include "dl/sp/api/omxSP.h"
+#include "dl/sp/api/x86SP.h"
+
+/**
+ * Function: omxSP_FFTInit_R_F32
+ *
+ * Description:
+ * Initialize the real forward-FFT specification information struct.
+ *
+ * Remarks:
+ * This function is used to initialize the specification structures
+ * for functions |omxSP_FFTFwd_RToCCS_F32_Sfs| and
+ * |omxSP_FFTInv_CCSToR_F32_Sfs|. Memory for *pFFTSpec must be
+ * allocated prior to calling this function. The number of bytes
+ * required for *pFFTSpec can be determined using
+ * |omxSP_FFTGetBufSize_R_F32|.
+ *
+ * Parameters:
+ * [in]  order       base-2 logarithm of the desired block length;
+ *                         valid in the range [1,12].  ([1,15] if
+ *                         BIG_FFT_TABLE is defined.)
+ * [out] pFFTFwdSpec pointer to the initialized specification structure.
+ *
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult omxSP_FFTInit_R_F32(OMXFFTSpec_R_F32 *pFFTSpec, OMX_INT order)
+{
+  OMX_F32 *pTwiddle;
+  OMX_F32 *pBuf;
+  OMX_INT i;
+  OMX_INT j;
+  OMX_INT N;
+  OMX_INT NBy2;
+  OMX_INT NBy4;
+  OMX_INT diff;
+  OMX_U32 pTmp;
+  X86FFTSpec_R_FC32  *pFFTStruct = (X86FFTSpec_R_FC32 *) pFFTSpec;
+  OMX_F32 real;
+  OMX_F32 imag;
+
+  if (!pFFTSpec || (order < 1) || (order > TWIDDLE_TABLE_ORDER))
+    return OMX_Sts_BadArgErr;
+
+  N = 1 << order;
+  NBy2 = N >> 1;
+
+  pTwiddle = (OMX_F32*) (sizeof(X86FFTSpec_R_FC32) + (OMX_S8*) pFFTSpec);
+
+  // Align to 32 byte boundary.
+  pTmp = ((OMX_U32)pTwiddle) & 31;
+  if (pTmp)
+    pTwiddle = (OMX_F32*) ((OMX_S8*)pTwiddle + (32 - pTmp));
+
+  pBuf = (OMX_F32*) (sizeof(OMX_F32) * (N << 1) + (OMX_S8*) pTwiddle);
+
+  // Align to 32 byte boundary.
+  pTmp = ((OMX_U32)pBuf) & 31;
+  if (pTmp)
+    pBuf = (OMX_F32*) ((OMX_S8*)pBuf + (32 - pTmp));
+
+  // Calculating Twiddle Factors.
+  diff = 1 << (TWIDDLE_TABLE_ORDER - order + 1);
+
+  // For SSE optimization, using twiddle with split format by which the real and
+  // imag data are stored into first and last halves of the buffer separately
+  // The negatives are moved when generating pTwiddle table.
+  if (order > 1) {
+    NBy4 = N >> 2;
+    for (i = 0, j = 0; i <= NBy4 >> 1; ++i, j += diff) {
+      real = armSP_FFT_F32TwiddleTable[j];
+      imag = armSP_FFT_F32TwiddleTable[j + 1];
+
+      pTwiddle[i] = -real;
+      pTwiddle[i + N] = -imag;
+
+      pTwiddle[NBy4 - i] = imag;
+      pTwiddle[NBy4 - i + N] = real;
+
+      pTwiddle[NBy4 + i] = -imag;
+      pTwiddle[NBy4 + i + N] = real;
+
+      pTwiddle[NBy2 - i] = real;
+      pTwiddle[NBy2 - i + N] = -imag;
+
+      pTwiddle[NBy2 + i] = real;
+      pTwiddle[NBy2 + i + N] = imag;
+
+      pTwiddle[NBy4 * 3 - i] = -imag;
+      pTwiddle[NBy4 * 3 - i + N] = -real;
+
+      pTwiddle[NBy4 * 3 + i] = imag;
+      pTwiddle[NBy4 * 3 + i + N] = -real;
+
+      pTwiddle[N - i - 1] = -real;
+      pTwiddle[(N << 1) - i - 1] = imag;
+    }
+  } else {
+    pTwiddle[0] = armSP_FFT_F32TwiddleTable[0];
+    pTwiddle[2] = armSP_FFT_F32TwiddleTable[1];
+    pTwiddle[1] = -pTwiddle[0];
+    pTwiddle[3] = pTwiddle[2];
+  }
+  pFFTStruct->N = N;
+  pFFTStruct->pTwiddle = pTwiddle;
+  pFFTStruct->pBuf1 = pBuf;
+  pFFTStruct->pBuf2 = pBuf + N + 4;
+
+  return OMX_Sts_NoErr;
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/omxSP_FFTInv_CCSToR_F32_Sfs.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/omxSP_FFTInv_CCSToR_F32_Sfs.c
new file mode 100644
index 00000000000..1733d665288
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/omxSP_FFTInv_CCSToR_F32_Sfs.c
@@ -0,0 +1,252 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+#include "dl/sp/api/omxSP.h"
+#include "dl/sp/api/x86SP.h"
+#include "dl/sp/src/x86/x86SP_SSE_Math.h"
+#include <stdbool.h>
+
+extern OMX_F32* x86SP_F32_radix2_kernel_OutOfPlace(
+    const OMX_F32 *src,
+    OMX_F32 *buf1,
+    OMX_F32 *buf2,
+    const OMX_F32 *twiddle,
+    OMX_INT n,
+    bool forward_fft);
+
+extern OMX_F32* x86SP_F32_radix4_kernel_OutOfPlace_sse(
+    const OMX_F32 *src,
+    OMX_F32 *buf1,
+    OMX_F32 *buf2,
+    const OMX_F32 *twiddle,
+    OMX_INT n,
+    bool forward_fft);
+
+/**
+ * A two-for-one algorithm is used here to do the real ifft:
+ *
+ * Input X[k], (k = 0, ..., N - 1)
+ * Output x[n] = IDFT(N, k){X}
+ * X' is complex conjugate of X
+ * A[k] = (X[k] + X'[N/2 - k]) / 2
+ * B[k] = (X[k] - X'[N/2 - k]) / 2 * W[k], (W = exp(j*2*PI*k/N);
+ *                                          k = 0, ..., N/2 - 1)
+ * Z[k] = A[k] + j * B[k], (k = 0, ..., N/2 - 1)
+ * z[n] = IDFT(N/2, k){Z}
+ * x[2n] = Re(z[n]), (n = 0, ..., N/2 - 1)
+ * x[2n + 1] = Im(z[n]), (n = 0, ..., N/2 - 1)
+ */
+
+/**
+ * This function is the first permutation of two-for-one IFFT algorithm.
+ * We move the division by 2 to the last step in the implementation, so:
+ * A[k] = (X[k] + X'[N/2 - k])
+ * B[k] = (X[k] - X'[N/2 - k]) * W[k], (k = 0, ..., N/2 - 1)
+ * Z[k] = (A[k] + j * B[k]) / 2, (k = 0, ..., N/2 - 1)
+ */
+static void RevbinPermuteInv(const OMX_F32 *in,
+                             OMX_F32 *out,
+                             const OMX_F32 *twiddle,
+                             OMX_INT n) {
+  OMX_INT i;
+  OMX_INT j;
+  OMX_INT i_by_2;
+  OMX_INT j_by_2;
+  OMX_INT n_by_2 = n >> 1;
+  OMX_INT n_by_4 = n >> 2;
+
+  OMX_FC32 big_a;
+  OMX_FC32 big_b;
+  OMX_FC32 temp;
+  const OMX_F32 *tw;
+
+  for (i = 2, j = n - 2; i < n_by_2; i += 2, j -= 2) {
+    // A[k] = (X[k] + X'[N/2 - k])
+    big_a.Re = in[i] + in[j];
+    big_a.Im = in[i + 1] - in[j + 1];
+
+    // temp = (X[k] - X'[N/2 - k])
+    temp.Re = in[i] - in[j];
+    temp.Im = in[i + 1] + in[j + 1];
+
+    i_by_2 = i >> 1;
+    j_by_2 = j >> 1;
+
+    // W[k]
+    tw = twiddle + i_by_2;
+
+    // B[k] = (X[k] - X'[N/2 - k]) * W[k]
+    big_b.Re =  temp.Re * tw[0] + temp.Im * tw[n];
+    big_b.Im =  temp.Re * tw[n] - temp.Im * tw[0];
+
+    // Convert split format to interleaved format.
+    // Z[k] = (A[k] + j * B[k]) (k = 0, ..., N/2 - 1)
+    // The scaling of 1/2 will be merged into to the scaling in
+    // the last step before the output in omxSP_FFTInv_CCSToR_F32_Sfs.
+    out[i_by_2] = big_a.Re + big_b.Im;
+    out[i_by_2 + n_by_2] = big_b.Re + big_a.Im;
+    out[j_by_2] = big_a.Re - big_b.Im;
+    out[j_by_2 + n_by_2] = big_b.Re - big_a.Im;
+  }
+
+  // The n_by_2 complex point
+  out[n_by_4] = 2.0f * in[n_by_2];
+  out[n_by_4 + n_by_2] = -2.0f * in[n_by_2 + 1];
+
+  // The first complex point
+  out[0] = in[0] + in[n];
+  out[n_by_2] = in[0] - in[n];
+}
+
+// Sse version of RevbinPermuteInv function.
+static void RevbinPermuteInvSse(const OMX_F32 *in,
+                                OMX_F32 *out,
+                                const OMX_F32 *twiddle,
+                                OMX_INT n) {
+  OMX_INT i;
+  OMX_INT j;
+  OMX_INT n_by_2 = n >> 1;
+  OMX_INT n_by_4 = n >> 2;
+  const OMX_F32 *tw;
+  const OMX_F32 *pi;
+  const OMX_F32 *pj;
+
+  VC v_i;
+  VC v_j;
+  VC v_big_a;
+  VC v_big_b;
+  VC v_temp;
+  VC v_tw;
+
+  for (i = 0, j = n_by_2 - 3; i < n_by_4; i += 4, j -= 4) {
+    pi = in + (i << 1);
+    pj = in + (j << 1);
+    VC_LOAD_INTERLEAVE(&v_i, pi);
+
+    v_j.real = _mm_set_ps(pj[0], pj[2], pj[4], pj[6]);
+    v_j.imag = _mm_set_ps(pj[1], pj[3], pj[5], pj[7]);
+
+    // A[k] = (X[k] + X'[N/2 - k])
+    VC_ADD_SUB(&v_big_a, &v_i, &v_j);
+
+    // temp = (X[k] - X'[N/2 - k])
+    VC_SUB_ADD(&v_temp, &v_i, &v_j);
+
+    // W[k]
+    tw = twiddle + i;
+    VC_LOAD_SPLIT(&v_tw, tw, n);
+
+    // B[k] = (X[k] - X'[N/2 - k]) * W[k]
+    VC_CONJ_MUL(&v_big_b, &v_temp, &v_tw);
+
+    // Convert split format to interleaved format.
+    // Z[k] = (A[k] + j * B[k]) (k = 0, ..., N/2 - 1)
+    // The scaling of 1/2 will be merged into to the scaling in
+    // the last step before the output in omxSP_FFTInv_CCSToR_F32_Sfs.
+    VC_ADD_X_STORE_SPLIT((out + i), &v_big_a, &v_big_b, n_by_2);
+
+    VC_SUB_X_INVERSE_STOREU_SPLIT((out + j), &v_big_a, &v_big_b, n_by_2);
+  }
+
+  // The n_by_2 complex point
+  out[n_by_4] = 2.0f * in[n_by_2];
+  out[n_by_4 + n_by_2] = -2.0f * in[n_by_2 + 1];
+
+  // The first complex point
+  out[0] = in[0] + in[n];
+  out[n_by_2] = in[0] - in[n];
+}
+
+OMXResult omxSP_FFTInv_CCSToR_F32_Sfs(const OMX_F32 *pSrc, OMX_F32 *pDst,
+                                      const OMXFFTSpec_R_F32 *pFFTSpec) {
+  // Input must be 32 byte aligned
+  if (!pSrc || !pDst || (OMX_INT)pSrc & 31 || (OMX_INT)pDst & 31)
+    return OMX_Sts_BadArgErr;
+
+  OMX_INT n;
+  OMX_INT n_by_2;
+  OMX_INT n_by_4;
+  OMX_INT i;
+  const OMX_F32 *twiddle;
+  OMX_F32 *buf;
+  OMX_F32 *in = (OMX_F32*) pSrc;
+
+  const X86FFTSpec_R_FC32 *pFFTStruct = (const X86FFTSpec_R_FC32*) pFFTSpec;
+
+  n = pFFTStruct->N;
+
+  // This is to handle the case of order == 1.
+  if (n == 2) {
+    pDst[0] = (pSrc[0] + pSrc[2]) / 2;
+    pDst[1] = (pSrc[0] - pSrc[2]) / 2;
+    return OMX_Sts_NoErr;
+  }
+
+  n_by_2 = n >> 1;
+  n_by_4 = n >> 2;
+  buf = pFFTStruct->pBuf1;
+
+  twiddle = pFFTStruct->pTwiddle;
+
+  if (n < 8)
+    RevbinPermuteInv(in, buf, twiddle, n);
+  else
+    RevbinPermuteInvSse(in, buf, twiddle, n);
+
+  if (n_by_2 < 16) {
+    buf = x86SP_F32_radix2_kernel_OutOfPlace(
+        buf,
+        pFFTStruct->pBuf2,
+        buf,
+        twiddle,
+        n_by_2,
+        0);
+  } else {
+    buf = x86SP_F32_radix4_kernel_OutOfPlace_sse(
+        buf,
+        pFFTStruct->pBuf2,
+        buf,
+        twiddle,
+        n_by_2,
+        0);
+  }
+
+  // Scale the result by 1/n.
+  // It contains a scaling factor of 1/2 in
+  // RevbinPermuteInv/RevbinPermuteInvSse.
+  OMX_F32 factor = 1.0f / n;
+
+  if (n < 8) {
+    for (i = 0; i < n_by_2; i++) {
+      pDst[i << 1] = buf[i] * factor;
+      pDst[(i << 1) + 1] = buf[i + n_by_2] * factor;
+    }
+  } else {
+    OMX_F32 *base;
+    OMX_F32 *dst;
+    VC temp0;
+    VC temp1;
+    __m128 mFactor = _mm_load1_ps(&factor);
+
+    // Two things are done in this loop:
+    // 1 Get the result scaled; 2 Change the format from split to interleaved.
+    for (i = 0; i < n_by_2; i += 4) {
+      base = buf + i;
+      dst = pDst + (i << 1);
+      VC_LOAD_SPLIT(&temp0, base, n_by_2);
+      VC_MUL_F(&temp1, &temp0, mFactor);
+      VC_STORE_INTERLEAVE(dst, &temp1);
+    }
+  }
+
+  return OMX_Sts_NoErr;
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix2_fs.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix2_fs.c
new file mode 100644
index 00000000000..6fa21cfb40d
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix2_fs.c
@@ -0,0 +1,36 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+
+void x86SP_FFT_CToC_FC32_Fwd_Radix2_fs(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    OMX_INT n) {
+  OMX_INT i;
+  OMX_F32 *out0 = out;
+
+  for (i = 0; i < n; i += 2) {
+    const OMX_F32 *in0 = in + i;
+    const OMX_F32 *in1 = in0 + n;
+    OMX_F32 *out1 = out0 + (n >> 1);
+
+    // CADD out0, in0, in1
+    out0[0] = in0[0] + in1[0];
+    out0[n] = in0[1] + in1[1];
+
+    // CSUB out1, in0, in1
+    out1[0] = in0[0] - in1[0];
+    out1[n] = in0[1] - in1[1];
+
+    out0 += 1;
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix2_ls.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix2_ls.c
new file mode 100644
index 00000000000..f4d991c85c3
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix2_ls.c
@@ -0,0 +1,43 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+
+void x86SP_FFT_CToC_FC32_Fwd_Radix2_ls(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n) {
+  OMX_INT i;
+  OMX_F32 *out0 = out;
+
+  for (i = 0; i < n; i += 2) {
+    OMX_FC32 t;
+    const OMX_F32 *tw = twiddle + i;
+    const OMX_F32 *in0 = in + i;
+    const OMX_F32 *in1 = in0 + 1;
+    OMX_F32 *out1 = out0 + (n >> 1);
+
+    // CMUL t, tw, in1
+    t.Re = tw[0] * in1[0] - tw[n << 1] * in1[n];
+    t.Im = tw[0] * in1[n] + tw[n << 1] * in1[0];
+
+    // CADD out0, in0, t
+    out0[0] = in0[0] + t.Re;
+    out0[n] = in0[n] + t.Im;
+
+    // CSUB out1, in0, t
+    out1[0] = in0[0] - t.Re;
+    out1[n] = in0[n] - t.Im;
+
+    out0 += 1;
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix2_ls_sse.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix2_ls_sse.c
new file mode 100644
index 00000000000..a712d96e4b3
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix2_ls_sse.c
@@ -0,0 +1,56 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+#include "dl/sp/src/x86/x86SP_SSE_Math.h"
+
+void x86SP_FFT_CToC_FC32_Fwd_Radix2_ls_sse(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n) {
+  OMX_F32 *out0 = out;
+  OMX_INT i;
+
+  // This function is used when n >= 8
+  assert(n >= 8);
+  if (n < 8) return;
+
+  for (i = 0; i < n; i += 8) {
+    VC v_tw;
+    VC v_t0;
+    VC v_t1;
+    VC v_temp;
+
+    // Load twiddle
+    const OMX_F32 *tw = twiddle + i;
+    v_tw.real = _mm_set_ps(tw[6], tw[4], tw[2], tw[0]);
+    const OMX_F32 * twi = tw + (n << 1);
+    v_tw.imag = _mm_set_ps(twi[6], twi[4], twi[2], twi[0]);
+
+    // Load real part
+    const OMX_F32 *t = in + i;
+    VC_LOAD_SHUFFLE(&(v_t0.real), &(v_t1.real), t);
+
+    // Load imag part
+    t = t + n;
+    VC_LOAD_SHUFFLE(&(v_t0.imag), &(v_t1.imag), t);
+
+    OMX_F32 *out1 = out0 + (n >> 1);
+    VC_MUL(&v_temp, &v_tw, &v_t1);
+
+    VC_SUB_STORE_SPLIT(out1, &v_t0, &v_temp, n);
+
+    VC_ADD_STORE_SPLIT(out0, &v_t0, &v_temp, n);
+
+    out0 += 4;
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix2_ms.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix2_ms.c
new file mode 100644
index 00000000000..37148775e25
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix2_ms.c
@@ -0,0 +1,50 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+
+void x86SP_FFT_CToC_FC32_Fwd_Radix2_ms(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n,
+    OMX_INT sub_size,
+    OMX_INT sub_num) {
+  OMX_INT grp;
+  OMX_F32 *out0 = out;
+  OMX_INT set_count = sub_num >> 1;
+
+  for (grp = 0; grp < sub_size; ++grp) {
+    OMX_INT set;
+    const OMX_F32 *tw = twiddle + grp * sub_num;
+
+    for (set = 0; set < set_count; ++set) {
+      OMX_FC32 t;
+      const OMX_F32 *in0 = in + set + grp * sub_num;
+      const OMX_F32 *in1 = in0 + set_count;
+      OMX_F32 *out1 = out0 + (n >> 1);
+
+      // CMUL t, tw, in1
+      t.Re = tw[0] * in1[0] - tw[n << 1] * in1[n];
+      t.Im = tw[0] * in1[n] + tw[n << 1] * in1[0];
+
+      // CADD out0, in0, t
+      out0[0] = in0[0] + t.Re;
+      out0[n] = in0[n] + t.Im;
+
+      // CSUB out1, in0, t
+      out1[0] = in0[0] - t.Re;
+      out1[n] = in0[n] - t.Im;
+
+      out0 += 1;
+    }
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_fs.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_fs.c
new file mode 100644
index 00000000000..36a40d8a910
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_fs.c
@@ -0,0 +1,72 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+
+void x86SP_FFT_CToC_FC32_Fwd_Radix4_fs(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    OMX_INT n) {
+  OMX_INT i;
+  OMX_INT n_by_4 = n >> 2;
+
+  // Transform from interleaved format to split format.
+  for (i = 0; i < n; i++) {
+    out[i] = in[i << 1];
+    out[i + n] = in[(i << 1) + 1];
+  }
+
+  // As we have already moved data from [in] to [out],
+  // next calculation will be produced in in-place mode.
+  for (i = 0; i < n_by_4; i++) {
+    OMX_F32 *out0 = out + i;
+    OMX_F32 *out1 = out0 + n_by_4;
+    OMX_F32 *out2 = out1 + n_by_4;
+    OMX_F32 *out3 = out2 + n_by_4;
+
+    OMX_FC32 t0;
+    OMX_FC32 t1;
+    OMX_FC32 t2;
+    OMX_FC32 t3;
+
+    // CADD t0, out0, out2
+    t0.Re = out0[0] + out2[0];
+    t0.Im = out0[n] + out2[n];
+
+    // CSUB t1, out0, out2
+    t1.Re = out0[0] - out2[0];
+    t1.Im = out0[n] - out2[n];
+
+    // CADD t2, out1, out3
+    t2.Re = out1[0] + out3[0];
+    t2.Im = out1[n] + out3[n];
+
+    // CSUB t3, out1, out3
+    t3.Re = out1[0] - out3[0];
+    t3.Im = out1[n] - out3[n];
+
+    // CADD out0, t0, t2
+    out0[0] = t0.Re + t2.Re;
+    out0[n] = t0.Im + t2.Im;
+
+    // CSUB out2, t0, t2
+    out2[0] = t0.Re - t2.Re;
+    out2[n] = t0.Im - t2.Im;
+
+    // CADD_SUB_X out1, t1, t3
+    out1[0] = t1.Re + t3.Im;
+    out1[n] = t1.Im - t3.Re;
+
+    // CSUB_ADD_X out3, t1, t3
+    out3[0] = t1.Re - t3.Im;
+    out3[n] = t1.Im + t3.Re;
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_fs_sse.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_fs_sse.c
new file mode 100644
index 00000000000..58908d3aa2b
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_fs_sse.c
@@ -0,0 +1,56 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+#include "dl/sp/src/x86/x86SP_SSE_Math.h"
+
+void x86SP_FFT_CToC_FC32_Fwd_Radix4_fs_sse(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    OMX_INT n) {
+  OMX_INT i;
+  OMX_INT n_by_2 = n >> 1;
+  OMX_INT n_by_4 = n >> 2;
+  OMX_F32 *out0 = out;
+
+  for (i = 0; i < n_by_2; i += 8) {
+    VC v_t0;
+    VC v_t1;
+    VC v_t2;
+    VC v_t3;
+    VC v_t4;
+    VC v_t5;
+    VC v_t6;
+    VC v_t7;
+
+    const OMX_F32 *in0 = in + i;
+    const OMX_F32 *in1 = in0 + n_by_2;
+    const OMX_F32 *in2 = in1 + n_by_2;
+    const OMX_F32 *in3 = in2 + n_by_2;
+
+    OMX_F32 *out1 = out0 + n_by_4;
+    OMX_F32 *out2 = out1 + n_by_4;
+    OMX_F32 *out3 = out2 + n_by_4;
+
+    VC_LOAD_SHUFFLE(&(v_t0.real), &(v_t0.imag), in0);
+    VC_LOAD_SHUFFLE(&(v_t1.real), &(v_t1.imag), in1);
+    VC_LOAD_SHUFFLE(&(v_t2.real), &(v_t2.imag), in2);
+    VC_LOAD_SHUFFLE(&(v_t3.real), &(v_t3.imag), in3);
+
+    RADIX4_BUTTERFLY_FS(&v_t4, &v_t5, &v_t6, &v_t7,
+                        &v_t0, &v_t1, &v_t2, &v_t3);
+
+    RADIX4_FWD_BUTTERFLY_STORE(out0, out1, out2, out3,
+                               &v_t4, &v_t5, &v_t6, &v_t7, n);
+
+    out0 += 4;
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_ls.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_ls.c
new file mode 100644
index 00000000000..08ab35bf86a
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_ls.c
@@ -0,0 +1,90 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+
+void x86SP_FFT_CToC_FC32_Fwd_Radix4_ls(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n) {
+  OMX_INT n_by_2 = n >> 1;
+  OMX_INT n_by_4 = n >> 2;
+  OMX_INT n_mul_2 = n << 1;
+  OMX_INT i;
+  OMX_F32 *out0 = out;
+
+  for (i = 0; i < n_by_2; i += 2) {
+    OMX_FC32 t0;
+    OMX_FC32 t1;
+    OMX_FC32 t2;
+    OMX_FC32 t3;
+    OMX_FC32 tt1;
+    OMX_FC32 tt2;
+    OMX_FC32 tt3;
+    const OMX_F32 *tw1 = twiddle + i;
+    const OMX_F32 *tw2 = tw1 + i;
+    const OMX_F32 *tw3 = tw2 + i;
+    const OMX_F32 *in0 = in + (i << 1);
+    const OMX_F32 *in1 = in0 + 1;
+    const OMX_F32 *in2 = in1 + 1;
+    const OMX_F32 *in3 = in2 + 1;
+    OMX_F32 *out1 = out0 + n_by_4;
+    OMX_F32 *out2 = out1 + n_by_4;
+    OMX_F32 *out3 = out2 + n_by_4;
+
+    // CMUL tt1, tw1, in1
+    tt1.Re = tw1[0] * in1[0] - tw1[n_mul_2] * in1[n];
+    tt1.Im = tw1[0] * in1[n] + tw1[n_mul_2] * in1[0];
+
+    // CMUL tt2, tw2, in2
+    tt2.Re = tw2[0] * in2[0] - tw2[n_mul_2] * in2[n];
+    tt2.Im = tw2[0] * in2[n] + tw2[n_mul_2] * in2[0];
+
+    // CMUL tt3, tw3, in3
+    tt3.Re = tw3[0] * in3[0] - tw3[n_mul_2] * in3[n];
+    tt3.Im = tw3[0] * in3[n] + tw3[n_mul_2] * in3[0];
+
+    // CADD t0, in0, tt2
+    t0.Re = in0[0] + tt2.Re;
+    t0.Im = in0[n] + tt2.Im;
+
+    // CSUB t1, in0, tt2
+    t1.Re = in0[0] - tt2.Re;
+    t1.Im = in0[n] - tt2.Im;
+
+    // CADD t2, tt1, tt3
+    t2.Re = tt1.Re + tt3.Re;
+    t2.Im = tt1.Im + tt3.Im;
+
+    // CSUB t3, tt1, tt3
+    t3.Re = tt1.Re - tt3.Re;
+    t3.Im = tt1.Im - tt3.Im;
+
+    // CADD out0, t0, t2
+    out0[0] = t0.Re + t2.Re;
+    out0[n] = t0.Im + t2.Im;
+
+    // CSUB out2, t0, t2
+    out2[0] = t0.Re - t2.Re;
+    out2[n] = t0.Im - t2.Im;
+
+    // CADD_SUB_X out1, t1, t3
+    out1[0] = t1.Re + t3.Im;
+    out1[n] = t1.Im - t3.Re;
+
+    // CSUB_ADD_X out3, t1, t3
+    out3[0] = t1.Re - t3.Im;
+    out3[n] = t1.Im + t3.Re;
+
+    out0 += 1;
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_ls_sse.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_ls_sse.c
new file mode 100644
index 00000000000..4fc34271809
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_ls_sse.c
@@ -0,0 +1,81 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+#include "dl/sp/src/x86/x86SP_SSE_Math.h"
+
+void x86SP_FFT_CToC_FC32_Fwd_Radix4_ls_sse(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n) {
+  OMX_INT n_by_2 = n >> 1;
+  OMX_INT n_by_4 = n >> 2;
+  OMX_INT n_mul_2 = n << 1;
+  OMX_INT i;
+
+  OMX_F32 *out0 = out;
+
+  for (i = 0; i < n_by_2; i += 8) {
+    const OMX_F32 *tw1 = twiddle + i;
+    const OMX_F32 *tw2 = tw1 + i;
+    const OMX_F32 *tw3 = tw2 + i;
+    const OMX_F32 *in0 = in + (i << 1);
+    const OMX_F32 *in1 = in0 + 4;
+    const OMX_F32 *in2 = in1 + 4;
+    const OMX_F32 *in3 = in2 + 4;
+    OMX_F32 *out1 = out0 + n_by_4;
+    OMX_F32 *out2 = out1 + n_by_4;
+    OMX_F32 *out3 = out2 + n_by_4;
+
+    VC v_tw1;
+    VC v_tw2;
+    VC v_tw3;
+    VC v_t0;
+    VC v_t1;
+    VC v_t2;
+    VC v_t3;
+    VC v_t4;
+    VC v_t5;
+    VC v_t6;
+    VC v_t7;
+
+    v_tw1.real = _mm_set_ps(tw1[6], tw1[4], tw1[2], tw1[0]);
+    v_tw1.imag = _mm_set_ps(
+        tw1[6 + n_mul_2],
+        tw1[4 + n_mul_2],
+        tw1[2 + n_mul_2],
+        tw1[n_mul_2]);
+    v_tw2.real = _mm_set_ps(tw2[12], tw2[8], tw2[4], tw2[0]);
+    v_tw2.imag = _mm_set_ps(
+        tw2[12 + n_mul_2],
+        tw2[8 + n_mul_2],
+        tw2[4 + n_mul_2],
+        tw2[n_mul_2]);
+    v_tw3.real = _mm_set_ps(tw3[18], tw3[12], tw3[6], tw3[0]);
+    v_tw3.imag = _mm_set_ps(
+        tw3[18 + n_mul_2],
+        tw3[12 + n_mul_2],
+        tw3[6 + n_mul_2],
+        tw3[n_mul_2]);
+
+    VC_LOAD_MATRIX_TRANSPOSE(&v_t0, &v_t1, &v_t2, &v_t3, in0, in1, in2, in3, n);
+
+    RADIX4_FWD_BUTTERFLY(&v_t4, &v_t5, &v_t6, &v_t7,
+                         &v_tw1, &v_tw2, &v_tw3,
+                         &v_t0, &v_t1, &v_t2, &v_t3);
+
+    RADIX4_FWD_BUTTERFLY_STORE(out0, out1, out2, out3,
+                               &v_t4, &v_t5, &v_t6, &v_t7, n);
+
+    out0 += 4;
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_ms.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_ms.c
new file mode 100644
index 00000000000..de2a1be7a9b
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_ms.c
@@ -0,0 +1,149 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+
+void x86SP_FFT_CToC_FC32_Fwd_Radix4_ms(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n,
+    OMX_INT sub_size,
+    OMX_INT sub_num) {
+  OMX_INT set;
+  OMX_INT grp;
+  OMX_INT step = sub_num >> 1;
+  OMX_INT set_count = sub_num >> 2;
+  OMX_INT n_by_4 = n >> 2;
+  OMX_INT n_mul_2 = n << 1;
+  OMX_F32 *out0 = out;
+
+  // grp == 0
+  for (set = 0; set < set_count; ++set) {
+    OMX_FC32 t0;
+    OMX_FC32 t1;
+    OMX_FC32 t2;
+    OMX_FC32 t3;
+
+    const OMX_F32 *in0 = in + set;
+    const OMX_F32 *in1 = in0 + set_count;
+    const OMX_F32 *in2 = in1 + set_count;
+    const OMX_F32 *in3 = in2 + set_count;
+    OMX_F32 *out1 = out0 + n_by_4;
+    OMX_F32 *out2 = out1 + n_by_4;
+    OMX_F32 *out3 = out2 + n_by_4;
+
+    // CADD t0, in0, in2
+    t0.Re = in0[0] + in2[0];
+    t0.Im = in0[n] + in2[n];
+
+    // CSUB t1, in0, in2
+    t1.Re = in0[0] - in2[0];
+    t1.Im = in0[n] - in2[n];
+
+    // CADD t2, in1, in3
+    t2.Re = in1[0] + in3[0];
+    t2.Im = in1[n] + in3[n];
+
+    // CSUB t3, in1, in3
+    t3.Re = in1[0] - in3[0];
+    t3.Im = in1[n] - in3[n];
+
+    // CADD out0, t0, t2
+    out0[0] = t0.Re + t2.Re;
+    out0[n] = t0.Im + t2.Im;
+
+    // CSUB out2, t0, t2
+    out2[0] = t0.Re - t2.Re;
+    out2[n] = t0.Im - t2.Im;
+
+    // CSUB_ADD_X out3, t1, t3
+    out3[0] = t1.Re - t3.Im;
+    out3[n] = t1.Im + t3.Re;
+
+    // CADD_SUB_X out1, t1, t3
+    out1[0] = t1.Re + t3.Im;
+    out1[n] = t1.Im - t3.Re;
+
+    out0 += 1;
+  }
+
+  // grp > 0
+  for (grp = 1; grp < sub_size; ++grp) {
+    const OMX_F32 *tw1 = twiddle + grp * step;
+    const OMX_F32 *tw2 = tw1 + grp * step;
+    const OMX_F32 *tw3 = tw2 + grp * step;
+
+    for (set = 0; set < set_count; ++set) {
+      OMX_FC32 t0;
+      OMX_FC32 t1;
+      OMX_FC32 t2;
+      OMX_FC32 t3;
+      OMX_FC32 tt1;
+      OMX_FC32 tt2;
+      OMX_FC32 tt3;
+
+      const OMX_F32 *in0 = in + set + grp * sub_num;
+      const OMX_F32 *in1 = in0 + set_count;
+      const OMX_F32 *in2 = in1 + set_count;
+      const OMX_F32 *in3 = in2 + set_count;
+      OMX_F32 *out1 = out0 + n_by_4;
+      OMX_F32 *out2 = out1 + n_by_4;
+      OMX_F32 *out3 = out2 + n_by_4;
+
+      // CMUL tt1, Tw1, in1
+      tt1.Re = tw1[0] * in1[0] - tw1[n_mul_2] * in1[n];
+      tt1.Im = tw1[0] * in1[n] + tw1[n_mul_2] * in1[0];
+
+      // CMUL tt2, Tw2, in2
+      tt2.Re = tw2[0] * in2[0] - tw2[n_mul_2] * in2[n];
+      tt2.Im = tw2[0] * in2[n] + tw2[n_mul_2] * in2[0];
+
+      // CMUL tt3, Tw3, in3
+      tt3.Re = tw3[0] * in3[0] - tw3[n_mul_2] * in3[n];
+      tt3.Im = tw3[0] * in3[n] + tw3[n_mul_2] * in3[0];
+
+      // CADD t0, in0, tt2
+      t0.Re = in0[0] + tt2.Re;
+      t0.Im = in0[n] + tt2.Im;
+
+      // CSUB t1, in0, tt2
+      t1.Re = in0[0] - tt2.Re;
+      t1.Im = in0[n] - tt2.Im;
+
+      // CADD t2, tt1, tt3
+      t2.Re = tt1.Re + tt3.Re;
+      t2.Im = tt1.Im + tt3.Im;
+
+      // CSUB t3, tt1, tt3
+      t3.Re = tt1.Re - tt3.Re;
+      t3.Im = tt1.Im - tt3.Im;
+
+      // CADD out0, t0, t2
+      out0[0] = t0.Re + t2.Re;
+      out0[n] = t0.Im + t2.Im;
+
+      // CSUB out2, t0, t2
+      out2[0] = t0.Re - t2.Re;
+      out2[n] = t0.Im - t2.Im;
+
+      // CADD_SUB_X out1, t1, t3
+      out1[0] = t1.Re + t3.Im;
+      out1[n] = t1.Im - t3.Re;
+
+      // CSUB_ADD_X out3, t1, t3
+      out3[0] = t1.Re - t3.Im;
+      out3[n] = t1.Im + t3.Re;
+
+      out0 += 1;
+    }
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_ms_sse.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_ms_sse.c
new file mode 100644
index 00000000000..286f842c464
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Fwd_Radix4_ms_sse.c
@@ -0,0 +1,215 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+#include "dl/sp/src/x86/x86SP_SSE_Math.h"
+
+// This function handles the case when set_count = 2, in which we cannot
+// unroll the set loop by 4 to meet the SSE requirement (4 elements).
+static void InternalUnroll2Fwd(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    OMX_F32 *twiddle,
+    OMX_INT n) {
+  OMX_INT i;
+  OMX_INT n_by_2 = n >> 1;
+  OMX_INT n_by_4 = n >> 2;
+  OMX_INT n_mul_2 = n << 1;
+  OMX_F32 *out0 = out;
+
+  for (i = 0; i < n_by_2; i += 8) {
+    const OMX_F32 *tw1  = twiddle + i;
+    const OMX_F32 *tw2  = tw1 + i;
+    const OMX_F32 *tw3  = tw2 + i;
+    const OMX_F32 *tw1e = tw1 + 4;
+    const OMX_F32 *tw2e = tw2 + 8;
+    const OMX_F32 *tw3e = tw3 + 12;
+
+    VC v_tw1;
+    VC v_tw2;
+    VC v_tw3;
+    VC v_t0;
+    VC v_t1;
+    VC v_t2;
+    VC v_t3;
+    VC v_t4;
+    VC v_t5;
+    VC v_t6;
+    VC v_t7;
+
+    v_tw1.real = _mm_shuffle_ps(_mm_load_ss(tw1),
+                                _mm_load_ss(tw1e),
+                                _MM_SHUFFLE(0, 0, 0, 0));
+    v_tw1.imag = _mm_shuffle_ps(_mm_load_ss(tw1 + n_mul_2),
+                                _mm_load_ss(tw1e + n_mul_2),
+                                _MM_SHUFFLE(0, 0, 0, 0));
+    v_tw2.real = _mm_shuffle_ps(_mm_load_ss(tw2),
+                                _mm_load_ss(tw2e),
+                                _MM_SHUFFLE(0, 0, 0, 0));
+    v_tw2.imag = _mm_shuffle_ps(_mm_load_ss(tw2 + n_mul_2),
+                                _mm_load_ss(tw2e + n_mul_2),
+                                _MM_SHUFFLE(0, 0, 0, 0));
+    v_tw3.real = _mm_shuffle_ps(_mm_load_ss(tw3),
+                                _mm_load_ss(tw3e),
+                                _MM_SHUFFLE(0, 0, 0, 0));
+    v_tw3.imag = _mm_shuffle_ps(_mm_load_ss(tw3 + n_mul_2),
+                                _mm_load_ss(tw3e + n_mul_2),
+                                _MM_SHUFFLE(0, 0, 0, 0));
+
+    __m128 xmm0;
+    __m128 xmm1;
+    __m128 xmm2;
+    __m128 xmm3;
+    __m128 xmm4;
+    __m128 xmm5;
+    __m128 xmm6;
+    __m128 xmm7;
+
+    const OMX_F32 *in0 = in + (i << 1);
+    xmm0 = _mm_load_ps(in0);
+    xmm1 = _mm_load_ps(in0 + 4);
+    xmm2 = _mm_load_ps(in0 + 8);
+    xmm3 = _mm_load_ps(in0 + 12);
+    v_t0.real = _mm_shuffle_ps(xmm0, xmm2, _MM_SHUFFLE(1, 0, 1, 0));
+    v_t1.real = _mm_shuffle_ps(xmm0, xmm2, _MM_SHUFFLE(3, 2, 3, 2));
+    v_t2.real = _mm_shuffle_ps(xmm1, xmm3, _MM_SHUFFLE(1, 0, 1, 0));
+    v_t3.real = _mm_shuffle_ps(xmm1, xmm3, _MM_SHUFFLE(3, 2, 3, 2));
+
+    xmm4 = _mm_load_ps(in0 + n);
+    xmm5 = _mm_load_ps(in0 + n + 4);
+    xmm6 = _mm_load_ps(in0 + n + 8);
+    xmm7 = _mm_load_ps(in0 + n + 12);
+    v_t0.imag = _mm_shuffle_ps(xmm4, xmm6, _MM_SHUFFLE(1, 0, 1, 0));
+    v_t1.imag = _mm_shuffle_ps(xmm4, xmm6, _MM_SHUFFLE(3, 2, 3, 2));
+    v_t2.imag = _mm_shuffle_ps(xmm5, xmm7, _MM_SHUFFLE(1, 0, 1, 0));
+    v_t3.imag = _mm_shuffle_ps(xmm5, xmm7, _MM_SHUFFLE(3, 2, 3, 2));
+
+    OMX_F32 *out1 = out0 + n_by_4;
+    OMX_F32 *out2 = out1 + n_by_4;
+    OMX_F32 *out3 = out2 + n_by_4;
+
+    RADIX4_FWD_BUTTERFLY(&v_t4, &v_t5, &v_t6, &v_t7,
+                         &v_tw1, &v_tw2,
+                         &v_tw3, &v_t0, &v_t1, &v_t2, &v_t3);
+
+    RADIX4_FWD_BUTTERFLY_STORE(out0, out1, out2, out3,
+                               &v_t4, &v_t5, &v_t6, &v_t7, n);
+
+    out0 += 4;
+  }
+}
+
+void x86SP_FFT_CToC_FC32_Fwd_Radix4_ms_sse(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    OMX_F32 *twiddle,
+    OMX_INT n,
+    OMX_INT sub_size,
+    OMX_INT sub_num) {
+  OMX_INT set;
+  OMX_INT grp;
+  OMX_INT step = sub_num >> 1;
+  OMX_INT set_count = sub_num >> 2;
+  OMX_INT n_by_4 = n >> 2;
+  OMX_INT n_mul_2 = n << 1;
+
+  OMX_F32 *out0 = out;
+
+  if (set_count == 2) {
+    InternalUnroll2Fwd(in, out, twiddle, n);
+    return;
+  }
+
+  // grp == 0
+  for (set = 0; set < set_count; set += 4) {
+    const OMX_F32 * in0 = in + set;
+    const OMX_F32 *in1 = in0 + set_count;
+    const OMX_F32 *in2 = in1 + set_count;
+    const OMX_F32 *in3 = in2 + set_count;
+
+    VC v_t0;
+    VC v_t1;
+    VC v_t2;
+    VC v_t3;
+    VC v_t4;
+    VC v_t5;
+    VC v_t6;
+    VC v_t7;
+
+    VC_LOAD_SPLIT(&v_t0, in0, n);
+    VC_LOAD_SPLIT(&v_t1, in1, n);
+    VC_LOAD_SPLIT(&v_t2, in2, n);
+    VC_LOAD_SPLIT(&v_t3, in3, n);
+
+    OMX_F32 *out1 = out0 + n_by_4;
+    OMX_F32 *out2 = out1 + n_by_4;
+    OMX_F32 *out3 = out2 + n_by_4;
+
+    RADIX4_BUTTERFLY_FS(&v_t4, &v_t5, &v_t6, &v_t7,
+                        &v_t0, &v_t1, &v_t2, &v_t3);
+
+    RADIX4_FWD_BUTTERFLY_STORE(out0, out1, out2, out3,
+                               &v_t4, &v_t5, &v_t6, &v_t7, n);
+
+    out0 += 4;
+  }
+
+  for (grp = 1; grp < sub_size; ++grp) {
+    const OMX_F32 *tw1 = twiddle + grp * step;
+    const OMX_F32 *tw2 = tw1 + grp * step;
+    const OMX_F32 *tw3 = tw2 + grp * step;
+
+    VC v_tw1;
+    VC v_tw2;
+    VC v_tw3;
+
+    v_tw1.real = _mm_load1_ps(tw1);
+    v_tw1.imag = _mm_load1_ps(tw1 + n_mul_2);
+    v_tw2.real = _mm_load1_ps(tw2);
+    v_tw2.imag = _mm_load1_ps(tw2 + n_mul_2);
+    v_tw3.real = _mm_load1_ps(tw3);
+    v_tw3.imag = _mm_load1_ps(tw3 + n_mul_2);
+
+    for (set = 0; set < set_count; set += 4) {
+      const OMX_F32 *in0 = in + set + grp * sub_num;
+      const OMX_F32 *in1 = in0 + set_count;
+      const OMX_F32 *in2 = in1 + set_count;
+      const OMX_F32 *in3 = in2 + set_count;
+
+      VC v_t0;
+      VC v_t1;
+      VC v_t2;
+      VC v_t3;
+      VC v_t4;
+      VC v_t5;
+      VC v_t6;
+      VC v_t7;
+
+      VC_LOAD_SPLIT(&v_t0, in0, n);
+      VC_LOAD_SPLIT(&v_t1, in1, n);
+      VC_LOAD_SPLIT(&v_t2, in2, n);
+      VC_LOAD_SPLIT(&v_t3, in3, n);
+
+      OMX_F32 *out1 = out0 + n_by_4;
+      OMX_F32 *out2 = out1 + n_by_4;
+      OMX_F32 *out3 = out2 + n_by_4;
+
+      RADIX4_FWD_BUTTERFLY(&v_t4, &v_t5, &v_t6, &v_t7,
+                           &v_tw1, &v_tw2, &v_tw3,
+                           &v_t0, &v_t1, &v_t2, &v_t3);
+
+      RADIX4_FWD_BUTTERFLY_STORE(out0, out1, out2, out3,
+                                 &v_t4, &v_t5, &v_t6, &v_t7, n);
+
+      out0 += 4;
+    }
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix2_fs.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix2_fs.c
new file mode 100644
index 00000000000..9f17d61b757
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix2_fs.c
@@ -0,0 +1,37 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+
+void x86SP_FFT_CToC_FC32_Inv_Radix2_fs(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    OMX_INT n) {
+  OMX_INT i;
+  OMX_INT n_by_2 = n >> 1;
+  OMX_F32 *out0 = out;
+
+  for (i = 0; i < n_by_2; i++) {
+    const OMX_F32 *in0 = in + i;
+    const OMX_F32 *in1 = in0 + n_by_2;
+    OMX_F32 *out1 = out0 + n_by_2;
+
+    // CADD out0, in0, in1
+    out0[0] = in0[0] + in1[0];
+    out0[n] = in0[n] + in1[n];
+
+    // CSUB out1, in0, in1
+    out1[0] = in0[0] - in1[0];
+    out1[n] = in0[n] - in1[n];
+
+    out0 += 1;
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix2_ls.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix2_ls.c
new file mode 100644
index 00000000000..ec545c5365a
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix2_ls.c
@@ -0,0 +1,43 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+
+void x86SP_FFT_CToC_FC32_Inv_Radix2_ls(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n) {
+  OMX_INT i;
+  OMX_F32 *out0 = out;
+
+  for (i = 0; i < n; i += 2) {
+    OMX_FC32 t;
+    const OMX_F32 *tw = twiddle + i;
+    const OMX_F32 *in0 = in + i;
+    const OMX_F32 *in1 = in0 + 1;
+    OMX_F32 *out1 = out0 + (n >> 1);
+
+    // CMUL t, tw, in1
+    t.Re = tw[0] * in1[0] + tw[n << 1] * in1[n];
+    t.Im = tw[0] * in1[n] - tw[n << 1] * in1[0];
+
+    // CADD out0, in0, t
+    out0[0] = in0[0] + t.Re;
+    out0[n] = in0[n] + t.Im;
+
+    // CSUB out1, in0, t
+    out1[0] = in0[0] - t.Re;
+    out1[n] = in0[n] - t.Im;
+
+    out0 += 1;
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix2_ls_sse.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix2_ls_sse.c
new file mode 100644
index 00000000000..abad0cc998d
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix2_ls_sse.c
@@ -0,0 +1,52 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+#include "dl/sp/src/x86/x86SP_SSE_Math.h"
+
+void x86SP_FFT_CToC_FC32_Inv_Radix2_ls_sse(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n) {
+  OMX_F32 *out0 =out;
+  OMX_INT i;
+
+  for (i = 0; i < n; i += 8) {
+  VC v_tw;
+  VC v_t0;
+  VC v_t1;
+  VC v_temp;
+
+    // Load twiddle
+    const OMX_F32 *tw = twiddle + i;
+    v_tw.real = _mm_set_ps(tw[6], tw[4], tw[2], tw[0]);
+    const OMX_F32 * twi = tw + (n << 1);
+    v_tw.imag = _mm_set_ps(twi[6], twi[4], twi[2], twi[0]);
+
+    // Load real part
+    const OMX_F32 *t = in + i;
+    VC_LOAD_SHUFFLE(&(v_t0.real), &(v_t1.real), t);
+
+    // Load imag part
+    t = t + n;
+    VC_LOAD_SHUFFLE(&(v_t0.imag), &(v_t1.imag), t);
+
+    OMX_F32 *out1 = out0 + (n >> 1);
+    VC_CONJ_MUL(&v_temp, &v_tw, &v_t1);
+
+    VC_SUB_STORE_SPLIT(out1, &v_t0, &v_temp, n);
+
+    VC_ADD_STORE_SPLIT(out0, &v_t0, &v_temp, n);
+
+    out0 += 4;
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix2_ms.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix2_ms.c
new file mode 100644
index 00000000000..78bc9ebdb61
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix2_ms.c
@@ -0,0 +1,50 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+
+void x86SP_FFT_CToC_FC32_Inv_Radix2_ms(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n,
+    OMX_INT sub_size,
+    OMX_INT sub_num) {
+  OMX_INT grp;
+  OMX_F32 *out0 = out;
+  OMX_INT set_count = sub_num >> 1;
+
+  for (grp = 0; grp < sub_size; ++grp) {
+    OMX_INT set;
+    const OMX_F32 *tw = twiddle + grp * sub_num;
+
+    for (set = 0; set < set_count; ++set) {
+      OMX_FC32 t;
+      const OMX_F32 *in0 = in + set + grp * sub_num;
+      const OMX_F32 *in1 = in0 + set_count;
+      OMX_F32 *out1 = out0 + (n >> 1);
+
+      // CMUL t, tw, in1
+      t.Re = tw[0] * in1[0] + tw[n << 1] * in1[n];
+      t.Im = tw[0] * in1[n] - tw[n << 1] * in1[0];
+
+      // CADD out0, in0, t
+      out0[0] = in0[0] + t.Re;
+      out0[n] = in0[n] + t.Im;
+
+      // CSUB out1, in0, t
+      out1[0] = in0[0] - t.Re;
+      out1[n] = in0[n] - t.Im;
+
+      out0 += 1;
+    }
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_fs.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_fs.c
new file mode 100644
index 00000000000..bb80fa30830
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_fs.c
@@ -0,0 +1,70 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+
+void x86SP_FFT_CToC_FC32_Inv_Radix4_fs(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    OMX_INT n) {
+  OMX_INT i;
+  OMX_INT n_by_4 = n >> 2;
+  OMX_F32 *out0 = out;
+
+  for (i = 0; i < n_by_4; i++) {
+    const OMX_F32 *in0 = in + i;
+    const OMX_F32 *in1 = in0 + n_by_4;
+    const OMX_F32 *in2 = in1 + n_by_4;
+    const OMX_F32 *in3 = in2 + n_by_4;
+    OMX_F32 *out1 = out0 + n_by_4;
+    OMX_F32 *out2 = out1 + n_by_4;
+    OMX_F32 *out3 = out2 + n_by_4;
+
+    OMX_FC32 t0;
+    OMX_FC32 t1;
+    OMX_FC32 t2;
+    OMX_FC32 t3;
+
+    // CADD t0, in0, in2
+    t0.Re = in0[0] + in2[0];
+    t0.Im = in0[n] + in2[n];
+
+    // CSUB t1, in0, in2
+    t1.Re = in0[0] - in2[0];
+    t1.Im = in0[n] - in2[n];
+
+    // CADD t2, in1, in3
+    t2.Re = in1[0] + in3[0];
+    t2.Im = in1[n] + in3[n];
+
+    // CSUB t3, in1, in3
+    t3.Re = in1[0] - in3[0];
+    t3.Im = in1[n] - in3[n];
+
+    // CADD out0, t0, t2
+    out0[0] = t0.Re + t2.Re;
+    out0[n] = t0.Im + t2.Im;
+
+    // CSUB out2, t0, t2
+    out2[0] = t0.Re - t2.Re;
+    out2[n] = t0.Im - t2.Im;
+
+    // CSUB_ADD_X out1, t1, t3
+    out1[0] = t1.Re - t3.Im;
+    out1[n] = t1.Im + t3.Re;
+
+    // CADD_SUB_X out3, t1, t3
+    out3[0] = t1.Re + t3.Im;
+    out3[n] = t1.Im - t3.Re;
+
+    out0 += 1;
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_fs_sse.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_fs_sse.c
new file mode 100644
index 00000000000..c3921bc46a4
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_fs_sse.c
@@ -0,0 +1,55 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+#include "dl/sp/src/x86/x86SP_SSE_Math.h"
+
+void x86SP_FFT_CToC_FC32_Inv_Radix4_fs_sse(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    OMX_INT n) {
+  OMX_INT i;
+  OMX_INT n_by_4 = n >> 2;
+  OMX_F32 *out0 = out;
+
+  for (i = 0; i < n_by_4; i += 4) {
+    VC v_t0;
+    VC v_t1;
+    VC v_t2;
+    VC v_t3;
+    VC v_t4;
+    VC v_t5;
+    VC v_t6;
+    VC v_t7;
+
+    const OMX_F32 *in0 = in + i;
+    const OMX_F32 *in1 = in0 + n_by_4;
+    const OMX_F32 *in2 = in1 + n_by_4;
+    const OMX_F32 *in3 = in2 + n_by_4;
+
+    OMX_F32 *out1 = out0 + n_by_4;
+    OMX_F32 *out2 = out1 + n_by_4;
+    OMX_F32 *out3 = out2 + n_by_4;
+
+    VC_LOAD_SPLIT(&v_t0, in0, n);
+    VC_LOAD_SPLIT(&v_t1, in1, n);
+    VC_LOAD_SPLIT(&v_t2, in2, n);
+    VC_LOAD_SPLIT(&v_t3, in3, n);
+
+    RADIX4_BUTTERFLY_FS(&v_t4, &v_t5, &v_t6, &v_t7,
+                        &v_t0, &v_t1, &v_t2, &v_t3);
+
+    RADIX4_INV_BUTTERFLY_STORE(out0, out1, out2, out3,
+                               &v_t4, &v_t5, &v_t6, &v_t7, n);
+
+    out0 += 4;
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_ls.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_ls.c
new file mode 100644
index 00000000000..705d9cbc342
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_ls.c
@@ -0,0 +1,90 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+
+void x86SP_FFT_CToC_FC32_Inv_Radix4_ls(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n) {
+  OMX_INT n_by_2 = n >> 1;
+  OMX_INT n_by_4 = n >> 2;
+  OMX_INT n_mul_2 = n << 1;
+  OMX_INT i;
+  OMX_F32 *out0 = out;
+
+  for (i = 0; i < n_by_2; i += 2) {
+    OMX_FC32 t0;
+    OMX_FC32 t1;
+    OMX_FC32 t2;
+    OMX_FC32 t3;
+    OMX_FC32 tt1;
+    OMX_FC32 tt2;
+    OMX_FC32 tt3;
+    const OMX_F32 *tw1 = twiddle + i;
+    const OMX_F32 *tw2 = tw1 + i;
+    const OMX_F32 *tw3 = tw2 + i;
+    const OMX_F32 *in0 = in + (i << 1);
+    const OMX_F32 *in1 = in0 + 1;
+    const OMX_F32 *in2 = in1 + 1;
+    const OMX_F32 *in3 = in2 + 1;
+    OMX_F32 *out1 = out0 + n_by_4;
+    OMX_F32 *out2 = out1 + n_by_4;
+    OMX_F32 *out3 = out2 + n_by_4;
+
+    // CMUL tt1, Tw1, in1
+    tt1.Re = tw1[0] * in1[0] + tw1[n_mul_2] * in1[n];
+    tt1.Im = tw1[0] * in1[n] - tw1[n_mul_2] * in1[0];
+
+    // CMUL tt2, Tw2, in2
+    tt2.Re = tw2[0] * in2[0] + tw2[n_mul_2] * in2[n];
+    tt2.Im = tw2[0] * in2[n] - tw2[n_mul_2] * in2[0];
+
+    // CMUL tt3, Tw3, in3
+    tt3.Re = tw3[0] * in3[0] + tw3[n_mul_2] * in3[n];
+    tt3.Im = tw3[0] * in3[n] - tw3[n_mul_2] * in3[0];
+
+    // CADD t0, in0, tt2
+    t0.Re = in0[0] + tt2.Re;
+    t0.Im = in0[n] + tt2.Im;
+
+    // CSUB t1, in0, tt2
+    t1.Re = in0[0] - tt2.Re;
+    t1.Im = in0[n] - tt2.Im;
+
+    // CADD t2, tt1, tt3
+    t2.Re = tt1.Re + tt3.Re;
+    t2.Im = tt1.Im + tt3.Im;
+
+    // CSUB t3, tt1, tt3
+    t3.Re = tt1.Re - tt3.Re;
+    t3.Im = tt1.Im - tt3.Im;
+
+    // CADD out0, t0, t2
+    out0[0] = t0.Re + t2.Re;
+    out0[n] = t0.Im + t2.Im;
+
+    // CSUB out2, t0, t2
+    out2[0] = t0.Re - t2.Re;
+    out2[n] = t0.Im - t2.Im;
+
+    // CSUB_ADD_X out1, t1, t3
+    out1[0] = t1.Re - t3.Im;
+    out1[n] = t1.Im + t3.Re;
+
+    // CADD_SUB_X out3, t1, t3
+    out3[0] = t1.Re + t3.Im;
+    out3[n] = t1.Im - t3.Re;
+
+    out0 += 1;
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_ls_sse.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_ls_sse.c
new file mode 100644
index 00000000000..2e245faf1a5
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_ls_sse.c
@@ -0,0 +1,81 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+#include "dl/sp/src/x86/x86SP_SSE_Math.h"
+
+void x86SP_FFT_CToC_FC32_Inv_Radix4_ls_sse(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n) {
+  OMX_INT n_by_2 = n >> 1;
+  OMX_INT n_by_4 = n >> 2;
+  OMX_INT n_mul_2 = n << 1;
+  OMX_INT i;
+
+  OMX_F32 *out0 = out;
+
+  for (i = 0; i < n_by_2; i += 8) {
+    const OMX_F32 *tw1 = twiddle + i;
+    const OMX_F32 *tw2 = tw1 + i;
+    const OMX_F32 *tw3 = tw2 + i;
+    const OMX_F32 *in0 = in + (i << 1);
+    const OMX_F32 *in1 = in0 + 4;
+    const OMX_F32 *in2 = in1 + 4;
+    const OMX_F32 *in3 = in2 + 4;
+    OMX_F32 *out1 = out0 + n_by_4;
+    OMX_F32 *out2 = out1 + n_by_4;
+    OMX_F32 *out3 = out2 + n_by_4;
+
+    VC v_tw1;
+    VC v_tw2;
+    VC v_tw3;
+    VC v_t0;
+    VC v_t1;
+    VC v_t2;
+    VC v_t3;
+    VC v_t4;
+    VC v_t5;
+    VC v_t6;
+    VC v_t7;
+
+    v_tw1.real = _mm_set_ps(tw1[6], tw1[4], tw1[2], tw1[0]);
+    v_tw1.imag = _mm_set_ps(
+        tw1[6 + n_mul_2],
+        tw1[4 + n_mul_2],
+        tw1[2 + n_mul_2],
+        tw1[n_mul_2]);
+    v_tw2.real = _mm_set_ps(tw2[12], tw2[8], tw2[4], tw2[0]);
+    v_tw2.imag = _mm_set_ps(
+        tw2[12 + n_mul_2],
+        tw2[8 + n_mul_2],
+        tw2[4 + n_mul_2],
+        tw2[n_mul_2]);
+    v_tw3.real = _mm_set_ps(tw3[18], tw3[12], tw3[6], tw3[0]);
+    v_tw3.imag = _mm_set_ps(
+        tw3[18 + n_mul_2],
+        tw3[12 + n_mul_2],
+        tw3[6 + n_mul_2],
+        tw3[n_mul_2]);
+
+    VC_LOAD_MATRIX_TRANSPOSE(&v_t0, &v_t1, &v_t2, &v_t3, in0, in1, in2, in3, n);
+
+    RADIX4_INV_BUTTERFLY(&v_t4, &v_t5, &v_t6, &v_t7,
+                         &v_tw1, &v_tw2, &v_tw3,
+                         &v_t0, &v_t1, &v_t2, &v_t3);
+
+    RADIX4_INV_BUTTERFLY_STORE(out0, out1, out2, out3,
+                               &v_t4, &v_t5, &v_t6, &v_t7, n);
+
+    out0 += 4;
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_ms.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_ms.c
new file mode 100644
index 00000000000..499036b9347
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_ms.c
@@ -0,0 +1,149 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+
+void x86SP_FFT_CToC_FC32_Inv_Radix4_ms(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n,
+    OMX_INT sub_size,
+    OMX_INT sub_num) {
+  OMX_INT set;
+  OMX_INT grp;
+  OMX_INT step = sub_num >> 1;
+  OMX_INT set_count = sub_num >> 2;
+  OMX_INT n_by_4 = n >> 2;
+  OMX_INT n_mul_2 = n << 1;
+  OMX_F32 *out0 = out;
+
+  // grp == 0
+  for (set = 0; set < set_count; ++set) {
+    OMX_FC32 t0;
+    OMX_FC32 t1;
+    OMX_FC32 t2;
+    OMX_FC32 t3;
+
+    const OMX_F32 *in0 = in + set;
+    const OMX_F32 *in1 = in0 + set_count;
+    const OMX_F32 *in2 = in1 + set_count;
+    const OMX_F32 *in3 = in2 + set_count;
+    OMX_F32 *out1 = out0 + n_by_4;
+    OMX_F32 *out2 = out1 + n_by_4;
+    OMX_F32 *out3 = out2 + n_by_4;
+
+    // CADD t0, in0, in2
+    t0.Re = in0[0] + in2[0];
+    t0.Im = in0[n] + in2[n];
+
+    // CSUB t1, in0, in2
+    t1.Re = in0[0] - in2[0];
+    t1.Im = in0[n] - in2[n];
+
+    // CADD t2, in1, in3
+    t2.Re = in1[0] + in3[0];
+    t2.Im = in1[n] + in3[n];
+
+    // CSUB t3, in1, in3
+    t3.Re = in1[0] - in3[0];
+    t3.Im = in1[n] - in3[n];
+
+    // CADD out0, t0, t2
+    out0[0] = t0.Re + t2.Re;
+    out0[n] = t0.Im + t2.Im;
+
+    // CSUB out2, t0, t2
+    out2[0] = t0.Re - t2.Re;
+    out2[n] = t0.Im - t2.Im;
+
+    // CSUB_ADD_X out1, t1, t3
+    out1[0] = t1.Re - t3.Im;
+    out1[n] = t1.Im + t3.Re;
+
+    // CADD_SUB_X out3, t1, t3
+    out3[0] = t1.Re + t3.Im;
+    out3[n] = t1.Im - t3.Re;
+
+    out0 += 1;
+  }
+
+  // grp > 0
+  for (grp = 1; grp < sub_size; ++grp) {
+    const OMX_F32 *tw1 = twiddle + grp * step;
+    const OMX_F32 *tw2 = tw1 + grp * step;
+    const OMX_F32 *tw3 = tw2 + grp * step;
+
+    for (set = 0; set < set_count; ++set) {
+      OMX_FC32 t0;
+      OMX_FC32 t1;
+      OMX_FC32 t2;
+      OMX_FC32 t3;
+      OMX_FC32 tt1;
+      OMX_FC32 tt2;
+      OMX_FC32 tt3;
+
+      const OMX_F32 *in0 = in + set + grp * sub_num;
+      const OMX_F32 *in1 = in0 + set_count;
+      const OMX_F32 *in2 = in1 + set_count;
+      const OMX_F32 *in3 = in2 + set_count;
+      OMX_F32 *out1 = out0 + n_by_4;
+      OMX_F32 *out2 = out1 + n_by_4;
+      OMX_F32 *out3 = out2 + n_by_4;
+
+      // CMUL tt1, Tw1, in1
+      tt1.Re = tw1[0] * in1[0] + tw1[n_mul_2] * in1[n];
+      tt1.Im = tw1[0] * in1[n] - tw1[n_mul_2] * in1[0];
+
+      // CMUL tt2, Tw2, in2
+      tt2.Re = tw2[0] * in2[0] + tw2[n_mul_2] * in2[n];
+      tt2.Im = tw2[0] * in2[n] - tw2[n_mul_2] * in2[0];
+
+      // CMUL tt3, Tw3, in3
+      tt3.Re = tw3[0] * in3[0] + tw3[n_mul_2] * in3[n];
+      tt3.Im = tw3[0] * in3[n] - tw3[n_mul_2] * in3[0];
+
+      // CADD t0, in0, tt2
+      t0.Re = in0[0] + tt2.Re;
+      t0.Im = in0[n] + tt2.Im;
+
+      // CSUB t1, in0, tt2
+      t1.Re = in0[0] - tt2.Re;
+      t1.Im = in0[n] - tt2.Im;
+
+      // CADD t2, tt1, tt3
+      t2.Re = tt1.Re + tt3.Re;
+      t2.Im = tt1.Im + tt3.Im;
+
+      // CSUB t3, tt1, tt3
+      t3.Re = tt1.Re - tt3.Re;
+      t3.Im = tt1.Im - tt3.Im;
+
+      // CADD out0, t0, t2
+      out0[0] = t0.Re + t2.Re;
+      out0[n] = t0.Im + t2.Im;
+
+      // CSUB out2, t0, t2
+      out2[0] = t0.Re - t2.Re;
+      out2[n] = t0.Im - t2.Im;
+
+      // CSUB_ADD_X out1, t1, t3
+      out1[0] = t1.Re - t3.Im;
+      out1[n] = t1.Im + t3.Re;
+
+      // CADD_SUB_X out3, t1, t3
+      out3[0] = t1.Re + t3.Im;
+      out3[n] = t1.Im - t3.Re;
+
+      out0 += 1;
+    }
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_ms_sse.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_ms_sse.c
new file mode 100644
index 00000000000..703f316920f
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_CToC_FC32_Inv_Radix4_ms_sse.c
@@ -0,0 +1,215 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+#include "dl/sp/src/x86/x86SP_SSE_Math.h"
+
+// This function handles the case when set_count = 2, in which we cannot
+// unroll the set loop by 4 to meet the SSE requirement (4 elements).
+static void InternalUnroll2Inv(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n) {
+  OMX_INT i;
+  OMX_INT n_by_2 = n >> 1;
+  OMX_INT n_by_4 = n >> 2;
+  OMX_INT n_mul_2 = n << 1;
+  OMX_F32 *out0 = out;
+
+  for (i = 0; i < n_by_2; i += 8) {
+    const OMX_F32 *tw1  = twiddle + i;
+    const OMX_F32 *tw2  = tw1 + i;
+    const OMX_F32 *tw3  = tw2 + i;
+    const OMX_F32 *tw1e = tw1 + 4;
+    const OMX_F32 *tw2e = tw2 + 8;
+    const OMX_F32 *tw3e = tw3 + 12;
+
+    VC v_tw1;
+    VC v_tw2;
+    VC v_tw3;
+    VC v_t0;
+    VC v_t1;
+    VC v_t2;
+    VC v_t3;
+    VC v_t4;
+    VC v_t5;
+    VC v_t6;
+    VC v_t7;
+
+    v_tw1.real = _mm_shuffle_ps(_mm_load_ss(tw1),
+                                _mm_load_ss(tw1e),
+                                _MM_SHUFFLE(0, 0, 0, 0));
+    v_tw1.imag = _mm_shuffle_ps(_mm_load_ss(tw1 + n_mul_2),
+                                _mm_load_ss(tw1e + n_mul_2),
+                                _MM_SHUFFLE(0, 0, 0, 0));
+    v_tw2.real = _mm_shuffle_ps(_mm_load_ss(tw2),
+                                _mm_load_ss(tw2e),
+                                _MM_SHUFFLE(0, 0, 0, 0));
+    v_tw2.imag = _mm_shuffle_ps(_mm_load_ss(tw2 + n_mul_2),
+                                _mm_load_ss(tw2e + n_mul_2),
+                                _MM_SHUFFLE(0, 0, 0, 0));
+    v_tw3.real = _mm_shuffle_ps(_mm_load_ss(tw3),
+                                _mm_load_ss(tw3e),
+                                _MM_SHUFFLE(0, 0, 0, 0));
+    v_tw3.imag = _mm_shuffle_ps(_mm_load_ss(tw3 + n_mul_2),
+                                _mm_load_ss(tw3e + n_mul_2),
+                                _MM_SHUFFLE(0, 0, 0, 0));
+
+    __m128 xmm0;
+    __m128 xmm1;
+    __m128 xmm2;
+    __m128 xmm3;
+    __m128 xmm4;
+    __m128 xmm5;
+    __m128 xmm6;
+    __m128 xmm7;
+
+    const OMX_F32 *in0 = in + (i << 1);
+    xmm0 = _mm_load_ps(in0);
+    xmm1 = _mm_load_ps(in0 + 4);
+    xmm2 = _mm_load_ps(in0 + 8);
+    xmm3 = _mm_load_ps(in0 + 12);
+    v_t0.real = _mm_shuffle_ps(xmm0, xmm2, _MM_SHUFFLE(1, 0, 1, 0));
+    v_t1.real = _mm_shuffle_ps(xmm0, xmm2, _MM_SHUFFLE(3, 2, 3, 2));
+    v_t2.real = _mm_shuffle_ps(xmm1, xmm3, _MM_SHUFFLE(1, 0, 1, 0));
+    v_t3.real = _mm_shuffle_ps(xmm1, xmm3, _MM_SHUFFLE(3, 2, 3, 2));
+
+    xmm4 = _mm_load_ps(in0 + n);
+    xmm5 = _mm_load_ps(in0 + n + 4);
+    xmm6 = _mm_load_ps(in0 + n + 8);
+    xmm7 = _mm_load_ps(in0 + n + 12);
+    v_t0.imag = _mm_shuffle_ps(xmm4, xmm6, _MM_SHUFFLE(1, 0, 1, 0));
+    v_t1.imag = _mm_shuffle_ps(xmm4, xmm6, _MM_SHUFFLE(3, 2, 3, 2));
+    v_t2.imag = _mm_shuffle_ps(xmm5, xmm7, _MM_SHUFFLE(1, 0, 1, 0));
+    v_t3.imag = _mm_shuffle_ps(xmm5, xmm7, _MM_SHUFFLE(3, 2, 3, 2));
+
+    OMX_F32 *out1 = out0 + n_by_4;
+    OMX_F32 *out2 = out1 + n_by_4;
+    OMX_F32 *out3 = out2 + n_by_4;
+
+    RADIX4_INV_BUTTERFLY(&v_t4, &v_t5, &v_t6, &v_t7,
+                         &v_tw1, &v_tw2, &v_tw3,
+                         &v_t0, &v_t1, &v_t2, &v_t3);
+
+    RADIX4_INV_BUTTERFLY_STORE(out0, out1, out2, out3,
+                               &v_t4, &v_t5, &v_t6, &v_t7, n);
+
+    out0 += 4;
+  }
+}
+
+void x86SP_FFT_CToC_FC32_Inv_Radix4_ms_sse(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n,
+    OMX_INT sub_size,
+    OMX_INT sub_num) {
+  OMX_INT set;
+  OMX_INT grp;
+  OMX_INT step = sub_num >> 1;
+  OMX_INT set_count = sub_num >> 2;
+  OMX_INT n_by_4 = n >> 2;
+  OMX_INT n_mul_2 = n << 1;
+
+  OMX_F32 *out0 = out;
+
+  if (set_count == 2) {
+    InternalUnroll2Inv(in, out, twiddle, n);
+    return;
+  }
+
+  // grp == 0
+  for (set = 0; set < set_count; set += 4) {
+    const OMX_F32 * in0 = in + set;
+    const OMX_F32 *in1 = in0 + set_count;
+    const OMX_F32 *in2 = in1 + set_count;
+    const OMX_F32 *in3 = in2 + set_count;
+
+    VC v_t0;
+    VC v_t1;
+    VC v_t2;
+    VC v_t3;
+    VC v_t4;
+    VC v_t5;
+    VC v_t6;
+    VC v_t7;
+
+    VC_LOAD_SPLIT(&v_t0, in0, n);
+    VC_LOAD_SPLIT(&v_t1, in1, n);
+    VC_LOAD_SPLIT(&v_t2, in2, n);
+    VC_LOAD_SPLIT(&v_t3, in3, n);
+
+    OMX_F32 *out1 = out0 + n_by_4;
+    OMX_F32 *out2 = out1 + n_by_4;
+    OMX_F32 *out3 = out2 + n_by_4;
+
+    RADIX4_BUTTERFLY_FS(&v_t4, &v_t5, &v_t6, &v_t7,
+                        &v_t0, &v_t1, &v_t2, &v_t3);
+
+    RADIX4_INV_BUTTERFLY_STORE(out0, out1, out2, out3,
+                               &v_t4, &v_t5, &v_t6, &v_t7, n);
+
+    out0 += 4;
+  }
+
+  for (grp = 1; grp < sub_size; ++grp) {
+    const OMX_F32 *tw1 = twiddle + grp * step;
+    const OMX_F32 *tw2 = tw1 + grp * step;
+    const OMX_F32 *tw3 = tw2 + grp * step;
+
+    VC v_tw1;
+    VC v_tw2;
+    VC v_tw3;
+
+    v_tw1.real = _mm_load1_ps(tw1);
+    v_tw1.imag = _mm_load1_ps(tw1 + n_mul_2);
+    v_tw2.real = _mm_load1_ps(tw2);
+    v_tw2.imag = _mm_load1_ps(tw2 + n_mul_2);
+    v_tw3.real = _mm_load1_ps(tw3);
+    v_tw3.imag = _mm_load1_ps(tw3 + n_mul_2);
+
+    for (set = 0; set < set_count; set += 4) {
+      const OMX_F32 *in0 = in + set + grp * sub_num;
+      const OMX_F32 *in1 = in0 + set_count;
+      const OMX_F32 *in2 = in1 + set_count;
+      const OMX_F32 *in3 = in2 + set_count;
+
+      VC v_t0;
+      VC v_t1;
+      VC v_t2;
+      VC v_t3;
+      VC v_t4;
+      VC v_t5;
+      VC v_t6;
+      VC v_t7;
+
+      VC_LOAD_SPLIT(&v_t0, in0, n);
+      VC_LOAD_SPLIT(&v_t1, in1, n);
+      VC_LOAD_SPLIT(&v_t2, in2, n);
+      VC_LOAD_SPLIT(&v_t3, in3, n);
+
+      OMX_F32 *out1 = out0 + n_by_4;
+      OMX_F32 *out2 = out1 + n_by_4;
+      OMX_F32 *out3 = out2 + n_by_4;
+
+      RADIX4_INV_BUTTERFLY(&v_t4, &v_t5, &v_t6, &v_t7,
+                           &v_tw1, &v_tw2, &v_tw3,
+                           &v_t0, &v_t1, &v_t2, &v_t3);
+
+      RADIX4_INV_BUTTERFLY_STORE(out0, out1, out2, out3,
+                                 &v_t4, &v_t5, &v_t6, &v_t7, n);
+
+      out0 += 4;
+    }
+  }
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_F32_radix2_kernel.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_F32_radix2_kernel.c
new file mode 100644
index 00000000000..0a3d816ffe4
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_F32_radix2_kernel.c
@@ -0,0 +1,99 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include "dl/api/omxtypes.h"
+#include <stdbool.h>
+
+extern void x86SP_FFT_CToC_FC32_Fwd_Radix2_fs(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    OMX_INT n);
+
+extern void x86SP_FFT_CToC_FC32_Inv_Radix2_fs(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    OMX_INT n);
+
+extern void x86SP_FFT_CToC_FC32_Fwd_Radix2_ms(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n,
+    OMX_INT sub_size,
+    OMX_INT sub_num);
+
+extern void x86SP_FFT_CToC_FC32_Inv_Radix2_ms(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n,
+    OMX_INT sub_size,
+    OMX_INT sub_num);
+
+extern void x86SP_FFT_CToC_FC32_Fwd_Radix2_ls(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n);
+
+extern void x86SP_FFT_CToC_FC32_Inv_Radix2_ls(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n);
+
+OMX_F32* x86SP_F32_radix2_kernel_OutOfPlace(
+    const OMX_F32 *src,
+    // Two Ping Pong buffers for out of place kernel.
+    OMX_F32 *buf1,
+    OMX_F32 *buf2,
+    const OMX_F32 *twiddle,
+    OMX_INT n,
+    bool forward_fft) {
+  OMX_INT sub_size;
+  OMX_INT sub_num;
+  OMX_INT n_by_2 = n >> 1;
+  OMX_F32 *in = buf1;
+  OMX_F32 *out = buf2;
+
+  if (forward_fft)
+    x86SP_FFT_CToC_FC32_Fwd_Radix2_fs(src, in, n);
+  else
+    x86SP_FFT_CToC_FC32_Inv_Radix2_fs(src, in, n);
+
+  for (sub_size = 2, sub_num = n_by_2;
+       sub_size < n_by_2;
+       sub_size = sub_size << 1, sub_num = sub_num >> 1) {
+
+    if (forward_fft) {
+      x86SP_FFT_CToC_FC32_Fwd_Radix2_ms(in, out, twiddle,
+                                        n, sub_size, sub_num);
+    } else {
+      x86SP_FFT_CToC_FC32_Inv_Radix2_ms(in, out, twiddle,
+                                        n, sub_size, sub_num);
+    }
+
+    OMX_F32 *temp = out;
+    out = in;
+    in = temp;
+  }
+
+  // If sub_num <= 1, no need to do the last stage.
+  if (sub_num <= 1)
+    return in;
+
+  if (forward_fft)
+    x86SP_FFT_CToC_FC32_Fwd_Radix2_ls(in, out, twiddle, n);
+  else
+    x86SP_FFT_CToC_FC32_Inv_Radix2_ls(in, out, twiddle, n);
+
+  return out;
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_F32_radix4_kernel.c b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_F32_radix4_kernel.c
new file mode 100644
index 00000000000..e7c7b892724
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_FFT_F32_radix4_kernel.c
@@ -0,0 +1,190 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+#include "dl/api/omxtypes.h"
+#include <stdbool.h>
+
+extern void x86SP_FFT_CToC_FC32_Fwd_Radix4_fs(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    OMX_INT n);
+
+extern void x86SP_FFT_CToC_FC32_Inv_Radix4_fs(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    OMX_INT n);
+
+extern void x86SP_FFT_CToC_FC32_Fwd_Radix4_fs_sse(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    OMX_INT n);
+
+extern void x86SP_FFT_CToC_FC32_Inv_Radix4_fs_sse(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    OMX_INT n);
+
+extern void x86SP_FFT_CToC_FC32_Fwd_Radix4_ms(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n,
+    OMX_INT sub_size,
+    OMX_INT sub_num);
+
+extern void x86SP_FFT_CToC_FC32_Inv_Radix4_ms(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n,
+    OMX_INT sub_size,
+    OMX_INT sub_num);
+
+extern void x86SP_FFT_CToC_FC32_Fwd_Radix4_ms_sse(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n,
+    OMX_INT sub_size,
+    OMX_INT sub_num);
+
+extern void x86SP_FFT_CToC_FC32_Inv_Radix4_ms_sse(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n,
+    OMX_INT sub_size,
+    OMX_INT sub_num);
+
+extern void x86SP_FFT_CToC_FC32_Fwd_Radix4_ls(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n);
+
+extern void x86SP_FFT_CToC_FC32_Inv_Radix4_ls(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n);
+
+extern void x86SP_FFT_CToC_FC32_Fwd_Radix4_ls_sse(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n);
+
+extern void x86SP_FFT_CToC_FC32_Inv_Radix4_ls_sse(
+    const OMX_F32 *in,
+    OMX_F32 *out,
+    const OMX_F32 *twiddle,
+    OMX_INT n);
+
+OMX_F32* x86SP_F32_radix4_kernel_OutOfPlace(
+    const OMX_F32 *src,
+    OMX_F32 *buf1,
+    OMX_F32 *buf2,
+    const OMX_F32 *twiddle,
+    OMX_INT n,
+    bool forward_fft) {
+  OMX_INT sub_size;
+  OMX_INT sub_num;
+  OMX_INT n_by_4 = n >> 2;
+  OMX_F32 *in = buf1;
+  OMX_F32 *out = buf2;
+
+  if (forward_fft)
+    x86SP_FFT_CToC_FC32_Fwd_Radix4_fs(src, in, n);
+  else
+    x86SP_FFT_CToC_FC32_Inv_Radix4_fs(src, in, n);
+
+  for (sub_size = 4, sub_num = n_by_4;
+       sub_size < n_by_4;
+       sub_size = sub_size << 2, sub_num = sub_num >> 2) {
+
+    if (forward_fft) {
+      x86SP_FFT_CToC_FC32_Fwd_Radix4_ms(in, out, twiddle,
+                                        n, sub_size, sub_num);
+    } else {
+      x86SP_FFT_CToC_FC32_Inv_Radix4_ms(in, out, twiddle,
+                                        n, sub_size, sub_num);
+    }
+
+    OMX_F32 *temp = out;
+    out = in;
+    in = temp;
+  }
+
+  if (forward_fft) {
+    if (sub_num == 2)
+      x86SP_FFT_CToC_FC32_Fwd_Radix2_ls(in, out, twiddle, n);
+    else
+      x86SP_FFT_CToC_FC32_Fwd_Radix4_ls(in, out, twiddle, n);
+  } else {
+    if (sub_num == 2)
+      x86SP_FFT_CToC_FC32_Inv_Radix2_ls(in, out, twiddle, n);
+    else
+      x86SP_FFT_CToC_FC32_Inv_Radix4_ls(in, out, twiddle, n);
+  }
+
+  return out;
+}
+
+OMX_F32* x86SP_F32_radix4_kernel_OutOfPlace_sse(
+    const OMX_F32 *src,
+    OMX_F32 *buf1,
+    OMX_F32 *buf2,
+    const OMX_F32 *twiddle,
+    OMX_INT n,
+    // true for forward, false for inverse.
+    bool forward_fft) {
+  OMX_INT sub_size, sub_num;
+  OMX_INT n_by_4 = n >> 2;
+  OMX_F32 *in, *out;
+  in = buf1;
+  out = buf2;
+
+  if (forward_fft)
+    x86SP_FFT_CToC_FC32_Fwd_Radix4_fs_sse(src, in, n);
+  else
+    x86SP_FFT_CToC_FC32_Inv_Radix4_fs_sse(src, in, n);
+
+  for (sub_size = 4, sub_num = n_by_4;
+       sub_size < n_by_4;
+       sub_size = sub_size << 2, sub_num = sub_num >> 2) {
+
+    if (forward_fft) {
+      x86SP_FFT_CToC_FC32_Fwd_Radix4_ms_sse(in, out, twiddle,
+                                            n, sub_size, sub_num);
+    } else {
+      x86SP_FFT_CToC_FC32_Inv_Radix4_ms_sse(in, out, twiddle,
+                                            n, sub_size, sub_num);
+    }
+
+    OMX_F32 *temp = out;
+    out = in;
+    in = temp;
+  }
+
+  // If n is not power of 4, sub_num == 2.
+  if (forward_fft) {
+    if (sub_num == 2)
+      x86SP_FFT_CToC_FC32_Fwd_Radix2_ls_sse(in, out, twiddle, n);
+    else
+      x86SP_FFT_CToC_FC32_Fwd_Radix4_ls_sse(in, out, twiddle, n);
+  } else {
+    if (sub_num == 2)
+      x86SP_FFT_CToC_FC32_Inv_Radix2_ls_sse(in, out, twiddle, n);
+    else
+      x86SP_FFT_CToC_FC32_Inv_Radix4_ls_sse(in, out, twiddle, n);
+  }
+
+  return out;
+}
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_SSE_Math.h b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_SSE_Math.h
new file mode 100644
index 00000000000..d10a851ae7a
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/x86/x86SP_SSE_Math.h
@@ -0,0 +1,488 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights realserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include <emmintrin.h>
+#include <assert.h>
+
+/**
+ * Two data formats are used by the FFT routines, internally. The
+ * interface to the main external FFT routines use interleaved complex
+ * values where the real part is followed by the imaginary part.
+ *
+ * One is the split format where a complex vector of real and imaginary
+ * values are split such that all of the real values are placed in the
+ * first half of the vector and the corresponding values are placed in
+ * the second half, in the same order. The conversion from interleaved
+ * complex values to split format and back is transparent to the
+ * external FFT interface.
+ *
+ * VComplex uses split format.
+ */
+
+/** VComplex hold 4 complex float elements, with the real parts stored
+ * in real and corresponding imaginary parts in imag.
+ */
+typedef struct VComplex {
+  __m128 real;
+  __m128 imag;
+} VC;
+
+/* out = a * b */
+static inline void VC_MUL(VC *out, VC *a, VC *b) {
+  out->real = _mm_sub_ps(_mm_mul_ps(a->real, b->real),
+      _mm_mul_ps(a->imag, b->imag));
+  out->imag = _mm_add_ps(_mm_mul_ps(a->real, b->imag),
+      _mm_mul_ps(a->imag, b->real));
+}
+
+/* out = conj(a) * b */
+static inline void VC_CONJ_MUL(VC *out, VC *a, VC *b) {
+  out->real = _mm_add_ps(_mm_mul_ps(a->real, b->real),
+      _mm_mul_ps(a->imag, b->imag));
+  out->imag = _mm_sub_ps(_mm_mul_ps(a->real, b->imag),
+      _mm_mul_ps(a->imag, b->real));
+}
+
+/* Scale complex by a real factor */
+static inline void VC_MUL_F(VC *out, VC *a, __m128 factor) {
+  out->real = _mm_mul_ps(factor, a->real);
+  out->imag = _mm_mul_ps(factor, a->imag);
+}
+
+/* out = a + b */
+static inline void VC_ADD(VC *out, VC *a, VC *b) {
+  out->real = _mm_add_ps(a->real, b->real);
+  out->imag = _mm_add_ps(a->imag, b->imag);
+}
+
+/**
+ * out.real = a.real + b.imag
+ * out.imag = a.imag + b.real
+ */
+static inline void VC_ADD_X(VC *out, VC *a, VC *b) {
+  out->real = _mm_add_ps(a->real, b->imag);
+  out->imag = _mm_add_ps(b->real, a->imag);
+}
+
+/* VC_ADD and store the result with Split format. */
+static inline void VC_ADD_STORE_SPLIT(
+    OMX_F32 *out,
+    VC *a,
+    VC *b,
+    OMX_INT offset) {
+  _mm_store_ps(out, _mm_add_ps(a->real, b->real));
+  _mm_store_ps(out + offset, _mm_add_ps(a->imag, b->imag));
+}
+
+/* out = a - b */
+static inline void VC_SUB(VC *out, VC *a, VC *b) {
+  out->real = _mm_sub_ps(a->real, b->real);
+  out->imag = _mm_sub_ps(a->imag, b->imag);
+}
+
+/**
+ * out.real = a.real - b.imag
+ * out.imag = a.imag - b.real
+ */
+static inline void VC_SUB_X(VC *out, VC *a, VC *b) {
+  out->real = _mm_sub_ps(a->real, b->imag);
+  out->imag = _mm_sub_ps(b->real, a->imag);
+}
+
+/* VC_SUB and store the result with Split format. */
+static inline void VC_SUB_STORE_SPLIT(
+    OMX_F32 *out,
+    VC *a,
+    VC *b,
+    OMX_INT offset) {
+  _mm_store_ps(out, _mm_sub_ps(a->real, b->real));
+  _mm_store_ps(out + offset, _mm_sub_ps(a->imag, b->imag));
+}
+
+/**
+ * out.real = a.real + b.real
+ * out.imag = a.imag - b.imag
+ */
+static inline void VC_ADD_SUB(VC *out, VC *a, VC *b) {
+  out->real = _mm_add_ps(a->real, b->real);
+  out->imag = _mm_sub_ps(a->imag, b->imag);
+}
+
+/**
+ * out.real = a.real + b.imag
+ * out.imag = a.imag - b.real
+ */
+static inline void VC_ADD_SUB_X(VC *out, VC *a, VC *b) {
+  out->real = _mm_add_ps(a->real, b->imag);
+  out->imag = _mm_sub_ps(a->imag, b->real);
+}
+
+/* VC_ADD_SUB_X and store the result with Split format. */
+static inline void VC_ADD_SUB_X_STORE_SPLIT(
+    OMX_F32 *out,
+    VC *a,
+    VC *b,
+    OMX_INT offset) {
+  _mm_store_ps(out, _mm_add_ps(a->real, b->imag));
+  _mm_store_ps(out + offset, _mm_sub_ps(a->imag, b->real));
+}
+
+/**
+ * out.real = a.real - b.real
+ * out.imag = a.imag + b.imag
+ */
+static inline void VC_SUB_ADD(VC *out, VC *a, VC *b) {
+  out->real = _mm_sub_ps(a->real, b->real);
+  out->imag = _mm_add_ps(a->imag, b->imag);
+}
+
+/**
+ * out.real = a.real - b.imag
+ * out.imag = a.imag + b.real
+ */
+static inline void VC_SUB_ADD_X(VC *out, VC *a, VC *b) {
+  out->real = _mm_sub_ps(a->real, b->imag);
+  out->imag = _mm_add_ps(a->imag, b->real);
+}
+
+/* VC_SUB_ADD_X and store the result with Split format. */
+static inline void VC_SUB_ADD_X_STORE_SPLIT(
+    OMX_F32 *out,
+    VC *a, VC *b,
+    OMX_INT offset) {
+  _mm_store_ps(out, _mm_sub_ps(a->real, b->imag));
+  _mm_store_ps(out + offset, _mm_add_ps(a->imag, b->real));
+}
+
+/**
+ * out[0]      = in.real
+ * out[offset] = in.imag
+ */
+static inline void VC_STORE_SPLIT(
+    OMX_F32 *out,
+    VC *in,
+    OMX_INT offset) {
+  _mm_store_ps(out, in->real);
+  _mm_store_ps(out + offset, in->imag);
+}
+
+/**
+ * out.real = in[0];
+ * out.imag = in[offset];
+*/
+static inline void VC_LOAD_SPLIT(
+    VC *out,
+    const OMX_F32 *in,
+    OMX_INT offset) {
+  out->real = _mm_load_ps(in);
+  out->imag = _mm_load_ps(in + offset);
+}
+
+/* Vector Complex Unpack from Split format to Interleaved format. */
+static inline void VC_UNPACK(VC *out, VC *in) {
+    out->real = _mm_unpacklo_ps(in->real, in->imag);
+    out->imag = _mm_unpackhi_ps(in->real, in->imag);
+}
+
+/**
+ * Vector Complex load from interleaved complex array.
+ * out.real = [in[0].real, in[1].real, in[2].real, in[3].real]
+ * out.imag = [in[0].imag, in[1].imag, in[2].imag, in[3].imag]
+ */
+static inline void VC_LOAD_INTERLEAVE(VC *out, const OMX_F32 *in) {
+    __m128 temp0 = _mm_load_ps(in);
+    __m128 temp1 = _mm_load_ps(in + 4);
+    out->real = _mm_shuffle_ps(temp0, temp1, _MM_SHUFFLE(2, 0, 2, 0));
+    out->imag = _mm_shuffle_ps(temp0, temp1, _MM_SHUFFLE(3, 1, 3, 1));
+}
+/**
+ * Vector Complex Load with Split format.
+ * The input address is not 16 byte aligned.
+ */
+static inline void VC_LOADU_SPLIT(
+    VC *out,
+    const OMX_F32 *in,
+    OMX_INT offset) {
+  out->real = _mm_loadu_ps(in);
+  out->imag = _mm_loadu_ps(in + offset);
+}
+
+/* Reverse the order of the Complex Vector. */
+static inline void VC_REVERSE(VC *v) {
+  v->real = _mm_shuffle_ps(v->real, v->real, _MM_SHUFFLE(0, 1, 2, 3));
+  v->imag = _mm_shuffle_ps(v->imag, v->imag, _MM_SHUFFLE(0, 1, 2, 3));
+}
+/*
+ * Vector Complex store to interleaved complex array
+ * out[0] = in.real[0]
+ * out[1] = in.imag[0]
+ * out[2] = in.real[1]
+ * out[3] = in.imag[1]
+ * out[4] = in.real[2]
+ * out[5] = in.imag[2]
+ * out[6] = in.real[3]
+ * out[7] = in.imag[3]
+ */
+static inline void VC_STORE_INTERLEAVE(OMX_F32 *out, VC *in) {
+  _mm_store_ps(out, _mm_unpacklo_ps(in->real, in->imag));
+  _mm_store_ps(out + 4, _mm_unpackhi_ps(in->real, in->imag));
+}
+
+/**
+ * Vector Complex Store with Interleaved format.
+ * Address is not 16 byte aligned.
+ */
+static inline void VC_STOREU_INTERLEAVE(OMX_F32 *out, VC *in) {
+  _mm_storeu_ps(out, _mm_unpacklo_ps(in->real, in->imag));
+  _mm_storeu_ps(out + 4, _mm_unpackhi_ps(in->real, in->imag));
+}
+
+/* VC_ADD_X and store the result with Split format. */
+static inline void VC_ADD_X_STORE_SPLIT(
+    OMX_F32 *out,
+    VC *a, VC *b,
+    OMX_INT offset) {
+  _mm_store_ps(out, _mm_add_ps(a->real, b->imag));
+  _mm_store_ps(out + offset, _mm_add_ps(b->real, a->imag));
+}
+
+/**
+ * VC_SUB_X and store the result with inverse order.
+ * Address is not 16 byte aligned.
+ */
+static inline void VC_SUB_X_INVERSE_STOREU_SPLIT(
+    OMX_F32 *out,
+    VC *a,
+    VC *b,
+    OMX_INT offset) {
+  __m128 t;
+  t = _mm_sub_ps(a->real, b->imag);
+  _mm_storeu_ps(out, _mm_shuffle_ps(t, t, _MM_SHUFFLE(0, 1, 2, 3)));
+  t = _mm_sub_ps(b->real, a->imag);
+  _mm_storeu_ps(out + offset, _mm_shuffle_ps(t, t, _MM_SHUFFLE(0, 1, 2, 3)));
+}
+
+/**
+ * Vector Complex Load from Interleaved format to Split format.
+ * Store the result into two __m128 registers.
+ */
+static inline void VC_LOAD_SHUFFLE(
+    __m128 *out0,
+    __m128 *out1,
+    const OMX_F32 *in) {
+  VC temp;
+  VC_LOAD_INTERLEAVE(&temp, in);
+  *out0 = temp.real;
+  *out1 = temp.imag;
+}
+
+/* Finish the butterfly calculation of forward radix4 and store the outputs. */
+static inline void RADIX4_FWD_BUTTERFLY_STORE(
+    OMX_F32 *out0,
+    OMX_F32 *out1,
+    OMX_F32 *out2,
+    OMX_F32 *out3,
+    VC *t0,
+    VC *t1,
+    VC *t2,
+    VC *t3,
+    OMX_INT n) {
+  /* CADD out0, t0, t2 */
+  VC_ADD_STORE_SPLIT(out0, t0, t2, n);
+
+  /* CSUB out2, t0, t2 */
+  VC_SUB_STORE_SPLIT(out2, t0, t2, n);
+
+  /* CADD_SUB_X out1, t1, t3 */
+  VC_ADD_SUB_X_STORE_SPLIT(out1, t1, t3, n);
+
+  /* CSUB_ADD_X out3, t1, t3 */
+  VC_SUB_ADD_X_STORE_SPLIT(out3, t1, t3, n);
+}
+
+/* Finish the butterfly calculation of inverse radix4 and store the outputs. */
+static inline void RADIX4_INV_BUTTERFLY_STORE(
+    OMX_F32 *out0,
+    OMX_F32 *out1,
+    OMX_F32 *out2,
+    OMX_F32 *out3,
+    VC *t0,
+    VC *t1,
+    VC *t2,
+    VC *t3,
+    OMX_INT n) {
+  /* CADD out0, t0, t2 */
+  VC_ADD_STORE_SPLIT(out0, t0, t2, n);
+
+  /* CSUB out2, t0, t2 */
+  VC_SUB_STORE_SPLIT(out2, t0, t2, n);
+
+  /* CSUB_ADD_X out1, t1, t3 */
+  VC_SUB_ADD_X_STORE_SPLIT(out1, t1, t3, n);
+
+  /* CADD_SUB_X out3, t1, t3 */
+  VC_ADD_SUB_X_STORE_SPLIT(out3, t1, t3, n);
+}
+
+/* Radix4 forward butterfly */
+static inline void RADIX4_FWD_BUTTERFLY(
+    VC *t0,
+    VC *t1,
+    VC *t2,
+    VC *t3,
+    VC *Tw1,
+    VC *Tw2,
+    VC *Tw3,
+    VC *T0,
+    VC *T1,
+    VC *T2,
+    VC *T3) {
+  VC tt1, tt2, tt3;
+
+  /* CMUL tt1, Tw1, T1 */
+  VC_MUL(&tt1, Tw1, T1);
+
+  /* CMUL tt2, Tw2, T2 */
+  VC_MUL(&tt2, Tw2, T2);
+
+  /* CMUL tt3, Tw3, T3 */
+  VC_MUL(&tt3, Tw3, T3);
+
+  /* CADD t0, T0, tt2 */
+  VC_ADD(t0, T0, &tt2);
+
+  /* CSUB t1, T0, tt2 */
+  VC_SUB(t1, T0, &tt2);
+
+  /* CADD t2, tt1, tt3 */
+  VC_ADD(t2, &tt1, &tt3);
+
+  /* CSUB t3, tt1, tt3 */
+  VC_SUB(t3, &tt1, &tt3);
+}
+
+/* Radix4 inverse butterfly */
+static inline void RADIX4_INV_BUTTERFLY(
+    VC *t0,
+    VC *t1,
+    VC *t2,
+    VC *t3,
+    VC *Tw1,
+    VC *Tw2,
+    VC *Tw3,
+    VC *T0,
+    VC *T1,
+    VC *T2,
+    VC *T3) {
+  VC tt1, tt2, tt3;
+
+  /* CMUL tt1, Tw1, T1 */
+  VC_CONJ_MUL(&tt1, Tw1, T1);
+
+  /* CMUL tt2, Tw2, T2 */
+  VC_CONJ_MUL(&tt2, Tw2, T2);
+
+  /* CMUL tt3, Tw3, T3 */
+  VC_CONJ_MUL(&tt3, Tw3, T3);
+
+  /* CADD t0, T0, tt2 */
+  VC_ADD(t0, T0, &tt2);
+
+  /* CSUB t1, T0, tt2 */
+  VC_SUB(t1, T0, &tt2);
+
+  /* CADD t2, tt1, tt3 */
+  VC_ADD(t2, &tt1, &tt3);
+
+  /* CSUB t3, tt1, tt3 */
+  VC_SUB(t3, &tt1, &tt3);
+}
+
+/* Radix4 butterfly in first stage for both forward and inverse */
+static inline void RADIX4_BUTTERFLY_FS(
+    VC *t0,
+    VC *t1,
+    VC *t2,
+    VC *t3,
+    VC *T0,
+    VC *T1,
+    VC *T2,
+    VC *T3) {
+  /* CADD t0, T0, T2 */
+  VC_ADD(t0, T0, T2);
+
+  /* CSUB t1, T0, T2 */
+  VC_SUB(t1, T0, T2);
+
+  /* CADD t2, T1, T3 */
+  VC_ADD(t2, T1, T3);
+
+  /* CSUB t3, T1, T3 */
+  VC_SUB(t3, T1, T3);
+}
+
+/**
+ * Load 16 float elements (4 sse registers) which is a 4 * 4 matrix.
+ * Then Do transpose on the matrix.
+ * 3,  2,  1,  0                  12, 8,  4,  0
+ * 7,  6,  5,  4        =====>    13, 9,  5,  1
+ * 11, 10, 9,  8                  14, 10, 6,  2
+ * 15, 14, 13, 12                 15, 11, 7,  3
+ */
+static inline void VC_LOAD_MATRIX_TRANSPOSE(
+    VC *T0,
+    VC *T1,
+    VC *T2,
+    VC *T3,
+    const OMX_F32 *pT0,
+    const OMX_F32 *pT1,
+    const OMX_F32 *pT2,
+    const OMX_F32 *pT3,
+    OMX_INT n) {
+  __m128 xmm0;
+  __m128 xmm1;
+  __m128 xmm2;
+  __m128 xmm3;
+  __m128 xmm4;
+  __m128 xmm5;
+  __m128 xmm6;
+  __m128 xmm7;
+
+  xmm0 = _mm_load_ps(pT0);
+  xmm1 = _mm_load_ps(pT1);
+  xmm2 = _mm_load_ps(pT2);
+  xmm3 = _mm_load_ps(pT3);
+
+  /* Matrix transpose */
+  xmm4 = _mm_unpacklo_ps(xmm0, xmm1);
+  xmm5 = _mm_unpackhi_ps(xmm0, xmm1);
+  xmm6 = _mm_unpacklo_ps(xmm2, xmm3);
+  xmm7 = _mm_unpackhi_ps(xmm2, xmm3);
+  T0->real = _mm_shuffle_ps(xmm4, xmm6, _MM_SHUFFLE(1, 0, 1, 0));
+  T1->real = _mm_shuffle_ps(xmm4, xmm6, _MM_SHUFFLE(3, 2, 3, 2));
+  T2->real = _mm_shuffle_ps(xmm5, xmm7, _MM_SHUFFLE(1, 0, 1, 0));
+  T3->real = _mm_shuffle_ps(xmm5, xmm7, _MM_SHUFFLE(3, 2, 3, 2));
+
+  xmm0 = _mm_load_ps(pT0 + n);
+  xmm1 = _mm_load_ps(pT1 + n);
+  xmm2 = _mm_load_ps(pT2 + n);
+  xmm3 = _mm_load_ps(pT3 + n);
+
+  /* Matrix transpose */
+  xmm4 = _mm_unpacklo_ps(xmm0, xmm1);
+  xmm5 = _mm_unpackhi_ps(xmm0, xmm1);
+  xmm6 = _mm_unpacklo_ps(xmm2, xmm3);
+  xmm7 = _mm_unpackhi_ps(xmm2, xmm3);
+  T0->imag = _mm_shuffle_ps(xmm4, xmm6, _MM_SHUFFLE(1, 0, 1, 0));
+  T1->imag = _mm_shuffle_ps(xmm4, xmm6, _MM_SHUFFLE(3, 2, 3, 2));
+  T2->imag = _mm_shuffle_ps(xmm5, xmm7, _MM_SHUFFLE(1, 0, 1, 0));
+  T3->imag = _mm_shuffle_ps(xmm5, xmm7, _MM_SHUFFLE(3, 2, 3, 2));
+}
author	Andras Becsi <andras.becsi@digia.com>	2014-03-18 13:16:26 +0100
committer	Frederik Gladhorn <frederik.gladhorn@digia.com>	2014-03-20 15:55:39 +0100
commit	3f0f86b0caed75241fa71c95a5d73bc0164348c5 (patch)
tree	92b9fb00f2e9e90b0be2262093876d4f43b6cd13 /chromium/third_party/openmax_dl
parent	e90d7c4b152c56919d963987e2503f9909a666d2 (diff)
download	qtwebengine-chromium-3f0f86b0caed75241fa71c95a5d73bc0164348c5.tar.gz