diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2021-01-28 11:21:21 +0000 |
---|---|---|
committer | Tom Stellard <tstellar@redhat.com> | 2021-02-03 11:26:33 -0800 |
commit | c1899cd5102dbdacd006fdb33db075319ccc933f (patch) | |
tree | e8bdf7d1af645edaf501c38c5dca634622b8414c | |
parent | 2a57ea296a4787828b52799564d7ddf02ec1c4f3 (diff) | |
download | llvm-c1899cd5102dbdacd006fdb33db075319ccc933f.tar.gz |
[X86][AVX] Add PR48908 shuffle test case
(cherry picked from commit da8845fc3d3bb0b0e133f020931440511fa72723)
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll | 151 |
1 files changed, 151 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll index 979c365acfd7..3da83b25d363 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll @@ -436,6 +436,157 @@ entry: unreachable } +define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double>* noalias %out0, <4 x double>* noalias %out1, <4 x double>* noalias %out2) { +; X86-AVX1-LABEL: PR48908: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1] +; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4 +; X86-AVX1-NEXT: vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3] +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1] +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1] +; X86-AVX1-NEXT: vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2] +; X86-AVX1-NEXT: vmovapd %ymm4, (%edx) +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1] +; X86-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3] +; X86-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3] +; X86-AVX1-NEXT: vmovapd %ymm3, (%ecx) +; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3] +; X86-AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] +; X86-AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3] +; X86-AVX1-NEXT: vmovapd %ymm0, (%eax) +; X86-AVX1-NEXT: vzeroupper +; X86-AVX1-NEXT: retl +; +; X86-AVX2-LABEL: PR48908: +; X86-AVX2: # %bb.0: +; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 +; X86-AVX2-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1] +; X86-AVX2-NEXT: vpermilpd {{.*#+}} xmm5 = xmm1[1,0] +; X86-AVX2-NEXT: vperm2f128 {{.*#+}} ymm6 = ymm0[0,1],ymm2[0,1] +; X86-AVX2-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,2,2,1] +; X86-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm6[0],ymm3[1],ymm6[2],ymm3[3] +; X86-AVX2-NEXT: vmovapd %ymm3, (%edx) +; X86-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0,1],ymm0[2],ymm5[3] +; X86-AVX2-NEXT: vpermpd {{.*#+}} ymm4 = ymm4[0,3,2,0] +; X86-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3] +; X86-AVX2-NEXT: vmovapd %ymm3, (%ecx) +; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X86-AVX2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3] +; X86-AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] +; X86-AVX2-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3] +; X86-AVX2-NEXT: vmovapd %ymm0, (%eax) +; X86-AVX2-NEXT: vzeroupper +; X86-AVX2-NEXT: retl +; +; X86-AVX512-LABEL: PR48908: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 +; X86-AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; X86-AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 +; X86-AVX512-NEXT: vshufpd {{.*#+}} ymm3 = ymm0[0],ymm3[1],ymm0[2],ymm3[2] +; X86-AVX512-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1] +; X86-AVX512-NEXT: vshufpd {{.*#+}} ymm4 = ymm1[1],ymm4[0],ymm1[2],ymm4[3] +; X86-AVX512-NEXT: vmovapd {{.*#+}} ymm5 = [0,0,3,0,8,0,1,0] +; X86-AVX512-NEXT: vpermt2pd %zmm2, %zmm5, %zmm3 +; X86-AVX512-NEXT: vmovapd %ymm3, (%edx) +; X86-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = [0,0,3,0,10,0,1,0] +; X86-AVX512-NEXT: vpermt2pd %zmm0, %zmm3, %zmm4 +; X86-AVX512-NEXT: vmovapd %ymm4, (%ecx) +; X86-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = <3,0,11,0,u,u,u,u> +; X86-AVX512-NEXT: vpermi2pd %zmm1, %zmm0, %zmm3 +; X86-AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [2,0,8,0,9,0,3,0] +; X86-AVX512-NEXT: vpermi2pd %zmm3, %zmm2, %zmm0 +; X86-AVX512-NEXT: vmovapd %ymm0, (%eax) +; X86-AVX512-NEXT: vzeroupper +; X86-AVX512-NEXT: retl +; +; X64-AVX1-LABEL: PR48908: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1] +; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4 +; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3] +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1] +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1] +; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2] +; X64-AVX1-NEXT: vmovapd %ymm4, (%rdi) +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1] +; X64-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3] +; X64-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3] +; X64-AVX1-NEXT: vmovapd %ymm3, (%rsi) +; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3] +; X64-AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] +; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3] +; X64-AVX1-NEXT: vmovapd %ymm0, (%rdx) +; X64-AVX1-NEXT: vzeroupper +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: PR48908: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 +; X64-AVX2-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1] +; X64-AVX2-NEXT: vpermilpd {{.*#+}} xmm5 = xmm1[1,0] +; X64-AVX2-NEXT: vperm2f128 {{.*#+}} ymm6 = ymm0[0,1],ymm2[0,1] +; X64-AVX2-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,2,2,1] +; X64-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm6[0],ymm3[1],ymm6[2],ymm3[3] +; X64-AVX2-NEXT: vmovapd %ymm3, (%rdi) +; X64-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0,1],ymm0[2],ymm5[3] +; X64-AVX2-NEXT: vpermpd {{.*#+}} ymm4 = ymm4[0,3,2,0] +; X64-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3] +; X64-AVX2-NEXT: vmovapd %ymm3, (%rsi) +; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X64-AVX2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3] +; X64-AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] +; X64-AVX2-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3] +; X64-AVX2-NEXT: vmovapd %ymm0, (%rdx) +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: PR48908: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 +; X64-AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; X64-AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; X64-AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 +; X64-AVX512-NEXT: vshufpd {{.*#+}} ymm3 = ymm0[0],ymm3[1],ymm0[2],ymm3[2] +; X64-AVX512-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1] +; X64-AVX512-NEXT: vshufpd {{.*#+}} ymm4 = ymm1[1],ymm4[0],ymm1[2],ymm4[3] +; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm5 = [0,3,8,1] +; X64-AVX512-NEXT: vpermt2pd %zmm2, %zmm5, %zmm3 +; X64-AVX512-NEXT: vmovapd %ymm3, (%rdi) +; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = [0,3,10,1] +; X64-AVX512-NEXT: vpermt2pd %zmm0, %zmm3, %zmm4 +; X64-AVX512-NEXT: vmovapd %ymm4, (%rsi) +; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = <3,11,u,u> +; X64-AVX512-NEXT: vpermi2pd %zmm1, %zmm0, %zmm3 +; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [2,8,9,3] +; X64-AVX512-NEXT: vpermi2pd %zmm3, %zmm2, %zmm0 +; X64-AVX512-NEXT: vmovapd %ymm0, (%rdx) +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq + %t0 = shufflevector <4 x double> %v0, <4 x double> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 4> + %t1 = shufflevector <4 x double> %v1, <4 x double> %v2, <4 x i32> <i32 1, i32 2, i32 4, i32 5> + %r0 = shufflevector <4 x double> %t0, <4 x double> %t1, <4 x i32> <i32 0, i32 3, i32 6, i32 1> + store <4 x double> %r0, <4 x double>* %out0, align 32 + %r1 = shufflevector <4 x double> %t0, <4 x double> %t1, <4 x i32> <i32 4, i32 7, i32 2, i32 5> + store <4 x double> %r1, <4 x double>* %out1, align 32 + %t2 = shufflevector <4 x double> %v0, <4 x double> %v1, <4 x i32> <i32 3, i32 7, i32 undef, i32 undef> + %r2 = shufflevector <4 x double> %t2, <4 x double> %v2, <4 x i32> <i32 6, i32 0, i32 1, i32 7> + store <4 x double> %r2, <4 x double>* %out2, align 32 + ret void +} + define <4 x i64> @concat_self_v4i64(<2 x i64> %x) { ; AVX1-LABEL: concat_self_v4i64: ; AVX1: # %bb.0: |