summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2020-09-01 12:14:32 -0700
committerCraig Topper <craig.topper@intel.com>2020-09-01 12:44:32 -0700
commit4783e2c9c603ed6aeacc76bb1177056a9d307bd1 (patch)
tree9bbabfbbdec5ab8ffddad5791fd841edbc6840d5
parent96f0b57568c3047fde0c1e4b4f52401ce34f2da2 (diff)
downloadllvm-4783e2c9c603ed6aeacc76bb1177056a9d307bd1.tar.gz
[MachineCopyPropagation] In isNopCopy, check the destination registers match in addition to the source registers.
Previously if the source match we asserted that the destination matched. But GPR <-> mask register copies on X86 can violate this since we use the same K-registers for multiple sizes. Fixes this ISPC issue https://github.com/ispc/ispc/issues/1851 Differential Revision: https://reviews.llvm.org/D86507
-rw-r--r--llvm/lib/CodeGen/MachineCopyPropagation.cpp4
-rw-r--r--llvm/test/CodeGen/X86/machine-cp-mask-reg.mir59
2 files changed, 60 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 67d15129b904..e2d7827c5a2f 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -338,10 +338,8 @@ static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src,
unsigned Def, const TargetRegisterInfo *TRI) {
Register PreviousSrc = PreviousCopy.getOperand(1).getReg();
Register PreviousDef = PreviousCopy.getOperand(0).getReg();
- if (Src == PreviousSrc) {
- assert(Def == PreviousDef);
+ if (Src == PreviousSrc && Def == PreviousDef)
return true;
- }
if (!TRI->isSubRegister(PreviousSrc, Src))
return false;
unsigned SubIdx = TRI->getSubRegIndex(PreviousSrc, Src);
diff --git a/llvm/test/CodeGen/X86/machine-cp-mask-reg.mir b/llvm/test/CodeGen/X86/machine-cp-mask-reg.mir
new file mode 100644
index 000000000000..86a077e64764
--- /dev/null
+++ b/llvm/test/CodeGen/X86/machine-cp-mask-reg.mir
@@ -0,0 +1,59 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx -run-pass=machine-cp -o - | FileCheck %s
+
+# machine-cp previously asserted trying to determine if the k0->eax copy below
+# could be combined with the k0->rax copy.
+
+--- |
+ ; ModuleID = 'test.ll'
+ source_filename = "test.ll"
+ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+ define i8 @foo(<64 x i8> %x, i64* %y, i64 %z) #0 {
+ %a = icmp eq <64 x i8> %x, zeroinitializer
+ %b = bitcast <64 x i1> %a to i64
+ %c = add i64 %b, %z
+ store i64 %c, i64* %y, align 8
+ %d = extractelement <64 x i1> %a, i32 0
+ %e = zext i1 %d to i8
+ ret i8 %e
+ }
+
+ attributes #0 = { "target-cpu"="skx" }
+
+...
+---
+name: foo
+alignment: 16
+tracksRegLiveness: true
+liveins:
+ - { reg: '$zmm0' }
+ - { reg: '$rdi' }
+ - { reg: '$rsi' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0 (%ir-block.0):
+ liveins: $rdi, $rsi, $zmm0
+
+ ; CHECK-LABEL: name: foo
+ ; CHECK: liveins: $rdi, $rsi, $zmm0
+ ; CHECK: renamable $k0 = VPTESTNMBZrr killed renamable $zmm0, renamable $zmm0
+ ; CHECK: renamable $rax = COPY renamable $k0
+ ; CHECK: renamable $rsi = ADD64rr killed renamable $rsi, killed renamable $rax, implicit-def dead $eflags
+ ; CHECK: MOV64mr killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $rsi :: (store 8 into %ir.y)
+ ; CHECK: renamable $eax = COPY killed renamable $k0
+ ; CHECK: renamable $al = AND8ri renamable $al, 1, implicit-def dead $eflags, implicit killed $eax, implicit-def $eax
+ ; CHECK: $al = KILL renamable $al, implicit killed $eax
+ ; CHECK: RET 0, $al
+ renamable $k0 = VPTESTNMBZrr killed renamable $zmm0, renamable $zmm0
+ renamable $rax = COPY renamable $k0
+ renamable $rsi = ADD64rr killed renamable $rsi, killed renamable $rax, implicit-def dead $eflags
+ MOV64mr killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $rsi :: (store 8 into %ir.y)
+ renamable $eax = COPY killed renamable $k0
+ renamable $al = AND8ri renamable $al, 1, implicit-def dead $eflags, implicit killed $eax, implicit-def $eax
+ $al = KILL renamable $al, implicit killed $eax
+ RET 0, $al
+
+...