diff options
author | Craig Topper <craig.topper@intel.com> | 2020-09-01 12:14:32 -0700 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2020-09-01 12:44:32 -0700 |
commit | 4783e2c9c603ed6aeacc76bb1177056a9d307bd1 (patch) | |
tree | 9bbabfbbdec5ab8ffddad5791fd841edbc6840d5 | |
parent | 96f0b57568c3047fde0c1e4b4f52401ce34f2da2 (diff) | |
download | llvm-4783e2c9c603ed6aeacc76bb1177056a9d307bd1.tar.gz |
[MachineCopyPropagation] In isNopCopy, check the destination registers match in addition to the source registers.
Previously if the source match we asserted that the destination
matched. But GPR <-> mask register copies on X86 can violate this
since we use the same K-registers for multiple sizes.
Fixes this ISPC issue https://github.com/ispc/ispc/issues/1851
Differential Revision: https://reviews.llvm.org/D86507
-rw-r--r-- | llvm/lib/CodeGen/MachineCopyPropagation.cpp | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/machine-cp-mask-reg.mir | 59 |
2 files changed, 60 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 67d15129b904..e2d7827c5a2f 100644 --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -338,10 +338,8 @@ static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src, unsigned Def, const TargetRegisterInfo *TRI) { Register PreviousSrc = PreviousCopy.getOperand(1).getReg(); Register PreviousDef = PreviousCopy.getOperand(0).getReg(); - if (Src == PreviousSrc) { - assert(Def == PreviousDef); + if (Src == PreviousSrc && Def == PreviousDef) return true; - } if (!TRI->isSubRegister(PreviousSrc, Src)) return false; unsigned SubIdx = TRI->getSubRegIndex(PreviousSrc, Src); diff --git a/llvm/test/CodeGen/X86/machine-cp-mask-reg.mir b/llvm/test/CodeGen/X86/machine-cp-mask-reg.mir new file mode 100644 index 000000000000..86a077e64764 --- /dev/null +++ b/llvm/test/CodeGen/X86/machine-cp-mask-reg.mir @@ -0,0 +1,59 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx -run-pass=machine-cp -o - | FileCheck %s + +# machine-cp previously asserted trying to determine if the k0->eax copy below +# could be combined with the k0->rax copy. + +--- | + ; ModuleID = 'test.ll' + source_filename = "test.ll" + target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + + define i8 @foo(<64 x i8> %x, i64* %y, i64 %z) #0 { + %a = icmp eq <64 x i8> %x, zeroinitializer + %b = bitcast <64 x i1> %a to i64 + %c = add i64 %b, %z + store i64 %c, i64* %y, align 8 + %d = extractelement <64 x i1> %a, i32 0 + %e = zext i1 %d to i8 + ret i8 %e + } + + attributes #0 = { "target-cpu"="skx" } + +... +--- +name: foo +alignment: 16 +tracksRegLiveness: true +liveins: + - { reg: '$zmm0' } + - { reg: '$rdi' } + - { reg: '$rsi' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $rdi, $rsi, $zmm0 + + ; CHECK-LABEL: name: foo + ; CHECK: liveins: $rdi, $rsi, $zmm0 + ; CHECK: renamable $k0 = VPTESTNMBZrr killed renamable $zmm0, renamable $zmm0 + ; CHECK: renamable $rax = COPY renamable $k0 + ; CHECK: renamable $rsi = ADD64rr killed renamable $rsi, killed renamable $rax, implicit-def dead $eflags + ; CHECK: MOV64mr killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $rsi :: (store 8 into %ir.y) + ; CHECK: renamable $eax = COPY killed renamable $k0 + ; CHECK: renamable $al = AND8ri renamable $al, 1, implicit-def dead $eflags, implicit killed $eax, implicit-def $eax + ; CHECK: $al = KILL renamable $al, implicit killed $eax + ; CHECK: RET 0, $al + renamable $k0 = VPTESTNMBZrr killed renamable $zmm0, renamable $zmm0 + renamable $rax = COPY renamable $k0 + renamable $rsi = ADD64rr killed renamable $rsi, killed renamable $rax, implicit-def dead $eflags + MOV64mr killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $rsi :: (store 8 into %ir.y) + renamable $eax = COPY killed renamable $k0 + renamable $al = AND8ri renamable $al, 1, implicit-def dead $eflags, implicit killed $eax, implicit-def $eax + $al = KILL renamable $al, implicit killed $eax + RET 0, $al + +... |