summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBill Wendling <morbo@google.com>2022-12-13 15:06:29 -0800
committerTom Stellard <tstellar@redhat.com>2023-01-09 12:09:00 -0800
commit74d3ba1af5c09b85331c90105c461484762ee3e4 (patch)
treebab9d06d5fa091d6313ed4559ff89558fe202d6a
parenta8af9f679231a55b8a0f5707d8727679a98f4b06 (diff)
downloadllvm-74d3ba1af5c09b85331c90105c461484762ee3e4.tar.gz
[X86] Don't zero out %eax if both %al and %ah are used
The iterator over super and sub registers doesn't include both 8-bit registers in its list. So if both registers are used and only one of them is live on return, then we need to make sure that the other 8-bit register is also marked as live and not zeroed out. Reviewed By: nickdesaulniers Differential Revision: https://reviews.llvm.org/D139679 (cherry picked from commit 14d4cddc5506fb0fd3c4ac556b4edd970aa151eb)
-rw-r--r--llvm/lib/CodeGen/PrologEpilogInserter.cpp8
-rw-r--r--llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll112
2 files changed, 119 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 85d051cfdbe7..a8d40edd88d3 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -1237,7 +1237,13 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
if (!MO.isReg())
continue;
- for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(MO.getReg()))
+ MCRegister Reg = MO.getReg();
+
+ // This picks up sibling registers (e.q. %al -> %ah).
+ for (MCRegUnitIterator Unit(Reg, &TRI); Unit.isValid(); ++Unit)
+ RegsToZero.reset(*Unit);
+
+ for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(Reg))
RegsToZero.reset(SReg);
}
}
diff --git a/llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll b/llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll
new file mode 100644
index 000000000000..33e501ca8503
--- /dev/null
+++ b/llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll
@@ -0,0 +1,112 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -opaque-pointers | FileCheck %s --check-prefix=I386
+;
+; Make sure we don't zero out %eax when both %ah and %al are used.
+;
+; PR1766: https://github.com/ClangBuiltLinux/linux/issues/1766
+
+%struct.maple_subtree_state = type { ptr }
+
+@mas_data_end_type = dso_local local_unnamed_addr global i32 0, align 4
+@ma_meta_end_mn_0_0_0_0_0_0 = dso_local local_unnamed_addr global i8 0, align 1
+@mt_pivots_0 = dso_local local_unnamed_addr global i8 0, align 1
+@mas_data_end___trans_tmp_2 = dso_local local_unnamed_addr global ptr null, align 4
+@mt_slots_0 = dso_local local_unnamed_addr global i8 0, align 1
+
+define dso_local zeroext i1 @test1(ptr nocapture noundef readonly %0) local_unnamed_addr "zero-call-used-regs"="used-gpr" nounwind {
+; I386-LABEL: test1:
+; I386: # %bb.0:
+; I386-NEXT: pushl %ebx
+; I386-NEXT: subl $24, %esp
+; I386-NEXT: movl {{[0-9]+}}(%esp), %eax
+; I386-NEXT: movl (%eax), %eax
+; I386-NEXT: movzbl (%eax), %ebx
+; I386-NEXT: calll bar
+; I386-NEXT: testb %al, %al
+; I386-NEXT: # implicit-def: $al
+; I386-NEXT: # kill: killed $al
+; I386-NEXT: je .LBB0_6
+; I386-NEXT: # %bb.1:
+; I386-NEXT: cmpl $0, mas_data_end_type
+; I386-NEXT: je .LBB0_3
+; I386-NEXT: # %bb.2:
+; I386-NEXT: movzbl ma_meta_end_mn_0_0_0_0_0_0, %eax
+; I386-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; I386-NEXT: jmp .LBB0_6
+; I386-NEXT: .LBB0_3:
+; I386-NEXT: movb mt_pivots_0, %ah
+; I386-NEXT: movb %ah, %al
+; I386-NEXT: decb %al
+; I386-NEXT: movl mas_data_end___trans_tmp_2, %ecx
+; I386-NEXT: movsbl %al, %edx
+; I386-NEXT: cmpl $0, (%ecx,%edx,4)
+; I386-NEXT: je .LBB0_5
+; I386-NEXT: # %bb.4:
+; I386-NEXT: movb %al, %ah
+; I386-NEXT: .LBB0_5:
+; I386-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; I386-NEXT: .LBB0_6:
+; I386-NEXT: movb mt_slots_0, %bh
+; I386-NEXT: leal {{[0-9]+}}(%esp), %eax
+; I386-NEXT: movl %eax, (%esp)
+; I386-NEXT: calll baz
+; I386-NEXT: subl $4, %esp
+; I386-NEXT: cmpb %bh, %bl
+; I386-NEXT: jae .LBB0_8
+; I386-NEXT: # %bb.7:
+; I386-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; I386-NEXT: movl %eax, (%esp)
+; I386-NEXT: calll gaz
+; I386-NEXT: .LBB0_8:
+; I386-NEXT: movb $1, %al
+; I386-NEXT: addl $24, %esp
+; I386-NEXT: popl %ebx
+; I386-NEXT: xorl %ecx, %ecx
+; I386-NEXT: xorl %edx, %edx
+; I386-NEXT: retl
+ %2 = alloca %struct.maple_subtree_state, align 4
+ %3 = load ptr, ptr %0, align 4
+ %4 = load i8, ptr %3, align 1
+ %5 = tail call zeroext i1 @bar()
+ br i1 %5, label %6, label %20
+
+6: ; preds = %1
+ %7 = load i32, ptr @mas_data_end_type, align 4
+ %8 = icmp eq i32 %7, 0
+ br i1 %8, label %11, label %9
+
+9: ; preds = %6
+ %10 = load i8, ptr @ma_meta_end_mn_0_0_0_0_0_0, align 1
+ br label %20
+
+11: ; preds = %6
+ %12 = load i8, ptr @mt_pivots_0, align 1
+ %13 = add i8 %12, -1
+ %14 = load ptr, ptr @mas_data_end___trans_tmp_2, align 4
+ %15 = sext i8 %13 to i32
+ %16 = getelementptr inbounds [1 x i32], ptr %14, i32 0, i32 %15
+ %17 = load i32, ptr %16, align 4
+ %18 = icmp eq i32 %17, 0
+ %19 = select i1 %18, i8 %12, i8 %13
+ br label %20
+
+20: ; preds = %11, %9, %1
+ %21 = phi i8 [ undef, %1 ], [ %10, %9 ], [ %19, %11 ]
+ %22 = load i8, ptr @mt_slots_0, align 1
+ call void @baz(ptr nonnull sret(%struct.maple_subtree_state) align 4 %2)
+ %23 = icmp ult i8 %4, %22
+ br i1 %23, label %24, label %25
+
+24: ; preds = %20
+ call void @gaz(i8 noundef signext %21)
+ br label %25
+
+25: ; preds = %20, %24
+ ret i1 true
+}
+
+declare dso_local zeroext i1 @bar(...) local_unnamed_addr
+
+declare dso_local void @baz(ptr sret(%struct.maple_subtree_state) align 4, ...) local_unnamed_addr
+
+declare dso_local void @gaz(i8 noundef signext) local_unnamed_addr