summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYonghong Song <yhs@fb.com>2020-06-10 19:13:37 -0700
committerTom Stellard <tstellar@redhat.com>2020-06-23 14:43:44 -0700
commitf5bad9cac5d38abdc3d9cc1f44bb1ab8198cc1df (patch)
tree30b02fa27694994ff9d1deff9ef0c6587fe0efd1
parent0777c907268a8197de102f3770432b0f185de30a (diff)
downloadllvm-f5bad9cac5d38abdc3d9cc1f44bb1ab8198cc1df.tar.gz
[BPF] fix incorrect type in BPFISelDAGToDAG readonly load optimization
In BPF Instruction Selection DAGToDAG transformation phase, BPF backend had an optimization to turn load from readonly data section to direct load of the values. This phase is implemented before libbpf has readonly section support and before alu32 is supported. This phase however may generate incorrect type when alu32 is enabled. The following is an example, -bash-4.4$ cat ~/tmp2/t.c struct t { unsigned char a; unsigned char b; unsigned char c; }; extern void foo(void *); int test() { struct t v = { .b = 2, }; foo(&v); return 0; } The compiler will turn local variable "v" into a readonly section. During instruction selection phase, the compiler generates two loads from readonly section, one 2 byte load or 1 byte load, e.g., for 2 loads, t8: i32,ch = load<(dereferenceable load 2 from `i8* getelementptr inbounds (%struct.t, %struct.t* @__const.test.v, i64 0, i32 0)`, align 1), anyext from i16> t3, GlobalAddress:i64<%struct.t* @__const.test.v> 0, undef:i64 t9: ch = store<(store 2 into %ir.v1.sub1), trunc to i16> t3, t8, FrameIndex:i64<0>, undef:i64 BPF backend changed t8 to i64 = Constant<2> and eventually the generated machine IR: t10: i64 = MOV_ri TargetConstant:i64<2> t40: i32 = SLL_ri_32 t10, TargetConstant:i32<8> t41: i32 = OR_ri_32 t40, TargetConstant:i64<0> t9: ch = STH32<Mem:(store 2 into %ir.v1.sub1)> t41, TargetFrameIndex:i64<0>, TargetConstant:i64<0>, t3 Note that t10 in the above is not correct. The type should be i32 and instruction should be MOV_ri_32. The reason for incorrect insn selection is BPF insn selection generated an i64 constant instead of an i32 constant as specified in the original load instruction. Such incorrect insn sequence eventually caused the following fatal error when a COPY insn tries to copy a 64bit register to a 32bit subregister. Impossible reg-to-reg copy UNREACHABLE executed at ../lib/Target/BPF/BPFInstrInfo.cpp:42! This patch fixed the issue by using the load result type instead of always i64 when doing readonly load optimization. Differential Revision: https://reviews.llvm.org/D81630 (cherry picked from commit 4db1878158a3f481ff673fef2396c12b7a53d280)
-rw-r--r--llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp2
-rw-r--r--llvm/test/CodeGen/BPF/rodata_5.ll50
2 files changed, 51 insertions, 1 deletions
diff --git a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
index 6f5f58554d09..d407edfbd966 100644
--- a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -304,7 +304,7 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node,
LLVM_DEBUG(dbgs() << "Replacing load of size " << size << " with constant "
<< val << '\n');
- SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64);
+ SDValue NVal = CurDAG->getConstant(val, DL, LD->getValueType(0));
// After replacement, the current node is dead, we need to
// go backward one step to make iterator still work
diff --git a/llvm/test/CodeGen/BPF/rodata_5.ll b/llvm/test/CodeGen/BPF/rodata_5.ll
new file mode 100644
index 000000000000..144a703ad0b2
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/rodata_5.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=bpfel -mattr=+alu32 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=bpfeb -mattr=+alu32 -verify-machineinstrs | FileCheck %s
+;
+; Source Code:
+; struct t {
+; unsigned char a;
+; unsigned char b;
+; unsigned char c;
+; };
+; extern void foo(void *);
+; int test() {
+; struct t v = {
+; .b = 2,
+; };
+; foo(&v);
+; return 0;
+; }
+; Compilation flag:
+; clang -target bpf -O2 -S -emit-llvm t.c
+
+%struct.t = type { i8, i8, i8 }
+
+@__const.test.v = private unnamed_addr constant %struct.t { i8 0, i8 2, i8 0 }, align 1
+
+; Function Attrs: nounwind
+define dso_local i32 @test() local_unnamed_addr {
+entry:
+ %v1 = alloca [3 x i8], align 1
+ %v1.sub = getelementptr inbounds [3 x i8], [3 x i8]* %v1, i64 0, i64 0
+ call void @llvm.lifetime.start.p0i8(i64 3, i8* nonnull %v1.sub)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(3) %v1.sub, i8* nonnull align 1 dereferenceable(3) getelementptr inbounds (%struct.t, %struct.t* @__const.test.v, i64 0, i32 0), i64 3, i1 false)
+ call void @foo(i8* nonnull %v1.sub)
+ call void @llvm.lifetime.end.p0i8(i64 3, i8* nonnull %v1.sub)
+ ret i32 0
+}
+; CHECK-NOT: w{{[0-9]+}} = *(u16 *)
+; CHECK-NOT: w{{[0-9]+}} = *(u8 *)
+; CHECK: *(u16 *)(r10 - 4) = w{{[0-9]+}}
+; CHECK: *(u8 *)(r10 - 2) = w{{[0-9]+}}
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg)
+
+declare dso_local void @foo(i8*) local_unnamed_addr
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)