[PowerPC] Unaligned FP default should apply to scalars only

As reported in PR45186, we could be in a situation where we don't want to handle unaligned memory accesses for FP scalars but still have VSX (which allows unaligned access for vectors). Change the default to only apply to scalars. Fixes: https://bugs.llvm.org/show_bug.cgi?id=45186 (cherry picked from commit 099a875f28d0131a6ae85af91b9eb8627917fbbe)
author: Nemanja Ivanovic <nemanja.i.ibm@gmail.com> 2020-05-26 09:58:25 -0500
committer: Tom Stellard <tstellar@redhat.com> 2020-06-23 13:57:14 -0700
commit: 177a9ac3c6bcb89ca2f900e20caad92fdeabe9d2 (patch)
tree: aaa5c93304a7ab605b87af7c54f2eee7063042e6
parent: 8f299fd2cff7d99c1aacf602ee50e96ded59d706 (diff)
download: llvm-177a9ac3c6bcb89ca2f900e20caad92fdeabe9d2.tar.gz
2 files changed, 134 insertions, 1 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index cdefb38ec0ae..ca1649fae258 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -15279,7 +15279,8 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
   if (!VT.isSimple())
     return false;
 
-  if (VT.isFloatingPoint() && !Subtarget.allowsUnalignedFPAccess())
+  if (VT.isFloatingPoint() && !VT.isVector() &&
+      !Subtarget.allowsUnalignedFPAccess())
     return false;
 
   if (VT.getSimpleVT().isVector()) {
diff --git a/llvm/test/CodeGen/PowerPC/pr45186.ll b/llvm/test/CodeGen/PowerPC/pr45186.ll
new file mode 100644
index 000000000000..92f748e3ef5a
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr45186.ll
@@ -0,0 +1,132 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -ppc-asm-full-reg-names -mtriple=powerpc64-- -mattr=+vsx \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s
+%struct.anon = type { i64, i64 }
+
+@d = local_unnamed_addr global %struct.anon zeroinitializer, align 8
+
+; Function Attrs: norecurse nounwind readonly
+define i64 @e(i8* nocapture readonly %f) local_unnamed_addr #0 {
+; CHECK-LABEL: e:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ldx r3, 0, r3
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i8, i8* %f, align 1
+  %conv = zext i8 %0 to i64
+  %shl = shl nuw i64 %conv, 56
+  %arrayidx1 = getelementptr inbounds i8, i8* %f, i64 1
+  %1 = load i8, i8* %arrayidx1, align 1
+  %conv2 = zext i8 %1 to i64
+  %shl3 = shl nuw nsw i64 %conv2, 48
+  %or = or i64 %shl3, %shl
+  %arrayidx4 = getelementptr inbounds i8, i8* %f, i64 2
+  %2 = load i8, i8* %arrayidx4, align 1
+  %conv5 = zext i8 %2 to i64
+  %shl6 = shl nuw nsw i64 %conv5, 40
+  %or7 = or i64 %or, %shl6
+  %arrayidx8 = getelementptr inbounds i8, i8* %f, i64 3
+  %3 = load i8, i8* %arrayidx8, align 1
+  %conv9 = zext i8 %3 to i64
+  %shl10 = shl nuw nsw i64 %conv9, 32
+  %or11 = or i64 %or7, %shl10
+  %arrayidx12 = getelementptr inbounds i8, i8* %f, i64 4
+  %4 = load i8, i8* %arrayidx12, align 1
+  %conv13 = zext i8 %4 to i64
+  %shl14 = shl nuw nsw i64 %conv13, 24
+  %or15 = or i64 %or11, %shl14
+  %arrayidx16 = getelementptr inbounds i8, i8* %f, i64 5
+  %5 = load i8, i8* %arrayidx16, align 1
+  %conv17 = zext i8 %5 to i64
+  %shl18 = shl nuw nsw i64 %conv17, 16
+  %or20 = or i64 %or15, %shl18
+  %arrayidx21 = getelementptr inbounds i8, i8* %f, i64 6
+  %6 = load i8, i8* %arrayidx21, align 1
+  %conv22 = zext i8 %6 to i64
+  %shl23 = shl nuw nsw i64 %conv22, 8
+  %or25 = or i64 %or20, %shl23
+  %arrayidx26 = getelementptr inbounds i8, i8* %f, i64 7
+  %7 = load i8, i8* %arrayidx26, align 1
+  %conv27 = zext i8 %7 to i64
+  %or28 = or i64 %or25, %conv27
+  ret i64 %or28
+}
+
+; Function Attrs: nofree norecurse nounwind
+define void @g() local_unnamed_addr #0 {
+; CHECK-LABEL: g:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis r3, r2, .LC0@toc@ha
+; CHECK-NEXT:    addis r4, r2, .LC1@toc@ha
+; CHECK-NEXT:    ld r3, .LC0@toc@l(r3)
+; CHECK-NEXT:    ld r4, .LC1@toc@l(r4)
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    stxvd2x vs0, 0, r4
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i8, i8* getelementptr inbounds (i8, i8* bitcast (void ()* @g to i8*), i64 8), align 1
+  %conv.i = zext i8 %0 to i64
+  %shl.i = shl nuw i64 %conv.i, 56
+  %1 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 9), align 1
+  %conv2.i = zext i8 %1 to i64
+  %shl3.i = shl nuw nsw i64 %conv2.i, 48
+  %or.i = or i64 %shl3.i, %shl.i
+  %2 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 10), align 1
+  %conv5.i = zext i8 %2 to i64
+  %shl6.i = shl nuw nsw i64 %conv5.i, 40
+  %or7.i = or i64 %or.i, %shl6.i
+  %3 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 11), align 1
+  %conv9.i = zext i8 %3 to i64
+  %shl10.i = shl nuw nsw i64 %conv9.i, 32
+  %or11.i = or i64 %or7.i, %shl10.i
+  %4 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 12), align 1
+  %conv13.i = zext i8 %4 to i64
+  %shl14.i = shl nuw nsw i64 %conv13.i, 24
+  %or15.i = or i64 %or11.i, %shl14.i
+  %5 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 13), align 1
+  %conv17.i = zext i8 %5 to i64
+  %shl18.i = shl nuw nsw i64 %conv17.i, 16
+  %or20.i = or i64 %or15.i, %shl18.i
+  %6 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 14), align 1
+  %conv22.i = zext i8 %6 to i64
+  %shl23.i = shl nuw nsw i64 %conv22.i, 8
+  %or25.i = or i64 %or20.i, %shl23.i
+  %7 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 15), align 1
+  %conv27.i = zext i8 %7 to i64
+  %or28.i = or i64 %or25.i, %conv27.i
+  store i64 %or28.i, i64* getelementptr inbounds (%struct.anon, %struct.anon* @d, i64 0, i32 1), align 8
+  %8 = load i8, i8* bitcast (void ()* @g to i8*), align 1
+  %conv.i2 = zext i8 %8 to i64
+  %shl.i3 = shl nuw i64 %conv.i2, 56
+  %9 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 1), align 1
+  %conv2.i4 = zext i8 %9 to i64
+  %shl3.i5 = shl nuw nsw i64 %conv2.i4, 48
+  %or.i6 = or i64 %shl3.i5, %shl.i3
+  %10 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 2), align 1
+  %conv5.i7 = zext i8 %10 to i64
+  %shl6.i8 = shl nuw nsw i64 %conv5.i7, 40
+  %or7.i9 = or i64 %or.i6, %shl6.i8
+  %11 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 3), align 1
+  %conv9.i10 = zext i8 %11 to i64
+  %shl10.i11 = shl nuw nsw i64 %conv9.i10, 32
+  %or11.i12 = or i64 %or7.i9, %shl10.i11
+  %12 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 4), align 1
+  %conv13.i13 = zext i8 %12 to i64
+  %shl14.i14 = shl nuw nsw i64 %conv13.i13, 24
+  %or15.i15 = or i64 %or11.i12, %shl14.i14
+  %13 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 5), align 1
+  %conv17.i16 = zext i8 %13 to i64
+  %shl18.i17 = shl nuw nsw i64 %conv17.i16, 16
+  %or20.i18 = or i64 %or15.i15, %shl18.i17
+  %14 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 6), align 1
+  %conv22.i19 = zext i8 %14 to i64
+  %shl23.i20 = shl nuw nsw i64 %conv22.i19, 8
+  %or25.i21 = or i64 %or20.i18, %shl23.i20
+  %15 = load i8, i8* getelementptr (i8, i8* bitcast (void ()* @g to i8*), i64 7), align 1
+  %conv27.i22 = zext i8 %15 to i64
+  %or28.i23 = or i64 %or25.i21, %conv27.i22
+  store i64 %or28.i23, i64* getelementptr inbounds (%struct.anon, %struct.anon* @d, i64 0, i32 0), align 8
+  ret void
+}
+
+attributes #0 = { nounwind }
author	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>	2020-05-26 09:58:25 -0500
committer	Tom Stellard <tstellar@redhat.com>	2020-06-23 13:57:14 -0700
commit	177a9ac3c6bcb89ca2f900e20caad92fdeabe9d2 (patch)
tree	aaa5c93304a7ab605b87af7c54f2eee7063042e6
parent	8f299fd2cff7d99c1aacf602ee50e96ded59d706 (diff)
download	llvm-177a9ac3c6bcb89ca2f900e20caad92fdeabe9d2.tar.gz