summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Spickett <david.spickett@linaro.org>2022-03-23 10:51:08 +0000
committerTom Stellard <tstellar@redhat.com>2022-04-18 19:37:41 -0700
commit571c7d8f6dae1a8797ae3271c0c09fc648b1940b (patch)
tree1e9deaf1f2e5a9ba2a3ba21aa7f67ead4dd1f8a3
parent0f56ce0fb2079c8f1fdcb7f7199d7313f81a863d (diff)
downloadllvm-571c7d8f6dae1a8797ae3271c0c09fc648b1940b.tar.gz
Reland "[llvm][AArch64] Insert "bti j" after call to setjmp"
Cherry-picked from c3b98194df5572bc9b33024b48457538a7213b4c which was originally reviewed as https://reviews.llvm.org/D121707. This reverts commit edb7ba714acba1d18a20d9f4986d2e38aee1d109. This changes BLR_BTI to take variable_ops meaning that we can accept a register or a label. The pattern still expects one argument so we'll never get more than one. Then later we can check the type of the operand to choose BL or BLR to emit. (this is what BLR_RVMARKER does but I missed this detail of it first time around) Also require NoSLSBLRMitigation which I missed in the first version.
-rw-r--r--clang/docs/ClangCommandLineReference.rst2
-rw-r--r--clang/docs/ReleaseNotes.rst6
-rw-r--r--clang/include/clang/Driver/Options.td2
-rw-r--r--clang/lib/Driver/ToolChains/Arch/AArch64.cpp3
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td5
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp34
-rw-r--r--llvm/lib/Target/AArch64/AArch64FastISel.cpp8
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp10
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td11
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.h6
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp12
-rw-r--r--llvm/test/CodeGen/AArch64/setjmp-bti-no-enforcement.ll51
-rw-r--r--llvm/test/CodeGen/AArch64/setjmp-bti-outliner.ll83
-rw-r--r--llvm/test/CodeGen/AArch64/setjmp-bti.ll55
15 files changed, 284 insertions, 6 deletions
diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst
index 8ae7d7f49f16..e41d00c2d4c3 100644
--- a/clang/docs/ClangCommandLineReference.rst
+++ b/clang/docs/ClangCommandLineReference.rst
@@ -3293,7 +3293,7 @@ Work around VLLDM erratum CVE-2021-35465 (ARM only)
.. option:: -mno-bti-at-return-twice
-Do not add a BTI instruction after a setjmp or other return-twice construct (Arm only)
+Do not add a BTI instruction after a setjmp or other return-twice construct (AArch32/AArch64 only)
.. option:: -mno-movt
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 5821e41fc733..0c50e168bf48 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -381,6 +381,11 @@ Arm and AArch64 Support in Clang
- The ``attribute((target("branch-protection=...)))`` attributes will now also
work for the ARM backend.
+- When using ``-mbranch-protection=bti`` with AArch64, calls to setjmp will
+ now be followed by a BTI instruction. This is done to be compatible with
+ setjmp implementations that return with a br instead of a ret. You can
+ disable this behaviour using the ``-mno-bti-at-return-twice`` option.
+
SPIR-V Support in Clang
-----------------------
@@ -391,7 +396,6 @@ SPIR-V Support in Clang
- Added linking of separate object files in SPIR-V format using external
``spirv-link`` tool.
-
Floating Point Support in Clang
-------------------------------
- The default setting of FP contraction (FMA) is now -ffp-contract=on (for
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 602586f94603..e0d215840714 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3372,7 +3372,7 @@ def mmark_bti_property : Flag<["-"], "mmark-bti-property">,
def mno_bti_at_return_twice : Flag<["-"], "mno-bti-at-return-twice">,
Group<m_arm_Features_Group>,
HelpText<"Do not add a BTI instruction after a setjmp or other"
- " return-twice construct (Arm only)">;
+ " return-twice construct (Arm/AArch64 only)">;
foreach i = {1-31} in
def ffixed_x#i : Flag<["-"], "ffixed-x"#i>, Group<m_Group>,
diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
index ca0ca4bf4eea..53610f0909a2 100644
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -592,4 +592,7 @@ fp16_fml_fallthrough:
// Enabled A53 errata (835769) workaround by default on android
Features.push_back("+fix-cortex-a53-835769");
}
+
+ if (Args.getLastArg(options::OPT_mno_bti_at_return_twice))
+ Features.push_back("+no-bti-at-return-twice");
}
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 70a79864d2c5..70c7b7b3f5dc 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -464,6 +464,11 @@ def FeatureEL3 : SubtargetFeature<"el3", "HasEL3", "true",
def FeatureFixCortexA53_835769 : SubtargetFeature<"fix-cortex-a53-835769",
"FixCortexA53_835769", "true", "Mitigate Cortex-A53 Erratum 835769">;
+def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice",
+ "NoBTIAtReturnTwice", "true",
+ "Don't place a BTI instruction "
+ "after a return-twice">;
+
//===----------------------------------------------------------------------===//
// Architectures.
//
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index b0f739cc26e6..910f8cdede75 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -86,6 +86,7 @@ private:
unsigned N);
bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
+ bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
};
@@ -759,6 +760,37 @@ bool AArch64ExpandPseudo::expandCALL_RVMARKER(
return true;
}
+bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ // Expand CALL_BTI pseudo to:
+ // - a branch to the call target
+ // - a BTI instruction
+ // Mark the sequence as a bundle, to avoid passes moving other code in
+ // between.
+
+ MachineInstr &MI = *MBBI;
+ MachineOperand &CallTarget = MI.getOperand(0);
+ assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
+ "invalid operand for regular call");
+ unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
+ MachineInstr *Call =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
+ Call->addOperand(CallTarget);
+
+ MachineInstr *BTI =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
+ // BTI J so that setjmp can to BR to this.
+ .addImm(36)
+ .getInstr();
+
+ if (MI.shouldUpdateCallSiteInfo())
+ MBB.getParent()->moveCallSiteInfo(&MI, Call);
+
+ MI.eraseFromParent();
+ finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator()));
+ return true;
+}
+
bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
Register CtxReg = MBBI->getOperand(0).getReg();
@@ -1238,6 +1270,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
case AArch64::BLR_RVMARKER:
return expandCALL_RVMARKER(MBB, MBBI);
+ case AArch64::BLR_BTI:
+ return expandCALL_BTI(MBB, MBBI);
case AArch64::StoreSwiftAsyncContext:
return expandStoreSwiftAsyncContext(MBB, MBBI);
}
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index c67fa62c7a92..dc5e6807945d 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -14,6 +14,7 @@
#include "AArch64.h"
#include "AArch64CallingConvention.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
@@ -3127,6 +3128,13 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (!Callee && !Symbol)
return false;
+ // Allow SelectionDAG isel to handle calls to functions like setjmp that need
+ // a bti instruction following the call.
+ if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
+ !Subtarget->noBTIAtReturnTwice() &&
+ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
+ return false;
+
// Allow SelectionDAG isel to handle tail calls.
if (IsTailCall)
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 77260c89df11..4d1cb0720a5a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2278,6 +2278,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::MOPS_MEMSET_TAGGING)
MAKE_CASE(AArch64ISD::MOPS_MEMCOPY)
MAKE_CASE(AArch64ISD::MOPS_MEMMOVE)
+ MAKE_CASE(AArch64ISD::CALL_BTI)
}
#undef MAKE_CASE
return nullptr;
@@ -6106,6 +6107,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
bool IsSibCall = false;
+ bool GuardWithBTI = false;
+
+ if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
+ !Subtarget->noBTIAtReturnTwice()) {
+ GuardWithBTI = FuncInfo->branchTargetEnforcement();
+ }
// Check callee args/returns for SVE registers and set calling convention
// accordingly.
@@ -6540,7 +6547,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
auto GA = DAG.getTargetGlobalAddress(ARCFn, DL, PtrVT);
Ops.insert(Ops.begin() + 1, GA);
- }
+ } else if (GuardWithBTI)
+ CallOpc = AArch64ISD::CALL_BTI;
// Returns a chain and a flag for retval copy to use.
Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 0ffaf08b8bbe..80b7e84872cd 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -55,6 +55,8 @@ enum NodeType : unsigned {
// x29, x29` marker instruction.
CALL_RVMARKER,
+ CALL_BTI, // Function call followed by a BTI instruction.
+
// Produces the full sequence of instructions for getting the thread pointer
// offset of a variable into X0, using the TLSDesc model.
TLSDESC_CALLSEQ,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 1316161f05f1..2680b5ac094e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -473,6 +473,11 @@ def AArch64call : SDNode<"AArch64ISD::CALL",
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+def AArch64call_bti : SDNode<"AArch64ISD::CALL_BTI",
+ SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
@@ -2320,6 +2325,8 @@ let isCall = 1, Defs = [LR], Uses = [SP] in {
PseudoInstExpansion<(BLR GPR64:$Rn)>;
def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>,
Sched<[WriteBrReg]>;
+ def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>,
+ Sched<[WriteBrReg]>;
} // isCall
def : Pat<(AArch64call GPR64:$Rn),
@@ -2333,6 +2340,10 @@ def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn),
(BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>,
Requires<[NoSLSBLRMitigation]>;
+def : Pat<(AArch64call_bti GPR64:$Rn),
+ (BLR_BTI GPR64:$Rn)>,
+ Requires<[NoSLSBLRMitigation]>;
+
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
} // isBranch, isTerminator, isBarrier, isIndirectBranch
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 7b2bbad30f85..061db926ee2b 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -243,6 +243,10 @@ protected:
// Enable 64-bit vectorization in SLP.
unsigned MinVectorRegisterBitWidth = 64;
+ // Do not place a BTI instruction after a call to a return twice function like
+ // setjmp.
+ bool NoBTIAtReturnTwice = false;
+
bool OutlineAtomics = false;
bool PredictableSelectIsExpensive = false;
bool BalanceFPOps = false;
@@ -588,6 +592,8 @@ public:
bool fixCortexA53_835769() const { return FixCortexA53_835769; }
+ bool noBTIAtReturnTwice() const { return NoBTIAtReturnTwice; }
+
bool addrSinkUsingGEPs() const override {
// Keeping GEPs inbounds is important for exploiting AArch64
// addressing-modes in ILP32 mode.
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 097b93e4fcca..bb6c7938791e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -1127,14 +1127,22 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Create a temporarily-floating call instruction so we can add the implicit
// uses of arg registers.
- unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ unsigned Opc = 0;
+ // A call to a returns twice function like setjmp must be followed by a bti
+ // instruction.
+ if (Info.CB && Info.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
+ !Subtarget.noBTIAtReturnTwice() &&
+ MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
+ Opc = AArch64::BLR_BTI;
+ else
+ Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
MIB.add(Info.Callee);
// Tell the call which registers are clobbered.
const uint32_t *Mask;
- const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const auto *TRI = Subtarget.getRegisterInfo();
AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
diff --git a/llvm/test/CodeGen/AArch64/setjmp-bti-no-enforcement.ll b/llvm/test/CodeGen/AArch64/setjmp-bti-no-enforcement.ll
new file mode 100644
index 000000000000..cd48245e0a22
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/setjmp-bti-no-enforcement.ll
@@ -0,0 +1,51 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefix=NOBTI
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel < %s | FileCheck %s --check-prefix=NOBTI
+; RUN: llc -mtriple=aarch64-none-linux-gnu -fast-isel < %s | FileCheck %s --check-prefix=NOBTI
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+no-bti-at-return-twice < %s | \
+; RUN: FileCheck %s --check-prefix=NOBTI
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -mattr=+no-bti-at-return-twice < %s | \
+; RUN: FileCheck %s --check-prefix=NOBTI
+; RUN: llc -mtriple=aarch64-none-linux-gnu -fast-isel -mattr=+no-bti-at-return-twice < %s | \
+; RUN: FileCheck %s --check-prefix=NOBTI
+
+; Same as setjmp-bti.ll except that we do not enable branch target enforcement for this
+; module. There should be no combination of options that leads to a bti being emitted.
+
+; C source
+; --------
+; extern int setjmp(void*);
+; extern void notsetjmp(void);
+;
+; void bbb(void) {
+; setjmp(0);
+; int (*fnptr)(void*) = setjmp;
+; fnptr(0);
+; notsetjmp();
+; }
+
+define void @bbb() {
+; NOBTI-LABEL: bbb:
+; NOBTI: bl setjmp
+; NOBTI-NOT: hint #36
+; NOBTI: blr x{{[0-9]+}}
+; NOBTI-NOT: hint #36
+; NOBTI: bl notsetjmp
+; NOBTI-NOT: hint #36
+
+entry:
+ %fnptr = alloca i32 (i8*)*, align 8
+ %call = call i32 @setjmp(i8* noundef null) #0
+ store i32 (i8*)* @setjmp, i32 (i8*)** %fnptr, align 8
+ %0 = load i32 (i8*)*, i32 (i8*)** %fnptr, align 8
+ %call1 = call i32 %0(i8* noundef null) #0
+ call void @notsetjmp()
+ ret void
+}
+
+declare i32 @setjmp(i8* noundef) #0
+declare void @notsetjmp()
+
+attributes #0 = { returns_twice }
+
+; !llvm.module.flags = !{!0}
+; !0 = !{i32 1, !"branch-target-enforcement", i32 1}
diff --git a/llvm/test/CodeGen/AArch64/setjmp-bti-outliner.ll b/llvm/test/CodeGen/AArch64/setjmp-bti-outliner.ll
new file mode 100644
index 000000000000..f9c5416d6daa
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/setjmp-bti-outliner.ll
@@ -0,0 +1,83 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -enable-machine-outliner < %s | FileCheck %s --check-prefix=BTI
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -enable-machine-outliner < %s | \
+; RUN: FileCheck %s --check-prefix=BTI
+; RUN: llc -mtriple=aarch64-none-linux-gnu -fast-isel -enable-machine-outliner < %s | \
+; RUN: FileCheck %s --check-prefix=BTI
+; RUN: llc -mtriple=aarch64-none-linux-gnu -enable-machine-outliner -mattr=+no-bti-at-return-twice < %s | \
+; RUN: FileCheck %s --check-prefix=NOBTI
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -enable-machine-outliner -mattr=+no-bti-at-return-twice < %s | \
+; RUN: FileCheck %s --check-prefix=NOBTI
+; RUN: llc -mtriple=aarch64-none-linux-gnu -fast-isel -enable-machine-outliner -mattr=+no-bti-at-return-twice < %s | \
+; RUN: FileCheck %s --check-prefix=NOBTI
+
+; Check that the outliner does not split up the call to setjmp and the bti after it.
+; When we do not insert a bti, it is allowed to move the setjmp call into an outlined function.
+
+; C source
+; --------
+; extern int setjmp(void*);
+;
+; int f(int a, int b, int c, int d) {
+; setjmp(0);
+; return 1 + a * (a + b) / (c + d);
+; }
+;
+; int g(int a, int b, int c, int d) {
+; setjmp(0);
+; return 2 + a * (a + b) / (c + d);
+; }
+
+define i32 @f(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) {
+; BTI-LABEL: f:
+; BTI: bl OUTLINED_FUNCTION_1
+; BTI-NEXT: bl setjmp
+; BTI-NEXT: hint #36
+; BTI-NEXT: bl OUTLINED_FUNCTION_0
+
+; NOBTI: f:
+; NOBTI: bl OUTLINED_FUNCTION_0
+; NOBTI-NEXT: bl OUTLINED_FUNCTION_1
+
+entry:
+ %call = call i32 @setjmp(i8* noundef null) #0
+ %add = add nsw i32 %b, %a
+ %mul = mul nsw i32 %add, %a
+ %add1 = add nsw i32 %d, %c
+ %div = sdiv i32 %mul, %add1
+ %add2 = add nsw i32 %div, 1
+ ret i32 %add2
+}
+
+declare i32 @setjmp(i8* noundef) #0
+
+define i32 @g(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) {
+; BTI-LABEL: g:
+; BTI: bl OUTLINED_FUNCTION_1
+; BTI-NEXT: bl setjmp
+; BTI-NEXT: hint #36
+; BTI-NEXT: bl OUTLINED_FUNCTION_0
+
+; NOBTI: g:
+; NOBTI: bl OUTLINED_FUNCTION_0
+; NOBTI-NEXT: bl OUTLINED_FUNCTION_1
+
+entry:
+ %call = call i32 @setjmp(i8* noundef null) #0
+ %add = add nsw i32 %b, %a
+ %mul = mul nsw i32 %add, %a
+ %add1 = add nsw i32 %d, %c
+ %div = sdiv i32 %mul, %add1
+ %add2 = add nsw i32 %div, 2
+ ret i32 %add2
+}
+
+; NOBTI-LABEL: OUTLINED_FUNCTION_0:
+; NOBTI: b setjmp
+; NOBTI: OUTLINED_FUNCTION_1:
+; NOBTI-LABEL: ret
+
+attributes #0 = { returns_twice }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"branch-target-enforcement", i32 1}
diff --git a/llvm/test/CodeGen/AArch64/setjmp-bti.ll b/llvm/test/CodeGen/AArch64/setjmp-bti.ll
new file mode 100644
index 000000000000..ceea80c839a8
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/setjmp-bti.ll
@@ -0,0 +1,55 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefix=BTI
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel < %s | FileCheck %s --check-prefix=BTI
+; RUN: llc -mtriple=aarch64-none-linux-gnu -fast-isel < %s | FileCheck %s --check-prefix=BTI
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+no-bti-at-return-twice < %s | \
+; RUN: FileCheck %s --check-prefix=NOBTI
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -mattr=+no-bti-at-return-twice < %s | \
+; RUN: FileCheck %s --check-prefix=NOBTI
+; RUN: llc -mtriple=aarch64-none-linux-gnu -fast-isel -mattr=+no-bti-at-return-twice < %s | \
+; RUN: FileCheck %s --check-prefix=NOBTI
+
+; C source
+; --------
+; extern int setjmp(void*);
+; extern void notsetjmp(void);
+;
+; void bbb(void) {
+; setjmp(0);
+; int (*fnptr)(void*) = setjmp;
+; fnptr(0);
+; notsetjmp();
+; }
+
+define void @bbb() {
+; BTI-LABEL: bbb:
+; BTI: bl setjmp
+; BTI-NEXT: hint #36
+; BTI: blr x{{[0-9]+}}
+; BTI-NEXT: hint #36
+; BTI: bl notsetjmp
+; BTI-NOT: hint #36
+
+; NOBTI-LABEL: bbb:
+; NOBTI: bl setjmp
+; NOBTI-NOT: hint #36
+; NOBTI: blr x{{[0-9]+}}
+; NOBTI-NOT: hint #36
+; NOBTI: bl notsetjmp
+; NOBTI-NOT: hint #36
+entry:
+ %fnptr = alloca i32 (i8*)*, align 8
+ %call = call i32 @setjmp(i8* noundef null) #0
+ store i32 (i8*)* @setjmp, i32 (i8*)** %fnptr, align 8
+ %0 = load i32 (i8*)*, i32 (i8*)** %fnptr, align 8
+ %call1 = call i32 %0(i8* noundef null) #0
+ call void @notsetjmp()
+ ret void
+}
+
+declare i32 @setjmp(i8* noundef) #0
+declare void @notsetjmp()
+
+attributes #0 = { returns_twice }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"branch-target-enforcement", i32 1}