summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--clang/lib/AST/ASTContext.cpp4
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp10
-rw-r--r--clang/test/CodeGenCUDA/device-var-linkage.cu31
-rw-r--r--clang/test/CodeGenCUDA/kernel-in-anon-ns.cu46
-rw-r--r--clang/test/CodeGenCUDA/managed-var.cu12
-rw-r--r--clang/test/CodeGenCUDA/static-device-var-rdc.cu18
6 files changed, 91 insertions, 30 deletions
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index b554cf833b44..e4b3827b8714 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -12267,7 +12267,9 @@ bool ASTContext::mayExternalizeStaticVar(const Decl *D) const {
// anonymous name space needs to be externalized to avoid duplicate symbols.
return (IsStaticVar &&
(D->hasAttr<HIPManagedAttr>() || IsExplicitDeviceVar)) ||
- (D->hasAttr<CUDAGlobalAttr>() && D->isInAnonymousNamespace());
+ (D->hasAttr<CUDAGlobalAttr>() &&
+ basicGVALinkageForFunction(*this, cast<FunctionDecl>(D)) ==
+ GVA_Internal);
}
bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const {
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 65b9f4e40dc1..2777fc22600d 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -6647,6 +6647,12 @@ bool CodeGenModule::stopAutoInit() {
void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS,
const Decl *D) const {
- OS << (isa<VarDecl>(D) ? "__static__" : ".anon.")
- << getContext().getCUIDHash();
+ StringRef Tag;
+ // ptxas does not allow '.' in symbol names. On the other hand, HIP prefers
+ // postfix beginning with '.' since the symbol name can be demangled.
+ if (LangOpts.HIP)
+ Tag = (isa<VarDecl>(D) ? ".static." : ".intern.");
+ else
+ Tag = (isa<VarDecl>(D) ? "__static__" : "__intern__");
+ OS << Tag << getContext().getCUIDHash();
}
diff --git a/clang/test/CodeGenCUDA/device-var-linkage.cu b/clang/test/CodeGenCUDA/device-var-linkage.cu
index d830802c8206..2c3f6023acae 100644
--- a/clang/test/CodeGenCUDA/device-var-linkage.cu
+++ b/clang/test/CodeGenCUDA/device-var-linkage.cu
@@ -1,15 +1,18 @@
-// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device \
// RUN: -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefixes=DEV,NORDC %s
-// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device \
// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefixes=DEV,RDC %s
-// RUN: %clang_cc1 -triple nvptx \
+// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux \
// RUN: -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefixes=HOST,NORDC-H %s
-// RUN: %clang_cc1 -triple nvptx \
+// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux \
// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefixes=HOST,RDC-H %s
+// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
+// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - %s \
+// RUN: | FileCheck -check-prefixes=CUDA %s
#include "Inputs/cuda.h"
@@ -24,7 +27,9 @@ __constant__ int v2;
// DEV-DAG: @v3 = addrspace(1) externally_initialized global i32 addrspace(1)* null
// NORDC-H-DAG: @v3 = internal externally_initialized global i32* null
// RDC-H-DAG: @v3 = externally_initialized global i32* null
+#if __HIP__
__managed__ int v3;
+#endif
// DEV-DAG: @ev1 = external addrspace(1) global i32
// HOST-DAG: @ev1 = external global i32
@@ -34,25 +39,35 @@ extern __device__ int ev1;
extern __constant__ int ev2;
// DEV-DAG: @ev3 = external addrspace(1) externally_initialized global i32 addrspace(1)*
// HOST-DAG: @ev3 = external externally_initialized global i32*
+#if __HIP__
extern __managed__ int ev3;
+#endif
// NORDC-DAG: @_ZL3sv1 = addrspace(1) externally_initialized global i32 0
-// RDC-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
+// RDC-DAG: @_ZL3sv1.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
// HOST-DAG: @_ZL3sv1 = internal global i32 undef
+// CUDA-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
static __device__ int sv1;
// NORDC-DAG: @_ZL3sv2 = addrspace(4) externally_initialized global i32 0
-// RDC-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0
+// RDC-DAG: @_ZL3sv2.static.[[HASH]] = addrspace(4) externally_initialized global i32 0
// HOST-DAG: @_ZL3sv2 = internal global i32 undef
+// CUDA-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0
static __constant__ int sv2;
// NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global i32 addrspace(1)* null
-// RDC-DAG: @_ZL3sv3__static__[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
+// RDC-DAG: @_ZL3sv3.static.[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
// HOST-DAG: @_ZL3sv3 = internal externally_initialized global i32* null
+#if __HIP__
static __managed__ int sv3;
+#endif
__device__ __host__ int work(int *x);
__device__ __host__ int fun1() {
- return work(&ev1) + work(&ev2) + work(&ev3) + work(&sv1) + work(&sv2) + work(&sv3);
+ return work(&ev1) + work(&ev2) + work(&sv1) + work(&sv2)
+#if __HIP__
+ + work(&ev3) + work(&sv3)
+#endif
+ ;
}
// HOST: hipRegisterVar({{.*}}@v1
diff --git a/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu b/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu
index 4243cec796a8..bc753d76e5c1 100644
--- a/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu
+++ b/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu
@@ -6,19 +6,53 @@
// RUN: -aux-triple amdgcn-amd-amdhsa -std=c++11 -fgpu-rdc \
// RUN: -emit-llvm -o - -x hip %s > %t.host
-// RUN: cat %t.dev %t.host | FileCheck %s
+// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=HIP,COMMON %s
+
+// RUN: echo "GPU binary" > %t.fatbin
+
+// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \
+// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \
+// RUN: -emit-llvm -o - %s > %t.dev
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
+// RUN: -aux-triple nvptx -std=c++11 -fgpu-rdc -fcuda-include-gpubinary %t.fatbin \
+// RUN: -emit-llvm -o - %s > %t.host
+
+// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=CUDA,COMMON %s
#include "Inputs/cuda.h"
-// CHECK: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv\.anon\.b04fd23c98500190]](
-// CHECK: @[[STR:.*]] = {{.*}} c"[[KERN]]\00"
-// CHECK: call i32 @__hipRegisterFunction({{.*}}@[[STR]]
+// HIP-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv\.intern\.b04fd23c98500190]](
+// HIP-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]](
+// HIP-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]](
+
+// CUDA-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]](
+// CUDA-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]](
+// CUDA-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]](
+
+// COMMON-DAG: @[[STR1:.*]] = {{.*}} c"[[KERN1]]\00"
+// COMMON-DAG: @[[STR2:.*]] = {{.*}} c"[[KERN2]]\00"
+// COMMON-DAG: @[[STR3:.*]] = {{.*}} c"[[KERN3]]\00"
+
+// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR1]]
+// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR2]]
+// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR3]]
+
+
+template <typename T>
+__global__ void tempKern(T x) {}
namespace {
-__global__ void kernel() {
-}
+ __global__ void kernel() {}
+ struct X {};
+ X x;
+ auto lambda = [](){};
}
void test() {
kernel<<<1, 1>>>();
+
+ tempKern<<<1, 1>>>(x);
+
+ tempKern<<<1, 1>>>(lambda);
}
diff --git a/clang/test/CodeGenCUDA/managed-var.cu b/clang/test/CodeGenCUDA/managed-var.cu
index 96657f0f7a13..3b8540377ab2 100644
--- a/clang/test/CodeGenCUDA/managed-var.cu
+++ b/clang/test/CodeGenCUDA/managed-var.cu
@@ -1,5 +1,3 @@
-// REQUIRES: x86-registered-target, amdgpu-registered-target
-
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
// RUN: -emit-llvm -o - -x hip %s | FileCheck \
// RUN: -check-prefixes=COMMON,DEV,NORDC-D %s
@@ -52,15 +50,15 @@ extern __managed__ int ex;
// NORDC-D-DAG: @_ZL2sx.managed = addrspace(1) externally_initialized global i32 1, align 4
// NORDC-D-DAG: @_ZL2sx = addrspace(1) externally_initialized global i32 addrspace(1)* null
-// RDC-D-DAG: @_ZL2sx__static__[[HASH:.*]].managed = addrspace(1) externally_initialized global i32 1, align 4
-// RDC-D-DAG: @_ZL2sx__static__[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
+// RDC-D-DAG: @_ZL2sx.static.[[HASH:.*]].managed = addrspace(1) externally_initialized global i32 1, align 4
+// RDC-D-DAG: @_ZL2sx.static.[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
// HOST-DAG: @_ZL2sx.managed = internal global i32 1
// HOST-DAG: @_ZL2sx = internal externally_initialized global i32* null
// NORDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx\00"
-// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx__static__[[HASH:.*]]\00"
+// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH:.*]]\00"
-// POSTFIX: @_ZL2sx__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
-// POSTFIX: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx__static__[[HASH]]\00"
+// POSTFIX: @_ZL2sx.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
+// POSTFIX: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH]]\00"
static __managed__ int sx = 1;
// DEV-DAG: @llvm.compiler.used
diff --git a/clang/test/CodeGenCUDA/static-device-var-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-rdc.cu
index bb750bd91a92..56ec2faf1e1a 100644
--- a/clang/test/CodeGenCUDA/static-device-var-rdc.cu
+++ b/clang/test/CodeGenCUDA/static-device-var-rdc.cu
@@ -40,6 +40,11 @@
// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host
// RUN: cat %t.host | FileCheck -check-prefix=HOST-NEG %s
+// Check postfix for CUDA.
+
+// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \
+// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - %s | FileCheck \
+// RUN: -check-prefixes=CUDA %s
#include "Inputs/cuda.h"
@@ -55,11 +60,12 @@
// INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00"
// Test externalized static device variables
-// EXT-DEV-DAG: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
-// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x__static__[[HASH:.*]]\00"
+// EXT-DEV-DAG: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
+// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH:.*]]\00"
+// CUDA-DAG: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
-// POSTFIX: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
-// POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x__static__[[HASH]]\00"
+// POSTFIX: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
+// POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH]]\00"
static __device__ int x;
@@ -73,8 +79,8 @@ static __device__ int x2;
// INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00"
// Test externalized static device variables
-// EXT-DEV-DAG: @_ZL1y__static__[[HASH]] = addrspace(4) externally_initialized global i32 0
-// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y__static__[[HASH]]\00"
+// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = addrspace(4) externally_initialized global i32 0
+// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.[[HASH]]\00"
static __constant__ int y;