5 files changed, 85 insertions, 52 deletions
diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h
index 0305094d9f..8a9e4a86e4 100644
--- a/include/clang/Sema/Sema.h
+++ b/include/clang/Sema/Sema.h
@@ -10150,6 +10150,16 @@ public:
   bool isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD);
   bool isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *CD);
 
+  // \brief Checks that initializers of \p Var satisfy CUDA restrictions. In
+  // case of error emits appropriate diagnostic and invalidates \p Var.
+  //
+  // \details CUDA allows only empty constructors as initializers for global
+  // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all
+  // __shared__ variables whether they are local or not (they all are implicitly
+  // static in CUDA). One exception is that CUDA allows constant initializers
+  // for __constant__ and __device__ variables.
+  void checkAllowedCUDAInitializer(VarDecl *Var);
+
   /// Check whether NewFD is a valid overload for CUDA. Emits
   /// diagnostics and invalidates NewFD if not.
   void checkCUDATargetOverload(FunctionDecl *NewFD,
diff --git a/lib/Sema/SemaCUDA.cpp b/lib/Sema/SemaCUDA.cpp
index 8224bd85f1..df828c0575 100644
--- a/lib/Sema/SemaCUDA.cpp
+++ b/lib/Sema/SemaCUDA.cpp
@@ -471,6 +471,59 @@ bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
   return true;
 }
 
+void Sema::checkAllowedCUDAInitializer(VarDecl *VD) {
+  if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage())
+    return;
+  const Expr *Init = VD->getInit();
+  if (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() ||
+      VD->hasAttr<CUDASharedAttr>()) {
+    assert(!VD->isStaticLocal() || VD->hasAttr<CUDASharedAttr>());
+    bool AllowedInit = false;
+    if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init))
+      AllowedInit =
+          isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor());
+    // We'll allow constant initializers even if it's a non-empty
+    // constructor according to CUDA rules. This deviates from NVCC,
+    // but allows us to handle things like constexpr constructors.
+    if (!AllowedInit &&
+        (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>()))
+      AllowedInit = VD->getInit()->isConstantInitializer(
+          Context, VD->getType()->isReferenceType());
+
+    // Also make sure that destructor, if there is one, is empty.
+    if (AllowedInit)
+      if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl())
+        AllowedInit =
+            isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor());
+
+    if (!AllowedInit) {
+      Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>()
+                                  ? diag::err_shared_var_init
+                                  : diag::err_dynamic_var_init)
+          << Init->getSourceRange();
+      VD->setInvalidDecl();
+    }
+  } else {
+    // This is a host-side global variable.  Check that the initializer is
+    // callable from the host side.
+    const FunctionDecl *InitFn = nullptr;
+    if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) {
+      InitFn = CE->getConstructor();
+    } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) {
+      InitFn = CE->getDirectCallee();
+    }
+    if (InitFn) {
+      CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn);
+      if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) {
+        Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer)
+            << InitFnTarget << InitFn;
+        Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn;
+        VD->setInvalidDecl();
+      }
+    }
+  }
+}
+
 // With -fcuda-host-device-constexpr, an unattributed constexpr function is
 // treated as implicitly __host__ __device__, unless:
 //  * it is a variadic function (device-side variadic functions are not
diff --git a/lib/Sema/SemaDecl.cpp b/lib/Sema/SemaDecl.cpp
index 295d89a40d..0502e75c69 100644
--- a/lib/Sema/SemaDecl.cpp
+++ b/lib/Sema/SemaDecl.cpp
@@ -11629,58 +11629,8 @@ void Sema::FinalizeDeclaration(Decl *ThisDecl) {
   // 7.5). We must also apply the same checks to all __shared__
   // variables whether they are local or not. CUDA also allows
   // constant initializers for __constant__ and __device__ variables.
-  if (getLangOpts().CUDA) {
-    const Expr *Init = VD->getInit();
-    if (Init && VD->hasGlobalStorage()) {
-      if (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() ||
-          VD->hasAttr<CUDASharedAttr>()) {
-        assert(!VD->isStaticLocal() || VD->hasAttr<CUDASharedAttr>());
-        bool AllowedInit = false;
-        if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init))
-          AllowedInit =
-              isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor());
-        // We'll allow constant initializers even if it's a non-empty
-        // constructor according to CUDA rules. This deviates from NVCC,
-        // but allows us to handle things like constexpr constructors.
-        if (!AllowedInit &&
-            (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>()))
-          AllowedInit = VD->getInit()->isConstantInitializer(
-              Context, VD->getType()->isReferenceType());
-
-        // Also make sure that destructor, if there is one, is empty.
-        if (AllowedInit)
-          if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl())
-            AllowedInit =
-                isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor());
-
-        if (!AllowedInit) {
-          Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>()
-                                      ? diag::err_shared_var_init
-                                      : diag::err_dynamic_var_init)
-              << Init->getSourceRange();
-          VD->setInvalidDecl();
-        }
-      } else {
-        // This is a host-side global variable.  Check that the initializer is
-        // callable from the host side.
-        const FunctionDecl *InitFn = nullptr;
-        if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) {
-          InitFn = CE->getConstructor();
-        } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) {
-          InitFn = CE->getDirectCallee();
-        }
-        if (InitFn) {
-          CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn);
-          if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) {
-            Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer)
-                << InitFnTarget << InitFn;
-            Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn;
-            VD->setInvalidDecl();
-          }
-        }
-      }
-    }
-  }
+  if (getLangOpts().CUDA)
+    checkAllowedCUDAInitializer(VD);
 
   // Grab the dllimport or dllexport attribute off of the VarDecl.
   const InheritableAttr *DLLAttr = getDLLAttr(VD);
diff --git a/lib/Sema/SemaTemplateInstantiateDecl.cpp b/lib/Sema/SemaTemplateInstantiateDecl.cpp
index a7883c67b8..709b4a18cb 100644
--- a/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -4221,6 +4221,9 @@ void Sema::InstantiateVariableInitializer(
 
     ActOnUninitializedDecl(Var);
   }
+
+  if (getLangOpts().CUDA)
+    checkAllowedCUDAInitializer(Var);
 }
 
 /// \brief Instantiate the definition of the given variable from its
diff --git a/test/SemaCUDA/device-var-init.cu b/test/SemaCUDA/device-var-init.cu
index 71f2352843..46cb90da2e 100644
--- a/test/SemaCUDA/device-var-init.cu
+++ b/test/SemaCUDA/device-var-init.cu
@@ -225,3 +225,20 @@ inline __host__ __device__ void hd_emitted_host_only() {
   static int x = 42; // no error on device because this is never codegen'ed there.
 }
 void call_hd_emitted_host_only() { hd_emitted_host_only(); }
+
+// Verify that we also check field initializers in instantiated structs.
+struct NontrivialInitializer {
+  __host__ __device__ NontrivialInitializer() : x(43) {}
+  int x;
+};
+
+template <typename T>
+__global__ void bar() {
+  __shared__ T bad;
+// expected-error@-1 {{initialization is not supported for __shared__ variables.}}
+}
+
+void instantiate() {
+  bar<NontrivialInitializer><<<1, 1>>>();
+// expected-note@-1 {{in instantiation of function template specialization 'bar<NontrivialInitializer>' requested here}}
+}