diff options
-rw-r--r-- | Source/cmMakefileTargetGenerator.cxx | 20 | ||||
-rw-r--r-- | Source/cmNinjaNormalTargetGenerator.cxx | 10 |
2 files changed, 15 insertions, 15 deletions
diff --git a/Source/cmMakefileTargetGenerator.cxx b/Source/cmMakefileTargetGenerator.cxx index 6324b2ef83..98c61fe8be 100644 --- a/Source/cmMakefileTargetGenerator.cxx +++ b/Source/cmMakefileTargetGenerator.cxx @@ -1519,22 +1519,13 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule( // Link device code for each architecture. for (const std::string& architectureKind : architectures) { - // Clang always generates real code, so strip the specifier. - const std::string architecture = - architectureKind.substr(0, architectureKind.find('-')); - const std::string cubin = - cmStrCat(objectDir, "sm_", architecture, ".cubin"); - - profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); - fatbinaryDepends.emplace_back(cubin); - std::string registerFileCmd; // The generated register file contains macros that when expanded // register the device routines. Because the routines are the same for // all architectures the register file will be the same too. Thus // generate it only on the first invocation to reduce overhead. - if (fatbinaryDepends.size() == 1) { + if (fatbinaryDepends.empty()) { std::string const registerFileRel = cmStrCat(relPath, relObjectDir, "cmake_cuda_register.h"); registerFileCmd = @@ -1542,6 +1533,15 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule( cleanFiles.push_back(registerFileRel); } + // Clang always generates real code, so strip the specifier. + const std::string architecture = + architectureKind.substr(0, architectureKind.find('-')); + const std::string cubin = + cmStrCat(objectDir, "sm_", architecture, ".cubin"); + + profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); + fatbinaryDepends.emplace_back(cubin); + std::string command = cmStrCat( this->Makefile->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"), " -arch=sm_", architecture, registerFileCmd, " -o=$@ ", diff --git a/Source/cmNinjaNormalTargetGenerator.cxx b/Source/cmNinjaNormalTargetGenerator.cxx index 5a4c6521d8..493bd4ac85 100644 --- a/Source/cmNinjaNormalTargetGenerator.cxx +++ b/Source/cmNinjaNormalTargetGenerator.cxx @@ -753,10 +753,6 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements( const std::string cubin = cmStrCat(ninjaOutputDir, "/sm_", architecture, ".cubin"); - fatbinary.Variables["PROFILES"] += - cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); - fatbinary.ExplicitDeps.emplace_back(cubin); - cmNinjaBuild dlink(this->LanguageLinkerCudaDeviceRule(config)); dlink.ExplicitDeps = explicitDeps; dlink.Outputs = { cubin }; @@ -766,11 +762,15 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements( // the device routines. Because the routines are the same for all // architectures the register file will be the same too. Thus generate it // only on the first invocation to reduce overhead. - if (fatbinary.ExplicitDeps.size() == 1) { + if (fatbinary.ExplicitDeps.empty()) { dlink.Variables["REGISTER"] = cmStrCat( "--register-link-binaries=", ninjaOutputDir, "/cmake_cuda_register.h"); } + fatbinary.Variables["PROFILES"] += + cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); + fatbinary.ExplicitDeps.emplace_back(cubin); + this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(), dlink); } |