summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrad King <brad.king@kitware.com>2023-03-14 13:34:42 +0000
committerKitware Robot <kwrobot@kitware.com>2023-03-14 09:35:00 -0400
commitdb4f4ad24e9a0ec8e0cb22b6b0204173d06e1cf8 (patch)
tree371c95db87ee1b9956ad84fd9f889196d027dc60
parent9b68949a27e6962423fb75296b06f4d6d1e6cf73 (diff)
parent2def6a874b52ef70157f101cbca9ee9b92a5a7f5 (diff)
downloadcmake-db4f4ad24e9a0ec8e0cb22b6b0204173d06e1cf8.tar.gz
Merge topic 'support_cubin_fatbin_optix_cuda_output'
2def6a874b CUDA: Add support for CUBIN, FATBIN, and OPTIXIR compilation Acked-by: Kitware Robot <kwrobot@kitware.com> Acked-by: buildbot <buildbot@kitware.com> Merge-request: !8259
-rw-r--r--Auxiliary/vim/syntax/cmake.vim3
-rw-r--r--Help/manual/cmake-properties.7.rst3
-rw-r--r--Help/prop_tgt/CUDA_CUBIN_COMPILATION.rst14
-rw-r--r--Help/prop_tgt/CUDA_FATBIN_COMPILATION.rst14
-rw-r--r--Help/prop_tgt/CUDA_OPTIX_COMPILATION.rst14
-rw-r--r--Help/release/dev/cuda-support-new-compile-modes.rst14
-rw-r--r--Modules/CMakeCUDAInformation.cmake1
-rw-r--r--Modules/Compiler/NVIDIA-CUDA.cmake5
-rw-r--r--Source/cmGeneratorTarget.cxx28
-rw-r--r--Source/cmMakefileTargetGenerator.cxx23
-rw-r--r--Source/cmNinjaTargetGenerator.cxx43
-rw-r--r--Source/cmTarget.cxx43
-rw-r--r--Source/cmVisualStudio10TargetGenerator.cxx25
-rw-r--r--Tests/CudaOnly/CMakeLists.txt3
-rw-r--r--Tests/CudaOnly/CUBIN/CMakeLists.txt21
-rw-r--r--Tests/CudaOnly/CUBIN/kernelA.cu7
-rw-r--r--Tests/CudaOnly/CUBIN/kernelB.cu7
-rw-r--r--Tests/CudaOnly/CUBIN/kernelC.cu7
-rw-r--r--Tests/CudaOnly/CUBIN/main.cu56
-rw-r--r--Tests/CudaOnly/Fatbin/CMakeLists.txt25
-rw-r--r--Tests/CudaOnly/Fatbin/main.cu56
-rw-r--r--Tests/CudaOnly/OptixIR/CMakeLists.txt33
-rw-r--r--Tests/CudaOnly/OptixIR/main.cu53
23 files changed, 463 insertions, 35 deletions
diff --git a/Auxiliary/vim/syntax/cmake.vim b/Auxiliary/vim/syntax/cmake.vim
index f303bd421e..1a47f678ef 100644
--- a/Auxiliary/vim/syntax/cmake.vim
+++ b/Auxiliary/vim/syntax/cmake.vim
@@ -128,7 +128,10 @@ syn keyword cmakeProperty contained
\ CPACK_WIX_ACL
\ CROSSCOMPILING_EMULATOR
\ CUDA_ARCHITECTURES
+ \ CUDA_CUBIN_COMPILATION
\ CUDA_EXTENSIONS
+ \ CUDA_FATBIN_COMPILATION
+ \ CUDA_OPTIX_COMPILATION
\ CUDA_PTX_COMPILATION
\ CUDA_RESOLVE_DEVICE_SYMBOLS
\ CUDA_RUNTIME_LIBRARY
diff --git a/Help/manual/cmake-properties.7.rst b/Help/manual/cmake-properties.7.rst
index 01c9ce8346..8559b0b2a1 100644
--- a/Help/manual/cmake-properties.7.rst
+++ b/Help/manual/cmake-properties.7.rst
@@ -175,7 +175,10 @@ Properties on Targets
/prop_tgt/CONFIG_POSTFIX
/prop_tgt/CROSSCOMPILING_EMULATOR
/prop_tgt/CUDA_ARCHITECTURES
+ /prop_tgt/CUDA_CUBIN_COMPILATION
/prop_tgt/CUDA_EXTENSIONS
+ /prop_tgt/CUDA_FATBIN_COMPILATION
+ /prop_tgt/CUDA_OPTIX_COMPILATION
/prop_tgt/CUDA_PTX_COMPILATION
/prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS
/prop_tgt/CUDA_RUNTIME_LIBRARY
diff --git a/Help/prop_tgt/CUDA_CUBIN_COMPILATION.rst b/Help/prop_tgt/CUDA_CUBIN_COMPILATION.rst
new file mode 100644
index 0000000000..f8860aeda3
--- /dev/null
+++ b/Help/prop_tgt/CUDA_CUBIN_COMPILATION.rst
@@ -0,0 +1,14 @@
+CUDA_CUBIN_COMPILATION
+----------------------
+
+.. versionadded:: 3.27
+
+Compile CUDA sources to ``.cubin`` files instead of ``.obj`` files
+within :ref:`Object Libraries`.
+
+For example:
+
+.. code-block:: cmake
+
+ add_library(mycubin OBJECT a.cu b.cu)
+ set_property(TARGET mycubin PROPERTY CUDA_CUBIN_COMPILATION ON)
diff --git a/Help/prop_tgt/CUDA_FATBIN_COMPILATION.rst b/Help/prop_tgt/CUDA_FATBIN_COMPILATION.rst
new file mode 100644
index 0000000000..3d3c715339
--- /dev/null
+++ b/Help/prop_tgt/CUDA_FATBIN_COMPILATION.rst
@@ -0,0 +1,14 @@
+CUDA_FATBIN_COMPILATION
+-----------------------
+
+.. versionadded:: 3.27
+
+Compile CUDA sources to ``.fatbin`` files instead of ``.obj`` files
+within :ref:`Object Libraries`.
+
+For example:
+
+.. code-block:: cmake
+
+ add_library(myfbins OBJECT a.cu b.cu)
+ set_property(TARGET myfbins PROPERTY CUDA_FATBIN_COMPILATION ON)
diff --git a/Help/prop_tgt/CUDA_OPTIX_COMPILATION.rst b/Help/prop_tgt/CUDA_OPTIX_COMPILATION.rst
new file mode 100644
index 0000000000..c2a06a88b7
--- /dev/null
+++ b/Help/prop_tgt/CUDA_OPTIX_COMPILATION.rst
@@ -0,0 +1,14 @@
+CUDA_OPTIX_COMPILATION
+----------------------
+
+.. versionadded:: 3.27
+
+Compile CUDA sources to ``.optixir`` files instead of ``.obj`` files
+within :ref:`Object Libraries`.
+
+For example:
+
+.. code-block:: cmake
+
+ add_library(myoptix OBJECT a.cu b.cu)
+ set_property(TARGET myoptix PROPERTY CUDA_OPTIX_COMPILATION ON)
diff --git a/Help/release/dev/cuda-support-new-compile-modes.rst b/Help/release/dev/cuda-support-new-compile-modes.rst
new file mode 100644
index 0000000000..2d24c167a4
--- /dev/null
+++ b/Help/release/dev/cuda-support-new-compile-modes.rst
@@ -0,0 +1,14 @@
+cuda-support-new-compile-modes
+------------------------------
+
+* A :prop_tgt:`CUDA_CUBIN_COMPILATION` target property was added to
+ :ref:`Object Libraries` to support compiling to ``.cubin`` files
+ instead of host object files. Currently only supported with NVIDIA.
+
+* A :prop_tgt:`CUDA_FATBIN_COMPILATION` target property was added to
+ :ref:`Object Libraries` to support compiling to ``.fatbin`` files
+ instead of host object files. Currently only supported with NVIDIA.
+
+* A :prop_tgt:`CUDA_OPTIX_COMPILATION` target property was added to
+ :ref:`Object Libraries` to support compiling to ``.optixir`` files
+ instead of host object files. Currently only supported with NVIDIA.
diff --git a/Modules/CMakeCUDAInformation.cmake b/Modules/CMakeCUDAInformation.cmake
index dea721ebb9..e774088811 100644
--- a/Modules/CMakeCUDAInformation.cmake
+++ b/Modules/CMakeCUDAInformation.cmake
@@ -134,7 +134,6 @@ include(CMakeCommonLanguageInclude)
# CMAKE_CUDA_CREATE_SHARED_LIBRARY
# CMAKE_CUDA_CREATE_SHARED_MODULE
# CMAKE_CUDA_COMPILE_WHOLE_COMPILATION
-# CMAKE_CUDA_COMPILE_PTX_COMPILATION
# CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION
# CMAKE_CUDA_LINK_EXECUTABLE
diff --git a/Modules/Compiler/NVIDIA-CUDA.cmake b/Modules/Compiler/NVIDIA-CUDA.cmake
index 0823954302..c839d1cbbb 100644
--- a/Modules/Compiler/NVIDIA-CUDA.cmake
+++ b/Modules/Compiler/NVIDIA-CUDA.cmake
@@ -8,6 +8,11 @@ set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cu")
set(_CMAKE_CUDA_WHOLE_FLAG "-c")
set(_CMAKE_CUDA_RDC_FLAG "-rdc=true")
set(_CMAKE_CUDA_PTX_FLAG "-ptx")
+set(_CMAKE_CUDA_CUBIN_FLAG "-cubin")
+set(_CMAKE_CUDA_FATBIN_FLAG "-fatbin")
+if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.7.0")
+ set(_CMAKE_CUDA_OPTIX_FLAG "-optix-ir")
+endif()
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 10.2.89)
# The -forward-unknown-to-host-compiler flag was only
diff --git a/Source/cmGeneratorTarget.cxx b/Source/cmGeneratorTarget.cxx
index 54cd8ba907..4cfa1d76e7 100644
--- a/Source/cmGeneratorTarget.cxx
+++ b/Source/cmGeneratorTarget.cxx
@@ -3,6 +3,7 @@
#include "cmGeneratorTarget.h"
#include <algorithm>
+#include <array>
#include <cassert>
#include <cerrno>
#include <cstddef>
@@ -1011,12 +1012,27 @@ const std::string& cmGeneratorTarget::GetObjectName(cmSourceFile const* file)
const char* cmGeneratorTarget::GetCustomObjectExtension() const
{
- static std::string extension;
- const bool has_ptx_extension =
- this->GetPropertyAsBool("CUDA_PTX_COMPILATION");
- if (has_ptx_extension) {
- extension = ".ptx";
- return extension.c_str();
+ struct compiler_mode
+ {
+ std::string variable;
+ std::string extension;
+ };
+ static std::array<compiler_mode, 4> const modes{
+ { { "CUDA_PTX_COMPILATION", ".ptx" },
+ { "CUDA_CUBIN_COMPILATION", ".cubin" },
+ { "CUDA_FATBIN_COMPILATION", ".fatbin" },
+ { "CUDA_OPTIX_COMPILATION", ".optixir" } }
+ };
+
+ std::string const& compiler =
+ this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID");
+ if (!compiler.empty()) {
+ for (const auto& m : modes) {
+ const bool has_extension = this->GetPropertyAsBool(m.variable);
+ if (has_extension) {
+ return m.extension.c_str();
+ }
+ }
}
return nullptr;
}
diff --git a/Source/cmMakefileTargetGenerator.cxx b/Source/cmMakefileTargetGenerator.cxx
index e217dd92fe..2ead7399a0 100644
--- a/Source/cmMakefileTargetGenerator.cxx
+++ b/Source/cmMakefileTargetGenerator.cxx
@@ -3,6 +3,7 @@
#include "cmMakefileTargetGenerator.h"
#include <algorithm>
+#include <array>
#include <cassert>
#include <cstdio>
#include <iterator>
@@ -977,11 +978,23 @@ void cmMakefileTargetGenerator::WriteObjectRuleFiles(
this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_RDC_FLAG");
cudaCompileMode = cmStrCat(cudaCompileMode, rdcFlag, " ");
}
- if (this->GeneratorTarget->GetPropertyAsBool("CUDA_PTX_COMPILATION")) {
- const std::string& ptxFlag =
- this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_PTX_FLAG");
- cudaCompileMode = cmStrCat(cudaCompileMode, ptxFlag);
- } else {
+
+ static std::array<cm::string_view, 4> const compileModes{
+ { "PTX"_s, "CUBIN"_s, "FATBIN"_s, "OPTIX"_s }
+ };
+ bool useNormalCompileMode = true;
+ for (cm::string_view mode : compileModes) {
+ auto propName = cmStrCat("CUDA_", mode, "_COMPILATION");
+ auto defName = cmStrCat("_CMAKE_CUDA_", mode, "_FLAG");
+ if (this->GeneratorTarget->GetPropertyAsBool(propName)) {
+ const std::string& flag =
+ this->Makefile->GetRequiredDefinition(defName);
+ cudaCompileMode = cmStrCat(cudaCompileMode, flag);
+ useNormalCompileMode = false;
+ break;
+ }
+ }
+ if (useNormalCompileMode) {
const std::string& wholeFlag =
this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_WHOLE_FLAG");
cudaCompileMode = cmStrCat(cudaCompileMode, wholeFlag);
diff --git a/Source/cmNinjaTargetGenerator.cxx b/Source/cmNinjaTargetGenerator.cxx
index 8663f46b5d..dc56142dc7 100644
--- a/Source/cmNinjaTargetGenerator.cxx
+++ b/Source/cmNinjaTargetGenerator.cxx
@@ -3,6 +3,7 @@
#include "cmNinjaTargetGenerator.h"
#include <algorithm>
+#include <array>
#include <cassert>
#include <functional>
#include <iterator>
@@ -859,11 +860,22 @@ void cmNinjaTargetGenerator::WriteCompileRule(const std::string& lang,
this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_RDC_FLAG");
cudaCompileMode = cmStrCat(cudaCompileMode, rdcFlag, " ");
}
- if (this->GeneratorTarget->GetPropertyAsBool("CUDA_PTX_COMPILATION")) {
- const std::string& ptxFlag =
- this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_PTX_FLAG");
- cudaCompileMode = cmStrCat(cudaCompileMode, ptxFlag);
- } else {
+ static std::array<cm::string_view, 4> const compileModes{
+ { "PTX"_s, "CUBIN"_s, "FATBIN"_s, "OPTIX"_s }
+ };
+ bool useNormalCompileMode = true;
+ for (cm::string_view mode : compileModes) {
+ auto propName = cmStrCat("CUDA_", mode, "_COMPILATION");
+ auto defName = cmStrCat("_CMAKE_CUDA_", mode, "_FLAG");
+ if (this->GeneratorTarget->GetPropertyAsBool(propName)) {
+ const std::string& flag =
+ this->Makefile->GetRequiredDefinition(defName);
+ cudaCompileMode = cmStrCat(cudaCompileMode, flag);
+ useNormalCompileMode = false;
+ break;
+ }
+ }
+ if (useNormalCompileMode) {
const std::string& wholeFlag =
this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_WHOLE_FLAG");
cudaCompileMode = cmStrCat(cudaCompileMode, wholeFlag);
@@ -1789,11 +1801,22 @@ void cmNinjaTargetGenerator::ExportObjectCompileCommand(
this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_RDC_FLAG");
cudaCompileMode = cmStrCat(cudaCompileMode, rdcFlag, " ");
}
- if (this->GeneratorTarget->GetPropertyAsBool("CUDA_PTX_COMPILATION")) {
- const std::string& ptxFlag =
- this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_PTX_FLAG");
- cudaCompileMode = cmStrCat(cudaCompileMode, ptxFlag);
- } else {
+ static std::array<cm::string_view, 4> const compileModes{
+ { "PTX"_s, "CUBIN"_s, "FATBIN"_s, "OPTIX"_s }
+ };
+ bool useNormalCompileMode = true;
+ for (cm::string_view mode : compileModes) {
+ auto propName = cmStrCat("CUDA_", mode, "_COMPILATION");
+ auto defName = cmStrCat("_CMAKE_CUDA_", mode, "_FLAG");
+ if (this->GeneratorTarget->GetPropertyAsBool(propName)) {
+ const std::string& flag =
+ this->Makefile->GetRequiredDefinition(defName);
+ cudaCompileMode = cmStrCat(cudaCompileMode, flag);
+ useNormalCompileMode = false;
+ break;
+ }
+ }
+ if (useNormalCompileMode) {
const std::string& wholeFlag =
this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_WHOLE_FLAG");
cudaCompileMode = cmStrCat(cudaCompileMode, wholeFlag);
diff --git a/Source/cmTarget.cxx b/Source/cmTarget.cxx
index ec872718a6..f06e26b50f 100644
--- a/Source/cmTarget.cxx
+++ b/Source/cmTarget.cxx
@@ -1775,6 +1775,9 @@ MAKE_PROP(COMPILE_FEATURES);
MAKE_PROP(COMPILE_OPTIONS);
MAKE_PROP(PRECOMPILE_HEADERS);
MAKE_PROP(PRECOMPILE_HEADERS_REUSE_FROM);
+MAKE_PROP(CUDA_CUBIN_COMPILATION);
+MAKE_PROP(CUDA_FATBIN_COMPILATION);
+MAKE_PROP(CUDA_OPTIX_COMPILATION);
MAKE_PROP(CUDA_PTX_COMPILATION);
MAKE_PROP(EXPORT_NAME);
MAKE_PROP(IMPORTED);
@@ -1910,14 +1913,38 @@ void cmTarget::StoreProperty(const std::string& prop, ValueType value)
value ? value
: std::string{})) { // NOLINT(bugprone-branch-clone)
/* error was reported by check method */
- } else if (prop == propCUDA_PTX_COMPILATION &&
- this->GetType() != cmStateEnums::OBJECT_LIBRARY) {
- std::ostringstream e;
- e << "CUDA_PTX_COMPILATION property can only be applied to OBJECT "
- "targets (\""
- << this->impl->Name << "\")\n";
- this->impl->Makefile->IssueMessage(MessageType::FATAL_ERROR, e.str());
- return;
+ } else if (prop == propCUDA_CUBIN_COMPILATION ||
+ prop == propCUDA_FATBIN_COMPILATION ||
+ prop == propCUDA_OPTIX_COMPILATION ||
+ prop == propCUDA_PTX_COMPILATION) {
+ auto const& compiler =
+ this->impl->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID");
+ auto const& compilerVersion =
+ this->impl->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_VERSION");
+ if (this->GetType() != cmStateEnums::OBJECT_LIBRARY) {
+ auto e =
+ cmStrCat(prop, " property can only be applied to OBJECT targets(",
+ this->impl->Name, ")\n");
+ this->impl->Makefile->IssueMessage(MessageType::FATAL_ERROR, e);
+ return;
+ }
+ const bool flag_found =
+ (prop == propCUDA_PTX_COMPILATION &&
+ this->impl->Makefile->GetDefinition("_CMAKE_CUDA_PTX_FLAG")) ||
+ (prop == propCUDA_CUBIN_COMPILATION &&
+ this->impl->Makefile->GetDefinition("_CMAKE_CUDA_CUBIN_FLAG")) ||
+ (prop == propCUDA_FATBIN_COMPILATION &&
+ this->impl->Makefile->GetDefinition("_CMAKE_CUDA_FATBIN_FLAG")) ||
+ (prop == propCUDA_OPTIX_COMPILATION &&
+ this->impl->Makefile->GetDefinition("_CMAKE_CUDA_OPTIX_FLAG"));
+ if (flag_found) {
+ this->impl->Properties.SetProperty(prop, value);
+ } else {
+ auto e = cmStrCat(prop, " property is not supported by ", compiler,
+ " compiler version ", compilerVersion, ".");
+ this->impl->Makefile->IssueMessage(MessageType::FATAL_ERROR, e);
+ return;
+ }
} else if (prop == propPRECOMPILE_HEADERS_REUSE_FROM) {
if (this->GetProperty("PRECOMPILE_HEADERS")) {
std::ostringstream e;
diff --git a/Source/cmVisualStudio10TargetGenerator.cxx b/Source/cmVisualStudio10TargetGenerator.cxx
index ef0fcf3162..8926f9ed15 100644
--- a/Source/cmVisualStudio10TargetGenerator.cxx
+++ b/Source/cmVisualStudio10TargetGenerator.cxx
@@ -3597,13 +3597,13 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions(
if (this->GeneratorTarget->GetPropertyAsBool("CUDA_SEPARABLE_COMPILATION")) {
cudaOptions.AddFlag("GenerateRelocatableDeviceCode", "true");
}
- bool notPtx = true;
+ bool notPtxLike = true;
if (this->GeneratorTarget->GetPropertyAsBool("CUDA_PTX_COMPILATION")) {
cudaOptions.AddFlag("NvccCompilation", "ptx");
// We drop the %(Extension) component as CMake expects all PTX files
// to not have the source file extension at all
cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).ptx");
- notPtx = false;
+ notPtxLike = false;
if (cmSystemTools::VersionCompare(cmSystemTools::OP_GREATER_EQUAL,
cudaVersion, "9.0") &&
@@ -3618,9 +3618,24 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions(
"%(BaseCommandLineTemplate) [CompileOut] [FastMath] "
"[Defines] \"%(FullPath)\"");
}
- }
-
- if (notPtx &&
+ } else if (this->GeneratorTarget->GetPropertyAsBool(
+ "CUDA_CUBIN_COMPILATION")) {
+ cudaOptions.AddFlag("NvccCompilation", "cubin");
+ cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).cubin");
+ notPtxLike = false;
+ } else if (this->GeneratorTarget->GetPropertyAsBool(
+ "CUDA_FATBIN_COMPILATION")) {
+ cudaOptions.AddFlag("NvccCompilation", "fatbin");
+ cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).fatbin");
+ notPtxLike = false;
+ } else if (this->GeneratorTarget->GetPropertyAsBool(
+ "CUDA_OPTIX_COMPILATION")) {
+ cudaOptions.AddFlag("NvccCompilation", "optix-ir");
+ cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).optixir");
+ notPtxLike = false;
+ }
+
+ if (notPtxLike &&
cmSystemTools::VersionCompareGreaterEq(
"8.0", this->GlobalGenerator->GetPlatformToolsetCudaString())) {
// Explicitly state that we want this file to be treated as a
diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt
index db08076d26..aa25c4cb09 100644
--- a/Tests/CudaOnly/CMakeLists.txt
+++ b/Tests/CudaOnly/CMakeLists.txt
@@ -27,6 +27,9 @@ if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
# Only NVCC defines __CUDACC_DEBUG__ when compiling in debug mode.
add_cuda_test_macro(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
+ add_cuda_test_macro(CudaOnly.CUBIN CudaOnlyCUBIN)
+ add_cuda_test_macro(CudaOnly.Fatbin CudaOnlyFatbin)
+ add_cuda_test_macro(CudaOnly.OptixIR CudaOnlyOptixIR)
endif()
add_cuda_test_macro(CudaOnly.DeviceLTO CudaOnlyDeviceLTO)
diff --git a/Tests/CudaOnly/CUBIN/CMakeLists.txt b/Tests/CudaOnly/CUBIN/CMakeLists.txt
new file mode 100644
index 0000000000..464714bbba
--- /dev/null
+++ b/Tests/CudaOnly/CUBIN/CMakeLists.txt
@@ -0,0 +1,21 @@
+cmake_minimum_required(VERSION 3.18)
+project(CudaCUBIN LANGUAGES CUDA)
+
+
+set(CMAKE_CUDA_ARCHITECTURES all-major)
+
+add_library(CudaCUBIN OBJECT kernelA.cu kernelB.cu kernelC.cu)
+set_property(TARGET CudaCUBIN PROPERTY CUDA_CUBIN_COMPILATION ON)
+set_property(TARGET CudaCUBIN PROPERTY CUDA_ARCHITECTURES native)
+
+add_executable(CudaOnlyCUBIN main.cu)
+target_compile_features(CudaOnlyCUBIN PRIVATE cuda_std_11)
+target_compile_definitions(CudaOnlyCUBIN PRIVATE "CUBIN_FILE_PATHS=\"$<JOIN:$<TARGET_OBJECTS:CudaCUBIN>,~_~>\"")
+
+find_package(CUDAToolkit REQUIRED)
+target_link_libraries(CudaOnlyCUBIN PRIVATE CUDA::cuda_driver)
+
+if(APPLE)
+ # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+ set_property(TARGET CudaOnlyCUBIN PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/CudaOnly/CUBIN/kernelA.cu b/Tests/CudaOnly/CUBIN/kernelA.cu
new file mode 100644
index 0000000000..fbe0d26d70
--- /dev/null
+++ b/Tests/CudaOnly/CUBIN/kernelA.cu
@@ -0,0 +1,7 @@
+
+__global__ void kernelA(float* r, float* x, float* y, float* z, int size)
+{
+ for (int i = threadIdx.x; i < size; i += blockDim.x) {
+ r[i] = x[i] * y[i] + z[i];
+ }
+}
diff --git a/Tests/CudaOnly/CUBIN/kernelB.cu b/Tests/CudaOnly/CUBIN/kernelB.cu
new file mode 100644
index 0000000000..74782532b9
--- /dev/null
+++ b/Tests/CudaOnly/CUBIN/kernelB.cu
@@ -0,0 +1,7 @@
+
+__global__ void kernelB(float* r, float* x, float* y, float* z, int size)
+{
+ for (int i = threadIdx.x; i < size; i += blockDim.x) {
+ r[i] = x[i] * y[i] + z[i];
+ }
+}
diff --git a/Tests/CudaOnly/CUBIN/kernelC.cu b/Tests/CudaOnly/CUBIN/kernelC.cu
new file mode 100644
index 0000000000..5f8a0ce93d
--- /dev/null
+++ b/Tests/CudaOnly/CUBIN/kernelC.cu
@@ -0,0 +1,7 @@
+
+__global__ void kernelC(float* r, float* x, float* y, float* z, int size)
+{
+ for (int i = threadIdx.x; i < size; i += blockDim.x) {
+ r[i] = x[i] * y[i] + z[i];
+ }
+}
diff --git a/Tests/CudaOnly/CUBIN/main.cu b/Tests/CudaOnly/CUBIN/main.cu
new file mode 100644
index 0000000000..da5249c8c5
--- /dev/null
+++ b/Tests/CudaOnly/CUBIN/main.cu
@@ -0,0 +1,56 @@
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include <cuda.h>
+
+#define GENERATED_HEADER(x) GENERATED_HEADER1(x)
+#define GENERATED_HEADER1(x) <x>
+
+static std::string input_paths = { CUBIN_FILE_PATHS };
+
+int main()
+{
+ const std::string delimiter = "~_~";
+ input_paths += delimiter;
+
+ size_t end = 0;
+ size_t previous_end = 0;
+ std::vector<std::string> actual_paths;
+ while ((end = input_paths.find(delimiter, previous_end)) !=
+ std::string::npos) {
+ actual_paths.emplace_back(
+ input_paths.substr(previous_end, end - previous_end));
+ previous_end = end + 3;
+ }
+
+ cuInit(0);
+ int count = 0;
+ cuDeviceGetCount(&count);
+ if (count == 0) {
+ std::cerr << "No CUDA devices found\n";
+ return 1;
+ }
+
+ CUdevice device;
+ cuDeviceGet(&device, 0);
+
+ CUcontext context;
+ cuCtxCreate(&context, 0, device);
+
+ CUmodule module;
+ for (auto p : actual_paths) {
+ if (p.find(".cubin") == std::string::npos) {
+ std::cout << p << " Doesn't have the .cubin suffix" << p << std::endl;
+ return 1;
+ }
+ std::cout << "trying to load cubin: " << p << std::endl;
+ CUresult result = cuModuleLoad(&module, p.c_str());
+ std::cout << "module pointer: " << module << '\n';
+ if (result != CUDA_SUCCESS || module == nullptr) {
+ std::cerr << "Failed to load the embedded cubin with error: "
+ << static_cast<unsigned int>(result) << '\n';
+ return 1;
+ }
+ }
+}
diff --git a/Tests/CudaOnly/Fatbin/CMakeLists.txt b/Tests/CudaOnly/Fatbin/CMakeLists.txt
new file mode 100644
index 0000000000..db0dc227e0
--- /dev/null
+++ b/Tests/CudaOnly/Fatbin/CMakeLists.txt
@@ -0,0 +1,25 @@
+cmake_minimum_required(VERSION 3.18)
+project(CudaFATBIN LANGUAGES CUDA)
+
+
+set(CMAKE_CUDA_ARCHITECTURES all-major)
+
+add_library(CudaFATBIN OBJECT
+${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelA.cu
+${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelB.cu
+${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelC.cu)
+
+set_property(TARGET CudaFATBIN PROPERTY CUDA_FATBIN_COMPILATION ON)
+
+# Will use `cuModuleLoadFatBinary` to load the fatbinaries
+add_executable(CudaOnlyFatbin main.cu)
+target_compile_features(CudaOnlyFatbin PRIVATE cuda_std_11)
+target_compile_definitions(CudaOnlyFatbin PRIVATE "FATBIN_FILE_PATHS=\"$<JOIN:$<TARGET_OBJECTS:CudaFATBIN>,~_~>\"")
+
+find_package(CUDAToolkit REQUIRED)
+target_link_libraries(CudaOnlyFatbin PRIVATE CUDA::cuda_driver)
+
+if(APPLE)
+ # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+ set_property(TARGET CudaOnlyFatbin PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/CudaOnly/Fatbin/main.cu b/Tests/CudaOnly/Fatbin/main.cu
new file mode 100644
index 0000000000..903feee6be
--- /dev/null
+++ b/Tests/CudaOnly/Fatbin/main.cu
@@ -0,0 +1,56 @@
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include <cuda.h>
+
+#define GENERATED_HEADER(x) GENERATED_HEADER1(x)
+#define GENERATED_HEADER1(x) <x>
+
+static std::string input_paths = { FATBIN_FILE_PATHS };
+
+int main()
+{
+ const std::string delimiter = "~_~";
+ input_paths += delimiter;
+
+ size_t end = 0;
+ size_t previous_end = 0;
+ std::vector<std::string> actual_paths;
+ while ((end = input_paths.find(delimiter, previous_end)) !=
+ std::string::npos) {
+ actual_paths.emplace_back(
+ input_paths.substr(previous_end, end - previous_end));
+ previous_end = end + 3;
+ }
+
+ cuInit(0);
+ int count = 0;
+ cuDeviceGetCount(&count);
+ if (count == 0) {
+ std::cerr << "No CUDA devices found\n";
+ return 1;
+ }
+
+ CUdevice device;
+ cuDeviceGet(&device, 0);
+
+ CUcontext context;
+ cuCtxCreate(&context, 0, device);
+
+ CUmodule module;
+ for (auto p : actual_paths) {
+ if (p.find(".fatbin") == std::string::npos) {
+ std::cout << p << " Doesn't have the .fatbin suffix" << p << std::endl;
+ return 1;
+ }
+ std::cout << "trying to load fatbin: " << p << std::endl;
+ CUresult result = cuModuleLoad(&module, p.c_str());
+ std::cout << "module pointer: " << module << '\n';
+ if (result != CUDA_SUCCESS || module == nullptr) {
+ std::cerr << "Failed to load the embedded fatbin with error: "
+ << static_cast<unsigned int>(result) << '\n';
+ return 1;
+ }
+ }
+}
diff --git a/Tests/CudaOnly/OptixIR/CMakeLists.txt b/Tests/CudaOnly/OptixIR/CMakeLists.txt
new file mode 100644
index 0000000000..afeabdade7
--- /dev/null
+++ b/Tests/CudaOnly/OptixIR/CMakeLists.txt
@@ -0,0 +1,33 @@
+cmake_minimum_required(VERSION 3.18)
+project(CudaOptix LANGUAGES CUDA)
+
+
+set(CMAKE_CUDA_ARCHITECTURES all-major)
+
+add_library(CudaOptix OBJECT
+ ${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelA.cu
+ ${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelB.cu
+ ${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelC.cu)
+
+if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.7.0")
+ set_property(TARGET CudaOptix PROPERTY CUDA_OPTIX_COMPILATION ON)
+endif()
+
+set_property(TARGET CudaOptix PROPERTY CUDA_ARCHITECTURES native)
+
+add_executable(CudaOnlyOptixIR main.cu)
+target_compile_features(CudaOnlyOptixIR PRIVATE cuda_std_11)
+
+if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.7.0")
+ target_compile_definitions(CudaOnlyOptixIR PRIVATE "OPTIX_FILE_PATHS=\"$<JOIN:$<TARGET_OBJECTS:CudaOptix>,~_~>\"")
+else()
+ target_compile_definitions(CudaOnlyOptixIR PRIVATE "OPTIX_FILE_PATHS=\"NO_OPTIX_SUPPORT\"")
+endif()
+
+find_package(CUDAToolkit REQUIRED)
+target_link_libraries(CudaOnlyOptixIR PRIVATE CUDA::cuda_driver)
+
+if(APPLE)
+ # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+ set_property(TARGET CudaOnlyOptixIR PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/CudaOnly/OptixIR/main.cu b/Tests/CudaOnly/OptixIR/main.cu
new file mode 100644
index 0000000000..c79829b297
--- /dev/null
+++ b/Tests/CudaOnly/OptixIR/main.cu
@@ -0,0 +1,53 @@
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include <cuda.h>
+
+#define GENERATED_HEADER(x) GENERATED_HEADER1(x)
+#define GENERATED_HEADER1(x) <x>
+
+static std::string input_paths = { OPTIX_FILE_PATHS };
+
+int main()
+{
+ if (input_paths == "NO_OPTIX_SUPPORT") {
+ return 0;
+ }
+
+ const std::string delimiter = "~_~";
+ input_paths += delimiter;
+
+ size_t end = 0;
+ size_t previous_end = 0;
+ std::vector<std::string> actual_paths;
+ while ((end = input_paths.find(delimiter, previous_end)) !=
+ std::string::npos) {
+ actual_paths.emplace_back(
+ input_paths.substr(previous_end, end - previous_end));
+ previous_end = end + 3;
+ }
+
+ if (actual_paths.empty()) {
+ std::cerr << "Failed to parse OPTIX_FILE_PATHS" << std::endl;
+ return 1;
+ }
+
+ const std::uint32_t optix_magic_value = 0x7f4e43ed;
+ for (auto p : actual_paths) {
+ if (p.find(".optixir") == std::string::npos) {
+ std::cout << p << " Doesn't have the .optixir suffix" << p << std::endl;
+ return 1;
+ }
+ std::ifstream input(p, std::ios::binary);
+ std::uint32_t value;
+ input.read(reinterpret_cast<char*>(&value), sizeof(value));
+ if (value != optix_magic_value) {
+ std::cerr << p << " Doesn't look like an optix-ir file" << std::endl;
+ return 1;
+ }
+ }
+
+ return 0;
+}