summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp219
-rw-r--r--openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp12
-rw-r--r--openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h74
-rw-r--r--openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp301
-rw-r--r--openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp4
5 files changed, 464 insertions, 146 deletions
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index 0d2d8fae149e..da8e318cf4c7 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -189,8 +189,11 @@ struct AMDGPUMemoryPoolTy {
/// Getter of the HSA memory pool.
hsa_amd_memory_pool_t get() const { return MemoryPool; }
- /// Indicate if it belongs to the global segment.
+ /// Indicate the segment which belongs to.
bool isGlobal() const { return (Segment == HSA_AMD_SEGMENT_GLOBAL); }
+ bool isReadOnly() const { return (Segment == HSA_AMD_SEGMENT_READONLY); }
+ bool isPrivate() const { return (Segment == HSA_AMD_SEGMENT_PRIVATE); }
+ bool isGroup() const { return (Segment == HSA_AMD_SEGMENT_GROUP); }
/// Indicate if it is fine-grained memory. Valid only for global.
bool isFineGrained() const {
@@ -246,7 +249,6 @@ struct AMDGPUMemoryPoolTy {
return Plugin::check(Status, "Error in hsa_amd_agents_allow_access: %s");
}
-private:
/// Get attribute from the memory pool.
template <typename Ty>
Error getAttr(hsa_amd_memory_pool_info_t Kind, Ty &Value) const {
@@ -255,6 +257,11 @@ private:
return Plugin::check(Status, "Error in hsa_amd_memory_pool_get_info: %s");
}
+ template <typename Ty>
+ hsa_status_t getAttrRaw(hsa_amd_memory_pool_info_t Kind, Ty &Value) const {
+ return hsa_amd_memory_pool_get_info(MemoryPool, Kind, &Value);
+ }
+
/// Get attribute from the memory pool relating to an agent.
template <typename Ty>
Error getAttr(hsa_agent_t Agent, hsa_amd_agent_memory_pool_info_t Kind,
@@ -266,6 +273,7 @@ private:
"Error in hsa_amd_agent_memory_pool_get_info: %s");
}
+private:
/// The HSA memory pool.
hsa_amd_memory_pool_t MemoryPool;
@@ -2100,8 +2108,206 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
}
/// Print information about the device.
- Error printInfoImpl() override {
- // TODO: Implement the basic info.
+ Error obtainInfoImpl(InfoQueueTy &Info) override {
+ char TmpChar[1000];
+ const char *TmpCharPtr;
+ uint16_t Major, Minor;
+ uint32_t TmpUInt, TmpUInt2;
+ uint32_t CacheSize[4];
+ size_t TmpSt;
+ bool TmpBool;
+ uint16_t WorkgrpMaxDim[3];
+ hsa_dim3_t GridMaxDim;
+ hsa_status_t Status, Status2;
+
+ Status = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &Major);
+ Status2 = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &Minor);
+ if (Status == HSA_STATUS_SUCCESS && Status2 == HSA_STATUS_SUCCESS)
+ Info.add("HSA Runtime Version",
+ std::to_string(Major) + "." + std::to_string(Minor));
+
+ Info.add("HSA OpenMP Device Number", DeviceId);
+
+ Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_PRODUCT_NAME, TmpChar);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add("Product Name", TmpChar);
+
+ Status = getDeviceAttrRaw(HSA_AGENT_INFO_NAME, TmpChar);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add("Device Name", TmpChar);
+
+ Status = getDeviceAttrRaw(HSA_AGENT_INFO_VENDOR_NAME, TmpChar);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add("Vendor Name", TmpChar);
+
+ hsa_device_type_t DevType;
+ Status = getDeviceAttrRaw(HSA_AGENT_INFO_DEVICE, DevType);
+ if (Status == HSA_STATUS_SUCCESS) {
+ switch (DevType) {
+ case HSA_DEVICE_TYPE_CPU:
+ TmpCharPtr = "CPU";
+ break;
+ case HSA_DEVICE_TYPE_GPU:
+ TmpCharPtr = "GPU";
+ break;
+ case HSA_DEVICE_TYPE_DSP:
+ TmpCharPtr = "DSP";
+ break;
+ default:
+ TmpCharPtr = "Unknown";
+ }
+ Info.add("Device Type", TmpCharPtr);
+ }
+
+ Status = getDeviceAttrRaw(HSA_AGENT_INFO_QUEUES_MAX, TmpUInt);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add("Max Queues", TmpUInt);
+
+ Status = getDeviceAttrRaw(HSA_AGENT_INFO_QUEUE_MIN_SIZE, TmpUInt);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add("Queue Min Size", TmpUInt);
+
+ Status = getDeviceAttrRaw(HSA_AGENT_INFO_QUEUE_MAX_SIZE, TmpUInt);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add("Queue Max Size", TmpUInt);
+
+ // FIXME: This is deprecated according to HSA documentation. But using
+ // hsa_agent_iterate_caches and hsa_cache_get_info breaks execution during
+ // runtime.
+ Status = getDeviceAttrRaw(HSA_AGENT_INFO_CACHE_SIZE, CacheSize);
+ if (Status == HSA_STATUS_SUCCESS) {
+ Info.add("Cache");
+
+ for (int I = 0; I < 4; I++)
+ if (CacheSize[I])
+ Info.add<InfoLevel2>("L" + std::to_string(I), CacheSize[I]);
+ }
+
+ Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_CACHELINE_SIZE, TmpUInt);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add("Cacheline Size", TmpUInt);
+
+ Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, TmpUInt);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add("Max Clock Freq", TmpUInt, "MHz");
+
+ Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, TmpUInt);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add("Compute Units", TmpUInt);
+
+ Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU, TmpUInt);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add("SIMD per CU", TmpUInt);
+
+ Status = getDeviceAttrRaw(HSA_AGENT_INFO_FAST_F16_OPERATION, TmpBool);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add("Fast F16 Operation", TmpBool);
+
+ Status = getDeviceAttrRaw(HSA_AGENT_INFO_WAVEFRONT_SIZE, TmpUInt2);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add("Wavefront Size", TmpUInt2);
+
+ Status = getDeviceAttrRaw(HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, TmpUInt);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add("Workgroup Max Size", TmpUInt);
+
+ Status = getDeviceAttrRaw(HSA_AGENT_INFO_WORKGROUP_MAX_DIM, WorkgrpMaxDim);
+ if (Status == HSA_STATUS_SUCCESS) {
+ Info.add("Workgroup Max Size per Dimension");
+ Info.add<InfoLevel2>("x", WorkgrpMaxDim[0]);
+ Info.add<InfoLevel2>("y", WorkgrpMaxDim[1]);
+ Info.add<InfoLevel2>("z", WorkgrpMaxDim[2]);
+ }
+
+ Status = getDeviceAttrRaw(
+ (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU, TmpUInt);
+ if (Status == HSA_STATUS_SUCCESS) {
+ Info.add("Max Waves Per CU", TmpUInt);
+ Info.add("Max Work-item Per CU", TmpUInt * TmpUInt2);
+ }
+
+ Status = getDeviceAttrRaw(HSA_AGENT_INFO_GRID_MAX_SIZE, TmpUInt);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add("Grid Max Size", TmpUInt);
+
+ Status = getDeviceAttrRaw(HSA_AGENT_INFO_GRID_MAX_DIM, GridMaxDim);
+ if (Status == HSA_STATUS_SUCCESS) {
+ Info.add("Grid Max Size per Dimension");
+ Info.add<InfoLevel2>("x", GridMaxDim.x);
+ Info.add<InfoLevel2>("y", GridMaxDim.y);
+ Info.add<InfoLevel2>("z", GridMaxDim.z);
+ }
+
+ Status = getDeviceAttrRaw(HSA_AGENT_INFO_FBARRIER_MAX_SIZE, TmpUInt);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add("Max fbarriers/Workgrp", TmpUInt);
+
+ Info.add("Memory Pools");
+ for (AMDGPUMemoryPoolTy *Pool : AllMemoryPools) {
+ std::string TmpStr, TmpStr2;
+
+ if (Pool->isGlobal())
+ TmpStr = "Global";
+ else if (Pool->isReadOnly())
+ TmpStr = "ReadOnly";
+ else if (Pool->isPrivate())
+ TmpStr = "Private";
+ else if (Pool->isGroup())
+ TmpStr = "Group";
+ else
+ TmpStr = "Unknown";
+
+ Info.add<InfoLevel2>(std::string("Pool ") + TmpStr);
+
+ if (Pool->isGlobal()) {
+ if (Pool->isFineGrained())
+ TmpStr2 += "Fine Grained ";
+ if (Pool->isCoarseGrained())
+ TmpStr2 += "Coarse Grained ";
+ if (Pool->supportsKernelArgs())
+ TmpStr2 += "Kernarg ";
+
+ Info.add<InfoLevel3>("Flags", TmpStr2);
+ }
+
+ Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_SIZE, TmpSt);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add<InfoLevel3>("Size", TmpSt, "bytes");
+
+ Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
+ TmpBool);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add<InfoLevel3>("Allocatable", TmpBool);
+
+ Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
+ TmpSt);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add<InfoLevel3>("Runtime Alloc Granule", TmpSt, "bytes");
+
+ Status = Pool->getAttrRaw(
+ HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, TmpSt);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add<InfoLevel3>("Runtime Alloc Alignment", TmpSt, "bytes");
+
+ Status =
+ Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, TmpBool);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add<InfoLevel3>("Accessable by all", TmpBool);
+ }
+
+ Info.add("ISAs");
+ auto Err = utils::iterateAgentISAs(getAgent(), [&](hsa_isa_t ISA) {
+ Status = hsa_isa_get_info_alt(ISA, HSA_ISA_INFO_NAME, TmpChar);
+ if (Status == HSA_STATUS_SUCCESS)
+ Info.add<InfoLevel2>("Name", TmpChar);
+
+ return Status;
+ });
+
+ // Silently consume the error.
+ if (Err)
+ consumeError(std::move(Err));
+
return Plugin::success();
}
@@ -2126,6 +2332,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Plugin::check(Status, "Error in hsa_agent_get_info: %s");
}
+ template <typename Ty>
+ hsa_status_t getDeviceAttrRaw(uint32_t Kind, Ty &Value) {
+ return hsa_agent_get_info(Agent, (hsa_agent_info_t)Kind, &Value);
+ }
+
/// Get the device agent.
hsa_agent_t getAgent() const override { return Agent; }
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
index 7ebc1d1092cd..8899f457ffd9 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -995,8 +995,16 @@ Error GenericDeviceTy::initDeviceInfo(__tgt_device_info *DeviceInfo) {
}
Error GenericDeviceTy::printInfo() {
- // TODO: Print generic information here
- return printInfoImpl();
+ InfoQueueTy InfoQueue;
+
+ // Get the vendor-specific info entries describing the device properties.
+ if (auto Err = obtainInfoImpl(InfoQueue))
+ return Err;
+
+ // Print all info entries.
+ InfoQueue.print();
+
+ return Plugin::success();
}
Error GenericDeviceTy::createEvent(void **EventPtrStorage) {
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
index a91ea81183c2..542d5185e919 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -13,6 +13,7 @@
#include <cstddef>
#include <cstdint>
+#include <deque>
#include <list>
#include <map>
#include <shared_mutex>
@@ -33,6 +34,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/Triple.h"
namespace llvm {
@@ -84,6 +86,76 @@ private:
__tgt_async_info *AsyncInfoPtr;
};
+/// The information level represents the level of a key-value property in the
+/// info tree print (i.e. indentation). The first level should be the default.
+enum InfoLevelKind { InfoLevel1 = 1, InfoLevel2, InfoLevel3 };
+
+/// Class for storing device information and later be printed. An object of this
+/// type acts as a queue of key-value properties. Each property has a key, a
+/// a value, and an optional unit for the value. For printing purposes, the
+/// information can be classified into several levels. These levels are useful
+/// for defining sections and subsections. Thus, each key-value property also
+/// has an additional field indicating to which level belongs to. Notice that
+/// we use the level to determine the indentation of the key-value property at
+/// printing time. See the enum InfoLevelKind for the list of accepted levels.
+class InfoQueueTy {
+ struct InfoQueueEntryTy {
+ std::string Key;
+ std::string Value;
+ std::string Units;
+ uint64_t Level;
+ };
+
+ std::deque<InfoQueueEntryTy> Queue;
+
+public:
+ /// Add a new info entry to the queue. The entry requires at least a key
+ /// string in \p Key. The value in \p Value is optional and can be any type
+ /// that is representable as a string. The units in \p Units is optional and
+ /// must be a string. The info level is a template parameter that defaults to
+ /// the first level (top level).
+ template <InfoLevelKind L = InfoLevel1, typename T = std::string>
+ void add(const std::string &Key, T Value = T(),
+ const std::string &Units = std::string()) {
+ assert(!Key.empty() && "Invalid info key");
+
+ // Convert the value to a string depending on its type.
+ if constexpr (std::is_same_v<T, bool>)
+ Queue.push_back({Key, Value ? "Yes" : "No", Units, L});
+ else if constexpr (std::is_arithmetic_v<T>)
+ Queue.push_back({Key, std::to_string(Value), Units, L});
+ else
+ Queue.push_back({Key, Value, Units, L});
+ }
+
+ /// Print all info entries added to the queue.
+ void print() const {
+ // We print four spances for each level.
+ constexpr uint64_t IndentSize = 4;
+
+ // Find the maximum key length (level + key) to compute the individual
+ // indentation of each entry.
+ uint64_t MaxKeySize = 0;
+ for (const auto &Entry : Queue) {
+ uint64_t KeySize = Entry.Key.size() + Entry.Level * IndentSize;
+ if (KeySize > MaxKeySize)
+ MaxKeySize = KeySize;
+ }
+
+ // Print all info entries.
+ for (const auto &Entry : Queue) {
+ // Compute the indentations for the current entry.
+ uint64_t KeyIndentSize = Entry.Level * IndentSize;
+ uint64_t ValIndentSize =
+ MaxKeySize - (Entry.Key.size() + KeyIndentSize) + IndentSize;
+
+ llvm::outs() << std::string(KeyIndentSize, ' ') << Entry.Key
+ << std::string(ValIndentSize, ' ') << Entry.Value
+ << (Entry.Units.empty() ? "" : " ") << Entry.Units << "\n";
+ }
+ }
+};
+
/// Class wrapping a __tgt_device_image and its offload entry table on a
/// specific device. This class is responsible for storing and managing
/// the offload entries for an image on a device.
@@ -645,7 +717,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// Print information about the device.
Error printInfo();
- virtual Error printInfoImpl() = 0;
+ virtual Error obtainInfoImpl(InfoQueueTy &Info) = 0;
/// Getters of the grid values.
uint32_t getWarpSize() const { return GridValues.GV_Warp_Size; }
diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
index 9e38d851196c..d823cbebc3d7 100644
--- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -620,147 +620,170 @@ struct CUDADeviceTy : public GenericDeviceTy {
}
/// Print information about the device.
- Error printInfoImpl() override {
+ Error obtainInfoImpl(InfoQueueTy &Info) override {
char TmpChar[1000];
- std::string TmpStr;
+ const char *TmpCharPtr;
size_t TmpSt;
- int TmpInt, TmpInt2, TmpInt3;
-
- // TODO: All these calls should be checked, but the whole printInfo must be
- // improved, so we will refactor it in the future.
- cuDriverGetVersion(&TmpInt);
- printf(" CUDA Driver Version: \t\t%d \n", TmpInt);
- printf(" CUDA Device Number: \t\t%d \n", DeviceId);
-
- cuDeviceGetName(TmpChar, 1000, Device);
- printf(" Device Name: \t\t\t%s \n", TmpChar);
-
- cuDeviceTotalMem(&TmpSt, Device);
- printf(" Global Memory Size: \t\t%zu bytes \n", TmpSt);
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- Device);
- printf(" Number of Multiprocessors: \t\t%d \n", TmpInt);
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, Device);
- printf(" Concurrent Copy and Execution: \t%s \n", TmpInt ? "Yes" : "No");
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY,
- Device);
- printf(" Total Constant Memory: \t\t%d bytes\n", TmpInt);
-
- cuDeviceGetAttribute(
- &TmpInt, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, Device);
- printf(" Max Shared Memory per Block: \t%d bytes \n", TmpInt);
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK,
- Device),
- printf(" Registers per Block: \t\t%d \n", TmpInt);
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_WARP_SIZE, Device);
- printf(" Warp Size: \t\t\t\t%d Threads \n", TmpInt);
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
- Device);
- printf(" Maximum Threads per Block: \t\t%d \n", TmpInt);
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, Device);
- cuDeviceGetAttribute(&TmpInt2, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, Device);
- cuDeviceGetAttribute(&TmpInt3, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, Device);
- printf(" Maximum Block Dimensions: \t\t%d, %d, %d \n", TmpInt, TmpInt2,
- TmpInt3);
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, Device);
- cuDeviceGetAttribute(&TmpInt2, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, Device);
- cuDeviceGetAttribute(&TmpInt3, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, Device);
- printf(" Maximum Grid Dimensions: \t\t%d x %d x %d \n", TmpInt, TmpInt2,
- TmpInt3);
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MAX_PITCH, Device);
- printf(" Maximum Memory Pitch: \t\t%d bytes \n", TmpInt);
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT,
- Device);
- printf(" Texture Alignment: \t\t\t%d bytes \n", TmpInt);
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, Device);
- printf(" Clock Rate: \t\t\t%d kHz\n", TmpInt);
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,
- Device);
- printf(" Execution Timeout: \t\t\t%s \n", TmpInt ? "Yes" : "No");
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_INTEGRATED, Device);
- printf(" Integrated Device: \t\t\t%s \n", TmpInt ? "Yes" : "No");
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY,
- Device);
- printf(" Can Map Host Memory: \t\t%s \n", TmpInt ? "Yes" : "No");
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, Device);
- if (TmpInt == CU_COMPUTEMODE_DEFAULT)
- TmpStr = "DEFAULT";
- else if (TmpInt == CU_COMPUTEMODE_PROHIBITED)
- TmpStr = "PROHIBITED";
- else if (TmpInt == CU_COMPUTEMODE_EXCLUSIVE_PROCESS)
- TmpStr = "EXCLUSIVE PROCESS";
- else
- TmpStr = "unknown";
- printf(" Compute Mode: \t\t\t%s \n", TmpStr.c_str());
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS,
- Device);
- printf(" Concurrent Kernels: \t\t%s \n", TmpInt ? "Yes" : "No");
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, Device);
- printf(" ECC Enabled: \t\t\t%s \n", TmpInt ? "Yes" : "No");
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE,
- Device);
- printf(" Memory Clock Rate: \t\t\t%d kHz\n", TmpInt);
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH,
- Device);
- printf(" Memory Bus Width: \t\t\t%d bits\n", TmpInt);
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, Device);
- printf(" L2 Cache Size: \t\t\t%d bytes \n", TmpInt);
-
- cuDeviceGetAttribute(
- &TmpInt, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, Device);
- printf(" Max Threads Per SMP: \t\t%d \n", TmpInt);
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT,
- Device);
- printf(" Async Engines: \t\t\t%s (%d) \n", TmpInt ? "Yes" : "No",
- TmpInt);
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING,
- Device);
- printf(" Unified Addressing: \t\t%s \n", TmpInt ? "Yes" : "No");
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, Device);
- printf(" Managed Memory: \t\t\t%s \n", TmpInt ? "Yes" : "No");
+ int TmpInt;
+
+ CUresult Res = cuDriverGetVersion(&TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("CUDA Driver Version", TmpInt);
+
+ Info.add("CUDA OpenMP Device Number", DeviceId);
+
+ Res = cuDeviceGetName(TmpChar, 1000, Device);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Device Name", TmpChar);
+
+ Res = cuDeviceTotalMem(&TmpSt, Device);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Global Memory Size", TmpSt, "bytes");
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Number of Multiprocessors", TmpInt);
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Concurrent Copy and Execution", (bool)TmpInt);
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Total Constant Memory", TmpInt, "bytes");
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
+ TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Max Shared Memory per Block", TmpInt, "bytes");
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Registers per Block", TmpInt);
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_WARP_SIZE, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Warp Size", TmpInt);
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Maximum Threads per Block", TmpInt);
+
+ Info.add("Maximum Block Dimensions", "");
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add<InfoLevel2>("x", TmpInt);
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add<InfoLevel2>("y", TmpInt);
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add<InfoLevel2>("z", TmpInt);
+
+ Info.add("Maximum Grid Dimensions", "");
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add<InfoLevel2>("x", TmpInt);
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add<InfoLevel2>("y", TmpInt);
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add<InfoLevel2>("z", TmpInt);
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_PITCH, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Maximum Memory Pitch", TmpInt, "bytes");
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Texture Alignment", TmpInt, "bytes");
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_CLOCK_RATE, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Clock Rate", TmpInt, "kHz");
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Execution Timeout", (bool)TmpInt);
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_INTEGRATED, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Integrated Device", (bool)TmpInt);
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Can Map Host Memory", (bool)TmpInt);
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, TmpInt);
+ if (Res == CUDA_SUCCESS) {
+ if (TmpInt == CU_COMPUTEMODE_DEFAULT)
+ TmpCharPtr = "Default";
+ else if (TmpInt == CU_COMPUTEMODE_PROHIBITED)
+ TmpCharPtr = "Prohibited";
+ else if (TmpInt == CU_COMPUTEMODE_EXCLUSIVE_PROCESS)
+ TmpCharPtr = "Exclusive process";
+ else
+ TmpCharPtr = "Unknown";
+ Info.add("Compute Mode", TmpCharPtr);
+ }
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Concurrent Kernels", (bool)TmpInt);
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_ECC_ENABLED, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("ECC Enabled", (bool)TmpInt);
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Memory Clock Rate", TmpInt, "kHz");
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Memory Bus Width", TmpInt, "bits");
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("L2 Cache Size", TmpInt, "bytes");
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR,
+ TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Max Threads Per SMP", TmpInt);
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Async Engines", TmpInt);
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS,
- Device);
- printf(" Concurrent Managed Memory: \t\t%s \n", TmpInt ? "Yes" : "No");
-
- cuDeviceGetAttribute(
- &TmpInt, CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, Device);
- printf(" Preemption Supported: \t\t%s \n", TmpInt ? "Yes" : "No");
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH,
- Device);
- printf(" Cooperative Launch: \t\t%s \n", TmpInt ? "Yes" : "No");
-
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD, Device);
- printf(" Multi-Device Boars: \t\t%s \n", TmpInt ? "Yes" : "No");
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Unified Addressing", (bool)TmpInt);
- cuDeviceGetAttribute(&TmpInt, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
- Device);
- cuDeviceGetAttribute(&TmpInt2, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
- Device);
- printf(" Compute Capabilities: \t\t%d%d \n", TmpInt, TmpInt2);
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Managed Memory", (bool)TmpInt);
+
+ Res =
+ getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Concurrent Managed Memory", (bool)TmpInt);
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED,
+ TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Preemption Supported", (bool)TmpInt);
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Cooperative Launch", (bool)TmpInt);
+
+ Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD, TmpInt);
+ if (Res == CUDA_SUCCESS)
+ Info.add("Multi-Device Boars", (bool)TmpInt);
+
+ Info.add("Compute Capabilities", ComputeCapability.str());
return Plugin::success();
}
@@ -797,6 +820,10 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Plugin::check(Res, "Error in cuDeviceGetAttribute: %s");
}
+ CUresult getDeviceAttrRaw(uint32_t Kind, int &Value) {
+ return cuDeviceGetAttribute(&Value, (CUdevice_attribute)Kind, Device);
+ }
+
/// See GenericDeviceTy::getComputeUnitKind().
std::string getComputeUnitKind() const override {
return ComputeCapability.str();
diff --git a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
index a9b828826b1b..a47015e2fa29 100644
--- a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
@@ -294,8 +294,8 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
Error syncEventImpl(void *EventPtr) override { return Plugin::success(); }
/// Print information about the device.
- Error printInfoImpl() override {
- printf(" This is a generic-elf-64bit device\n");
+ Error obtainInfoImpl(InfoQueueTy &Info) override {
+ Info.add("Device Type", "Generic-elf-64bit");
return Plugin::success();
}