Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 26 additions & 13 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -542,9 +542,28 @@ pi_result cuda_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms,

static std::once_flag initFlag;
static _pi_platform platformId;
std::call_once(initFlag,
[](pi_result &err) { err = PI_CHECK_ERROR(cuInit(0)); },
err);
std::call_once(
initFlag,
[](pi_result &err) {
err = PI_CHECK_ERROR(cuInit(0));

int numDevices = 0;
err = PI_CHECK_ERROR(cuDeviceGetCount(&numDevices));
platformId.devices_.reserve(numDevices);
try {
for (int i = 0; i < numDevices; ++i) {
CUdevice device;
err = PI_CHECK_ERROR(cuDeviceGet(&device, i));
platformId.devices_.emplace_back(
new _pi_device{device, &platformId});
}
} catch (...) {
// Clear and rethrow to allow retry
platformId.devices_.clear();
throw;
}
},
err);

*platforms = &platformId;
}
Expand Down Expand Up @@ -594,22 +613,16 @@ pi_result cuda_piDevicesGet(pi_platform platform, pi_device_type device_type,

pi_result err = PI_SUCCESS;
const bool askingForGPU = (device_type & PI_DEVICE_TYPE_GPU);
size_t numDevices = askingForGPU ? 1 : 0;
size_t numDevices = askingForGPU ? platform->devices_.size() : 0;

try {
if (num_devices) {
*num_devices = numDevices;
}

if (askingForGPU) {
if (devices) {
CUdevice device;
err = PI_CHECK_ERROR(cuDeviceGet(&device, 0));
*devices = new _pi_device{device, platform};
}
} else {
if (devices) {
*devices = nullptr;
if (askingForGPU && devices) {
for (size_t i = 0; i < std::min(size_t(num_entries), numDevices); ++i) {
devices[i] = platform->devices_[i].get();
}
}

Expand Down
1 change: 1 addition & 0 deletions sycl/plugins/cuda/pi_cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ pi_result cuda_piKernelRelease(pi_kernel);
}

struct _pi_platform {
std::vector<std::unique_ptr<_pi_device>> devices_;
};

struct _pi_device {
Expand Down