@@ -1124,12 +1124,30 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
1124
1124
}
1125
1125
1126
1126
/* Context APIs */
1127
- pi_result cuda_piContextCreate (const cl_context_properties *properties,
1128
- pi_uint32 num_devices, const pi_device *devices,
1129
- void (*pfn_notify)(const char *errinfo,
1130
- const void *private_info,
1131
- size_t cb, void *user_data),
1132
- void *user_data, pi_context *retcontext) {
1127
+
1128
+ // / Create a PI CUDA context.
1129
+ // /
1130
+ // / By default creates a scoped context and keeps the last active CUDA context
1131
+ // / on top of the CUDA context stack.
1132
+ // / With the PI_CONTEXT_PROPERTIES_CUDA_PRIMARY key/id and a value of PI_TRUE
1133
+ // / creates a primary CUDA context and activates it on the CUDA context stack.
1134
+ // /
1135
+ // / @param[in] properties 0 terminated array of key/id-value combinations. Can
1136
+ // / be nullptr. Only accepts property key/id PI_CONTEXT_PROPERTIES_CUDA_PRIMARY
1137
+ // / with a pi_bool value.
1138
+ // / @param[in] num_devices Number of devices to create the context for.
1139
+ // / @param[in] devices Devices to create the context for.
1140
+ // / @param[in] pfn_notify Callback, currently unused.
1141
+ // / @param[in] user_data User data for callback.
1142
+ // / @param[out] retcontext Set to created context on success.
1143
+ // /
1144
+ // / @return PI_SUCCESS on success, otherwise an error return code.
1145
+ pi_result cuda_piContextCreate (const pi_context_properties *properties,
1146
+ pi_uint32 num_devices, const pi_device *devices,
1147
+ void (*pfn_notify)(const char *errinfo,
1148
+ const void *private_info,
1149
+ size_t cb, void *user_data),
1150
+ void *user_data, pi_context *retcontext) {
1133
1151
1134
1152
assert (devices != nullptr );
1135
1153
// TODO: How to implement context callback?
@@ -1141,31 +1159,51 @@ pi_result cuda_piContextCreate(const cl_context_properties *properties,
1141
1159
assert (retcontext != nullptr );
1142
1160
pi_result errcode_ret = PI_SUCCESS;
1143
1161
1162
+ // Parse properties.
1163
+ bool property_cuda_primary = false ;
1164
+ while (properties && (0 != *properties)) {
1165
+ // Consume property ID.
1166
+ pi_context_properties id = *properties;
1167
+ ++properties;
1168
+ // Consume property value.
1169
+ pi_context_properties value = *properties;
1170
+ ++properties;
1171
+ switch (id) {
1172
+ case PI_CONTEXT_PROPERTIES_CUDA_PRIMARY:
1173
+ assert (value == PI_FALSE || value == PI_TRUE);
1174
+ property_cuda_primary = static_cast <bool >(value);
1175
+ break ;
1176
+ default :
1177
+ // Unknown property.
1178
+ assert (!" Unknown piContextCreate property in property list" );
1179
+ return PI_INVALID_VALUE;
1180
+ }
1181
+ }
1182
+
1144
1183
std::unique_ptr<_pi_context> piContextPtr{nullptr };
1145
1184
try {
1146
- if (properties && *properties != PI_CONTEXT_PROPERTIES_CUDA_PRIMARY) {
1147
- throw pi_result (CL_INVALID_VALUE);
1148
- } else if (!properties) {
1185
+ if (property_cuda_primary) {
1186
+ // Use the CUDA primary context and assume that we want to use it
1187
+ // immediately as we want to forge context switches.
1188
+ CUcontext Ctxt;
1189
+ errcode_ret = PI_CHECK_ERROR (
1190
+ cuDevicePrimaryCtxRetain (&Ctxt, devices[0 ]->cuDevice_ ));
1191
+ piContextPtr = std::unique_ptr<_pi_context>(
1192
+ new _pi_context{_pi_context::kind::primary, Ctxt, *devices});
1193
+ errcode_ret = PI_CHECK_ERROR (cuCtxPushCurrent (Ctxt));
1194
+ } else {
1195
+ // Create a scoped context.
1149
1196
CUcontext newContext, current;
1150
1197
PI_CHECK_ERROR (cuCtxGetCurrent (¤t));
1151
- errcode_ret = PI_CHECK_ERROR (cuCtxCreate (&newContext, CU_CTX_MAP_HOST,
1152
- (* devices) ->cuDevice_ ));
1198
+ errcode_ret = PI_CHECK_ERROR (
1199
+ cuCtxCreate (&newContext, CU_CTX_MAP_HOST, devices[ 0 ] ->cuDevice_ ));
1153
1200
piContextPtr = std::unique_ptr<_pi_context>(new _pi_context{
1154
1201
_pi_context::kind::user_defined, newContext, *devices});
1202
+ // For scoped contexts keep the last active CUDA one on top of the stack
1203
+ // as `cuCtxCreate` replaces it implicitly otherwise.
1155
1204
if (current != nullptr ) {
1156
- // If there was an existing context on the thread we recover it
1157
1205
PI_CHECK_ERROR (cuCtxSetCurrent (current));
1158
1206
}
1159
- } else if (properties
1160
- && *properties == PI_CONTEXT_PROPERTIES_CUDA_PRIMARY) {
1161
- CUcontext Ctxt;
1162
- errcode_ret = PI_CHECK_ERROR (cuDevicePrimaryCtxRetain (
1163
- &Ctxt, (*devices)->cuDevice_ ));
1164
- piContextPtr = std::unique_ptr<_pi_context>(
1165
- new _pi_context{_pi_context::kind::primary, Ctxt, *devices});
1166
- errcode_ret = PI_CHECK_ERROR (cuCtxPushCurrent (Ctxt));
1167
- } else {
1168
- throw pi_result (CL_INVALID_VALUE);
1169
1207
}
1170
1208
1171
1209
*retcontext = piContextPtr.release ();
0 commit comments