Skip to content

Commit ebea5a0

Browse files
moved cleanup logic to when zeEventPoolCreate to make the cleanup deterministic
Signed-off-by: Zhang, Winston <[email protected]>
1 parent c6b6ddd commit ebea5a0

File tree

5 files changed

+37
-35
lines changed

5 files changed

+37
-35
lines changed

unified-runtime/source/adapters/level_zero/context.cpp

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
406406
ze_event_pool_handle_t &Pool, size_t &Index, bool HostVisible,
407407
bool ProfilingEnabled, ur_device_handle_t Device,
408408
bool CounterBasedEventEnabled, bool UsingImmCmdList,
409-
bool InterruptBasedEventEnabled) {
409+
bool InterruptBasedEventEnabled, ur_queue_handle_t Queue) {
410410

411411
ze_device_handle_t ZeDevice = nullptr;
412412
if (Device) {
@@ -461,8 +461,21 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
461461
});
462462
}
463463

464-
ZE2UR_CALL(zeEventPoolCreate, (ZeContext, &ZeEventPoolDesc,
465-
ZeDevices.size(), &ZeDevices[0], &Pool));
464+
ze_result_t Result = ZE_CALL_NOCHECK(
465+
zeEventPoolCreate,
466+
(ZeContext, &ZeEventPoolDesc, ZeDevices.size(), &ZeDevices[0], &Pool));
467+
if (ze2urResult(Result) == UR_RESULT_ERROR_OUT_OF_RESOURCES &&
468+
!Queue->isInOrderQueue()) {
469+
if (Queue->UsingImmCmdLists) {
470+
UR_CALL(CleanupEventsInImmCmdLists(Queue, true /*QueueLocked*/,
471+
false /*QueueSynced*/,
472+
nullptr /*CompletedEvent*/));
473+
} else {
474+
UR_CALL(resetCommandLists(Queue));
475+
}
476+
ZE2UR_CALL(zeEventPoolCreate, (ZeContext, &ZeEventPoolDesc,
477+
ZeDevices.size(), &ZeDevices[0], &Pool));
478+
}
466479
Index = 0;
467480
NumEventsAvailableInEventPool[Pool] = MaxNumEventsPerPool - 1;
468481
NumEventsUnreleasedInEventPool[Pool] = 1;
@@ -546,8 +559,21 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
546559
});
547560
}
548561

549-
ZE2UR_CALL(zeEventPoolCreate, (ZeContext, &ZeEventPoolDesc,
550-
ZeDevices.size(), &ZeDevices[0], ZePool));
562+
ze_result_t Result = ZE_CALL_NOCHECK(
563+
zeEventPoolCreate,
564+
(ZeContext, &ZeEventPoolDesc, ZeDevices.size(), &ZeDevices[0], &Pool));
565+
if (ze2urResult(Result) == UR_RESULT_ERROR_OUT_OF_RESOURCES &&
566+
!Queue->isInOrderQueue()) {
567+
if (Queue->UsingImmCmdLists) {
568+
UR_CALL(CleanupEventsInImmCmdLists(Queue, true /*QueueLocked*/,
569+
false /*QueueSynced*/,
570+
nullptr /*CompletedEvent*/));
571+
} else {
572+
UR_CALL(resetCommandLists(Queue));
573+
}
574+
ZE2UR_CALL(zeEventPoolCreate, (ZeContext, &ZeEventPoolDesc,
575+
ZeDevices.size(), &ZeDevices[0], &Pool));
576+
}
551577
NumEventsAvailableInEventPool[*ZePool] = MaxNumEventsPerPool - 1;
552578
NumEventsUnreleasedInEventPool[*ZePool] = 1;
553579
} else {

unified-runtime/source/adapters/level_zero/context.hpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -202,13 +202,11 @@ struct ur_context_handle_t_ : ur_object {
202202
// pool then create new one. The HostVisible parameter tells if we need a
203203
// slot for a host-visible event. The ProfilingEnabled tells is we need a
204204
// slot for an event with profiling capabilities.
205-
ur_result_t getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &, size_t &,
206-
bool HostVisible,
207-
bool ProfilingEnabled,
208-
ur_device_handle_t Device,
209-
bool CounterBasedEventEnabled,
210-
bool UsingImmCmdList,
211-
bool InterruptBasedEventEnabled);
205+
ur_result_t getFreeSlotInExistingOrNewPool(
206+
ze_event_pool_handle_t &, size_t &, bool HostVisible,
207+
bool ProfilingEnabled, ur_device_handle_t Device,
208+
bool CounterBasedEventEnabled, bool UsingImmCmdList,
209+
bool InterruptBasedEventEnabled, ur_queue_handle_t Queue);
212210

213211
// Get ur_event_handle_t from cache.
214212
ur_event_handle_t getEventFromContextCache(bool HostVisible,

unified-runtime/source/adapters/level_zero/event.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1380,7 +1380,7 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
13801380
if (auto Res = Context->getFreeSlotInExistingOrNewPool(
13811381
ZeEventPool, Index, HostVisible, ProfilingEnabled, Device,
13821382
CounterBasedEventEnabled, UsingImmediateCommandlists,
1383-
InterruptBasedEventEnabled))
1383+
InterruptBasedEventEnabled, Queue))
13841384
return Res;
13851385

13861386
ZeStruct<ze_event_desc_t> ZeEventDesc;

unified-runtime/source/adapters/level_zero/kernel.cpp

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@
99
//===----------------------------------------------------------------------===//
1010

1111
#include "kernel.hpp"
12-
#include "common.hpp"
1312
#include "logger/ur_logger.hpp"
14-
#include "queue.hpp"
1513
#include "ur_api.h"
1614
#include "ur_interface_loader.hpp"
1715

@@ -223,25 +221,6 @@ ur_result_t urEnqueueKernelLaunch(
223221
UR_CALL(Queue->executeCommandList(CommandList, false /*IsBlocking*/,
224222
true /*OKToBatchCommand*/));
225223

226-
// For internal events only, occasionally trigger cleanup to prevent event
227-
// pool exhaustion, but avoid doing this for in-order queues which are
228-
// commonly used in multi-threaded scenarios and may have stricter
229-
// synchronization requirements
230-
if (IsInternal && !Queue->isInOrderQueue()) {
231-
// Use a probabilistic approach - only cleanup 1 in 128 internal events
232-
// to minimize performance impact and reduce chances of race conditions
233-
static thread_local uint32_t CleanupCounter = 0;
234-
if ((++CleanupCounter & 127) == 0) {
235-
if (Queue->UsingImmCmdLists) {
236-
UR_CALL(CleanupEventsInImmCmdLists(Queue, true /*QueueLocked*/,
237-
false /*QueueSynced*/,
238-
nullptr /*CompletedEvent*/));
239-
} else {
240-
UR_CALL(resetCommandLists(Queue));
241-
}
242-
}
243-
}
244-
245224
return UR_RESULT_SUCCESS;
246225
}
247226

unified-runtime/source/adapters/level_zero/kernel.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#include "common.hpp"
1515
#include "common/ur_ref_count.hpp"
1616
#include "memory.hpp"
17-
#include "queue.hpp"
1817

1918
struct ur_kernel_handle_t_ : ur_object {
2019
ur_kernel_handle_t_(bool OwnZeHandle, ur_program_handle_t Program)

0 commit comments

Comments
 (0)