From 330e196d401afd7b291e50e9c61e54d004f93027 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Mon, 18 Aug 2025 16:45:56 +0200 Subject: [PATCH 01/25] [SYCL][E2E] Drop CUDA requirement from bindless image tests Those tests already check for specific aspects --- sycl/test-e2e/bindless_images/copies/copy_subregion_1D.cpp | 1 - sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp | 1 - sycl/test-e2e/bindless_images/copies/device_to_device_copy.cpp | 1 - .../test-e2e/bindless_images/copies/device_to_device_pitched.cpp | 1 - sycl/test-e2e/bindless_images/copies/host_to_host_pitched.cpp | 1 - sycl/test-e2e/bindless_images/sampling_2D_USM_host.cpp | 1 - 6 files changed, 6 deletions(-) diff --git a/sycl/test-e2e/bindless_images/copies/copy_subregion_1D.cpp b/sycl/test-e2e/bindless_images/copies/copy_subregion_1D.cpp index ac93ae5db9e67..ff9430ee31fc0 100644 --- a/sycl/test-e2e/bindless_images/copies/copy_subregion_1D.cpp +++ b/sycl/test-e2e/bindless_images/copies/copy_subregion_1D.cpp @@ -1,5 +1,4 @@ // REQUIRES: aspect-ext_oneapi_bindless_images -// REQUIRES: cuda // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp b/sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp index 6d61a2a2a259a..c3325fbef33b0 100644 --- a/sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp +++ b/sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp @@ -1,5 +1,4 @@ // REQUIRES: aspect-ext_oneapi_bindless_images -// REQUIRES: cuda // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/copies/device_to_device_copy.cpp b/sycl/test-e2e/bindless_images/copies/device_to_device_copy.cpp index 9777e2a591837..2b2028c5a7e06 100644 --- a/sycl/test-e2e/bindless_images/copies/device_to_device_copy.cpp +++ b/sycl/test-e2e/bindless_images/copies/device_to_device_copy.cpp @@ -1,5 +1,4 @@ // REQUIRES: aspect-ext_oneapi_bindless_images -// REQUIRES: cuda // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp b/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp index c8a644296eba2..d47ecbe693f22 100644 --- a/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp +++ b/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp @@ -1,6 +1,5 @@ // REQUIRES: aspect-ext_oneapi_bindless_images // REQUIRES: aspect-ext_oneapi_bindless_images_2d_usm -// REQUIRES: cuda // // UNSUPPORTED: cuda // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/17231 diff --git a/sycl/test-e2e/bindless_images/copies/host_to_host_pitched.cpp b/sycl/test-e2e/bindless_images/copies/host_to_host_pitched.cpp index 37e58a3d75bef..df8ce7e12e1b7 100644 --- a/sycl/test-e2e/bindless_images/copies/host_to_host_pitched.cpp +++ b/sycl/test-e2e/bindless_images/copies/host_to_host_pitched.cpp @@ -1,6 +1,5 @@ // REQUIRES: aspect-ext_oneapi_bindless_images // REQUIRES: aspect-ext_oneapi_bindless_images_2d_usm -// REQUIRES: cuda // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/sampling_2D_USM_host.cpp b/sycl/test-e2e/bindless_images/sampling_2D_USM_host.cpp index bca3d2c1c0ddd..94c3ff24e3aab 100644 --- a/sycl/test-e2e/bindless_images/sampling_2D_USM_host.cpp +++ b/sycl/test-e2e/bindless_images/sampling_2D_USM_host.cpp @@ -1,4 +1,3 @@ -// REQUIRES: cuda // REQUIRES: aspect-ext_oneapi_bindless_images_2d_usm // RUN: %{build} -o %t.out From cee3455e33f5bf050035cbbf3708d545da6d38bb Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Wed, 20 Aug 2025 17:12:45 +0200 Subject: [PATCH 02/25] XFAIL some known failures --- .../bindless_images/copies/device_to_device_pitched.cpp | 2 ++ sycl/test-e2e/bindless_images/copies/host_to_host_pitched.cpp | 2 ++ sycl/test-e2e/bindless_images/sampling_2D_USM_host.cpp | 2 ++ 3 files changed, 6 insertions(+) diff --git a/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp b/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp index d47ecbe693f22..1842cd30735b0 100644 --- a/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp +++ b/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp @@ -1,5 +1,7 @@ // REQUIRES: aspect-ext_oneapi_bindless_images // REQUIRES: aspect-ext_oneapi_bindless_images_2d_usm +// XFAIL: level_zero +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/17663 // // UNSUPPORTED: cuda // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/17231 diff --git a/sycl/test-e2e/bindless_images/copies/host_to_host_pitched.cpp b/sycl/test-e2e/bindless_images/copies/host_to_host_pitched.cpp index df8ce7e12e1b7..41405b891b484 100644 --- a/sycl/test-e2e/bindless_images/copies/host_to_host_pitched.cpp +++ b/sycl/test-e2e/bindless_images/copies/host_to_host_pitched.cpp @@ -1,5 +1,7 @@ // REQUIRES: aspect-ext_oneapi_bindless_images // REQUIRES: aspect-ext_oneapi_bindless_images_2d_usm +// XFAIL: level_zero +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/17663 // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/sampling_2D_USM_host.cpp b/sycl/test-e2e/bindless_images/sampling_2D_USM_host.cpp index 94c3ff24e3aab..4e369dfef27b8 100644 --- a/sycl/test-e2e/bindless_images/sampling_2D_USM_host.cpp +++ b/sycl/test-e2e/bindless_images/sampling_2D_USM_host.cpp @@ -1,4 +1,6 @@ // REQUIRES: aspect-ext_oneapi_bindless_images_2d_usm +// XFAIL: level_zero +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/17663 // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} %t.out From d8ba8d298eafcb48c94a6d0de9937189cb92b0d2 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Wed, 20 Aug 2025 17:14:48 +0200 Subject: [PATCH 03/25] Set correct flags for image-to-usm and usm-to-image copies --- sycl/source/handler.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index f9d1769e573e7..d5e24f74e5f9c 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1769,7 +1769,7 @@ void handler::ext_oneapi_copy( MDstPtr = Dest; detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, 0, + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST, 0, DestRowPitch); setType(detail::CGType::CopyImage); @@ -1790,7 +1790,7 @@ void handler::ext_oneapi_copy( MDstPtr = Dest; detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, 0, + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST, 0, DestRowPitch, SrcOffset, {0, 0, 0}, DestOffset, {0, 0, 0}, CopyExtent); @@ -1810,7 +1810,7 @@ void handler::ext_oneapi_copy( MDstPtr = reinterpret_cast(Dest.raw_handle); detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, SrcRowPitch, + UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE, SrcRowPitch, 0); setType(detail::CGType::CopyImage); @@ -1831,7 +1831,7 @@ void handler::ext_oneapi_copy( MDstPtr = reinterpret_cast(Dest.raw_handle); detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, SrcRowPitch, + UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE, SrcRowPitch, 0, SrcOffset, {0, 0, 0}, DestOffset, {0, 0, 0}, CopyExtent); From 9f5c1a7a374c18f0376f39405c1d6986e20fee68 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Thu, 21 Aug 2025 08:05:23 -0700 Subject: [PATCH 04/25] Fixed crashes, but results are incorrect --- sycl/source/handler.cpp | 40 +++++-------------- .../adapters/level_zero/image_common.cpp | 24 +++++++++++ 2 files changed, 33 insertions(+), 31 deletions(-) diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index d5e24f74e5f9c..3726701a4c7d4 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1851,21 +1851,10 @@ void handler::ext_oneapi_copy( MSrcPtr = const_cast(Src); MDstPtr = Dest; - ur_exp_image_copy_flags_t ImageCopyFlags = detail::getUrImageCopyFlags( - get_pointer_type(Src, - createSyclObjFromImpl(impl->get_context())), - get_pointer_type(Dest, - createSyclObjFromImpl(impl->get_context()))); - - if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE || - ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST) { - detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, ImageCopyFlags, - SrcRowPitch, DestRowPitch); - } else { - throw sycl::exception(make_error_code(errc::invalid), - "Copy Error: This copy function only performs device " - "to device or host to host copies!"); - } + ur_exp_image_copy_flags_t ImageCopyFlags = + UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST; + detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, ImageCopyFlags, + SrcRowPitch, DestRowPitch); setType(detail::CGType::CopyImage); } @@ -1880,22 +1869,11 @@ void handler::ext_oneapi_copy( MSrcPtr = const_cast(Src); MDstPtr = Dest; - ur_exp_image_copy_flags_t ImageCopyFlags = detail::getUrImageCopyFlags( - get_pointer_type(Src, - createSyclObjFromImpl(impl->get_context())), - get_pointer_type(Dest, - createSyclObjFromImpl(impl->get_context()))); - - if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE || - ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST) { - detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, ImageCopyFlags, - SrcRowPitch, DestRowPitch, SrcOffset, {0, 0, 0}, - DestOffset, {0, 0, 0}, CopyExtent); - } else { - throw sycl::exception(make_error_code(errc::invalid), - "Copy Error: This copy function only performs device " - "to device or host to host copies!"); - } + ur_exp_image_copy_flags_t ImageCopyFlags = + UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST; + detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, ImageCopyFlags, + SrcRowPitch, DestRowPitch, SrcOffset, {0, 0, 0}, + DestOffset, {0, 0, 0}, CopyExtent); setType(detail::CGType::CopyImage); } diff --git a/unified-runtime/source/adapters/level_zero/image_common.cpp b/unified-runtime/source/adapters/level_zero/image_common.cpp index 9b76788b6a65e..d4601beee7f90 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.cpp +++ b/unified-runtime/source/adapters/level_zero/image_common.cpp @@ -882,6 +882,30 @@ ur_result_t bindlessImagesHandleCopyFlags( return UR_RESULT_SUCCESS; }; + case UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST: { + // Copy between pitched USM regions + uint32_t DstRowPitch = pDstImageDesc->rowPitch; + uint32_t SrcRowPitch = pSrcImageDesc->rowPitch; + ze_copy_region_t ZeDstRegion = {(uint32_t)pCopyRegion->dstOffset.x, + (uint32_t)pCopyRegion->dstOffset.y, + (uint32_t)pCopyRegion->dstOffset.z, + DstRowPitch, + (uint32_t)pCopyRegion->copyExtent.height, + (uint32_t)pCopyRegion->copyExtent.depth}; + ze_copy_region_t ZeSrcRegion = {(uint32_t)pCopyRegion->srcOffset.x, + (uint32_t)pCopyRegion->srcOffset.y, + (uint32_t)pCopyRegion->srcOffset.z, + SrcRowPitch, + (uint32_t)pCopyRegion->copyExtent.height, + (uint32_t)pCopyRegion->copyExtent.depth}; + uint32_t DstSlicePitch = DstRowPitch * pDstImageDesc->height; + uint32_t SrcSlicePitch = SrcRowPitch * pSrcImageDesc->height; + ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, + (ZeCommandList, pDst, &ZeDstRegion, DstRowPitch, DstSlicePitch, + pSrc, &ZeSrcRegion, SrcRowPitch, SrcSlicePitch, zeSignalEvent, + numWaitEvents, phWaitEvents)); + return UR_RESULT_SUCCESS; + }; default: UR_LOG(ERR, "ur_queue_immediate_in_order_t::bindlessImagesImageCopyExp: " "unexpected imageCopyFlags"); From 2a43585d32d591c0e8fc34a29c88f459d0773f70 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Thu, 21 Aug 2025 11:16:16 -0700 Subject: [PATCH 05/25] This fixes more crashes (for 1D copies), but results are still incorrect --- sycl/source/handler.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 3726701a4c7d4..4a033eab16681 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1661,10 +1661,12 @@ void handler::ext_oneapi_copy( Desc.width * Desc.num_channels * detail::get_channel_size(Desc); if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) { - detail::fill_copy_args(get_impl(), Desc, ImageCopyFlags, HostRowPitch, + detail::fill_copy_args(get_impl(), Desc, + UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST, HostRowPitch, DeviceRowPitch); } else if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { - detail::fill_copy_args(get_impl(), Desc, ImageCopyFlags, DeviceRowPitch, + detail::fill_copy_args(get_impl(), Desc, + UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST, DeviceRowPitch, HostRowPitch); } else { throw sycl::exception(make_error_code(errc::invalid), @@ -1701,13 +1703,15 @@ void handler::ext_oneapi_copy( // Fill the host extent based on the type of copy. if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) { - detail::fill_copy_args(get_impl(), DeviceImgDesc, ImageCopyFlags, - HostRowPitch, DeviceRowPitch, SrcOffset, HostExtent, - DestOffset, {0, 0, 0}, CopyExtent); + detail::fill_copy_args(get_impl(), DeviceImgDesc, + UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST, HostRowPitch, + DeviceRowPitch, SrcOffset, HostExtent, DestOffset, + {0, 0, 0}, CopyExtent); } else if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { - detail::fill_copy_args(get_impl(), DeviceImgDesc, ImageCopyFlags, - DeviceRowPitch, HostRowPitch, SrcOffset, {0, 0, 0}, - DestOffset, HostExtent, CopyExtent); + detail::fill_copy_args(get_impl(), DeviceImgDesc, + UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST, DeviceRowPitch, + HostRowPitch, SrcOffset, {0, 0, 0}, DestOffset, + HostExtent, CopyExtent); } else { throw sycl::exception(make_error_code(errc::invalid), "Copy Error: This copy function only performs host " From aefb64569f873ed5ee2469e531f0311f44a11a79 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Mon, 25 Aug 2025 09:28:37 -0700 Subject: [PATCH 06/25] Fix incorrect usage of L0 API leading to memory corruptions --- .../adapters/level_zero/image_common.cpp | 46 ++++++++++--------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/unified-runtime/source/adapters/level_zero/image_common.cpp b/unified-runtime/source/adapters/level_zero/image_common.cpp index d4601beee7f90..662cee78c81b8 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.cpp +++ b/unified-runtime/source/adapters/level_zero/image_common.cpp @@ -883,28 +883,30 @@ ur_result_t bindlessImagesHandleCopyFlags( return UR_RESULT_SUCCESS; }; case UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST: { - // Copy between pitched USM regions - uint32_t DstRowPitch = pDstImageDesc->rowPitch; - uint32_t SrcRowPitch = pSrcImageDesc->rowPitch; - ze_copy_region_t ZeDstRegion = {(uint32_t)pCopyRegion->dstOffset.x, - (uint32_t)pCopyRegion->dstOffset.y, - (uint32_t)pCopyRegion->dstOffset.z, - DstRowPitch, - (uint32_t)pCopyRegion->copyExtent.height, - (uint32_t)pCopyRegion->copyExtent.depth}; - ze_copy_region_t ZeSrcRegion = {(uint32_t)pCopyRegion->srcOffset.x, - (uint32_t)pCopyRegion->srcOffset.y, - (uint32_t)pCopyRegion->srcOffset.z, - SrcRowPitch, - (uint32_t)pCopyRegion->copyExtent.height, - (uint32_t)pCopyRegion->copyExtent.depth}; - uint32_t DstSlicePitch = DstRowPitch * pDstImageDesc->height; - uint32_t SrcSlicePitch = SrcRowPitch * pSrcImageDesc->height; - ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, - (ZeCommandList, pDst, &ZeDstRegion, DstRowPitch, DstSlicePitch, - pSrc, &ZeSrcRegion, SrcRowPitch, SrcSlicePitch, zeSignalEvent, - numWaitEvents, phWaitEvents)); - return UR_RESULT_SUCCESS; + // Copy between (possibly) pitched USM regions + uint32_t DstRowPitch = + std::max(pDstImageDesc->rowPitch, pCopyRegion->copyExtent.width); + uint32_t SrcRowPitch = + std::max(pSrcImageDesc->rowPitch, pCopyRegion->copyExtent.width); + ze_copy_region_t ZeDstRegion = {(uint32_t)pCopyRegion->dstOffset.x, + (uint32_t)pCopyRegion->dstOffset.y, + (uint32_t)pCopyRegion->dstOffset.z, + (uint32_t)pCopyRegion->copyExtent.width, + (uint32_t)pCopyRegion->copyExtent.height, + (uint32_t)pCopyRegion->copyExtent.depth}; + ze_copy_region_t ZeSrcRegion = {(uint32_t)pCopyRegion->srcOffset.x, + (uint32_t)pCopyRegion->srcOffset.y, + (uint32_t)pCopyRegion->srcOffset.z, + (uint32_t)pCopyRegion->copyExtent.width, + (uint32_t)pCopyRegion->copyExtent.height, + (uint32_t)pCopyRegion->copyExtent.depth}; + uint32_t DstSlicePitch = DstRowPitch * pDstImageDesc->height; + uint32_t SrcSlicePitch = SrcRowPitch * pSrcImageDesc->height; + ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, + (ZeCommandList, pDst, &ZeDstRegion, DstRowPitch, DstSlicePitch, + pSrc, &ZeSrcRegion, SrcRowPitch, SrcSlicePitch, zeSignalEvent, + numWaitEvents, phWaitEvents)); + return UR_RESULT_SUCCESS; }; default: UR_LOG(ERR, "ur_queue_immediate_in_order_t::bindlessImagesImageCopyExp: " From 6b33f17968384cfdc42c003746956e1c04af6b39 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Wed, 27 Aug 2025 05:35:47 -0700 Subject: [PATCH 07/25] More fixes. copy_subregion_2D should pass now --- .../adapters/level_zero/image_common.cpp | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/unified-runtime/source/adapters/level_zero/image_common.cpp b/unified-runtime/source/adapters/level_zero/image_common.cpp index 662cee78c81b8..6f12f3d971bc5 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.cpp +++ b/unified-runtime/source/adapters/level_zero/image_common.cpp @@ -884,22 +884,26 @@ ur_result_t bindlessImagesHandleCopyFlags( }; case UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST: { // Copy between (possibly) pitched USM regions - uint32_t DstRowPitch = - std::max(pDstImageDesc->rowPitch, pCopyRegion->copyExtent.width); - uint32_t SrcRowPitch = - std::max(pSrcImageDesc->rowPitch, pCopyRegion->copyExtent.width); - ze_copy_region_t ZeDstRegion = {(uint32_t)pCopyRegion->dstOffset.x, - (uint32_t)pCopyRegion->dstOffset.y, - (uint32_t)pCopyRegion->dstOffset.z, - (uint32_t)pCopyRegion->copyExtent.width, - (uint32_t)pCopyRegion->copyExtent.height, - (uint32_t)pCopyRegion->copyExtent.depth}; - ze_copy_region_t ZeSrcRegion = {(uint32_t)pCopyRegion->srcOffset.x, - (uint32_t)pCopyRegion->srcOffset.y, - (uint32_t)pCopyRegion->srcOffset.z, - (uint32_t)pCopyRegion->copyExtent.width, - (uint32_t)pCopyRegion->copyExtent.height, - (uint32_t)pCopyRegion->copyExtent.depth}; + uint32_t DstRowPitch = pDstImageDesc->rowPitch; + uint32_t SrcRowPitch = pSrcImageDesc->rowPitch; + ze_copy_region_t ZeDstRegion = { + (uint32_t)(pCopyRegion->dstOffset.x * + getPixelSizeBytes(pDstImageFormat)), + (uint32_t)pCopyRegion->dstOffset.y, + (uint32_t)pCopyRegion->dstOffset.z, + (uint32_t)(pCopyRegion->copyExtent.width * + getPixelSizeBytes(pDstImageFormat)), + (uint32_t)pCopyRegion->copyExtent.height, + (uint32_t)pCopyRegion->copyExtent.depth}; + ze_copy_region_t ZeSrcRegion = { + (uint32_t)(pCopyRegion->dstOffset.x * + getPixelSizeBytes(pSrcImageFormat)), + (uint32_t)pCopyRegion->srcOffset.y, + (uint32_t)pCopyRegion->srcOffset.z, + (uint32_t)(pCopyRegion->copyExtent.width * + getPixelSizeBytes(pSrcImageFormat)), + (uint32_t)pCopyRegion->copyExtent.height, + (uint32_t)pCopyRegion->copyExtent.depth}; uint32_t DstSlicePitch = DstRowPitch * pDstImageDesc->height; uint32_t SrcSlicePitch = SrcRowPitch * pSrcImageDesc->height; ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, From c980982d0a294ca43e0c486064a2d33cafd1a76a Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Wed, 27 Aug 2025 06:52:32 -0700 Subject: [PATCH 08/25] Use the right units for offsets and region sizes --- sycl/source/handler.cpp | 18 +++- .../adapters/level_zero/image_common.cpp | 82 +++++++++++++------ 2 files changed, 69 insertions(+), 31 deletions(-) diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 4a033eab16681..ffba0367d89c7 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -258,8 +258,17 @@ fill_copy_args(detail::handler_impl *impl, auto ZCopyExtentComponent = detail::fill_image_type(SrcImgDesc, UrSrcDesc); detail::fill_image_type(DestImgDesc, UrDestDesc); - impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; - impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; + // ur_rect_offset_t and ur_rect_offset_t which represent image offsets and + // copy extents expect that X-axis offset and region width are specified in + // bytes rather then in elements. + auto SrcPixelSize = + SrcImgDesc.num_channels * detail::get_channel_size(SrcImgDesc); + auto DestPixelSize = + DestImgDesc.num_channels * detail::get_channel_size(DestImgDesc); + + impl->MSrcOffset = {SrcOffset[0] * SrcPixelSize, SrcOffset[1], SrcOffset[2]}; + impl->MDestOffset = {DestOffset[0] * DestPixelSize, DestOffset[1], + DestOffset[2]}; impl->MSrcImageDesc = UrSrcDesc; impl->MDstImageDesc = UrDestDesc; impl->MSrcImageFormat = UrSrcFormat; @@ -267,9 +276,10 @@ fill_copy_args(detail::handler_impl *impl, impl->MImageCopyFlags = ImageCopyFlags; if (CopyExtent.size() != 0) { - impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; + impl->MCopyExtent = {CopyExtent[0] * SrcPixelSize, CopyExtent[1], + CopyExtent[2]}; } else { - impl->MCopyExtent = {SrcImgDesc.width, SrcImgDesc.height, + impl->MCopyExtent = {SrcImgDesc.width * SrcPixelSize, SrcImgDesc.height, ZCopyExtentComponent}; } diff --git a/unified-runtime/source/adapters/level_zero/image_common.cpp b/unified-runtime/source/adapters/level_zero/image_common.cpp index 6f12f3d971bc5..385f3599970d8 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.cpp +++ b/unified-runtime/source/adapters/level_zero/image_common.cpp @@ -787,13 +787,21 @@ ur_result_t bindlessImagesHandleCopyFlags( ze_image_region_t DstRegion; UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->dstOffset, &pCopyRegion->copyExtent, DstRegion)); + // UR accepts copy regions as byte-pixel-pixel format, i.e. X-axis offset + // and width are specified as bytes, whilst Y/Z-axis offsets, height and + // depth are specified as pixels (or rows and slices). ze_image_region_t, + // however, accepts everything as pixels, so we need to do a conversion + // here. + auto PixelSizeInBytes = getPixelSizeBytes(pSrcImageFormat); + DstRegion.originX /= PixelSizeInBytes; + DstRegion.width /= PixelSizeInBytes; + auto *urDstImg = static_cast(pDst); - const char *SrcPtr = - static_cast(pSrc) + - pCopyRegion->srcOffset.z * SrcSlicePitch + - pCopyRegion->srcOffset.y * SrcRowPitch + - pCopyRegion->srcOffset.x * getPixelSizeBytes(pSrcImageFormat); + const char *SrcPtr = static_cast(pSrc) + + pCopyRegion->srcOffset.z * SrcSlicePitch + + pCopyRegion->srcOffset.y * SrcRowPitch + + pCopyRegion->srcOffset.x; ZE2UR_CALL(zeCommandListAppendImageCopyFromMemoryExt, (ZeCommandList, urDstImg->getZeImage(), SrcPtr, &DstRegion, @@ -830,13 +838,20 @@ ur_result_t bindlessImagesHandleCopyFlags( ze_image_region_t SrcRegion; UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->srcOffset, &pCopyRegion->copyExtent, SrcRegion)); + // UR accepts copy regions as byte-pixel-pixel format, i.e. X-axis offset + // and width are specified as bytes, whilst Y/Z-axis offsets, height and + // depth are specified as pixels (or rows and slices). ze_image_region_t, + // however, accepts everything as pixels, so we need to do a conversion + // here. + auto PixelSizeInBytes = getPixelSizeBytes(pSrcImageFormat); + SrcRegion.originX /= PixelSizeInBytes; + SrcRegion.width /= PixelSizeInBytes; auto *urSrcImg = reinterpret_cast(pSrc); char *DstPtr = static_cast(pDst) + pCopyRegion->dstOffset.z * DstSlicePitch + - pCopyRegion->dstOffset.y * DstRowPitch + - pCopyRegion->dstOffset.x * getPixelSizeBytes(pDstImageFormat); + pCopyRegion->dstOffset.y * DstRowPitch + pCopyRegion->dstOffset.x; ZE2UR_CALL(zeCommandListAppendImageCopyToMemoryExt, (ZeCommandList, DstPtr, urSrcImg->getZeImage(), &SrcRegion, DstRowPitch, DstSlicePitch, zeSignalEvent, numWaitEvents, @@ -866,11 +881,30 @@ ur_result_t bindlessImagesHandleCopyFlags( }; case UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE: { ze_image_region_t DstRegion; - UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->dstOffset, + UR_CALL(getImageRegionHelper(zeSrcImageDesc, + &pCopyRegion->dstOffset, &pCopyRegion->copyExtent, DstRegion)); + // UR accepts copy regions as byte-pixel-pixel format, i.e. X-axis offset + // and width are specified as bytes, whilst Y/Z-axis offsets, height and + // depth are specified as pixels (or rows and slices). ze_image_region_t, + // however, accepts everything as pixels, so we need to do a conversion + // here. + auto PixelSizeInBytes = getPixelSizeBytes(pSrcImageFormat); + DstRegion.originX /= PixelSizeInBytes; + DstRegion.width /= PixelSizeInBytes; + ze_image_region_t SrcRegion; - UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->srcOffset, + UR_CALL(getImageRegionHelper(zeSrcImageDesc, + &pCopyRegion->srcOffset, &pCopyRegion->copyExtent, SrcRegion)); + // UR accepts copy regions as byte-pixel-pixel format, i.e. X-axis offset + // and width are specified as bytes, whilst Y/Z-axis offsets, height and + // depth are specified as pixels (or rows and slices). ze_image_region_t, + // however, accepts everything as pixels, so we need to do a conversion + // here. + PixelSizeInBytes = getPixelSizeBytes(pSrcImageFormat); + SrcRegion.originX /= PixelSizeInBytes; + SrcRegion.width /= PixelSizeInBytes; auto *urImgSrc = reinterpret_cast(pSrc); auto *urImgDst = reinterpret_cast(pDst); @@ -884,26 +918,20 @@ ur_result_t bindlessImagesHandleCopyFlags( }; case UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST: { // Copy between (possibly) pitched USM regions + ze_copy_region_t ZeDstRegion = {(uint32_t)pCopyRegion->dstOffset.x, + (uint32_t)pCopyRegion->dstOffset.y, + (uint32_t)pCopyRegion->dstOffset.z, + (uint32_t)pCopyRegion->copyExtent.width, + (uint32_t)pCopyRegion->copyExtent.height, + (uint32_t)pCopyRegion->copyExtent.depth}; + ze_copy_region_t ZeSrcRegion = {(uint32_t)pCopyRegion->dstOffset.x, + (uint32_t)pCopyRegion->srcOffset.y, + (uint32_t)pCopyRegion->srcOffset.z, + (uint32_t)pCopyRegion->copyExtent.width, + (uint32_t)pCopyRegion->copyExtent.height, + (uint32_t)pCopyRegion->copyExtent.depth}; uint32_t DstRowPitch = pDstImageDesc->rowPitch; uint32_t SrcRowPitch = pSrcImageDesc->rowPitch; - ze_copy_region_t ZeDstRegion = { - (uint32_t)(pCopyRegion->dstOffset.x * - getPixelSizeBytes(pDstImageFormat)), - (uint32_t)pCopyRegion->dstOffset.y, - (uint32_t)pCopyRegion->dstOffset.z, - (uint32_t)(pCopyRegion->copyExtent.width * - getPixelSizeBytes(pDstImageFormat)), - (uint32_t)pCopyRegion->copyExtent.height, - (uint32_t)pCopyRegion->copyExtent.depth}; - ze_copy_region_t ZeSrcRegion = { - (uint32_t)(pCopyRegion->dstOffset.x * - getPixelSizeBytes(pSrcImageFormat)), - (uint32_t)pCopyRegion->srcOffset.y, - (uint32_t)pCopyRegion->srcOffset.z, - (uint32_t)(pCopyRegion->copyExtent.width * - getPixelSizeBytes(pSrcImageFormat)), - (uint32_t)pCopyRegion->copyExtent.height, - (uint32_t)pCopyRegion->copyExtent.depth}; uint32_t DstSlicePitch = DstRowPitch * pDstImageDesc->height; uint32_t SrcSlicePitch = SrcRowPitch * pSrcImageDesc->height; ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, From 72a94d478d5d87a2789892ec39104596724fd5ba Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Wed, 27 Aug 2025 07:02:47 -0700 Subject: [PATCH 09/25] Fix CUDA & HIP (untested locally) --- .../source/adapters/cuda/image.cpp | 98 ++++++++--------- unified-runtime/source/adapters/hip/image.cpp | 100 ++++++++---------- 2 files changed, 94 insertions(+), 104 deletions(-) diff --git a/unified-runtime/source/adapters/cuda/image.cpp b/unified-runtime/source/adapters/cuda/image.cpp index 4f2a83d0878c8..5c5ec3367442a 100644 --- a/unified-runtime/source/adapters/cuda/image.cpp +++ b/unified-runtime/source/adapters/cuda/image.cpp @@ -645,7 +645,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( }; unsigned int NumChannels = 0; - size_t PixelSizeBytes = 0; + [[maybe_unused]] size_t PixelSizeBytes = 0; UR_CALL(urCalculateNumChannels(pSrcImageFormat->channelOrder, &NumChannels)); @@ -673,19 +673,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cuPointerGetAttribute(&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)pDst) != CUDA_SUCCESS; - size_t CopyExtentBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; - const char *SrcWithOffset = static_cast(pSrc) + - (pCopyRegion->srcOffset.x * PixelSizeBytes); + size_t CopyExtentBytes = pCopyRegion->copyExtent.width; + const char *SrcWithOffset = + static_cast(pSrc) + pCopyRegion->srcOffset.x; if (isCudaArray) { - UR_CHECK_ERROR(cuMemcpyHtoAAsync( - (CUarray)pDst, pCopyRegion->dstOffset.x * PixelSizeBytes, - static_cast(SrcWithOffset), CopyExtentBytes, - Stream)); + UR_CHECK_ERROR( + cuMemcpyHtoAAsync((CUarray)pDst, pCopyRegion->dstOffset.x, + static_cast(SrcWithOffset), + CopyExtentBytes, Stream)); } else if (memType == CU_MEMORYTYPE_DEVICE) { - void *DstWithOffset = - static_cast(static_cast(pDst) + - (PixelSizeBytes * pCopyRegion->dstOffset.x)); + void *DstWithOffset = static_cast(static_cast(pDst) + + pCopyRegion->dstOffset.x); UR_CHECK_ERROR( cuMemcpyHtoDAsync((CUdeviceptr)DstWithOffset, static_cast(SrcWithOffset), @@ -698,11 +697,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( CUDA_MEMCPY2D cpy_desc = {}; cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; cpy_desc.srcHost = pSrc; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = pCopyRegion->srcOffset.y; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = pCopyRegion->dstOffset.y; - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; cpy_desc.srcPitch = pSrcImageDesc->rowPitch; if (pDstImageDesc->rowPitch == 0) { @@ -717,10 +716,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( UR_CHECK_ERROR(cuMemcpy2DAsync(&cpy_desc, Stream)); } else if (pDstImageDesc->type == UR_MEM_TYPE_IMAGE3D) { CUDA_MEMCPY3D cpy_desc = {}; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = pCopyRegion->srcOffset.y; cpy_desc.srcZ = pCopyRegion->srcOffset.z; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; @@ -729,7 +728,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.srcHeight = pSrcImageDesc->height; cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; cpy_desc.dstArray = (CUarray)pDst; - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; cpy_desc.Depth = pCopyRegion->copyExtent.depth; UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream)); @@ -737,10 +736,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( pDstImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY || pDstImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) { CUDA_MEMCPY3D cpy_desc = {}; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = pCopyRegion->srcOffset.y; cpy_desc.srcZ = pCopyRegion->srcOffset.z; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; @@ -749,7 +748,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.srcHeight = std::max(uint64_t{1}, pSrcImageDesc->height); cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; cpy_desc.dstArray = (CUarray)pDst; - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = std::max(uint64_t{1}, pCopyRegion->copyExtent.height); cpy_desc.Depth = pCopyRegion->copyExtent.depth; UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream)); @@ -764,20 +763,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cuPointerGetAttribute(&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)pSrc) != CUDA_SUCCESS; - size_t CopyExtentBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; - void *DstWithOffset = - static_cast(static_cast(pDst) + - (PixelSizeBytes * pCopyRegion->dstOffset.x)); + size_t CopyExtentBytes = CopyRegion->copyExtent.width; + void *DstWithOffset = static_cast(static_cast(pDst) + + pCopyRegion->dstOffset.x); if (isCudaArray) { - UR_CHECK_ERROR( - cuMemcpyAtoHAsync(DstWithOffset, as_CUArray(pSrc), - PixelSizeBytes * pCopyRegion->srcOffset.x, - CopyExtentBytes, Stream)); + UR_CHECK_ERROR(cuMemcpyAtoHAsync(DstWithOffset, as_CUArray(pSrc), + pCopyRegion->srcOffset.x, + CopyExtentBytes, Stream)); } else if (memType == CU_MEMORYTYPE_DEVICE) { const char *SrcWithOffset = - static_cast(pSrc) + - (pCopyRegion->srcOffset.x * PixelSizeBytes); + static_cast(pSrc) + pCopyRegion->srcOffset.x; UR_CHECK_ERROR(cuMemcpyDtoHAsync(DstWithOffset, (CUdeviceptr)SrcWithOffset, CopyExtentBytes, Stream)); @@ -787,11 +783,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( } } else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D) { CUDA_MEMCPY2D cpy_desc = {}; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = pCopyRegion->srcOffset.y; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = pCopyRegion->dstOffset.y; - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; cpy_desc.dstPitch = pDstImageDesc->rowPitch; cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; @@ -808,10 +804,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( UR_CHECK_ERROR(cuMemcpy2DAsync(&cpy_desc, Stream)); } else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE3D) { CUDA_MEMCPY3D cpy_desc = {}; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = pCopyRegion->srcOffset.y; cpy_desc.srcZ = pCopyRegion->srcOffset.z; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; @@ -820,7 +816,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstHost = pDst; cpy_desc.dstPitch = pDstImageDesc->rowPitch; cpy_desc.dstHeight = pDstImageDesc->height; - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; cpy_desc.Depth = pCopyRegion->copyExtent.depth; UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream)); @@ -828,10 +824,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY || pSrcImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) { CUDA_MEMCPY3D cpy_desc = {}; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = pCopyRegion->srcOffset.y; cpy_desc.srcZ = pCopyRegion->srcOffset.z; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; @@ -840,7 +836,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstHost = pDst; cpy_desc.dstPitch = pDstImageDesc->rowPitch; cpy_desc.dstHeight = std::max(uint64_t{1}, pDstImageDesc->height); - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = std::max(uint64_t{1}, pCopyRegion->copyExtent.height); cpy_desc.Depth = pCopyRegion->copyExtent.depth; UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream)); @@ -874,11 +870,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( (CUdeviceptr)pDst) != CUDA_SUCCESS; CUDA_MEMCPY2D cpy_desc = {}; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = 0; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = 0; - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = 1; if (isSrcCudaArray) { cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; @@ -897,11 +893,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( UR_CHECK_ERROR(cuMemcpy2DAsync(&cpy_desc, Stream)); } else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D) { CUDA_MEMCPY2D cpy_desc = {}; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = pCopyRegion->srcOffset.y; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = pCopyRegion->dstOffset.y; - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; if (pSrcImageDesc->rowPitch == 0) { cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; @@ -924,17 +920,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( UR_CHECK_ERROR(cuMemcpy2DAsync(&cpy_desc, Stream)); } else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE3D) { CUDA_MEMCPY3D cpy_desc = {}; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = pCopyRegion->srcOffset.y; cpy_desc.srcZ = pCopyRegion->srcOffset.z; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; cpy_desc.srcArray = as_CUArray(pSrc); cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; cpy_desc.dstArray = (CUarray)pDst; - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; cpy_desc.Depth = pCopyRegion->copyExtent.depth; UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream)); @@ -942,17 +938,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY || pSrcImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) { CUDA_MEMCPY3D cpy_desc = {}; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = pCopyRegion->srcOffset.y; cpy_desc.srcZ = pCopyRegion->srcOffset.z; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; cpy_desc.srcArray = as_CUArray(pSrc); cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; cpy_desc.dstArray = (CUarray)pDst; - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = std::max(uint64_t{1}, pCopyRegion->copyExtent.height); cpy_desc.Depth = pCopyRegion->copyExtent.depth; UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream)); diff --git a/unified-runtime/source/adapters/hip/image.cpp b/unified-runtime/source/adapters/hip/image.cpp index 0874df4c8ce85..05f04f1ff9eea 100644 --- a/unified-runtime/source/adapters/hip/image.cpp +++ b/unified-runtime/source/adapters/hip/image.cpp @@ -635,7 +635,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( UR_RESULT_ERROR_INVALID_ARGUMENT); unsigned int NumChannels = 0; - size_t PixelSizeBytes = 0; + [[maybe_unused]] size_t PixelSizeBytes = 0; UR_CALL(urCalculateNumChannels(pSrcImageFormat->channelOrder, &NumChannels)); @@ -665,29 +665,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( memType == hipMemoryTypeArray, UR_RESULT_ERROR_INVALID_VALUE); - size_t CopyExtentBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; - const char *SrcWithOffset = static_cast(pSrc) + - (pCopyRegion->srcOffset.x * PixelSizeBytes); + size_t CopyExtentBytes = pCopyRegion->copyExtent.width; + const char *SrcWithOffset = + static_cast(pSrc) + pCopyRegion->srcOffset.x; if (memType == hipMemoryTypeArray) { // HIP doesn not provide async copies between host and image arrays // memory in versions earlier than 6.2. #if HIP_VERSION >= 60200000 - UR_CHECK_ERROR( - hipMemcpyHtoAAsync(static_cast(pDst), - pCopyRegion->dstOffset.x * PixelSizeBytes, - static_cast(SrcWithOffset), - CopyExtentBytes, Stream)); + UR_CHECK_ERROR(hipMemcpyHtoAAsync( + static_cast(pDst), pCopyRegion->dstOffset.x, + static_cast(SrcWithOffset), CopyExtentBytes, + Stream)); #else UR_CHECK_ERROR(hipMemcpyHtoA( - static_cast(pDst), - pCopyRegion->dstOffset.x * PixelSizeBytes, + static_cast(pDst), pCopyRegion->dstOffset.x, static_cast(SrcWithOffset), CopyExtentBytes)); #endif } else if (memType == hipMemoryTypeDevice) { - void *DstWithOffset = - static_cast(static_cast(pDst) + - (PixelSizeBytes * pCopyRegion->dstOffset.x)); + void *DstWithOffset = static_cast(static_cast(pDst) + + pCopyRegion->dstOffset.x); UR_CHECK_ERROR(hipMemcpyHtoDAsync( static_cast(DstWithOffset), const_cast(static_cast(SrcWithOffset)), @@ -700,9 +697,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( hip_Memcpy2D cpy_desc = {}; cpy_desc.srcMemoryType = hipMemoryTypeHost; cpy_desc.srcHost = pSrc; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = pCopyRegion->srcOffset.y; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.srcPitch = pSrcImageDesc->rowPitch; if (pDstImageDesc->rowPitch == 0) { @@ -714,15 +711,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstDevice = static_cast(pDst); cpy_desc.dstPitch = pDstImageDesc->rowPitch; } - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; UR_CHECK_ERROR(hipMemcpyParam2DAsync(&cpy_desc, Stream)); } else if (pDstImageDesc->type == UR_MEM_TYPE_IMAGE3D) { HIP_MEMCPY3D cpy_desc = {}; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = pCopyRegion->srcOffset.y; cpy_desc.srcZ = pCopyRegion->srcOffset.z; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = hipMemoryTypeHost; @@ -731,7 +728,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.srcHeight = pSrcImageDesc->height; cpy_desc.dstMemoryType = hipMemoryTypeArray; cpy_desc.dstArray = static_cast(pDst); - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; cpy_desc.Depth = pCopyRegion->copyExtent.depth; // 'hipMemcpy3DAsync' requires us to correctly create 'hipMemcpy3DParms' @@ -741,10 +738,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( pDstImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY || pDstImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) { HIP_MEMCPY3D cpy_desc = {}; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = pCopyRegion->srcOffset.y; cpy_desc.srcZ = pCopyRegion->srcOffset.z; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = hipMemoryTypeHost; @@ -753,7 +750,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.srcHeight = std::max(MinCopyHeight, pSrcImageDesc->height); cpy_desc.dstMemoryType = hipMemoryTypeArray; cpy_desc.dstArray = static_cast(pDst); - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = std::max(MinCopyHeight, pCopyRegion->copyExtent.height); cpy_desc.Depth = pCopyRegion->copyExtent.depth; @@ -774,10 +771,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( memType == hipMemoryTypeArray, UR_RESULT_ERROR_INVALID_VALUE); - size_t CopyExtentBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; - void *DstWithOffset = - static_cast(static_cast(pDst) + - (PixelSizeBytes * pCopyRegion->dstOffset.x)); + size_t CopyExtentBytes = pCopyRegion->copyExtent.width; + void *DstWithOffset = static_cast(static_cast(pDst) + + pCopyRegion->dstOffset.x); if (memType == hipMemoryTypeArray) { // HIP doesn not provide async copies between image arrays and host @@ -785,17 +781,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( #if HIP_VERSION >= 60200000 UR_CHECK_ERROR(hipMemcpyAtoHAsync( DstWithOffset, static_cast(const_cast(pSrc)), - PixelSizeBytes * pCopyRegion->srcOffset.x, CopyExtentBytes, - Stream)); + pCopyRegion->srcOffset.x, CopyExtentBytes, Stream)); #else UR_CHECK_ERROR(hipMemcpyAtoH( DstWithOffset, static_cast(const_cast(pSrc)), - PixelSizeBytes * pCopyRegion->srcOffset.x, CopyExtentBytes)); + pCopyRegion->srcOffset.x, CopyExtentBytes)); #endif } else if (memType == hipMemoryTypeDevice) { const char *SrcWithOffset = - static_cast(pSrc) + - (pCopyRegion->srcOffset.x * PixelSizeBytes); + static_cast(pSrc) + pCopyRegion->srcOffset.x; UR_CHECK_ERROR(hipMemcpyDtoHAsync( DstWithOffset, static_cast(const_cast(SrcWithOffset)), @@ -806,9 +800,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( } } else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D) { hip_Memcpy2D cpy_desc = {}; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = pCopyRegion->srcOffset.y; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.dstMemoryType = hipMemoryTypeHost; cpy_desc.dstHost = pDst; @@ -825,15 +819,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstMemoryType = hipMemoryTypeHost; cpy_desc.dstHost = pDst; cpy_desc.dstPitch = pDstImageDesc->rowPitch; - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; UR_CHECK_ERROR(hipMemcpyParam2DAsync(&cpy_desc, Stream)); } else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE3D) { HIP_MEMCPY3D cpy_desc = {}; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = pCopyRegion->srcOffset.y; cpy_desc.srcZ = pCopyRegion->srcOffset.z; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = hipMemoryTypeArray; @@ -842,7 +836,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstHost = pDst; cpy_desc.dstPitch = pDstImageDesc->rowPitch; cpy_desc.dstHeight = pDstImageDesc->height; - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; cpy_desc.Depth = pCopyRegion->copyExtent.depth; // 'hipMemcpy3DAsync' requires us to correctly create @@ -853,10 +847,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY || pSrcImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) { HIP_MEMCPY3D cpy_desc = {}; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = pCopyRegion->srcOffset.y; cpy_desc.srcZ = pCopyRegion->srcOffset.z; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = hipMemoryTypeArray; @@ -865,7 +859,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstHost = pDst; cpy_desc.dstPitch = pDstImageDesc->rowPitch; cpy_desc.dstHeight = std::max(MinCopyHeight, pDstImageDesc->height); - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = std::max(MinCopyHeight, pCopyRegion->copyExtent.height); cpy_desc.Depth = pCopyRegion->copyExtent.depth; @@ -889,43 +883,43 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( // the end if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE1D) { hip_Memcpy2D cpy_desc = {}; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = 0; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = 0; cpy_desc.srcMemoryType = hipMemoryTypeArray; cpy_desc.srcArray = static_cast(const_cast(pSrc)); cpy_desc.dstMemoryType = hipMemoryTypeArray; cpy_desc.dstArray = static_cast(pDst); - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = 1; UR_CHECK_ERROR(hipMemcpyParam2DAsync(&cpy_desc, Stream)); } else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D) { hip_Memcpy2D cpy_desc = {}; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = pCopyRegion->srcOffset.y; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.srcMemoryType = hipMemoryTypeArray; cpy_desc.srcArray = static_cast(const_cast(pSrc)); cpy_desc.dstMemoryType = hipMemoryTypeArray; cpy_desc.dstArray = static_cast(pDst); - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; UR_CHECK_ERROR(hipMemcpyParam2DAsync(&cpy_desc, Stream)); } else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE3D) { HIP_MEMCPY3D cpy_desc = {}; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = pCopyRegion->srcOffset.y; cpy_desc.srcZ = pCopyRegion->srcOffset.z; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = hipMemoryTypeArray; cpy_desc.srcArray = static_cast(const_cast(pSrc)); cpy_desc.dstMemoryType = hipMemoryTypeArray; cpy_desc.dstArray = static_cast(pDst); - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; cpy_desc.Depth = pCopyRegion->copyExtent.depth; // 'hipMemcpy3DAsync' requires us to correctly create @@ -936,17 +930,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY || pSrcImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) { HIP_MEMCPY3D cpy_desc = {}; - cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes; + cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x; cpy_desc.srcY = pCopyRegion->srcOffset.y; cpy_desc.srcZ = pCopyRegion->srcOffset.z; - cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; + cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x; cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = hipMemoryTypeArray; cpy_desc.srcArray = static_cast(const_cast(pSrc)); cpy_desc.dstMemoryType = hipMemoryTypeArray; cpy_desc.dstArray = static_cast(pDst); - cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; + cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width; cpy_desc.Height = std::max(MinCopyHeight, pCopyRegion->copyExtent.height); cpy_desc.Depth = pCopyRegion->copyExtent.depth; From 20a0e26f1f83d8a76cb9e83c718123b5436b1db0 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Wed, 27 Aug 2025 08:10:31 -0700 Subject: [PATCH 10/25] Fix 1D copies --- .../source/adapters/level_zero/image_common.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/unified-runtime/source/adapters/level_zero/image_common.cpp b/unified-runtime/source/adapters/level_zero/image_common.cpp index 385f3599970d8..4883ca266a8f4 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.cpp +++ b/unified-runtime/source/adapters/level_zero/image_common.cpp @@ -924,14 +924,17 @@ ur_result_t bindlessImagesHandleCopyFlags( (uint32_t)pCopyRegion->copyExtent.width, (uint32_t)pCopyRegion->copyExtent.height, (uint32_t)pCopyRegion->copyExtent.depth}; - ze_copy_region_t ZeSrcRegion = {(uint32_t)pCopyRegion->dstOffset.x, + ze_copy_region_t ZeSrcRegion = {(uint32_t)pCopyRegion->srcOffset.x, (uint32_t)pCopyRegion->srcOffset.y, (uint32_t)pCopyRegion->srcOffset.z, (uint32_t)pCopyRegion->copyExtent.width, (uint32_t)pCopyRegion->copyExtent.height, (uint32_t)pCopyRegion->copyExtent.depth}; - uint32_t DstRowPitch = pDstImageDesc->rowPitch; - uint32_t SrcRowPitch = pSrcImageDesc->rowPitch; + uint32_t DstRowPitch = + std::max(pDstImageDesc->rowPitch, pCopyRegion->copyExtent.width); + uint32_t SrcRowPitch = + std::max(pSrcImageDesc->rowPitch, pCopyRegion->copyExtent.width); + ; uint32_t DstSlicePitch = DstRowPitch * pDstImageDesc->height; uint32_t SrcSlicePitch = SrcRowPitch * pSrcImageDesc->height; ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, From 1b4fe4edb77b58005516ea85dfe563b9b542f1de Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Thu, 28 Aug 2025 03:30:51 -0700 Subject: [PATCH 11/25] Fix CUDA adapter build --- unified-runtime/source/adapters/cuda/image.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unified-runtime/source/adapters/cuda/image.cpp b/unified-runtime/source/adapters/cuda/image.cpp index 5c5ec3367442a..4f1f1892cddfd 100644 --- a/unified-runtime/source/adapters/cuda/image.cpp +++ b/unified-runtime/source/adapters/cuda/image.cpp @@ -763,7 +763,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cuPointerGetAttribute(&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)pSrc) != CUDA_SUCCESS; - size_t CopyExtentBytes = CopyRegion->copyExtent.width; + size_t CopyExtentBytes = pCopyRegion->copyExtent.width; void *DstWithOffset = static_cast(static_cast(pDst) + pCopyRegion->dstOffset.x); From 3b2f80e8141b30688c797c01690145d44f3b77ad Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Thu, 28 Aug 2025 06:57:55 -0700 Subject: [PATCH 12/25] Fix unused argument in UR L0 adapter --- unified-runtime/source/adapters/level_zero/image_common.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/unified-runtime/source/adapters/level_zero/image_common.cpp b/unified-runtime/source/adapters/level_zero/image_common.cpp index 4883ca266a8f4..7ed47629fbdf7 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.cpp +++ b/unified-runtime/source/adapters/level_zero/image_common.cpp @@ -889,7 +889,7 @@ ur_result_t bindlessImagesHandleCopyFlags( // depth are specified as pixels (or rows and slices). ze_image_region_t, // however, accepts everything as pixels, so we need to do a conversion // here. - auto PixelSizeInBytes = getPixelSizeBytes(pSrcImageFormat); + auto PixelSizeInBytes = getPixelSizeBytes(pDstImageFormat); DstRegion.originX /= PixelSizeInBytes; DstRegion.width /= PixelSizeInBytes; @@ -934,7 +934,6 @@ ur_result_t bindlessImagesHandleCopyFlags( std::max(pDstImageDesc->rowPitch, pCopyRegion->copyExtent.width); uint32_t SrcRowPitch = std::max(pSrcImageDesc->rowPitch, pCopyRegion->copyExtent.width); - ; uint32_t DstSlicePitch = DstRowPitch * pDstImageDesc->height; uint32_t SrcSlicePitch = SrcRowPitch * pSrcImageDesc->height; ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, From 3f88450d56a45325e01c13c8f25ffc18679f85b0 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 29 Aug 2025 07:34:32 -0700 Subject: [PATCH 13/25] Working towards an alternative approach --- sycl/source/handler.cpp | 20 +++++++--------- .../source/adapters/level_zero/common.cpp | 4 +++- .../source/adapters/level_zero/image.cpp | 1 + .../adapters/level_zero/image_common.cpp | 23 ++++++++++++++----- .../adapters/level_zero/image_common.hpp | 1 + .../level_zero/v2/command_list_manager.cpp | 1 + 6 files changed, 31 insertions(+), 19 deletions(-) diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index ffba0367d89c7..667ad6c3eed7a 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1671,12 +1671,10 @@ void handler::ext_oneapi_copy( Desc.width * Desc.num_channels * detail::get_channel_size(Desc); if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) { - detail::fill_copy_args(get_impl(), Desc, - UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST, HostRowPitch, + detail::fill_copy_args(get_impl(), Desc, ImageCopyFlags, HostRowPitch, DeviceRowPitch); } else if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { - detail::fill_copy_args(get_impl(), Desc, - UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST, DeviceRowPitch, + detail::fill_copy_args(get_impl(), Desc, ImageCopyFlags, DeviceRowPitch, HostRowPitch); } else { throw sycl::exception(make_error_code(errc::invalid), @@ -1713,15 +1711,13 @@ void handler::ext_oneapi_copy( // Fill the host extent based on the type of copy. if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) { - detail::fill_copy_args(get_impl(), DeviceImgDesc, - UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST, HostRowPitch, - DeviceRowPitch, SrcOffset, HostExtent, DestOffset, - {0, 0, 0}, CopyExtent); + detail::fill_copy_args(get_impl(), DeviceImgDesc, ImageCopyFlags, + HostRowPitch, DeviceRowPitch, SrcOffset, HostExtent, + DestOffset, {0, 0, 0}, CopyExtent); } else if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { - detail::fill_copy_args(get_impl(), DeviceImgDesc, - UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST, DeviceRowPitch, - HostRowPitch, SrcOffset, {0, 0, 0}, DestOffset, - HostExtent, CopyExtent); + detail::fill_copy_args(get_impl(), DeviceImgDesc, ImageCopyFlags, + DeviceRowPitch, HostRowPitch, SrcOffset, {0, 0, 0}, + DestOffset, HostExtent, CopyExtent); } else { throw sycl::exception(make_error_code(errc::invalid), "Copy Error: This copy function only performs host " diff --git a/unified-runtime/source/adapters/level_zero/common.cpp b/unified-runtime/source/adapters/level_zero/common.cpp index 0433a2d52d149..2c6a0fb64791e 100644 --- a/unified-runtime/source/adapters/level_zero/common.cpp +++ b/unified-runtime/source/adapters/level_zero/common.cpp @@ -133,8 +133,10 @@ void zeParseError(ze_result_t ZeError, const char *&ErrorString) { } // switch } -ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *, const char *, +ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *API, const char *ARGS, bool) { + if (PrintTrace) + UR_LOG(QUIET, "ZE2UR {}({}) -> {}", API, ARGS, ZeResult); return ZeResult; } diff --git a/unified-runtime/source/adapters/level_zero/image.cpp b/unified-runtime/source/adapters/level_zero/image.cpp index 414b9bba3d6d9..4af84851179d1 100644 --- a/unified-runtime/source/adapters/level_zero/image.cpp +++ b/unified-runtime/source/adapters/level_zero/image.cpp @@ -83,6 +83,7 @@ ur_result_t urBindlessImagesImageCopyExp( const auto &WaitList = (*Event)->WaitList; auto res = bindlessImagesHandleCopyFlags( + hQueue->Context->getZeHandle(), pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, pDstImageFormat, pCopyRegion, imageCopyFlags, ZeCommandList, ZeEvent, WaitList.Length, WaitList.ZeEventList); diff --git a/unified-runtime/source/adapters/level_zero/image_common.cpp b/unified-runtime/source/adapters/level_zero/image_common.cpp index 7ed47629fbdf7..d1ebc512f7640 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.cpp +++ b/unified-runtime/source/adapters/level_zero/image_common.cpp @@ -765,6 +765,7 @@ ur_result_t getImageRegionHelper(ze_image_desc_t ZeImageDesc, } ur_result_t bindlessImagesHandleCopyFlags( + ze_context_handle_t hContext, const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, const ur_image_format_t *pSrcImageFormat, @@ -781,7 +782,12 @@ ur_result_t bindlessImagesHandleCopyFlags( case UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE: { uint32_t SrcRowPitch = pSrcImageDesc->rowPitch; uint32_t SrcSlicePitch = SrcRowPitch * pSrcImageDesc->height; - if (pDstImageDesc->rowPitch == 0) { + ze_memory_allocation_properties_t props { + .stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES, + .pNext = nullptr + }; + ZE2UR_CALL(zeMemGetAllocProperties, (hContext, pDst, &props, nullptr)); + if (props.type == ZE_MEMORY_TYPE_UNKNOWN) { // Copy to Non-USM memory ze_image_region_t DstRegion; @@ -813,14 +819,14 @@ ur_result_t bindlessImagesHandleCopyFlags( ze_copy_region_t ZeDstRegion = {(uint32_t)pCopyRegion->dstOffset.x, (uint32_t)pCopyRegion->dstOffset.y, (uint32_t)pCopyRegion->dstOffset.z, - DstRowPitch, + (uint32_t)pCopyRegion->copyExtent.width, (uint32_t)pCopyRegion->copyExtent.height, (uint32_t)pCopyRegion->copyExtent.depth}; uint32_t DstSlicePitch = 0; ze_copy_region_t ZeSrcRegion = {(uint32_t)pCopyRegion->srcOffset.x, (uint32_t)pCopyRegion->srcOffset.y, (uint32_t)pCopyRegion->srcOffset.z, - SrcRowPitch, + (uint32_t)pCopyRegion->copyExtent.width, (uint32_t)pCopyRegion->copyExtent.height, (uint32_t)pCopyRegion->copyExtent.depth}; ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, @@ -833,7 +839,12 @@ ur_result_t bindlessImagesHandleCopyFlags( case UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST: { uint32_t DstRowPitch = pDstImageDesc->rowPitch; uint32_t DstSlicePitch = DstRowPitch * pDstImageDesc->height; - if (pSrcImageDesc->rowPitch == 0) { + ze_memory_allocation_properties_t props { + .stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES, + .pNext = nullptr + }; + ZE2UR_CALL(zeMemGetAllocProperties, (hContext, pSrc, &props, nullptr)); + if (props.type == ZE_MEMORY_TYPE_UNKNOWN) { // Copy from Non-USM memory to host ze_image_region_t SrcRegion; UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->srcOffset, @@ -861,14 +872,14 @@ ur_result_t bindlessImagesHandleCopyFlags( ze_copy_region_t ZeDstRegion = {(uint32_t)pCopyRegion->dstOffset.x, (uint32_t)pCopyRegion->dstOffset.y, (uint32_t)pCopyRegion->dstOffset.z, - DstRowPitch, + (uint32_t)pCopyRegion->copyExtent.width, (uint32_t)pCopyRegion->copyExtent.height, (uint32_t)pCopyRegion->copyExtent.depth}; uint32_t SrcRowPitch = pSrcImageDesc->rowPitch; ze_copy_region_t ZeSrcRegion = {(uint32_t)pCopyRegion->srcOffset.x, (uint32_t)pCopyRegion->srcOffset.y, (uint32_t)pCopyRegion->srcOffset.z, - SrcRowPitch, + (uint32_t)pCopyRegion->copyExtent.width, (uint32_t)pCopyRegion->copyExtent.height, (uint32_t)pCopyRegion->copyExtent.depth}; uint32_t SrcSlicePitch = 0; diff --git a/unified-runtime/source/adapters/level_zero/image_common.hpp b/unified-runtime/source/adapters/level_zero/image_common.hpp index 8df10e528b06b..846f269d00b38 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.hpp +++ b/unified-runtime/source/adapters/level_zero/image_common.hpp @@ -54,6 +54,7 @@ ur_result_t getImageRegionHelper(ze_image_desc_t ZeImageDesc, ze_image_region_t &ZeRegion); ur_result_t bindlessImagesHandleCopyFlags( + ze_context_handle_t hContext, const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, const ur_image_format_t *pSrcImageFormat, diff --git a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp index 9f5fa7e31a07a..57754e408b3f1 100644 --- a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp @@ -901,6 +901,7 @@ ur_result_t ur_command_list_manager::bindlessImagesImageCopyExp( auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList); return bindlessImagesHandleCopyFlags( + hContext->getZeHandle(), pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, pDstImageFormat, pCopyRegion, imageCopyFlags, getZeCommandList(), zeSignalEvent, waitListView.num, waitListView.handles); From 393355cb65f87c83e9fbd9797d31e40dbdbb8d64 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 29 Aug 2025 08:11:46 -0700 Subject: [PATCH 14/25] Nope, it doesn't work either --- .../adapters/level_zero/image_common.cpp | 258 ++++++++---------- 1 file changed, 112 insertions(+), 146 deletions(-) diff --git a/unified-runtime/source/adapters/level_zero/image_common.cpp b/unified-runtime/source/adapters/level_zero/image_common.cpp index d1ebc512f7640..34f74db42a3fa 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.cpp +++ b/unified-runtime/source/adapters/level_zero/image_common.cpp @@ -765,132 +765,67 @@ ur_result_t getImageRegionHelper(ze_image_desc_t ZeImageDesc, } ur_result_t bindlessImagesHandleCopyFlags( - ze_context_handle_t hContext, - const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc, - const ur_image_desc_t *pDstImageDesc, + ze_context_handle_t hContext, const void *pSrc, void *pDst, + const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, - ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_region_t *pCopyRegion, ur_exp_image_copy_flags_t, ze_command_list_handle_t ZeCommandList, ze_event_handle_t zeSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZeStruct zeSrcImageDesc; ur2zeImageDesc(pSrcImageFormat, pSrcImageDesc, zeSrcImageDesc); - switch (imageCopyFlags) { - case UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE: { - uint32_t SrcRowPitch = pSrcImageDesc->rowPitch; - uint32_t SrcSlicePitch = SrcRowPitch * pSrcImageDesc->height; - ze_memory_allocation_properties_t props { - .stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES, - .pNext = nullptr - }; - ZE2UR_CALL(zeMemGetAllocProperties, (hContext, pDst, &props, nullptr)); - if (props.type == ZE_MEMORY_TYPE_UNKNOWN) { - // Copy to Non-USM memory - - ze_image_region_t DstRegion; - UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->dstOffset, - &pCopyRegion->copyExtent, DstRegion)); - // UR accepts copy regions as byte-pixel-pixel format, i.e. X-axis offset - // and width are specified as bytes, whilst Y/Z-axis offsets, height and - // depth are specified as pixels (or rows and slices). ze_image_region_t, - // however, accepts everything as pixels, so we need to do a conversion - // here. - auto PixelSizeInBytes = getPixelSizeBytes(pSrcImageFormat); - DstRegion.originX /= PixelSizeInBytes; - DstRegion.width /= PixelSizeInBytes; - - auto *urDstImg = static_cast(pDst); - - const char *SrcPtr = static_cast(pSrc) + - pCopyRegion->srcOffset.z * SrcSlicePitch + - pCopyRegion->srcOffset.y * SrcRowPitch + - pCopyRegion->srcOffset.x; - - ZE2UR_CALL(zeCommandListAppendImageCopyFromMemoryExt, - (ZeCommandList, urDstImg->getZeImage(), SrcPtr, &DstRegion, - SrcRowPitch, SrcSlicePitch, zeSignalEvent, numWaitEvents, - phWaitEvents)); - } else { - // Copy to pitched USM memory - uint32_t DstRowPitch = pDstImageDesc->rowPitch; - ze_copy_region_t ZeDstRegion = {(uint32_t)pCopyRegion->dstOffset.x, - (uint32_t)pCopyRegion->dstOffset.y, - (uint32_t)pCopyRegion->dstOffset.z, - (uint32_t)pCopyRegion->copyExtent.width, - (uint32_t)pCopyRegion->copyExtent.height, - (uint32_t)pCopyRegion->copyExtent.depth}; - uint32_t DstSlicePitch = 0; - ze_copy_region_t ZeSrcRegion = {(uint32_t)pCopyRegion->srcOffset.x, - (uint32_t)pCopyRegion->srcOffset.y, - (uint32_t)pCopyRegion->srcOffset.z, - (uint32_t)pCopyRegion->copyExtent.width, - (uint32_t)pCopyRegion->copyExtent.height, - (uint32_t)pCopyRegion->copyExtent.depth}; - ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, - (ZeCommandList, pDst, &ZeDstRegion, DstRowPitch, DstSlicePitch, - pSrc, &ZeSrcRegion, SrcRowPitch, SrcSlicePitch, zeSignalEvent, - numWaitEvents, phWaitEvents)); - } - return UR_RESULT_SUCCESS; - }; - case UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST: { - uint32_t DstRowPitch = pDstImageDesc->rowPitch; + // Level Zero does not use terms device and host, but instead operates on + // terms image and memory. + // Image means ze_image_handle_t, memory means regular pointer. + // The choice of API to call depends on input types, not on the copy + // direction. + ze_memory_allocation_properties_t DstMemAllocProps{ + /*.stype = */ ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES, + /*.pNext = */ nullptr}; + ZE2UR_CALL(zeMemGetAllocProperties, + (hContext, pDst, &DstMemAllocProps, nullptr)); + ze_memory_allocation_properties_t SrcMemAllocProps{ + /*.stype = */ ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES, + /*.pNext = */ nullptr}; + ZE2UR_CALL(zeMemGetAllocProperties, + (hContext, pDst, &SrcMemAllocProps, nullptr)); + + const bool SrcIsMemory = SrcMemAllocProps.type != ZE_MEMORY_TYPE_UNKNOWN; + const bool DstIsMemory = DstMemAllocProps.type != ZE_MEMORY_TYPE_UNKNOWN; + + if (SrcIsMemory && DstIsMemory) { + // Copy between (possibly) pitched USM regions + ze_copy_region_t ZeDstRegion = {(uint32_t)pCopyRegion->dstOffset.x, + (uint32_t)pCopyRegion->dstOffset.y, + (uint32_t)pCopyRegion->dstOffset.z, + (uint32_t)pCopyRegion->copyExtent.width, + (uint32_t)pCopyRegion->copyExtent.height, + (uint32_t)pCopyRegion->copyExtent.depth}; + ze_copy_region_t ZeSrcRegion = {(uint32_t)pCopyRegion->srcOffset.x, + (uint32_t)pCopyRegion->srcOffset.y, + (uint32_t)pCopyRegion->srcOffset.z, + (uint32_t)pCopyRegion->copyExtent.width, + (uint32_t)pCopyRegion->copyExtent.height, + (uint32_t)pCopyRegion->copyExtent.depth}; + // Strictly speaking, zeCommandListAppendMemoryCopyRegion is only for 2D and + // 3D copies and as such, row pitch arguments are non-optional. + // Since urBindlessImagesImageCopy can also be called for 1D images for + // which row pitch is zero, we calculate it ourselves. + uint32_t DstRowPitch = + std::max(pDstImageDesc->rowPitch, pCopyRegion->copyExtent.width); + uint32_t SrcRowPitch = + std::max(pSrcImageDesc->rowPitch, pCopyRegion->copyExtent.width); uint32_t DstSlicePitch = DstRowPitch * pDstImageDesc->height; - ze_memory_allocation_properties_t props { - .stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES, - .pNext = nullptr - }; - ZE2UR_CALL(zeMemGetAllocProperties, (hContext, pSrc, &props, nullptr)); - if (props.type == ZE_MEMORY_TYPE_UNKNOWN) { - // Copy from Non-USM memory to host - ze_image_region_t SrcRegion; - UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->srcOffset, - &pCopyRegion->copyExtent, SrcRegion)); - // UR accepts copy regions as byte-pixel-pixel format, i.e. X-axis offset - // and width are specified as bytes, whilst Y/Z-axis offsets, height and - // depth are specified as pixels (or rows and slices). ze_image_region_t, - // however, accepts everything as pixels, so we need to do a conversion - // here. - auto PixelSizeInBytes = getPixelSizeBytes(pSrcImageFormat); - SrcRegion.originX /= PixelSizeInBytes; - SrcRegion.width /= PixelSizeInBytes; - - auto *urSrcImg = reinterpret_cast(pSrc); - - char *DstPtr = - static_cast(pDst) + pCopyRegion->dstOffset.z * DstSlicePitch + - pCopyRegion->dstOffset.y * DstRowPitch + pCopyRegion->dstOffset.x; - ZE2UR_CALL(zeCommandListAppendImageCopyToMemoryExt, - (ZeCommandList, DstPtr, urSrcImg->getZeImage(), &SrcRegion, - DstRowPitch, DstSlicePitch, zeSignalEvent, numWaitEvents, - phWaitEvents)); - } else { - // Copy from pitched USM memory to host - ze_copy_region_t ZeDstRegion = {(uint32_t)pCopyRegion->dstOffset.x, - (uint32_t)pCopyRegion->dstOffset.y, - (uint32_t)pCopyRegion->dstOffset.z, - (uint32_t)pCopyRegion->copyExtent.width, - (uint32_t)pCopyRegion->copyExtent.height, - (uint32_t)pCopyRegion->copyExtent.depth}; - uint32_t SrcRowPitch = pSrcImageDesc->rowPitch; - ze_copy_region_t ZeSrcRegion = {(uint32_t)pCopyRegion->srcOffset.x, - (uint32_t)pCopyRegion->srcOffset.y, - (uint32_t)pCopyRegion->srcOffset.z, - (uint32_t)pCopyRegion->copyExtent.width, - (uint32_t)pCopyRegion->copyExtent.height, - (uint32_t)pCopyRegion->copyExtent.depth}; - uint32_t SrcSlicePitch = 0; - ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, - (ZeCommandList, pDst, &ZeDstRegion, DstRowPitch, DstSlicePitch, - pSrc, &ZeSrcRegion, SrcRowPitch, SrcSlicePitch, zeSignalEvent, - numWaitEvents, phWaitEvents)); - } + uint32_t SrcSlicePitch = SrcRowPitch * pSrcImageDesc->height; + ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, + (ZeCommandList, pDst, &ZeDstRegion, DstRowPitch, DstSlicePitch, + pSrc, &ZeSrcRegion, SrcRowPitch, SrcSlicePitch, zeSignalEvent, + numWaitEvents, phWaitEvents)); return UR_RESULT_SUCCESS; - }; - case UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE: { + } else if (!SrcIsMemory && !DstIsMemory) { + // Copy between two ze_image_handle_t's ze_image_region_t DstRegion; UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->dstOffset, @@ -926,38 +861,69 @@ ur_result_t bindlessImagesHandleCopyFlags( phWaitEvents)); return UR_RESULT_SUCCESS; - }; - case UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST: { - // Copy between (possibly) pitched USM regions - ze_copy_region_t ZeDstRegion = {(uint32_t)pCopyRegion->dstOffset.x, - (uint32_t)pCopyRegion->dstOffset.y, - (uint32_t)pCopyRegion->dstOffset.z, - (uint32_t)pCopyRegion->copyExtent.width, - (uint32_t)pCopyRegion->copyExtent.height, - (uint32_t)pCopyRegion->copyExtent.depth}; - ze_copy_region_t ZeSrcRegion = {(uint32_t)pCopyRegion->srcOffset.x, - (uint32_t)pCopyRegion->srcOffset.y, - (uint32_t)pCopyRegion->srcOffset.z, - (uint32_t)pCopyRegion->copyExtent.width, - (uint32_t)pCopyRegion->copyExtent.height, - (uint32_t)pCopyRegion->copyExtent.depth}; - uint32_t DstRowPitch = - std::max(pDstImageDesc->rowPitch, pCopyRegion->copyExtent.width); - uint32_t SrcRowPitch = - std::max(pSrcImageDesc->rowPitch, pCopyRegion->copyExtent.width); - uint32_t DstSlicePitch = DstRowPitch * pDstImageDesc->height; - uint32_t SrcSlicePitch = SrcRowPitch * pSrcImageDesc->height; - ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, - (ZeCommandList, pDst, &ZeDstRegion, DstRowPitch, DstSlicePitch, - pSrc, &ZeSrcRegion, SrcRowPitch, SrcSlicePitch, zeSignalEvent, - numWaitEvents, phWaitEvents)); + } else if (SrcIsMemory) { + assert(!DstIsMemory && + "Memory to memory copy should have been handled above"); + // Copy from USM to ze_image_handle_t + ze_image_region_t DstRegion; + UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->dstOffset, + &pCopyRegion->copyExtent, DstRegion)); + // UR accepts copy regions as byte-pixel-pixel format, i.e. X-axis offset + // and width are specified as bytes, whilst Y/Z-axis offsets, height and + // depth are specified as pixels (or rows and slices). ze_image_region_t, + // however, accepts everything as pixels, so we need to do a conversion + // here. + const auto PixelSizeInBytes = getPixelSizeBytes(pSrcImageFormat); + DstRegion.originX /= PixelSizeInBytes; + DstRegion.width /= PixelSizeInBytes; + + auto *urDstImg = static_cast(pDst); + + const uint32_t SrcRowPitch = pSrcImageDesc->rowPitch; + const uint32_t SrcSlicePitch = SrcRowPitch * pSrcImageDesc->height; + const char *SrcPtr = static_cast(pSrc) + + pCopyRegion->srcOffset.z * SrcSlicePitch + + pCopyRegion->srcOffset.y * SrcRowPitch + + pCopyRegion->srcOffset.x; + + ZE2UR_CALL(zeCommandListAppendImageCopyFromMemoryExt, + (ZeCommandList, urDstImg->getZeImage(), SrcPtr, &DstRegion, + SrcRowPitch, SrcSlicePitch, zeSignalEvent, numWaitEvents, + phWaitEvents)); + return UR_RESULT_SUCCESS; + } else { + assert(DstIsMemory && !SrcIsMemory && + "Memory to image copy should have been handled above"); + // Copy from ze_image_handle_t to USM + ze_image_region_t SrcRegion; + UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->srcOffset, + &pCopyRegion->copyExtent, SrcRegion)); + // UR accepts copy regions as byte-pixel-pixel format, i.e. X-axis offset + // and width are specified as bytes, whilst Y/Z-axis offsets, height and + // depth are specified as pixels (or rows and slices). ze_image_region_t, + // however, accepts everything as pixels, so we need to do a conversion + // here. + const auto PixelSizeInBytes = getPixelSizeBytes(pSrcImageFormat); + SrcRegion.originX /= PixelSizeInBytes; + SrcRegion.width /= PixelSizeInBytes; + + auto *urSrcImg = reinterpret_cast(pSrc); + + const uint32_t DstRowPitch = pDstImageDesc->rowPitch; + const uint32_t DstSlicePitch = DstRowPitch * pDstImageDesc->height; + char *DstPtr = + static_cast(pDst) + pCopyRegion->dstOffset.z * DstSlicePitch + + pCopyRegion->dstOffset.y * DstRowPitch + pCopyRegion->dstOffset.x; + ZE2UR_CALL(zeCommandListAppendImageCopyToMemoryExt, + (ZeCommandList, DstPtr, urSrcImg->getZeImage(), &SrcRegion, + DstRowPitch, DstSlicePitch, zeSignalEvent, numWaitEvents, + phWaitEvents)); return UR_RESULT_SUCCESS; - }; - default: - UR_LOG(ERR, "ur_queue_immediate_in_order_t::bindlessImagesImageCopyExp: " - "unexpected imageCopyFlags"); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + + UR_LOG(ERR, "ur_queue_immediate_in_order_t::bindlessImagesImageCopyExp: " + "unexpected inputs"); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } bool verifyStandardImageSupport( From e7e9272169feb483361835dcb89356ea8bae140c Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Mon, 1 Sep 2025 15:04:19 +0200 Subject: [PATCH 15/25] Add new argument to the urBindlessImagesImageCopyExp --- unified-runtime/include/ur_api.h | 23 +++++++++++++ unified-runtime/include/ur_api_funcs.def | 2 +- unified-runtime/include/ur_ddi.h | 4 +-- unified-runtime/include/ur_print.h | 10 ++++++ unified-runtime/include/ur_print.hpp | 32 +++++++++++++++++++ .../scripts/core/exp-bindless-images.yml | 17 ++++++++++ unified-runtime/source/ur_api.cpp | 5 +++ 7 files changed, 90 insertions(+), 3 deletions(-) diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h index e6f814e178e8b..ac9242d1beaa1 100644 --- a/unified-runtime/include/ur_api.h +++ b/unified-runtime/include/ur_api.h @@ -9798,6 +9798,23 @@ typedef enum ur_exp_image_copy_flag_t { /// @brief Bit Mask for validating ur_exp_image_copy_flags_t #define UR_EXP_IMAGE_COPY_FLAGS_MASK 0xfffffff0 +/////////////////////////////////////////////////////////////////////////////// +/// @brief Dictates the types of memory copy input and output. +typedef enum ur_exp_image_copy_input_types_t { + /// Memory to image handle + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_IMAGE = 0, + /// Image handle to memory + UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_MEM = 1, + /// Memory to Memory + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM = 2, + /// Image handle to image handle + UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE = 3, + /// @cond + UR_EXP_IMAGE_COPY_INPUT_TYPES_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_image_copy_input_types_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Sampler cubemap seamless filtering mode. typedef enum ur_exp_sampler_cubemap_filter_mode_t { @@ -10285,6 +10302,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( /// + `NULL == pCopyRegion` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_EXP_IMAGE_COPY_FLAGS_MASK & imageCopyFlags` +/// + `::UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE < +/// imageCopyInputTypes` /// - ::UR_RESULT_ERROR_INVALID_QUEUE /// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR @@ -10314,6 +10333,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( ur_exp_image_copy_region_t *pCopyRegion, /// [in] flags describing copy direction e.g. H2D or D2H ur_exp_image_copy_flags_t imageCopyFlags, + /// [in] flag describing types of source and destination pointers (USM vs + /// image handle) + ur_exp_image_copy_input_types_t imageCopyInputTypes, /// [in] size of the event wait list uint32_t numEventsInWaitList, /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -14871,6 +14893,7 @@ typedef struct ur_bindless_images_image_copy_exp_params_t { const ur_image_format_t **ppDstImageFormat; ur_exp_image_copy_region_t **ppCopyRegion; ur_exp_image_copy_flags_t *pimageCopyFlags; + ur_exp_image_copy_input_types_t *pimageCopyInputTypes; uint32_t *pnumEventsInWaitList; const ur_event_handle_t **pphEventWaitList; ur_event_handle_t **pphEvent; diff --git a/unified-runtime/include/ur_api_funcs.def b/unified-runtime/include/ur_api_funcs.def index f0c92445b9238..1ad83fc2f04a4 100644 --- a/unified-runtime/include/ur_api_funcs.def +++ b/unified-runtime/include/ur_api_funcs.def @@ -15,7 +15,7 @@ * */ -// Auto-generated file, do not edit. + // Auto-generated file, do not edit. _UR_API(urAdapterGet) _UR_API(urAdapterRelease) diff --git a/unified-runtime/include/ur_ddi.h b/unified-runtime/include/ur_ddi.h index 8ab686aa583cc..f59e15a9eb3cd 100644 --- a/unified-runtime/include/ur_ddi.h +++ b/unified-runtime/include/ur_ddi.h @@ -1412,8 +1412,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnBindlessImagesImageCopyExp_t)( ur_queue_handle_t, const void *, void *, const ur_image_desc_t *, const ur_image_desc_t *, const ur_image_format_t *, const ur_image_format_t *, ur_exp_image_copy_region_t *, - ur_exp_image_copy_flags_t, uint32_t, const ur_event_handle_t *, - ur_event_handle_t *); + ur_exp_image_copy_flags_t, ur_exp_image_copy_input_types_t, uint32_t, + const ur_event_handle_t *, ur_event_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urBindlessImagesImageGetInfoExp diff --git a/unified-runtime/include/ur_print.h b/unified-runtime/include/ur_print.h index 8130df0c5bec4..7c4528ec9ea81 100644 --- a/unified-runtime/include/ur_print.h +++ b/unified-runtime/include/ur_print.h @@ -1203,6 +1203,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpImageCopyFlags(enum ur_exp_image_copy_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_image_copy_input_types_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpImageCopyInputTypes( + enum ur_exp_image_copy_input_types_t value, char *buffer, + const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_sampler_cubemap_filter_mode_t enum /// @returns diff --git a/unified-runtime/include/ur_print.hpp b/unified-runtime/include/ur_print.hpp index 17a8a5267efd9..fe14764a8aa29 100644 --- a/unified-runtime/include/ur_print.hpp +++ b/unified-runtime/include/ur_print.hpp @@ -534,6 +534,8 @@ operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_buffer_desc_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_image_copy_flag_t value); +inline std::ostream &operator<<(std::ostream &os, + enum ur_exp_image_copy_input_types_t value); inline std::ostream & operator<<(std::ostream &os, enum ur_exp_sampler_cubemap_filter_mode_t value); inline std::ostream &operator<<(std::ostream &os, @@ -11356,6 +11358,31 @@ inline ur_result_t printFlag(std::ostream &os, } } // namespace ur::details /////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_image_copy_input_types_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, + enum ur_exp_image_copy_input_types_t value) { + switch (value) { + case UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_IMAGE: + os << "UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_IMAGE"; + break; + case UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_MEM: + os << "UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_MEM"; + break; + case UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM: + os << "UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM"; + break; + case UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE: + os << "UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} +/////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_sampler_cubemap_filter_mode_t type /// @returns /// std::ostream & @@ -18266,6 +18293,11 @@ inline std::ostream &operator<<( ur::details::printFlag(os, *(params->pimageCopyFlags)); + os << ", "; + os << ".imageCopyInputTypes = "; + + os << *(params->pimageCopyInputTypes); + os << ", "; os << ".numEventsInWaitList = "; diff --git a/unified-runtime/scripts/core/exp-bindless-images.yml b/unified-runtime/scripts/core/exp-bindless-images.yml index 6ace4e7740153..05c6acec0191f 100644 --- a/unified-runtime/scripts/core/exp-bindless-images.yml +++ b/unified-runtime/scripts/core/exp-bindless-images.yml @@ -174,6 +174,20 @@ etors: desc: "Host to host" --- #-------------------------------------------------------------------------- type: enum +desc: "Dictates the types of memory copy input and output." +class: $xBindlessImages +name: $x_exp_image_copy_input_types_t +etors: + - name: MEM_TO_IMAGE + desc: "Memory to image handle" + - name: IMAGE_TO_MEM + desc: "Image handle to memory" + - name: MEM_TO_MEM + desc: "Memory to Memory" + - name: IMAGE_TO_IMAGE + desc: "Image handle to image handle" +--- #-------------------------------------------------------------------------- +type: enum extend: True desc: "Memory types" name: $x_mem_type_t @@ -602,6 +616,9 @@ params: - type: $x_exp_image_copy_flags_t name: imageCopyFlags desc: "[in] flags describing copy direction e.g. H2D or D2H" + - type: $x_exp_image_copy_input_types_t + name: imageCopyInputTypes + desc: "[in] flag describing types of source and destination pointers (USM vs image handle)" - type: uint32_t name: numEventsInWaitList desc: "[in] size of the event wait list" diff --git a/unified-runtime/source/ur_api.cpp b/unified-runtime/source/ur_api.cpp index 47094964055fb..36d7ece1f1076 100644 --- a/unified-runtime/source/ur_api.cpp +++ b/unified-runtime/source/ur_api.cpp @@ -6758,6 +6758,8 @@ ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( /// + `NULL == pCopyRegion` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_EXP_IMAGE_COPY_FLAGS_MASK & imageCopyFlags` +/// + `::UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE < +/// imageCopyInputTypes` /// - ::UR_RESULT_ERROR_INVALID_QUEUE /// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR @@ -6787,6 +6789,9 @@ ur_result_t UR_APICALL urBindlessImagesImageCopyExp( ur_exp_image_copy_region_t *pCopyRegion, /// [in] flags describing copy direction e.g. H2D or D2H ur_exp_image_copy_flags_t imageCopyFlags, + /// [in] flag describing types of source and destination pointers (USM vs + /// image handle) + ur_exp_image_copy_input_types_t imageCopyInputTypes, /// [in] size of the event wait list uint32_t numEventsInWaitList, /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of From dccab129e61f633b320ec62b9f6e63c9ba58974d Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Mon, 1 Sep 2025 07:14:51 -0700 Subject: [PATCH 16/25] Extend UR API for bindless image copies to pass knowledge about input types --- sycl/source/detail/cg.hpp | 9 +- sycl/source/detail/handler_impl.hpp | 1 + sycl/source/detail/memory_manager.cpp | 10 +- sycl/source/detail/memory_manager.hpp | 6 +- sycl/source/detail/scheduler/commands.cpp | 5 +- sycl/source/handler.cpp | 120 +++++++++++------- .../source/adapters/cuda/image.cpp | 5 +- unified-runtime/source/adapters/hip/image.cpp | 5 +- .../source/adapters/level_zero/image.cpp | 14 +- .../adapters/level_zero/image_common.cpp | 48 +++---- .../adapters/level_zero/image_common.hpp | 6 +- .../level_zero/ur_interface_loader.hpp | 6 +- .../level_zero/v2/command_list_manager.cpp | 14 +- .../level_zero/v2/command_list_manager.hpp | 6 +- .../adapters/level_zero/v2/queue_api.cpp | 10 +- .../adapters/level_zero/v2/queue_api.hpp | 5 +- .../v2/queue_immediate_in_order.hpp | 9 +- .../v2/queue_immediate_out_of_order.hpp | 9 +- .../source/adapters/mock/ur_mockddi.cpp | 4 + .../source/adapters/native_cpu/image.cpp | 1 + .../source/adapters/opencl/image.cpp | 1 + .../loader/layers/tracing/ur_trcddi.cpp | 8 +- .../loader/layers/validation/ur_valddi.cpp | 6 +- unified-runtime/source/loader/ur_ldrddi.cpp | 7 +- unified-runtime/source/loader/ur_libapi.cpp | 6 +- 25 files changed, 187 insertions(+), 134 deletions(-) diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index 87e7b088951ac..23eb543c76e5e 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -600,6 +600,7 @@ class CGCopyImage : public CG { ur_image_format_t MSrcImageFormat; ur_image_format_t MDstImageFormat; ur_exp_image_copy_flags_t MImageCopyFlags; + ur_exp_image_copy_input_types_t MImageInputTypes; ur_rect_offset_t MSrcOffset; ur_rect_offset_t MDstOffset; ur_rect_region_t MCopyExtent; @@ -609,14 +610,15 @@ class CGCopyImage : public CG { ur_image_desc_t DstImageDesc, ur_image_format_t SrcImageFormat, ur_image_format_t DstImageFormat, ur_exp_image_copy_flags_t ImageCopyFlags, + ur_exp_image_copy_input_types_t ImageInputTypes, ur_rect_offset_t SrcOffset, ur_rect_offset_t DstOffset, ur_rect_region_t CopyExtent, CG::StorageInitHelper CGData, detail::code_location loc = {}) : CG(CGType::CopyImage, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), MSrcImageDesc(SrcImageDesc), MDstImageDesc(DstImageDesc), MSrcImageFormat(SrcImageFormat), MDstImageFormat(DstImageFormat), - MImageCopyFlags(ImageCopyFlags), MSrcOffset(SrcOffset), - MDstOffset(DstOffset), MCopyExtent(CopyExtent) {} + MImageCopyFlags(ImageCopyFlags), MImageInputTypes(ImageInputTypes), + MSrcOffset(SrcOffset), MDstOffset(DstOffset), MCopyExtent(CopyExtent) {} void *getSrc() const { return MSrc; } void *getDst() const { return MDst; } @@ -625,6 +627,9 @@ class CGCopyImage : public CG { ur_image_format_t getSrcFormat() const { return MSrcImageFormat; } ur_image_format_t getDstFormat() const { return MDstImageFormat; } ur_exp_image_copy_flags_t getCopyFlags() const { return MImageCopyFlags; } + ur_exp_image_copy_input_types_t getCopyInputTypes() const { + return MImageInputTypes; + } ur_rect_offset_t getSrcOffset() const { return MSrcOffset; } ur_rect_offset_t getDstOffset() const { return MDstOffset; } ur_rect_region_t getCopyExtent() const { return MCopyExtent; } diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index edf32dfa80f7e..9a5dbac6784d7 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -115,6 +115,7 @@ class handler_impl { ur_image_format_t MSrcImageFormat = {}; ur_image_format_t MDstImageFormat = {}; ur_exp_image_copy_flags_t MImageCopyFlags = {}; + ur_exp_image_copy_input_types_t MImageCopyInputTypes = {}; ur_rect_offset_t MSrcOffset = {}; ur_rect_offset_t MDestOffset = {}; diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index e09969fba057c..ff0118fd50b4d 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -1575,8 +1575,10 @@ void MemoryManager::copy_image_bindless( queue_impl &Queue, const void *Src, void *Dst, const ur_image_desc_t &SrcDesc, const ur_image_desc_t &DstDesc, const ur_image_format_t &SrcFormat, const ur_image_format_t &DstFormat, - const ur_exp_image_copy_flags_t Flags, ur_rect_offset_t SrcOffset, - ur_rect_offset_t DstOffset, ur_rect_region_t CopyExtent, + const ur_exp_image_copy_flags_t Flags, + const ur_exp_image_copy_input_types_t InputTypes, + ur_rect_offset_t SrcOffset, ur_rect_offset_t DstOffset, + ur_rect_region_t CopyExtent, const std::vector &DepEvents, ur_event_handle_t *OutEvent) { assert((Flags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE || @@ -1599,8 +1601,8 @@ void MemoryManager::copy_image_bindless( Adapter.call( Queue.getHandleRef(), Src, Dst, &SrcDesc, &DstDesc, &SrcFormat, - &DstFormat, &CopyRegion, Flags, DepEvents.size(), DepEvents.data(), - OutEvent); + &DstFormat, &CopyRegion, Flags, InputTypes, DepEvents.size(), + DepEvents.data(), OutEvent); } } // namespace detail diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp index 02b0c7d673433..6a4f986c2a840 100644 --- a/sycl/source/detail/memory_manager.hpp +++ b/sycl/source/detail/memory_manager.hpp @@ -264,8 +264,10 @@ class MemoryManager { queue_impl &Queue, const void *Src, void *Dst, const ur_image_desc_t &SrcDesc, const ur_image_desc_t &DstDesc, const ur_image_format_t &SrcFormat, const ur_image_format_t &DstFormat, - const ur_exp_image_copy_flags_t Flags, ur_rect_offset_t SrcOffset, - ur_rect_offset_t DstOffset, ur_rect_region_t CopyExtent, + const ur_exp_image_copy_flags_t Flags, + const ur_exp_image_copy_input_types_t InputTypes, + ur_rect_offset_t SrcOffset, ur_rect_offset_t DstOffset, + ur_rect_region_t CopyExtent, const std::vector &DepEvents, ur_event_handle_t *OutEvent); }; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index d16f917cb94c5..6523b7eb623ff 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -3686,8 +3686,9 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { MemoryManager::copy_image_bindless, *MQueue, Copy->getSrc(), Copy->getDst(), Copy->getSrcDesc(), Copy->getDstDesc(), Copy->getSrcFormat(), Copy->getDstFormat(), Copy->getCopyFlags(), - Copy->getSrcOffset(), Copy->getDstOffset(), Copy->getCopyExtent(), - std::move(RawEvents), Event); + Copy->getCopyInputTypes(), Copy->getSrcOffset(), + Copy->getDstOffset(), Copy->getCopyExtent(), std::move(RawEvents), + Event); Result != UR_RESULT_SUCCESS) return Result; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 667ad6c3eed7a..4e94dbc50dd47 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -230,16 +230,16 @@ fill_image_desc(const ext::oneapi::experimental::image_descriptor &ImgDesc) { return UrDesc; } -static void -fill_copy_args(detail::handler_impl *impl, - const ext::oneapi::experimental::image_descriptor &SrcImgDesc, - const ext::oneapi::experimental::image_descriptor &DestImgDesc, - ur_exp_image_copy_flags_t ImageCopyFlags, size_t SrcPitch, - size_t DestPitch, sycl::range<3> SrcOffset = {0, 0, 0}, - sycl::range<3> SrcExtent = {0, 0, 0}, - sycl::range<3> DestOffset = {0, 0, 0}, - sycl::range<3> DestExtent = {0, 0, 0}, - sycl::range<3> CopyExtent = {0, 0, 0}) { +static void fill_copy_args( + detail::handler_impl *impl, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + ur_exp_image_copy_flags_t ImageCopyFlags, + ur_exp_image_copy_input_types_t ImageCopyInputTypes, size_t SrcPitch, + size_t DestPitch, sycl::range<3> SrcOffset = {0, 0, 0}, + sycl::range<3> SrcExtent = {0, 0, 0}, sycl::range<3> DestOffset = {0, 0, 0}, + sycl::range<3> DestExtent = {0, 0, 0}, + sycl::range<3> CopyExtent = {0, 0, 0}) { SrcImgDesc.verify(); DestImgDesc.verify(); @@ -274,6 +274,7 @@ fill_copy_args(detail::handler_impl *impl, impl->MSrcImageFormat = UrSrcFormat; impl->MDstImageFormat = UrDestFormat; impl->MImageCopyFlags = ImageCopyFlags; + impl->MImageCopyInputTypes = ImageCopyInputTypes; if (CopyExtent.size() != 0) { impl->MCopyExtent = {CopyExtent[0] * SrcPixelSize, CopyExtent[1], @@ -303,6 +304,7 @@ static void fill_copy_args(detail::handler_impl *impl, const ext::oneapi::experimental::image_descriptor &Desc, ur_exp_image_copy_flags_t ImageCopyFlags, + ur_exp_image_copy_input_types_t ImageCopyInputTypes, sycl::range<3> SrcOffset = {0, 0, 0}, sycl::range<3> SrcExtent = {0, 0, 0}, sycl::range<3> DestOffset = {0, 0, 0}, @@ -312,22 +314,24 @@ fill_copy_args(detail::handler_impl *impl, size_t SrcPitch = SrcExtent[0] * Desc.num_channels * get_channel_size(Desc); size_t DestPitch = DestExtent[0] * Desc.num_channels * get_channel_size(Desc); - fill_copy_args(impl, Desc, Desc, ImageCopyFlags, SrcPitch, DestPitch, - SrcOffset, SrcExtent, DestOffset, DestExtent, CopyExtent); + fill_copy_args(impl, Desc, Desc, ImageCopyFlags, ImageCopyInputTypes, + SrcPitch, DestPitch, SrcOffset, SrcExtent, DestOffset, + DestExtent, CopyExtent); } -static void -fill_copy_args(detail::handler_impl *impl, - const ext::oneapi::experimental::image_descriptor &Desc, - ur_exp_image_copy_flags_t ImageCopyFlags, size_t SrcPitch, - size_t DestPitch, sycl::range<3> SrcOffset = {0, 0, 0}, - sycl::range<3> SrcExtent = {0, 0, 0}, - sycl::range<3> DestOffset = {0, 0, 0}, - sycl::range<3> DestExtent = {0, 0, 0}, - sycl::range<3> CopyExtent = {0, 0, 0}) { +static void fill_copy_args( + detail::handler_impl *impl, + const ext::oneapi::experimental::image_descriptor &Desc, + ur_exp_image_copy_flags_t ImageCopyFlags, + ur_exp_image_copy_input_types_t ImageCopyInputTypes, size_t SrcPitch, + size_t DestPitch, sycl::range<3> SrcOffset = {0, 0, 0}, + sycl::range<3> SrcExtent = {0, 0, 0}, sycl::range<3> DestOffset = {0, 0, 0}, + sycl::range<3> DestExtent = {0, 0, 0}, + sycl::range<3> CopyExtent = {0, 0, 0}) { - fill_copy_args(impl, Desc, Desc, ImageCopyFlags, SrcPitch, DestPitch, - SrcOffset, SrcExtent, DestOffset, DestExtent, CopyExtent); + fill_copy_args(impl, Desc, Desc, ImageCopyFlags, ImageCopyInputTypes, + SrcPitch, DestPitch, SrcOffset, SrcExtent, DestOffset, + DestExtent, CopyExtent); } static void @@ -335,6 +339,7 @@ fill_copy_args(detail::handler_impl *impl, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, const ext::oneapi::experimental::image_descriptor &DestImgDesc, ur_exp_image_copy_flags_t ImageCopyFlags, + ur_exp_image_copy_input_types_t ImageCopyInputTypes, sycl::range<3> SrcOffset = {0, 0, 0}, sycl::range<3> SrcExtent = {0, 0, 0}, sycl::range<3> DestOffset = {0, 0, 0}, @@ -346,9 +351,9 @@ fill_copy_args(detail::handler_impl *impl, size_t DestPitch = DestExtent[0] * DestImgDesc.num_channels * get_channel_size(DestImgDesc); - fill_copy_args(impl, SrcImgDesc, DestImgDesc, ImageCopyFlags, SrcPitch, - DestPitch, SrcOffset, SrcExtent, DestOffset, DestExtent, - CopyExtent); + fill_copy_args(impl, SrcImgDesc, DestImgDesc, ImageCopyFlags, + ImageCopyInputTypes, SrcPitch, DestPitch, SrcOffset, SrcExtent, + DestOffset, DestExtent, CopyExtent); } } // namespace detail @@ -871,8 +876,8 @@ event handler::finalize() { CommandGroup.reset(new detail::CGCopyImage( MSrcPtr, MDstPtr, impl->MSrcImageDesc, impl->MDstImageDesc, impl->MSrcImageFormat, impl->MDstImageFormat, impl->MImageCopyFlags, - impl->MSrcOffset, impl->MDestOffset, impl->MCopyExtent, - std::move(impl->CGData), MCodeLoc)); + impl->MImageCopyInputTypes, impl->MSrcOffset, impl->MDestOffset, + impl->MCopyExtent, std::move(impl->CGData), MCodeLoc)); break; case detail::CGType::SemaphoreWait: CommandGroup.reset( @@ -1588,7 +1593,8 @@ void handler::ext_oneapi_copy( MDstPtr = reinterpret_cast(Dest.raw_handle); detail::fill_copy_args(get_impl(), DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE); + UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_IMAGE); setType(detail::CGType::CopyImage); } @@ -1606,7 +1612,8 @@ void handler::ext_oneapi_copy( MDstPtr = reinterpret_cast(Dest.raw_handle); detail::fill_copy_args(get_impl(), DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE, SrcOffset, + UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_IMAGE, SrcOffset, SrcExtent, DestOffset, {0, 0, 0}, CopyExtent); setType(detail::CGType::CopyImage); @@ -1623,7 +1630,8 @@ void handler::ext_oneapi_copy( MDstPtr = Dest; detail::fill_copy_args(get_impl(), SrcImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST); + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST, + UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_MEM); setType(detail::CGType::CopyImage); } @@ -1642,7 +1650,8 @@ void handler::ext_oneapi_copy( MDstPtr = Dest; detail::fill_copy_args(get_impl(), SrcImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST, SrcOffset, + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST, + UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_MEM, SrcOffset, {0, 0, 0}, DestOffset, DestExtent, CopyExtent); setType(detail::CGType::CopyImage); @@ -1671,11 +1680,13 @@ void handler::ext_oneapi_copy( Desc.width * Desc.num_channels * detail::get_channel_size(Desc); if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) { - detail::fill_copy_args(get_impl(), Desc, ImageCopyFlags, HostRowPitch, - DeviceRowPitch); + detail::fill_copy_args(get_impl(), Desc, ImageCopyFlags, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM, + HostRowPitch, DeviceRowPitch); } else if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { - detail::fill_copy_args(get_impl(), Desc, ImageCopyFlags, DeviceRowPitch, - HostRowPitch); + detail::fill_copy_args(get_impl(), Desc, ImageCopyFlags, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM, + DeviceRowPitch, HostRowPitch); } else { throw sycl::exception(make_error_code(errc::invalid), "Copy Error: This copy function only performs host " @@ -1712,10 +1723,12 @@ void handler::ext_oneapi_copy( // Fill the host extent based on the type of copy. if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) { detail::fill_copy_args(get_impl(), DeviceImgDesc, ImageCopyFlags, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM, HostRowPitch, DeviceRowPitch, SrcOffset, HostExtent, DestOffset, {0, 0, 0}, CopyExtent); } else if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { detail::fill_copy_args(get_impl(), DeviceImgDesc, ImageCopyFlags, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM, DeviceRowPitch, HostRowPitch, SrcOffset, {0, 0, 0}, DestOffset, HostExtent, CopyExtent); } else { @@ -1740,7 +1753,8 @@ void handler::ext_oneapi_copy( MDstPtr = reinterpret_cast(Dest.raw_handle); detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE); + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, + UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE); setType(detail::CGType::CopyImage); } @@ -1760,8 +1774,10 @@ void handler::ext_oneapi_copy( MDstPtr = reinterpret_cast(Dest.raw_handle); detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, SrcOffset, - {0, 0, 0}, DestOffset, {0, 0, 0}, CopyExtent); + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, + UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE, + SrcOffset, {0, 0, 0}, DestOffset, {0, 0, 0}, + CopyExtent); setType(detail::CGType::CopyImage); } @@ -1779,7 +1795,8 @@ void handler::ext_oneapi_copy( MDstPtr = Dest; detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST, 0, + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST, + UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_MEM, 0, DestRowPitch); setType(detail::CGType::CopyImage); @@ -1800,7 +1817,8 @@ void handler::ext_oneapi_copy( MDstPtr = Dest; detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST, 0, + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST, + UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_MEM, 0, DestRowPitch, SrcOffset, {0, 0, 0}, DestOffset, {0, 0, 0}, CopyExtent); @@ -1820,8 +1838,9 @@ void handler::ext_oneapi_copy( MDstPtr = reinterpret_cast(Dest.raw_handle); detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE, SrcRowPitch, - 0); + UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_IMAGE, + SrcRowPitch, 0); setType(detail::CGType::CopyImage); } @@ -1841,9 +1860,10 @@ void handler::ext_oneapi_copy( MDstPtr = reinterpret_cast(Dest.raw_handle); detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE, SrcRowPitch, - 0, SrcOffset, {0, 0, 0}, DestOffset, {0, 0, 0}, - CopyExtent); + UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_IMAGE, + SrcRowPitch, 0, SrcOffset, {0, 0, 0}, DestOffset, + {0, 0, 0}, CopyExtent); setType(detail::CGType::CopyImage); } @@ -1864,7 +1884,8 @@ void handler::ext_oneapi_copy( ur_exp_image_copy_flags_t ImageCopyFlags = UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST; detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, ImageCopyFlags, - SrcRowPitch, DestRowPitch); + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM, SrcRowPitch, + DestRowPitch); setType(detail::CGType::CopyImage); } @@ -1882,8 +1903,9 @@ void handler::ext_oneapi_copy( ur_exp_image_copy_flags_t ImageCopyFlags = UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST; detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, ImageCopyFlags, - SrcRowPitch, DestRowPitch, SrcOffset, {0, 0, 0}, - DestOffset, {0, 0, 0}, CopyExtent); + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM, SrcRowPitch, + DestRowPitch, SrcOffset, {0, 0, 0}, DestOffset, + {0, 0, 0}, CopyExtent); setType(detail::CGType::CopyImage); } diff --git a/unified-runtime/source/adapters/cuda/image.cpp b/unified-runtime/source/adapters/cuda/image.cpp index 4f1f1892cddfd..5ec1e1d9a35c3 100644 --- a/unified-runtime/source/adapters/cuda/image.cpp +++ b/unified-runtime/source/adapters/cuda/image.cpp @@ -630,8 +630,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + ur_exp_image_copy_flags_t imageCopyFlags, ur_exp_image_copy_input_types_t, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { UR_ASSERT((imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE || imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST || imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE || diff --git a/unified-runtime/source/adapters/hip/image.cpp b/unified-runtime/source/adapters/hip/image.cpp index 05f04f1ff9eea..f44e025242d6a 100644 --- a/unified-runtime/source/adapters/hip/image.cpp +++ b/unified-runtime/source/adapters/hip/image.cpp @@ -625,8 +625,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + ur_exp_image_copy_flags_t imageCopyFlags, ur_exp_image_copy_input_types_t, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { UR_ASSERT((imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE || imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST || imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE), diff --git a/unified-runtime/source/adapters/level_zero/image.cpp b/unified-runtime/source/adapters/level_zero/image.cpp index 4af84851179d1..07407c54fa415 100644 --- a/unified-runtime/source/adapters/level_zero/image.cpp +++ b/unified-runtime/source/adapters/level_zero/image.cpp @@ -28,8 +28,10 @@ ur_result_t urBindlessImagesImageCopyExp( const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t imageCopyInputTypes, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { std::scoped_lock Lock(hQueue->Mutex); UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -83,10 +85,10 @@ ur_result_t urBindlessImagesImageCopyExp( const auto &WaitList = (*Event)->WaitList; auto res = bindlessImagesHandleCopyFlags( - hQueue->Context->getZeHandle(), - pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, - pDstImageFormat, pCopyRegion, imageCopyFlags, ZeCommandList, ZeEvent, - WaitList.Length, WaitList.ZeEventList); + hQueue->Context->getZeHandle(), pSrc, pDst, pSrcImageDesc, pDstImageDesc, + pSrcImageFormat, pDstImageFormat, pCopyRegion, imageCopyFlags, + imageCopyInputTypes, ZeCommandList, ZeEvent, WaitList.Length, + WaitList.ZeEventList); if (res == UR_RESULT_SUCCESS) UR_CALL(hQueue->executeCommandList(CommandList, Blocking, OkToBatch)); diff --git a/unified-runtime/source/adapters/level_zero/image_common.cpp b/unified-runtime/source/adapters/level_zero/image_common.cpp index 34f74db42a3fa..684bdad57f875 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.cpp +++ b/unified-runtime/source/adapters/level_zero/image_common.cpp @@ -769,7 +769,9 @@ ur_result_t bindlessImagesHandleCopyFlags( const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, - ur_exp_image_copy_region_t *pCopyRegion, ur_exp_image_copy_flags_t, + ur_exp_image_copy_region_t *pCopyRegion, + /* unused */ ur_exp_image_copy_flags_t, + ur_exp_image_copy_input_types_t copyImageInputTypes, ze_command_list_handle_t ZeCommandList, ze_event_handle_t zeSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { @@ -781,21 +783,9 @@ ur_result_t bindlessImagesHandleCopyFlags( // Image means ze_image_handle_t, memory means regular pointer. // The choice of API to call depends on input types, not on the copy // direction. - ze_memory_allocation_properties_t DstMemAllocProps{ - /*.stype = */ ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES, - /*.pNext = */ nullptr}; - ZE2UR_CALL(zeMemGetAllocProperties, - (hContext, pDst, &DstMemAllocProps, nullptr)); - ze_memory_allocation_properties_t SrcMemAllocProps{ - /*.stype = */ ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES, - /*.pNext = */ nullptr}; - ZE2UR_CALL(zeMemGetAllocProperties, - (hContext, pDst, &SrcMemAllocProps, nullptr)); - const bool SrcIsMemory = SrcMemAllocProps.type != ZE_MEMORY_TYPE_UNKNOWN; - const bool DstIsMemory = DstMemAllocProps.type != ZE_MEMORY_TYPE_UNKNOWN; - - if (SrcIsMemory && DstIsMemory) { + switch (copyImageInputTypes) { + case UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM: { // Copy between (possibly) pitched USM regions ze_copy_region_t ZeDstRegion = {(uint32_t)pCopyRegion->dstOffset.x, (uint32_t)pCopyRegion->dstOffset.y, @@ -824,11 +814,11 @@ ur_result_t bindlessImagesHandleCopyFlags( pSrc, &ZeSrcRegion, SrcRowPitch, SrcSlicePitch, zeSignalEvent, numWaitEvents, phWaitEvents)); return UR_RESULT_SUCCESS; - } else if (!SrcIsMemory && !DstIsMemory) { + } + case UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE: { // Copy between two ze_image_handle_t's ze_image_region_t DstRegion; - UR_CALL(getImageRegionHelper(zeSrcImageDesc, - &pCopyRegion->dstOffset, + UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->dstOffset, &pCopyRegion->copyExtent, DstRegion)); // UR accepts copy regions as byte-pixel-pixel format, i.e. X-axis offset // and width are specified as bytes, whilst Y/Z-axis offsets, height and @@ -840,8 +830,7 @@ ur_result_t bindlessImagesHandleCopyFlags( DstRegion.width /= PixelSizeInBytes; ze_image_region_t SrcRegion; - UR_CALL(getImageRegionHelper(zeSrcImageDesc, - &pCopyRegion->srcOffset, + UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->srcOffset, &pCopyRegion->copyExtent, SrcRegion)); // UR accepts copy regions as byte-pixel-pixel format, i.e. X-axis offset // and width are specified as bytes, whilst Y/Z-axis offsets, height and @@ -861,9 +850,8 @@ ur_result_t bindlessImagesHandleCopyFlags( phWaitEvents)); return UR_RESULT_SUCCESS; - } else if (SrcIsMemory) { - assert(!DstIsMemory && - "Memory to memory copy should have been handled above"); + } + case UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_IMAGE: { // Copy from USM to ze_image_handle_t ze_image_region_t DstRegion; UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->dstOffset, @@ -891,9 +879,8 @@ ur_result_t bindlessImagesHandleCopyFlags( SrcRowPitch, SrcSlicePitch, zeSignalEvent, numWaitEvents, phWaitEvents)); return UR_RESULT_SUCCESS; - } else { - assert(DstIsMemory && !SrcIsMemory && - "Memory to image copy should have been handled above"); + } + case UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_MEM: { // Copy from ze_image_handle_t to USM ze_image_region_t SrcRegion; UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->srcOffset, @@ -920,10 +907,11 @@ ur_result_t bindlessImagesHandleCopyFlags( phWaitEvents)); return UR_RESULT_SUCCESS; } - - UR_LOG(ERR, "ur_queue_immediate_in_order_t::bindlessImagesImageCopyExp: " - "unexpected inputs"); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + default: + UR_LOG(ERR, "ur_queue_immediate_in_order_t::bindlessImagesImageCopyExp: " + "unexpected inputs"); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } } bool verifyStandardImageSupport( diff --git a/unified-runtime/source/adapters/level_zero/image_common.hpp b/unified-runtime/source/adapters/level_zero/image_common.hpp index 846f269d00b38..819d04fd8a207 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.hpp +++ b/unified-runtime/source/adapters/level_zero/image_common.hpp @@ -54,13 +54,13 @@ ur_result_t getImageRegionHelper(ze_image_desc_t ZeImageDesc, ze_image_region_t &ZeRegion); ur_result_t bindlessImagesHandleCopyFlags( - ze_context_handle_t hContext, - const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc, - const ur_image_desc_t *pDstImageDesc, + ze_context_handle_t hContext, const void *pSrc, void *pDst, + const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t imageCopyInputTypes, ze_command_list_handle_t ZeCommandList, ze_event_handle_t zeSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); diff --git a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp index df8e93c1f768a..77bc0b7d5b737 100644 --- a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp +++ b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp @@ -562,8 +562,10 @@ ur_result_t urBindlessImagesImageCopyExp( const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); + ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t imageCopyInputTypes, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); ur_result_t urBindlessImagesImageGetInfoExp( ur_context_handle_t hContext, ur_exp_image_mem_native_handle_t hImageMem, ur_image_info_t propName, void *pPropValue, size_t *pPropSizeRet); diff --git a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp index 57754e408b3f1..c970d02f3ad5f 100644 --- a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp @@ -894,17 +894,19 @@ ur_result_t ur_command_list_manager::bindlessImagesImageCopyExp( const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t phEvent) { + ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t imageCopyInputTypes, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t phEvent) { auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_MEM_IMAGE_COPY); auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList); return bindlessImagesHandleCopyFlags( - hContext->getZeHandle(), - pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, - pDstImageFormat, pCopyRegion, imageCopyFlags, getZeCommandList(), - zeSignalEvent, waitListView.num, waitListView.handles); + hContext->getZeHandle(), pSrc, pDst, pSrcImageDesc, pDstImageDesc, + pSrcImageFormat, pDstImageFormat, pCopyRegion, imageCopyFlags, + imageCopyInputTypes, getZeCommandList(), zeSignalEvent, waitListView.num, + waitListView.handles); } ur_result_t ur_command_list_manager::bindlessImagesWaitExternalSemaphoreExp( diff --git a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp index a7eafa8f9cecc..3c1bbd710ed47 100644 --- a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp +++ b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp @@ -182,8 +182,10 @@ struct ur_command_list_manager { const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t phEvent); + ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t imageCopyInputTypes, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t phEvent); ur_result_t bindlessImagesWaitExternalSemaphoreExp( ur_exp_external_semaphore_handle_t hSemaphore, bool hasWaitValue, uint64_t waitValue, uint32_t numEventsInWaitList, diff --git a/unified-runtime/source/adapters/level_zero/v2/queue_api.cpp b/unified-runtime/source/adapters/level_zero/v2/queue_api.cpp index d043a68dcaec7..582885ea67c46 100644 --- a/unified-runtime/source/adapters/level_zero/v2/queue_api.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/queue_api.cpp @@ -394,12 +394,14 @@ ur_result_t urBindlessImagesImageCopyExp( const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) try { + ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t imageCopyInputTypes, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) try { return hQueue->get().bindlessImagesImageCopyExp( pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, - pDstImageFormat, pCopyRegion, imageCopyFlags, numEventsInWaitList, - phEventWaitList, phEvent); + pDstImageFormat, pCopyRegion, imageCopyFlags, imageCopyInputTypes, + numEventsInWaitList, phEventWaitList, phEvent); } catch (...) { return exceptionToResult(std::current_exception()); } diff --git a/unified-runtime/source/adapters/level_zero/v2/queue_api.hpp b/unified-runtime/source/adapters/level_zero/v2/queue_api.hpp index b710f9d56b50d..4bd9d8fd2141e 100644 --- a/unified-runtime/source/adapters/level_zero/v2/queue_api.hpp +++ b/unified-runtime/source/adapters/level_zero/v2/queue_api.hpp @@ -147,8 +147,9 @@ struct ur_queue_t_ { virtual ur_result_t bindlessImagesImageCopyExp( const void *, void *, const ur_image_desc_t *, const ur_image_desc_t *, const ur_image_format_t *, const ur_image_format_t *, - ur_exp_image_copy_region_t *, ur_exp_image_copy_flags_t, uint32_t, - const ur_event_handle_t *, ur_event_handle_t *) = 0; + ur_exp_image_copy_region_t *, ur_exp_image_copy_flags_t, + ur_exp_image_copy_input_types_t, uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; virtual ur_result_t bindlessImagesWaitExternalSemaphoreExp( ur_exp_external_semaphore_handle_t, bool, uint64_t, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) = 0; diff --git a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp index 74b37d1b40eb3..3f230861ad563 100644 --- a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp +++ b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp @@ -390,13 +390,14 @@ struct ur_queue_immediate_in_order_t : ur_object, ur_queue_t_ { const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, + ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t imageCopyInputTypes, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) override { return commandListManager.lock()->bindlessImagesImageCopyExp( pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, - pDstImageFormat, pCopyRegion, imageCopyFlags, numEventsInWaitList, - phEventWaitList, + pDstImageFormat, pCopyRegion, imageCopyFlags, imageCopyInputTypes, + numEventsInWaitList, phEventWaitList, createEventIfRequested(eventPool.get(), phEvent, this)); } diff --git a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_out_of_order.hpp b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_out_of_order.hpp index 07e8743154ded..f1ad68a62a1a8 100644 --- a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_out_of_order.hpp +++ b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_out_of_order.hpp @@ -433,14 +433,15 @@ struct ur_queue_immediate_out_of_order_t : ur_object, ur_queue_t_ { const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, + ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t imageCopyInputTypes, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) override { auto commandListId = getNextCommandListId(); return commandListManagers.lock()[commandListId].bindlessImagesImageCopyExp( pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, - pDstImageFormat, pCopyRegion, imageCopyFlags, numEventsInWaitList, - phEventWaitList, + pDstImageFormat, pCopyRegion, imageCopyFlags, imageCopyInputTypes, + numEventsInWaitList, phEventWaitList, createEventIfRequested(eventPool.get(), phEvent, this)); } diff --git a/unified-runtime/source/adapters/mock/ur_mockddi.cpp b/unified-runtime/source/adapters/mock/ur_mockddi.cpp index 7956f048db92e..c7ecf0979b8f5 100644 --- a/unified-runtime/source/adapters/mock/ur_mockddi.cpp +++ b/unified-runtime/source/adapters/mock/ur_mockddi.cpp @@ -8510,6 +8510,9 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( ur_exp_image_copy_region_t *pCopyRegion, /// [in] flags describing copy direction e.g. H2D or D2H ur_exp_image_copy_flags_t imageCopyFlags, + /// [in] flag describing types of source and destination pointers (USM vs + /// image handle) + ur_exp_image_copy_input_types_t imageCopyInputTypes, /// [in] size of the event wait list uint32_t numEventsInWaitList, /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -8533,6 +8536,7 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( &pDstImageFormat, &pCopyRegion, &imageCopyFlags, + &imageCopyInputTypes, &numEventsInWaitList, &phEventWaitList, &phEvent}; diff --git a/unified-runtime/source/adapters/native_cpu/image.cpp b/unified-runtime/source/adapters/native_cpu/image.cpp index 18d02de897df2..4a4fc96dbf414 100644 --- a/unified-runtime/source/adapters/native_cpu/image.cpp +++ b/unified-runtime/source/adapters/native_cpu/image.cpp @@ -83,6 +83,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( [[maybe_unused]] const ur_image_format_t *pDstImageFormat, [[maybe_unused]] ur_exp_image_copy_region_t *pCopyRegion, [[maybe_unused]] ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t, [[maybe_unused]] uint32_t numEventsInWaitList, [[maybe_unused]] const ur_event_handle_t *phEventWaitList, [[maybe_unused]] ur_event_handle_t *phEvent) { diff --git a/unified-runtime/source/adapters/opencl/image.cpp b/unified-runtime/source/adapters/opencl/image.cpp index 3ef27c7f5f7ac..573d2fa4f9219 100644 --- a/unified-runtime/source/adapters/opencl/image.cpp +++ b/unified-runtime/source/adapters/opencl/image.cpp @@ -84,6 +84,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( [[maybe_unused]] const ur_image_format_t *pDstImageFormat, [[maybe_unused]] ur_exp_image_copy_region_t *pCopyRegion, [[maybe_unused]] ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t, [[maybe_unused]] uint32_t numEventsInWaitList, [[maybe_unused]] const ur_event_handle_t *phEventWaitList, [[maybe_unused]] ur_event_handle_t *phEvent) { diff --git a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp index d096d3895c385..0bca15321849d 100644 --- a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp +++ b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp @@ -7121,6 +7121,9 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( ur_exp_image_copy_region_t *pCopyRegion, /// [in] flags describing copy direction e.g. H2D or D2H ur_exp_image_copy_flags_t imageCopyFlags, + /// [in] flag describing types of source and destination pointers (USM vs + /// image handle) + ur_exp_image_copy_input_types_t imageCopyInputTypes, /// [in] size of the event wait list uint32_t numEventsInWaitList, /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -7148,6 +7151,7 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( &pDstImageFormat, &pCopyRegion, &imageCopyFlags, + &imageCopyInputTypes, &numEventsInWaitList, &phEventWaitList, &phEvent}; @@ -7160,8 +7164,8 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( ur_result_t result = pfnImageCopyExp( hQueue, pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, - pDstImageFormat, pCopyRegion, imageCopyFlags, numEventsInWaitList, - phEventWaitList, phEvent); + pDstImageFormat, pCopyRegion, imageCopyFlags, imageCopyInputTypes, + numEventsInWaitList, phEventWaitList, phEvent); getContext()->notify_end(UR_FUNCTION_BINDLESS_IMAGES_IMAGE_COPY_EXP, "urBindlessImagesImageCopyExp", ¶ms, &result, diff --git a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp index 82e898fab80a7..409d33c68b8fe 100644 --- a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp +++ b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp @@ -7892,6 +7892,8 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( /// [in] flags describing copy direction e.g. H2D or D2H ur_exp_image_copy_flags_t imageCopyFlags, /// [in] size of the event wait list + ur_exp_image_copy_input_types_t imageCopyInputTypes, + /// [in] size of the event wait list uint32_t numEventsInWaitList, /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of /// events that must be complete before this command can be executed. @@ -7960,8 +7962,8 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( ur_result_t result = pfnImageCopyExp( hQueue, pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, - pDstImageFormat, pCopyRegion, imageCopyFlags, numEventsInWaitList, - phEventWaitList, phEvent); + pDstImageFormat, pCopyRegion, imageCopyFlags, imageCopyInputTypes, + numEventsInWaitList, phEventWaitList, phEvent); return result; } diff --git a/unified-runtime/source/loader/ur_ldrddi.cpp b/unified-runtime/source/loader/ur_ldrddi.cpp index 75ae04bc5a4a8..d943fe99afdb6 100644 --- a/unified-runtime/source/loader/ur_ldrddi.cpp +++ b/unified-runtime/source/loader/ur_ldrddi.cpp @@ -4079,6 +4079,9 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( ur_exp_image_copy_region_t *pCopyRegion, /// [in] flags describing copy direction e.g. H2D or D2H ur_exp_image_copy_flags_t imageCopyFlags, + /// [in] flag describing types of source and destination pointers (USM vs + /// image handle) + ur_exp_image_copy_input_types_t imageCopyInputTypes, /// [in] size of the event wait list uint32_t numEventsInWaitList, /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -4101,8 +4104,8 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( // forward to device-platform return pfnImageCopyExp(hQueue, pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, pDstImageFormat, pCopyRegion, - imageCopyFlags, numEventsInWaitList, phEventWaitList, - phEvent); + imageCopyFlags, imageCopyInputTypes, + numEventsInWaitList, phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// diff --git a/unified-runtime/source/loader/ur_libapi.cpp b/unified-runtime/source/loader/ur_libapi.cpp index a7559029e2743..0c129b57c535e 100644 --- a/unified-runtime/source/loader/ur_libapi.cpp +++ b/unified-runtime/source/loader/ur_libapi.cpp @@ -7746,6 +7746,8 @@ ur_result_t UR_APICALL urBindlessImagesImageCopyExp( /// [in] flags describing copy direction e.g. H2D or D2H ur_exp_image_copy_flags_t imageCopyFlags, /// [in] size of the event wait list + ur_exp_image_copy_input_types_t imageCopyInputTypes, + /// [in] size of the event wait list uint32_t numEventsInWaitList, /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of /// events that must be complete before this command can be executed. @@ -7764,8 +7766,8 @@ ur_result_t UR_APICALL urBindlessImagesImageCopyExp( return pfnImageCopyExp(hQueue, pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, pDstImageFormat, pCopyRegion, - imageCopyFlags, numEventsInWaitList, phEventWaitList, - phEvent); + imageCopyFlags, imageCopyInputTypes, + numEventsInWaitList, phEventWaitList, phEvent); } catch (...) { return exceptionToResult(std::current_exception()); } From ed523bcdf99c591540254fecb62f9649b9e99924 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Mon, 1 Sep 2025 08:32:34 -0700 Subject: [PATCH 17/25] Revert now unused additional argument --- unified-runtime/source/adapters/level_zero/image.cpp | 7 +++---- .../source/adapters/level_zero/image_common.cpp | 4 ++-- .../source/adapters/level_zero/image_common.hpp | 4 ++-- .../source/adapters/level_zero/v2/command_list_manager.cpp | 6 +++--- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/unified-runtime/source/adapters/level_zero/image.cpp b/unified-runtime/source/adapters/level_zero/image.cpp index 07407c54fa415..3614cd5240e94 100644 --- a/unified-runtime/source/adapters/level_zero/image.cpp +++ b/unified-runtime/source/adapters/level_zero/image.cpp @@ -85,10 +85,9 @@ ur_result_t urBindlessImagesImageCopyExp( const auto &WaitList = (*Event)->WaitList; auto res = bindlessImagesHandleCopyFlags( - hQueue->Context->getZeHandle(), pSrc, pDst, pSrcImageDesc, pDstImageDesc, - pSrcImageFormat, pDstImageFormat, pCopyRegion, imageCopyFlags, - imageCopyInputTypes, ZeCommandList, ZeEvent, WaitList.Length, - WaitList.ZeEventList); + pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, + pDstImageFormat, pCopyRegion, imageCopyFlags, imageCopyInputTypes, + ZeCommandList, ZeEvent, WaitList.Length, WaitList.ZeEventList); if (res == UR_RESULT_SUCCESS) UR_CALL(hQueue->executeCommandList(CommandList, Blocking, OkToBatch)); diff --git a/unified-runtime/source/adapters/level_zero/image_common.cpp b/unified-runtime/source/adapters/level_zero/image_common.cpp index 684bdad57f875..0079724f6c741 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.cpp +++ b/unified-runtime/source/adapters/level_zero/image_common.cpp @@ -765,8 +765,8 @@ ur_result_t getImageRegionHelper(ze_image_desc_t ZeImageDesc, } ur_result_t bindlessImagesHandleCopyFlags( - ze_context_handle_t hContext, const void *pSrc, void *pDst, - const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, + const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc, + const ur_image_desc_t *pDstImageDesc, const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, diff --git a/unified-runtime/source/adapters/level_zero/image_common.hpp b/unified-runtime/source/adapters/level_zero/image_common.hpp index 819d04fd8a207..dd9d944ae92aa 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.hpp +++ b/unified-runtime/source/adapters/level_zero/image_common.hpp @@ -54,8 +54,8 @@ ur_result_t getImageRegionHelper(ze_image_desc_t ZeImageDesc, ze_image_region_t &ZeRegion); ur_result_t bindlessImagesHandleCopyFlags( - ze_context_handle_t hContext, const void *pSrc, void *pDst, - const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, + const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc, + const ur_image_desc_t *pDstImageDesc, const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, diff --git a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp index c970d02f3ad5f..ab9c35bc03311 100644 --- a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp @@ -903,9 +903,9 @@ ur_result_t ur_command_list_manager::bindlessImagesImageCopyExp( auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList); return bindlessImagesHandleCopyFlags( - hContext->getZeHandle(), pSrc, pDst, pSrcImageDesc, pDstImageDesc, - pSrcImageFormat, pDstImageFormat, pCopyRegion, imageCopyFlags, - imageCopyInputTypes, getZeCommandList(), zeSignalEvent, waitListView.num, + pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, + pDstImageFormat, pCopyRegion, imageCopyFlags, imageCopyInputTypes, + getZeCommandList(), zeSignalEvent, waitListView.num, waitListView.handles); } From 52cb9a149d975752ec2cd916c56197dc02e38bc8 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Mon, 1 Sep 2025 08:37:03 -0700 Subject: [PATCH 18/25] Fix UR code checks --- unified-runtime/include/ur_api_funcs.def | 2 +- .../source/loader/layers/validation/ur_valddi.cpp | 6 +++++- unified-runtime/source/loader/loader.def.in | 1 + unified-runtime/source/loader/loader.map.in | 1 + unified-runtime/source/loader/ur_libapi.cpp | 5 ++++- unified-runtime/source/loader/ur_print.cpp | 9 +++++++++ 6 files changed, 21 insertions(+), 3 deletions(-) diff --git a/unified-runtime/include/ur_api_funcs.def b/unified-runtime/include/ur_api_funcs.def index 1ad83fc2f04a4..f0c92445b9238 100644 --- a/unified-runtime/include/ur_api_funcs.def +++ b/unified-runtime/include/ur_api_funcs.def @@ -15,7 +15,7 @@ * */ - // Auto-generated file, do not edit. +// Auto-generated file, do not edit. _UR_API(urAdapterGet) _UR_API(urAdapterRelease) diff --git a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp index 409d33c68b8fe..a643d2553ca43 100644 --- a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp +++ b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp @@ -7891,7 +7891,8 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( ur_exp_image_copy_region_t *pCopyRegion, /// [in] flags describing copy direction e.g. H2D or D2H ur_exp_image_copy_flags_t imageCopyFlags, - /// [in] size of the event wait list + /// [in] flag describing types of source and destination pointers (USM vs + /// image handle) ur_exp_image_copy_input_types_t imageCopyInputTypes, /// [in] size of the event wait list uint32_t numEventsInWaitList, @@ -7940,6 +7941,9 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( if (UR_EXP_IMAGE_COPY_FLAGS_MASK & imageCopyFlags) return UR_RESULT_ERROR_INVALID_ENUMERATION; + if (UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE < imageCopyInputTypes) + return UR_RESULT_ERROR_INVALID_ENUMERATION; + if (pSrcImageDesc && UR_MEM_TYPE_IMAGE_CUBEMAP_EXP < pSrcImageDesc->type) return UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR; diff --git a/unified-runtime/source/loader/loader.def.in b/unified-runtime/source/loader/loader.def.in index 3ad47149315ee..e86a6c65a7957 100644 --- a/unified-runtime/source/loader/loader.def.in +++ b/unified-runtime/source/loader/loader.def.in @@ -346,6 +346,7 @@ EXPORTS urPrintExpExternalSemaphoreType urPrintExpFileDescriptor urPrintExpImageCopyFlags + urPrintExpImageCopyInputTypes urPrintExpImageCopyRegion urPrintExpImageMemType urPrintExpPeerInfo diff --git a/unified-runtime/source/loader/loader.map.in b/unified-runtime/source/loader/loader.map.in index fde803f9aa45a..f147cf07c433d 100644 --- a/unified-runtime/source/loader/loader.map.in +++ b/unified-runtime/source/loader/loader.map.in @@ -346,6 +346,7 @@ urPrintExpExternalSemaphoreType; urPrintExpFileDescriptor; urPrintExpImageCopyFlags; + urPrintExpImageCopyInputTypes urPrintExpImageCopyRegion; urPrintExpImageMemType; urPrintExpPeerInfo; diff --git a/unified-runtime/source/loader/ur_libapi.cpp b/unified-runtime/source/loader/ur_libapi.cpp index 0c129b57c535e..7e7f5d8e77822 100644 --- a/unified-runtime/source/loader/ur_libapi.cpp +++ b/unified-runtime/source/loader/ur_libapi.cpp @@ -7716,6 +7716,8 @@ ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( /// + `NULL == pCopyRegion` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_EXP_IMAGE_COPY_FLAGS_MASK & imageCopyFlags` +/// + `::UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE < +/// imageCopyInputTypes` /// - ::UR_RESULT_ERROR_INVALID_QUEUE /// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR @@ -7745,7 +7747,8 @@ ur_result_t UR_APICALL urBindlessImagesImageCopyExp( ur_exp_image_copy_region_t *pCopyRegion, /// [in] flags describing copy direction e.g. H2D or D2H ur_exp_image_copy_flags_t imageCopyFlags, - /// [in] size of the event wait list + /// [in] flag describing types of source and destination pointers (USM vs + /// image handle) ur_exp_image_copy_input_types_t imageCopyInputTypes, /// [in] size of the event wait list uint32_t numEventsInWaitList, diff --git a/unified-runtime/source/loader/ur_print.cpp b/unified-runtime/source/loader/ur_print.cpp index f3d5c96e376ca..06619c8f7f625 100644 --- a/unified-runtime/source/loader/ur_print.cpp +++ b/unified-runtime/source/loader/ur_print.cpp @@ -965,6 +965,15 @@ ur_result_t urPrintExpImageCopyFlags(enum ur_exp_image_copy_flag_t value, return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t +urPrintExpImageCopyInputTypes(enum ur_exp_image_copy_input_types_t value, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintExpSamplerCubemapFilterMode( enum ur_exp_sampler_cubemap_filter_mode_t value, char *buffer, const size_t buff_size, size_t *out_size) { From 967b3772f0ae8748c84b6eb556ac089c29b4b2f2 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Mon, 1 Sep 2025 08:38:17 -0700 Subject: [PATCH 19/25] Drop old debugging code --- unified-runtime/source/adapters/level_zero/common.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/unified-runtime/source/adapters/level_zero/common.cpp b/unified-runtime/source/adapters/level_zero/common.cpp index 2c6a0fb64791e..0433a2d52d149 100644 --- a/unified-runtime/source/adapters/level_zero/common.cpp +++ b/unified-runtime/source/adapters/level_zero/common.cpp @@ -133,10 +133,8 @@ void zeParseError(ze_result_t ZeError, const char *&ErrorString) { } // switch } -ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *API, const char *ARGS, +ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *, const char *, bool) { - if (PrintTrace) - UR_LOG(QUIET, "ZE2UR {}({}) -> {}", API, ARGS, ZeResult); return ZeResult; } From 4cec7ceb7c8216787d1daa3eaf9a6d6c7c5551a3 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Mon, 1 Sep 2025 08:47:15 -0700 Subject: [PATCH 20/25] Reduce the diff with sycl branch, fix build --- sycl/source/handler.cpp | 50 +++++++++++++++------ unified-runtime/source/loader/loader.map.in | 2 +- 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 4e94dbc50dd47..d1a8f00be6e07 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1795,7 +1795,7 @@ void handler::ext_oneapi_copy( MDstPtr = Dest; detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST, + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_MEM, 0, DestRowPitch); @@ -1838,7 +1838,7 @@ void handler::ext_oneapi_copy( MDstPtr = reinterpret_cast(Dest.raw_handle); detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE, + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_IMAGE, SrcRowPitch, 0); @@ -1860,7 +1860,7 @@ void handler::ext_oneapi_copy( MDstPtr = reinterpret_cast(Dest.raw_handle); detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE, + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_IMAGE, SrcRowPitch, 0, SrcOffset, {0, 0, 0}, DestOffset, {0, 0, 0}, CopyExtent); @@ -1881,11 +1881,22 @@ void handler::ext_oneapi_copy( MSrcPtr = const_cast(Src); MDstPtr = Dest; - ur_exp_image_copy_flags_t ImageCopyFlags = - UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST; - detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, ImageCopyFlags, - UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM, SrcRowPitch, - DestRowPitch); + ur_exp_image_copy_flags_t ImageCopyFlags = detail::getUrImageCopyFlags( + get_pointer_type(Src, + createSyclObjFromImpl(impl->get_context())), + get_pointer_type(Dest, + createSyclObjFromImpl(impl->get_context()))); + + if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE || + ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST) { + detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, ImageCopyFlags, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM, + SrcRowPitch, DestRowPitch); + } else { + throw sycl::exception(make_error_code(errc::invalid), + "Copy Error: This copy function only performs device " + "to device or host to host copies!"); + } setType(detail::CGType::CopyImage); } @@ -1900,12 +1911,23 @@ void handler::ext_oneapi_copy( MSrcPtr = const_cast(Src); MDstPtr = Dest; - ur_exp_image_copy_flags_t ImageCopyFlags = - UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST; - detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, ImageCopyFlags, - UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM, SrcRowPitch, - DestRowPitch, SrcOffset, {0, 0, 0}, DestOffset, - {0, 0, 0}, CopyExtent); + ur_exp_image_copy_flags_t ImageCopyFlags = detail::getUrImageCopyFlags( + get_pointer_type(Src, + createSyclObjFromImpl(impl->get_context())), + get_pointer_type(Dest, + createSyclObjFromImpl(impl->get_context()))); + + if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE || + ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST) { + detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, ImageCopyFlags, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM, + SrcRowPitch, DestRowPitch, SrcOffset, {0, 0, 0}, + DestOffset, {0, 0, 0}, CopyExtent); + } else { + throw sycl::exception(make_error_code(errc::invalid), + "Copy Error: This copy function only performs device " + "to device or host to host copies!"); + } setType(detail::CGType::CopyImage); } diff --git a/unified-runtime/source/loader/loader.map.in b/unified-runtime/source/loader/loader.map.in index f147cf07c433d..6a30c9186f674 100644 --- a/unified-runtime/source/loader/loader.map.in +++ b/unified-runtime/source/loader/loader.map.in @@ -346,7 +346,7 @@ urPrintExpExternalSemaphoreType; urPrintExpFileDescriptor; urPrintExpImageCopyFlags; - urPrintExpImageCopyInputTypes + urPrintExpImageCopyInputTypes; urPrintExpImageCopyRegion; urPrintExpImageMemType; urPrintExpPeerInfo; From 23521b004a06817f4d8c99088854d3b289a1ff89 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Mon, 1 Sep 2025 08:47:34 -0700 Subject: [PATCH 21/25] Drop incorrectly added XFAIL --- .../bindless_images/copies/device_to_device_pitched.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp b/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp index 1842cd30735b0..d47ecbe693f22 100644 --- a/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp +++ b/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp @@ -1,7 +1,5 @@ // REQUIRES: aspect-ext_oneapi_bindless_images // REQUIRES: aspect-ext_oneapi_bindless_images_2d_usm -// XFAIL: level_zero -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/17663 // // UNSUPPORTED: cuda // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/17231 From 5db29610251b37c418c492f21e2589f20f7c0065 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Tue, 2 Sep 2025 04:48:47 -0700 Subject: [PATCH 22/25] This should (hopefully) fix HIP issues --- sycl/source/handler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index d1a8f00be6e07..5ec39d9691eb5 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1817,7 +1817,7 @@ void handler::ext_oneapi_copy( MDstPtr = Dest; detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST, + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_MEM, 0, DestRowPitch, SrcOffset, {0, 0, 0}, DestOffset, {0, 0, 0}, CopyExtent); From c816ebb6897f0b95f9571807e665aab1e8fee8a7 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Tue, 2 Sep 2025 04:53:28 -0700 Subject: [PATCH 23/25] Improve tests logging on failure - I cannot reproduce those failures locally, so I need more info --- .../copies/copy_subregion_1D.cpp | 66 +++++++++++++------ .../copies/copy_subregion_2D.cpp | 63 ++++++++++++------ 2 files changed, 88 insertions(+), 41 deletions(-) diff --git a/sycl/test-e2e/bindless_images/copies/copy_subregion_1D.cpp b/sycl/test-e2e/bindless_images/copies/copy_subregion_1D.cpp index ff9430ee31fc0..ba8c6b4bc5c74 100644 --- a/sycl/test-e2e/bindless_images/copies/copy_subregion_1D.cpp +++ b/sycl/test-e2e/bindless_images/copies/copy_subregion_1D.cpp @@ -504,10 +504,11 @@ bool check_test(const std::vector &out, } if (mismatch) { -#ifdef VERBOSE_PRINT - std::cout << "Result mismatch! Expected: " << expected[i] - << ", Actual: " << out[i] << std::endl; -#else + std::cout << "Result mismatch at index " << i + << "! Expected: " << expected[i] << ", Actual: " << out[i] + << std::endl; +#ifndef VERBOSE_PRINT + // In CI, only display the first mismatched index break; #endif } @@ -541,42 +542,65 @@ bool run_copy_test(sycl::device &dev, sycl::queue &q, sycl::range<1> dims) { // Perform copy checks copy_image_mem_handle_to_image_mem_handle(dataInDesc, outDesc, dataIn1, dataIn2, dev, q, out); - - validated = validated && check_test(out, expected); + if (!check_test(out, expected)) { + std::cout << "copy_image_mem_handle_to_image_mem_handle test failed" + << std::endl; + validated = false; + } std::fill(out.begin(), out.end(), 0); copy_image_mem_handle_to_usm(dataInDesc, outDesc, dataIn1, dataIn2, dev, q, out); - - validated = validated && check_test(out, expected); + if (!check_test(out, expected)) { + std::cout << "copy_image_mem_handle_to_usm test failed" << std::endl; + validated = false; + } std::fill(out.begin(), out.end(), 0); copy_usm_to_image_mem_handle(dataInDesc, outDesc, dataIn1, dataIn2, dev, q, out); - - validated = validated && check_test(out, expected); + if (!check_test(out, expected)) { + std::cout << "copy_usm_to_image_mem_handle test failed" << std::endl; + validated = false; + } std::fill(out.begin(), out.end(), 0); copy_usm_to_usm(dataInDesc, outDesc, dataIn1, dataIn2, dev, q, out); - - validated = validated && check_test(out, expected); + if (!check_test(out, expected)) { + std::cout << "copy_usm_to_usm test failed" << std::endl; + validated = false; + } // Perform out of bounds copy checks - validated = - validated && image_mem_handle_to_image_mem_handle_out_of_bounds_copy( - dataInDesc, outDesc, dataIn1, dev, q); + if (!image_mem_handle_to_image_mem_handle_out_of_bounds_copy( + dataInDesc, outDesc, dataIn1, dev, q)) { + std::cout + << "image_mem_handle_to_image_mem_handle_out_of_bounds_copy test failed" + << std::endl; + validated = false; + } - validated = validated && image_mem_handle_to_usm_out_of_bounds_copy( - dataInDesc, outDesc, dataIn1, dev, q); + if (!image_mem_handle_to_usm_out_of_bounds_copy(dataInDesc, outDesc, dataIn1, + dev, q)) { + std::cout << "image_mem_handle_to_usm_out_of_bounds_copy test failed" + << std::endl; + validated = false; + } - validated = validated && usm_to_image_mem_handle_out_of_bounds_copy( - dataInDesc, outDesc, dataIn1, dev, q); + if (!usm_to_image_mem_handle_out_of_bounds_copy(dataInDesc, outDesc, dataIn1, + dev, q)) { + std::cout << "usm_to_image_mem_handle_out_of_bounds_copy test failed" + << std::endl; + validated = false; + } - validated = validated && usm_to_usm_out_of_bounds_copy(dataInDesc, outDesc, - dataIn1, dev, q); + if (!usm_to_usm_out_of_bounds_copy(dataInDesc, outDesc, dataIn1, dev, q)) { + std::cout << "usm_to_usm_out_of_bounds_copy test failed" << std::endl; + validated = false; + } return validated; } diff --git a/sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp b/sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp index c3325fbef33b0..3d422b717e3d8 100644 --- a/sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp +++ b/sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp @@ -445,10 +445,11 @@ bool check_test(const std::vector &out, } if (mismatch) { -#ifdef VERBOSE_PRINT - std::cout << "Result mismatch! Expected: " << expected[i] - << ", Actual: " << out[i] << std::endl; -#else + std::cout << "Result mismatch at index " << i + << "! Expected: " << expected[i] << ", Actual: " << out[i] + << std::endl; +#ifndef VERBOSE_PRINT + // In CI, only display the first mismatched index break; #endif } @@ -474,39 +475,61 @@ bool run_copy_test(sycl::device &dev, sycl::queue &q, sycl::range<2> dims) { // Perform copy checks copy_image_mem_handle_to_image_mem_handle(desc, dataIn, dev, q, out); - - validated = validated && check_test(out, expected); + if (!check_test(out, expected)) { + std::cout << "copy_image_mem_handle_to_image_mem_handle test failed" + << std::endl; + validated = false; + } std::fill(out.begin(), out.end(), 0); copy_image_mem_handle_to_usm(desc, dataIn, dev, q, out); - - validated = validated && check_test(out, expected); + if (!check_test(out, expected)) { + std::cout << "copy_image_mem_handle_to_usm test failed" << std::endl; + validated = false; + } std::fill(out.begin(), out.end(), 0); copy_usm_to_image_mem_handle(desc, dataIn, dev, q, out); - - validated = validated && check_test(out, expected); + if (!check_test(out, expected)) { + std::cout << "copy_usm_to_image_mem_handle test failed" << std::endl; + validated = false; + } std::fill(out.begin(), out.end(), 0); copy_usm_to_usm(desc, dataIn, dev, q, out); - - validated = validated && check_test(out, expected); + if (!check_test(out, expected)) { + std::cout << "copy_usm_to_usm test failed" << std::endl; + validated = false; + } // Perform out of bounds copy checks - validated = - validated && image_mem_handle_to_image_mem_handle_out_of_bounds_copy( - desc, dataIn, dev, q); + if (!image_mem_handle_to_image_mem_handle_out_of_bounds_copy( + desc, dataIn, dev, q)) { + std::cout + << "image_mem_handle_to_image_mem_handle_out_of_bounds_copy test failed" + << std::endl; + validated = false; + } - validated = validated && - image_mem_handle_to_usm_out_of_bounds_copy(desc, dataIn, dev, q); + if (!image_mem_handle_to_usm_out_of_bounds_copy(desc, dataIn, dev, q)) { + std::cout << "image_mem_handle_to_usm_out_of_bounds_copy test failed" + << std::endl; + validated = false; + } - validated = validated && - usm_to_image_mem_handle_out_of_bounds_copy(desc, dataIn, dev, q); + if (!usm_to_image_mem_handle_out_of_bounds_copy(desc, dataIn, dev, q)) { + std::cout << "usm_to_image_mem_handle_out_of_bounds_copy test failed" + << std::endl; + validated = false; + } - validated = validated && usm_to_usm_out_of_bounds_copy(desc, dataIn, dev, q); + if (!usm_to_usm_out_of_bounds_copy(desc, dataIn, dev, q)) { + std::cout << "usm_to_usm_out_of_bounds_copy test failed" << std::endl; + validated = false; + } return validated; } From 0899f4e8f54b8ffce234ff93b229ee4836fd048f Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Tue, 2 Sep 2025 05:04:36 -0700 Subject: [PATCH 24/25] Apply clang-format --- sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp b/sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp index 3d422b717e3d8..62ef4a4ea890e 100644 --- a/sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp +++ b/sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp @@ -506,8 +506,8 @@ bool run_copy_test(sycl::device &dev, sycl::queue &q, sycl::range<2> dims) { } // Perform out of bounds copy checks - if (!image_mem_handle_to_image_mem_handle_out_of_bounds_copy( - desc, dataIn, dev, q)) { + if (!image_mem_handle_to_image_mem_handle_out_of_bounds_copy(desc, dataIn, + dev, q)) { std::cout << "image_mem_handle_to_image_mem_handle_out_of_bounds_copy test failed" << std::endl; From 410d22a3c2f4738269db8b29fe23a47b18ccdc4f Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Wed, 3 Sep 2025 03:13:27 -0700 Subject: [PATCH 25/25] This should fix failures we see with V1 L0 adapter --- unified-runtime/source/adapters/level_zero/image.cpp | 12 +++++++++++- .../source/adapters/level_zero/image_common.cpp | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/unified-runtime/source/adapters/level_zero/image.cpp b/unified-runtime/source/adapters/level_zero/image.cpp index 3614cd5240e94..5200978fe1ed9 100644 --- a/unified-runtime/source/adapters/level_zero/image.cpp +++ b/unified-runtime/source/adapters/level_zero/image.cpp @@ -45,7 +45,17 @@ ur_result_t urBindlessImagesImageCopyExp( UR_ASSERT(!(pSrcImageDesc && UR_MEM_TYPE_IMAGE1D_ARRAY < pSrcImageDesc->type), UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR); - bool UseCopyEngine = hQueue->useCopyEngine(/*PreferCopyEngine*/ true); + // When we do a region copy from an image handle to USM with non-zero offest + // into a USM region, then copy engine would ignore the offset and always + // write data at the beginning of the USM allocation. + // On the other hand, when performing memory to memory copies if copy engine + // is not used, then only half the lines are copied. + // This is wild and the change is only added because we continue to test + // both V1 and V2 L0 adapters for all HW, regardless of the default adapter + // there. + bool UseCopyEngine = + hQueue->useCopyEngine(/*PreferCopyEngine*/ imageCopyInputTypes == + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM); // Due to the limitation of the copy engine, disable usage of Copy Engine // Given 3 channel image if (is3ChannelOrder( diff --git a/unified-runtime/source/adapters/level_zero/image_common.cpp b/unified-runtime/source/adapters/level_zero/image_common.cpp index 0079724f6c741..015e770c0fd61 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.cpp +++ b/unified-runtime/source/adapters/level_zero/image_common.cpp @@ -861,7 +861,7 @@ ur_result_t bindlessImagesHandleCopyFlags( // depth are specified as pixels (or rows and slices). ze_image_region_t, // however, accepts everything as pixels, so we need to do a conversion // here. - const auto PixelSizeInBytes = getPixelSizeBytes(pSrcImageFormat); + const auto PixelSizeInBytes = getPixelSizeBytes(pDstImageFormat); DstRegion.originX /= PixelSizeInBytes; DstRegion.width /= PixelSizeInBytes;