diff --git a/.github/workflows/clang-format.yml b/.github/workflows/clang-format.yml
index 3aeabb036ad58..d61e16a6e7353 100644
--- a/.github/workflows/clang-format.yml
+++ b/.github/workflows/clang-format.yml
@@ -9,16 +9,16 @@ jobs:
   build:
     runs-on: ubuntu-latest
     steps:
+    - name: Get clang-format first
+      run: sudo apt-get install -yqq clang-format-9
+
     - uses: actions/checkout@v2
       with:
         fetch-depth: 2
 
-    - name: Get clang-format first
-      run: sudo apt-get install -yqq clang-format-10
-
     - name: Run clang-format for the patch
       run: |
-        git diff -U0 --no-color ${GITHUB_SHA}^1 ${GITHUB_SHA} -- | ./clang/tools/clang-format/clang-format-diff.py -p1 -binary clang-format-10 > ./clang-format.patch
+        git diff -U0 --no-color ${GITHUB_SHA}^1 ${GITHUB_SHA} -- | ./clang/tools/clang-format/clang-format-diff.py -p1 -binary clang-format-9 > ./clang-format.patch
 
     # Add patch with formatting fixes to CI job artifacts
     - uses: actions/upload-artifact@v1
diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt
index d540c23ea0a64..1b7ae47f7b7fb 100644
--- a/sycl/CMakeLists.txt
+++ b/sycl/CMakeLists.txt
@@ -14,7 +14,7 @@ include(AddSYCLExecutable)
 set(SYCL_MAJOR_VERSION 2)
 set(SYCL_MINOR_VERSION 1)
 set(SYCL_PATCH_VERSION 0)
-set(SYCL_DEV_ABI_VERSION 3)
+set(SYCL_DEV_ABI_VERSION 4)
 if (SYCL_ADD_DEV_VERSION_POSTFIX)
   set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}")
 endif()
@@ -326,7 +326,7 @@ set( SYCL_TOOLCHAIN_DEPLOY_COMPONENTS
      sycl-headers-extras
      sycl
      pi_opencl
-     pi_level0
+     pi_level_zero
      libsycldevice
 )
 if(OpenCL_INSTALL_KHRONOS_ICD_LOADER AND TARGET ocl-icd)
diff --git a/sycl/ReleaseNotes.md b/sycl/ReleaseNotes.md
index 70d30cf3cfd5b..7c6963777fb30 100644
--- a/sycl/ReleaseNotes.md
+++ b/sycl/ReleaseNotes.md
@@ -62,7 +62,7 @@ Release notes for the commit range ba404be..24726df
   - Added a cache for PI plugins, so subsequent calls for `sycl::device`
     creation should be cheaper [03dd60d]
   - A SYCL program will be aborted now if program linking is requested when
-    using L0 plugin. This is done because L0 doesn't support program linking
+    using Level Zero plugin. This is done because L0 doesn't support program linking
     [d4a5b71]
   - Added a diagnostic on attempt to use `sycl::program::set_spec_constant` when
     the program is already in compiled or linked state [e2e3d3d]
diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md
index 3d8d5cb41f741..b353135ecb9db 100644
--- a/sycl/doc/EnvironmentVariables.md
+++ b/sycl/doc/EnvironmentVariables.md
@@ -1,6 +1,6 @@
 # Environment Variables
 
-This document describes environment variables that are having effect on DPC++ 
+This document describes environment variables that are having effect on DPC++
 compiler and runtime.
 
 ## Controlling DPC++ RT
@@ -12,7 +12,7 @@ subject to change. Do not rely on these variables in production code.
 | Environment variable | Values | Description |
 | -------------------- | ------ | ----------- |
 | SYCL_PI_TRACE | Described [below](#sycl_pi_trace-options)  | Enable specified level of tracing for PI. |
-| SYCL_BE | PI_OPENCL, PI_LEVEL0, PI_CUDA | Force SYCL RT to consider only devices of the specified backend during the device selection. |
+| SYCL_BE | PI_OPENCL, PI_LEVEL_ZERO, PI_CUDA | Force SYCL RT to consider only devices of the specified backend during the device selection. |
 | SYCL_DEVICE_TYPE | One of: CPU, GPU, ACC, HOST | Force SYCL to use the specified device type. If unset, default selection rules are applied. If set to any unlisted value, this control has no effect. If the requested device type is not found, a `cl::sycl::runtime_error` exception is thrown. If a non-default device selector is used, a device must satisfy both the selector and this control to be chosen. This control only has effect on devices created with a selector. |
 | SYCL_PROGRAM_COMPILE_OPTIONS | String of valid OpenCL compile options | Override compile options for all programs. |
 | SYCL_PROGRAM_LINK_OPTIONS | String of valid OpenCL link options | Override link options for all programs. |
diff --git a/sycl/doc/GetStartedGuide.md b/sycl/doc/GetStartedGuide.md
index 2c860b87e01c3..66312abd105b2 100644
--- a/sycl/doc/GetStartedGuide.md
+++ b/sycl/doc/GetStartedGuide.md
@@ -166,37 +166,37 @@ To run DPC++ applications on Level Zero devices, Level Zero implementation(s)
 must be present in the system. You can find the link to the Level Zero spec in
 the following section [Find More](#find-more).
 
-The Level Zero RT for `GPU`, OpenCL RT for `GPU`, OpenCL RT for `CPU` and TBB runtime
-which are needed to run DPC++ application on Intel `GPU` or Intel `CPU` devices can be
-downloaded using links in
+The Level Zero RT for `GPU`, OpenCL RT for `GPU`, OpenCL RT for `CPU`, FPGA
+emulation RT and TBB runtime which are needed to run DPC++ application
+on Intel `GPU` or Intel `CPU` devices can be downloaded using links in
 [the dependency configuration file](../../buildbot/dependency.conf)
 and installed following the instructions below. The same versions are used in
 PR testing.
 
-Intel OpenCL RT for `CPU` devices can be switched into OpenCL runtime for
-Intel FPGA Emulation. The following parameter should be set in `cl.cfg` file
-(available in directory containing CPU runtime for OpenCL) or environment
-variable with the same name. The following value should be set to switch
-OpenCL device mode:
-
-```bash
-CL_CONFIG_DEVICES = fpga-emu
-```
-
 **Linux**:
 
-1) Extract the archive. For example, for the archive
-`oclcpu_rt_<cpu_version>.tar.gz` you would run the following commands
+1) Extract the archive. For example, for the archives
+`oclcpuexp_<cpu_version>.tar.gz` and `fpgaemu_<fpga_version>.tar.gz` you would
+run the following commands
 
     ```bash
+    # Extract OpenCL FPGA emulation RT
+    mkdir -p /opt/intel/oclfpgaemu_<fpga_version>
+    cd /opt/intel/oclfpgaemu_<fpga_version>
+    tar zxvf fpgaemu_<fpga_version>.tar.gz
+    # Extract OpenCL CPU RT
     mkdir -p /opt/intel/oclcpuexp_<cpu_version>
     cd /opt/intel/oclcpuexp_<cpu_version>
     tar -zxvf oclcpu_rt_<cpu_version>.tar.gz
     ```
 
-2) Create ICD file pointing to the new runtime
+2) Create ICD file pointing to the new runtime (requires root access)
 
     ```bash
+    # OpenCL FPGA emulation RT
+    echo  /opt/intel/oclfpgaemu_<fpga_version>/x64/libintelocl_emu.so >
+      /etc/OpenCL/vendors/intel_fpgaemu.icd
+    # OpenCL CPU RT
     echo /opt/intel/oclcpuexp_<cpu_version>/x64/libintelocl.so >
       /etc/OpenCL/vendors/intel_expcpu.icd
     ```
@@ -213,6 +213,16 @@ CL_CONFIG_DEVICES = fpga-emu
 folder:
 
     ```bash
+    # OpenCL FPGA emulation RT
+    ln -s /opt/intel/tbb_<tbb_version>/tbb/lib/intel64/gcc4.8/libtbb.so
+      /opt/intel/oclfpgaemu_<fpga_version>/x64
+    ln -s /opt/intel/tbb_<tbb_version>/tbb/lib/intel64/gcc4.8/libtbbmalloc.so
+      /opt/intel/oclfpgaemu_<fpga_version>/x64
+    ln -s /opt/intel/tbb_<tbb_version>/tbb/lib/intel64/gcc4.8/libtbb.so.2
+      /opt/intel/oclfpgaemu_<fpga_version>/x64
+    ln -s /opt/intel/tbb_<tbb_version>/tbb/lib/intel64/gcc4.8/libtbbmalloc.so.2
+      /opt/intel/oclfpgaemu_<fpga_version>/x64
+    # OpenCL CPU RT
     ln -s /opt/intel/tbb_<tbb_version>/tbb/lib/intel64/gcc4.8/libtbb.so
       /opt/intel/oclcpuexp_<cpu_version>/x64
     ln -s /opt/intel/tbb_<tbb_version>/tbb/lib/intel64/gcc4.8/libtbbmalloc.so
@@ -223,10 +233,12 @@ folder:
       /opt/intel/oclcpuexp_<cpu_version>/x64
     ```
 
-5) Configure library paths
+5) Configure library paths (requires root access)
 
     ```bash
-    echo /opt/intel/oclcpuexp_<cpu_version>/x64 >
+    echo /opt/intel/oclfpgaemu_<fpga_version>/x64 >
+      /etc/ld.so.conf.d/libintelopenclexp.conf
+    echo /opt/intel/oclcpuexp_<cpu_version>/x64 >>
       /etc/ld.so.conf.d/libintelopenclexp.conf
     ldconfig -f /etc/ld.so.conf.d/libintelopenclexp.conf
     ```
@@ -239,7 +251,8 @@ OpenCL runtime for Intel `GPU` installer may re-write some important
 files or settings and make existing OpenCL runtime for Intel `CPU` runtime
 not working properly.
 
-2) Extract the archive with OpenCL runtime for Intel `CPU` using links in
+2) Extract the archive with OpenCL runtime for Intel `CPU` and/or for Intel
+`FPGA` emulation using links in
 [the dependency configuration file](../../buildbot/dependency.conf).  For
 example, to `c:\oclcpu_rt_<cpu_version>`.
 
@@ -257,6 +270,11 @@ extracted files are in `c:\oclcpu_rt_<cpu_version>\` folder, then type the
 command:
 
     ```bash
+    # Install OpenCL FPGA emulation RT
+    # Answer N to clean previous OCL_ICD_FILENAMES configuration
+    c:\oclfpga_rt_<fpga_version>\install.bat c:\tbb_<tbb_version>\tbb\bin\intel64\vc14
+    # Install OpenCL CPU RT
+    # Answer Y to setup CPU RT side-bi-side with FPGA RT
     c:\oclcpu_rt_<cpu_version>\install.bat c:\tbb_<tbb_version>\tbb\bin\intel64\vc14
     ```
 
diff --git a/sycl/doc/extensions/ParallelForSimpification/SYCL_INTEL_parallel_for_simplification.asciidoc b/sycl/doc/extensions/ParallelForSimplification/SYCL_INTEL_parallel_for_simplification.asciidoc
similarity index 100%
rename from sycl/doc/extensions/ParallelForSimpification/SYCL_INTEL_parallel_for_simplification.asciidoc
rename to sycl/doc/extensions/ParallelForSimplification/SYCL_INTEL_parallel_for_simplification.asciidoc
diff --git a/sycl/include/CL/sycl.hpp b/sycl/include/CL/sycl.hpp
index 4fd3c55b0952c..2285f9ac63348 100644
--- a/sycl/include/CL/sycl.hpp
+++ b/sycl/include/CL/sycl.hpp
@@ -37,6 +37,11 @@
 #include <CL/sycl/platform.hpp>
 #include <CL/sycl/pointers.hpp>
 #include <CL/sycl/program.hpp>
+#include <CL/sycl/properties/accessor_properties.hpp>
+#include <CL/sycl/properties/buffer_properties.hpp>
+#include <CL/sycl/properties/context_properties.hpp>
+#include <CL/sycl/properties/image_properties.hpp>
+#include <CL/sycl/properties/queue_properties.hpp>
 #include <CL/sycl/queue.hpp>
 #include <CL/sycl/range.hpp>
 #include <CL/sycl/sampler.hpp>
diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp
index 18084a8a2d2b2..8631cee6ab640 100755
--- a/sycl/include/CL/sycl/accessor.hpp
+++ b/sycl/include/CL/sycl/accessor.hpp
@@ -22,6 +22,8 @@
 #include <CL/sycl/id.hpp>
 #include <CL/sycl/image.hpp>
 #include <CL/sycl/pointers.hpp>
+#include <CL/sycl/properties/accessor_properties.hpp>
+#include <CL/sycl/property_list.hpp>
 #include <CL/sycl/sampler.hpp>
 
 /// \file accessor.hpp
diff --git a/sycl/include/CL/sycl/backend/level_zero.hpp b/sycl/include/CL/sycl/backend/level_zero.hpp
index 3dd6fa2b8677b..82de4eb4c6a78 100644
--- a/sycl/include/CL/sycl/backend/level_zero.hpp
+++ b/sycl/include/CL/sycl/backend/level_zero.hpp
@@ -14,37 +14,37 @@
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
 
-template <> struct interop<backend::level0, platform> {
+template <> struct interop<backend::level_zero, platform> {
   using type = ze_driver_handle_t;
 };
 
-template <> struct interop<backend::level0, device> {
+template <> struct interop<backend::level_zero, device> {
   using type = ze_device_handle_t;
 };
 
-template <> struct interop<backend::level0, queue> {
+template <> struct interop<backend::level_zero, queue> {
   using type = ze_command_queue_handle_t;
 };
 
-template <> struct interop<backend::level0, program> {
+template <> struct interop<backend::level_zero, program> {
   using type = ze_module_handle_t;
 };
 
 template <typename DataT, int Dimensions, access::mode AccessMode>
-struct interop<backend::level0, accessor<DataT, Dimensions, AccessMode,
-                                         access::target::global_buffer,
-                                         access::placeholder::false_t>> {
+struct interop<backend::level_zero, accessor<DataT, Dimensions, AccessMode,
+                                             access::target::global_buffer,
+                                             access::placeholder::false_t>> {
   using type = char *;
 };
 
 template <typename DataT, int Dimensions, access::mode AccessMode>
-struct interop<backend::level0, accessor<DataT, Dimensions, AccessMode,
-                                         access::target::constant_buffer,
-                                         access::placeholder::false_t>> {
+struct interop<backend::level_zero, accessor<DataT, Dimensions, AccessMode,
+                                             access::target::constant_buffer,
+                                             access::placeholder::false_t>> {
   using type = char *;
 };
 
-namespace level0 {
+namespace level_zero {
 
 // Implementation of various "make" functions resides in libsycl.so
 platform make_platform(pi_native_handle NativeHandle);
@@ -55,7 +55,7 @@ queue make_queue(const context &Context, pi_native_handle InteropHandle);
 // Construction of SYCL platform.
 template <typename T, typename std::enable_if<
                           std::is_same<T, platform>::value>::type * = nullptr>
-T make(typename interop<backend::level0, T>::type Interop) {
+T make(typename interop<backend::level_zero, T>::type Interop) {
   return make_platform(reinterpret_cast<pi_native_handle>(Interop));
 }
 
@@ -63,7 +63,7 @@ T make(typename interop<backend::level0, T>::type Interop) {
 template <typename T, typename std::enable_if<
                           std::is_same<T, device>::value>::type * = nullptr>
 T make(const platform &Platform,
-       typename interop<backend::level0, T>::type Interop) {
+       typename interop<backend::level_zero, T>::type Interop) {
   return make_device(Platform, reinterpret_cast<pi_native_handle>(Interop));
 }
 
@@ -71,7 +71,7 @@ T make(const platform &Platform,
 template <typename T, typename std::enable_if<
                           std::is_same<T, program>::value>::type * = nullptr>
 T make(const context &Context,
-       typename interop<backend::level0, T>::type Interop) {
+       typename interop<backend::level_zero, T>::type Interop) {
   return make_program(Context, reinterpret_cast<pi_native_handle>(Interop));
 }
 
@@ -79,10 +79,10 @@ T make(const context &Context,
 template <typename T, typename std::enable_if<
                           std::is_same<T, queue>::value>::type * = nullptr>
 T make(const context &Context,
-       typename interop<backend::level0, T>::type Interop) {
+       typename interop<backend::level_zero, T>::type Interop) {
   return make_queue(Context, reinterpret_cast<pi_native_handle>(Interop));
 }
 
-} // namespace level0
+} // namespace level_zero
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/backend_types.hpp b/sycl/include/CL/sycl/backend_types.hpp
index 362c6d9d9b5b9..655bbf89d8d39 100644
--- a/sycl/include/CL/sycl/backend_types.hpp
+++ b/sycl/include/CL/sycl/backend_types.hpp
@@ -18,7 +18,7 @@
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
 
-enum class backend : char { host, opencl, level0, cuda };
+enum class backend : char { host, opencl, level_zero, cuda };
 
 template <backend name, typename SYCLObjectT> struct interop;
 
@@ -30,7 +30,7 @@ inline std::ostream &operator<<(std::ostream &Out, backend be) {
   case backend::opencl:
     Out << std::string("opencl");
     break;
-  case backend::level0:
+  case backend::level_zero:
     Out << std::string("level-zero");
     break;
   case backend::cuda:
diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/include/CL/sycl/detail/pi.hpp
index 5301dd288502a..c106c5cba35bd 100644
--- a/sycl/include/CL/sycl/detail/pi.hpp
+++ b/sycl/include/CL/sycl/detail/pi.hpp
@@ -57,11 +57,11 @@ bool trace(TraceLevel level);
 
 #ifdef SYCL_RT_OS_WINDOWS
 #define OPENCL_PLUGIN_NAME "pi_opencl.dll"
-#define LEVEL0_PLUGIN_NAME "pi_level0.dll"
+#define LEVEL_ZERO_PLUGIN_NAME "pi_level_zero.dll"
 #define CUDA_PLUGIN_NAME "pi_cuda.dll"
 #else
 #define OPENCL_PLUGIN_NAME "libpi_opencl.so"
-#define LEVEL0_PLUGIN_NAME "libpi_level0.so"
+#define LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.so"
 #define CUDA_PLUGIN_NAME "libpi_cuda.so"
 #endif
 
diff --git a/sycl/include/CL/sycl/detail/property_helper.hpp b/sycl/include/CL/sycl/detail/property_helper.hpp
new file mode 100644
index 0000000000000..3c8a82bc00117
--- /dev/null
+++ b/sycl/include/CL/sycl/detail/property_helper.hpp
@@ -0,0 +1,83 @@
+//==--------- property_helper.hpp --- SYCL property helper -----------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <CL/sycl/detail/common.hpp>
+
+__SYCL_INLINE_NAMESPACE(cl) {
+namespace sycl {
+
+namespace detail {
+
+// All properties are split here to dataless properties and properties with
+// data. A dataless property is one which has no data stored in it. A property
+// with data is one which has data stored in it and usually provides and access
+// to it. For dataless property we just store a bool which indicates if a
+// property is set or not. For properties with data we store a pointer to the
+// base class because we do not know the size of such properties beforehand.
+
+// List of all dataless properties' IDs
+enum DataLessPropKind {
+  BufferUseHostPtr = 0,
+  ImageUseHostPtr,
+  QueueEnableProfiling,
+  InOrder,
+  NoInit,
+  BufferUsePinnedHostMemory,
+  UsePrimaryContext,
+  DataLessPropKindSize
+};
+
+// List of all properties with data IDs
+enum PropWithDataKind {
+  BufferUseMutex = 0,
+  BufferContextBound,
+  ImageUseMutex,
+  ImageContextBound,
+  PropWithDataKindSize
+};
+
+// Base class for dataless properties, needed to check that the type of an
+// object passed to the property_list is a property.
+class DataLessPropertyBase {};
+
+// Helper class for the dataless properties. Every such property is supposed
+// to inherit from it. The ID template parameter should be one from
+// DataLessPropKind.
+template <int ID> class DataLessProperty : DataLessPropertyBase {
+public:
+  static constexpr int getKind() { return ID; }
+};
+
+// Base class for properties with data, needed to check that the type of an
+// object passed to the property_list is a property and for checking if two
+// properties with data are of the same type.
+class PropertyWithDataBase {
+public:
+  PropertyWithDataBase(int ID) : MID(ID) {}
+  bool isSame(int ID) const { return ID == MID; }
+  virtual ~PropertyWithDataBase() = default;
+
+private:
+  int MID = -1;
+};
+
+// Helper class for the properties with data. Every such property is supposed
+// to inherit from it. The ID template parameter should be one from
+// PropWithDataKind.
+template <int ID> class PropertyWithData : public PropertyWithDataBase {
+public:
+  PropertyWithData() : PropertyWithDataBase(ID) {}
+  static int getKind() { return ID; }
+};
+
+} // namespace detail
+
+} // namespace sycl
+} // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/detail/spirv.hpp b/sycl/include/CL/sycl/detail/spirv.hpp
index d662e2afc7880..3ba8ff09b8828 100644
--- a/sycl/include/CL/sycl/detail/spirv.hpp
+++ b/sycl/include/CL/sycl/detail/spirv.hpp
@@ -33,6 +33,32 @@ template <> struct group_scope<::cl::sycl::intel::sub_group> {
   static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Subgroup;
 };
 
+// Generic shuffles and broadcasts may require multiple calls to SPIR-V
+// intrinsics, and should use the fewest broadcasts possible
+// - Loop over 64-bit chunks until remaining bytes < 64-bit
+// - At most one 32-bit, 16-bit and 8-bit chunk left over
+template <typename T, typename Functor>
+void GenericCall(const Functor &ApplyToBytes) {
+  if (sizeof(T) >= sizeof(uint64_t)) {
+#pragma unroll
+    for (size_t Offset = 0; Offset < sizeof(T); Offset += sizeof(uint64_t)) {
+      ApplyToBytes(Offset, sizeof(uint64_t));
+    }
+  }
+  if (sizeof(T) % sizeof(uint64_t) >= sizeof(uint32_t)) {
+    size_t Offset = sizeof(T) / sizeof(uint64_t) * sizeof(uint64_t);
+    ApplyToBytes(Offset, sizeof(uint32_t));
+  }
+  if (sizeof(T) % sizeof(uint32_t) >= sizeof(uint16_t)) {
+    size_t Offset = sizeof(T) / sizeof(uint32_t) * sizeof(uint32_t);
+    ApplyToBytes(Offset, sizeof(uint16_t));
+  }
+  if (sizeof(T) % sizeof(uint16_t) >= sizeof(uint8_t)) {
+    size_t Offset = sizeof(T) / sizeof(uint16_t) * sizeof(uint16_t);
+    ApplyToBytes(Offset, sizeof(uint8_t));
+  }
+}
+
 template <typename Group> bool GroupAll(bool pred) {
   return __spirv_GroupAll(group_scope<Group>::value, pred);
 }
@@ -41,47 +67,137 @@ template <typename Group> bool GroupAny(bool pred) {
   return __spirv_GroupAny(group_scope<Group>::value, pred);
 }
 
+// Native broadcasts map directly to a SPIR-V GroupBroadcast intrinsic
+template <typename T>
+using is_native_broadcast = bool_constant<detail::is_arithmetic<T>::value>;
+
+template <typename T, typename IdT = size_t>
+using EnableIfNativeBroadcast = detail::enable_if_t<
+    is_native_broadcast<T>::value && std::is_integral<IdT>::value, T>;
+
+// Bitcast broadcasts can be implemented using a single SPIR-V GroupBroadcast
+// intrinsic, but require type-punning via an appropriate integer type
+template <typename T>
+using is_bitcast_broadcast = bool_constant<
+    !is_native_broadcast<T>::value && std::is_trivially_copyable<T>::value &&
+    (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8)>;
+
+template <typename T, typename IdT = size_t>
+using EnableIfBitcastBroadcast = detail::enable_if_t<
+    is_bitcast_broadcast<T>::value && std::is_integral<IdT>::value, T>;
+
+template <typename T>
+using ConvertToNativeBroadcastType_t = select_cl_scalar_integral_unsigned_t<T>;
+
+// Generic broadcasts may require multiple calls to SPIR-V GroupBroadcast
+// intrinsics, and should use the fewest broadcasts possible
+// - Loop over 64-bit chunks until remaining bytes < 64-bit
+// - At most one 32-bit, 16-bit and 8-bit chunk left over
+template <typename T>
+using is_generic_broadcast =
+    bool_constant<!is_native_broadcast<T>::value &&
+                  !is_bitcast_broadcast<T>::value &&
+                  std::is_trivially_copyable<T>::value>;
+
+template <typename T, typename IdT = size_t>
+using EnableIfGenericBroadcast = detail::enable_if_t<
+    is_generic_broadcast<T>::value && std::is_integral<IdT>::value, T>;
+
 // Broadcast with scalar local index
 // Work-group supports any integral type
 // Sub-group currently supports only uint32_t
+template <typename Group> struct GroupId { using type = size_t; };
+template <> struct GroupId<::cl::sycl::intel::sub_group> {
+  using type = uint32_t;
+};
 template <typename Group, typename T, typename IdT>
-detail::enable_if_t<is_group<Group>::value && std::is_integral<IdT>::value, T>
-GroupBroadcast(T x, IdT local_id) {
+EnableIfNativeBroadcast<T, IdT> GroupBroadcast(T x, IdT local_id) {
+  using GroupIdT = typename GroupId<Group>::type;
+  GroupIdT GroupLocalId = static_cast<GroupIdT>(local_id);
   using OCLT = detail::ConvertToOpenCLType_t<T>;
-  using OCLIdT = detail::ConvertToOpenCLType_t<IdT>;
-  OCLT ocl_x = detail::convertDataToType<T, OCLT>(x);
-  OCLIdT ocl_id = detail::convertDataToType<IdT, OCLIdT>(local_id);
-  return __spirv_GroupBroadcast(group_scope<Group>::value, ocl_x, ocl_id);
+  using OCLIdT = detail::ConvertToOpenCLType_t<GroupIdT>;
+  OCLT OCLX = detail::convertDataToType<T, OCLT>(x);
+  OCLIdT OCLId = detail::convertDataToType<GroupIdT, OCLIdT>(GroupLocalId);
+  return __spirv_GroupBroadcast(group_scope<Group>::value, OCLX, OCLId);
 }
 template <typename Group, typename T, typename IdT>
-detail::enable_if_t<is_sub_group<Group>::value && std::is_integral<IdT>::value,
-                    T>
-GroupBroadcast(T x, IdT local_id) {
-  using SGIdT = uint32_t;
-  SGIdT sg_local_id = static_cast<SGIdT>(local_id);
-  using OCLT = detail::ConvertToOpenCLType_t<T>;
-  using OCLIdT = detail::ConvertToOpenCLType_t<SGIdT>;
-  OCLT ocl_x = detail::convertDataToType<T, OCLT>(x);
-  OCLIdT ocl_id = detail::convertDataToType<SGIdT, OCLIdT>(sg_local_id);
-  return __spirv_GroupBroadcast(group_scope<Group>::value, ocl_x, ocl_id);
+EnableIfBitcastBroadcast<T, IdT> GroupBroadcast(T x, IdT local_id) {
+  using GroupIdT = typename GroupId<Group>::type;
+  GroupIdT GroupLocalId = static_cast<GroupIdT>(local_id);
+  using BroadcastT = ConvertToNativeBroadcastType_t<T>;
+  using OCLIdT = detail::ConvertToOpenCLType_t<GroupIdT>;
+  auto BroadcastX = detail::bit_cast<BroadcastT>(x);
+  OCLIdT OCLId = detail::convertDataToType<GroupIdT, OCLIdT>(GroupLocalId);
+  BroadcastT Result =
+      __spirv_GroupBroadcast(group_scope<Group>::value, BroadcastX, OCLId);
+  return detail::bit_cast<T>(Result);
+}
+template <typename Group, typename T, typename IdT>
+EnableIfGenericBroadcast<T, IdT> GroupBroadcast(T x, IdT local_id) {
+  T Result;
+  char *XBytes = reinterpret_cast<char *>(&x);
+  char *ResultBytes = reinterpret_cast<char *>(&Result);
+  auto BroadcastBytes = [=](size_t Offset, size_t Size) {
+    uint64_t BroadcastX, BroadcastResult;
+    detail::memcpy(&BroadcastX, XBytes + Offset, Size);
+    BroadcastResult = GroupBroadcast<Group>(BroadcastX, local_id);
+    detail::memcpy(ResultBytes + Offset, &BroadcastResult, Size);
+  };
+  GenericCall<T>(BroadcastBytes);
+  return Result;
 }
 
 // Broadcast with vector local index
 template <typename Group, typename T, int Dimensions>
-T GroupBroadcast(T x, id<Dimensions> local_id) {
+EnableIfNativeBroadcast<T> GroupBroadcast(T x, id<Dimensions> local_id) {
   if (Dimensions == 1) {
     return GroupBroadcast<Group>(x, local_id[0]);
   }
   using IdT = vec<size_t, Dimensions>;
   using OCLT = detail::ConvertToOpenCLType_t<T>;
   using OCLIdT = detail::ConvertToOpenCLType_t<IdT>;
-  IdT vec_id;
+  IdT VecId;
+  for (int i = 0; i < Dimensions; ++i) {
+    VecId[i] = local_id[Dimensions - i - 1];
+  }
+  OCLT OCLX = detail::convertDataToType<T, OCLT>(x);
+  OCLIdT OCLId = detail::convertDataToType<IdT, OCLIdT>(VecId);
+  return __spirv_GroupBroadcast(group_scope<Group>::value, OCLX, OCLId);
+}
+template <typename Group, typename T, int Dimensions>
+EnableIfBitcastBroadcast<T> GroupBroadcast(T x, id<Dimensions> local_id) {
+  if (Dimensions == 1) {
+    return GroupBroadcast<Group>(x, local_id[0]);
+  }
+  using IdT = vec<size_t, Dimensions>;
+  using BroadcastT = ConvertToNativeBroadcastType_t<T>;
+  using OCLIdT = detail::ConvertToOpenCLType_t<IdT>;
+  IdT VecId;
   for (int i = 0; i < Dimensions; ++i) {
-    vec_id[i] = local_id[Dimensions - i - 1];
+    VecId[i] = local_id[Dimensions - i - 1];
   }
-  OCLT ocl_x = detail::convertDataToType<T, OCLT>(x);
-  OCLIdT ocl_id = detail::convertDataToType<IdT, OCLIdT>(vec_id);
-  return __spirv_GroupBroadcast(group_scope<Group>::value, ocl_x, ocl_id);
+  auto BroadcastX = detail::bit_cast<BroadcastT>(x);
+  OCLIdT OCLId = detail::convertDataToType<IdT, OCLIdT>(VecId);
+  BroadcastT Result =
+      __spirv_GroupBroadcast(group_scope<Group>::value, BroadcastX, OCLId);
+  return detail::bit_cast<T>(Result);
+}
+template <typename Group, typename T, int Dimensions>
+EnableIfGenericBroadcast<T> GroupBroadcast(T x, id<Dimensions> local_id) {
+  if (Dimensions == 1) {
+    return GroupBroadcast<Group>(x, local_id[0]);
+  }
+  T Result;
+  char *XBytes = reinterpret_cast<char *>(&x);
+  char *ResultBytes = reinterpret_cast<char *>(&Result);
+  auto BroadcastBytes = [=](size_t Offset, size_t Size) {
+    uint64_t BroadcastX, BroadcastResult;
+    detail::memcpy(&BroadcastX, XBytes + Offset, Size);
+    BroadcastResult = GroupBroadcast<Group>(BroadcastX, local_id);
+    detail::memcpy(ResultBytes + Offset, &BroadcastResult, Size);
+  };
+  GenericCall<T>(BroadcastBytes);
+  return Result;
 }
 
 // Single happens-before means semantics should always apply to all spaces
@@ -400,28 +516,6 @@ using EnableIfGenericShuffle =
                                sizeof(T) == 4 || sizeof(T) == 8)),
                         T>;
 
-template <typename T, typename ShuffleFunctor>
-void GenericShuffle(const ShuffleFunctor &ShuffleBytes) {
-  if (sizeof(T) >= sizeof(uint64_t)) {
-#pragma unroll
-    for (size_t Offset = 0; Offset < sizeof(T); Offset += sizeof(uint64_t)) {
-      ShuffleBytes(Offset, sizeof(uint64_t));
-    }
-  }
-  if (sizeof(T) % sizeof(uint64_t) >= sizeof(uint32_t)) {
-    size_t Offset = sizeof(T) / sizeof(uint64_t) * sizeof(uint64_t);
-    ShuffleBytes(Offset, sizeof(uint32_t));
-  }
-  if (sizeof(T) % sizeof(uint32_t) >= sizeof(uint16_t)) {
-    size_t Offset = sizeof(T) / sizeof(uint32_t) * sizeof(uint32_t);
-    ShuffleBytes(Offset, sizeof(uint16_t));
-  }
-  if (sizeof(T) % sizeof(uint16_t) >= sizeof(uint8_t)) {
-    size_t Offset = sizeof(T) / sizeof(uint16_t) * sizeof(uint16_t);
-    ShuffleBytes(Offset, sizeof(uint8_t));
-  }
-}
-
 template <typename T>
 EnableIfGenericShuffle<T> SubgroupShuffle(T x, id<1> local_id) {
   T Result;
@@ -433,7 +527,7 @@ EnableIfGenericShuffle<T> SubgroupShuffle(T x, id<1> local_id) {
     ShuffleResult = SubgroupShuffle(ShuffleX, local_id);
     detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
   };
-  GenericShuffle<T>(ShuffleBytes);
+  GenericCall<T>(ShuffleBytes);
   return Result;
 }
 
@@ -448,7 +542,7 @@ EnableIfGenericShuffle<T> SubgroupShuffleXor(T x, id<1> local_id) {
     ShuffleResult = SubgroupShuffleXor(ShuffleX, local_id);
     detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
   };
-  GenericShuffle<T>(ShuffleBytes);
+  GenericCall<T>(ShuffleBytes);
   return Result;
 }
 
@@ -465,7 +559,7 @@ EnableIfGenericShuffle<T> SubgroupShuffleDown(T x, T y, id<1> local_id) {
     ShuffleResult = SubgroupShuffleDown(ShuffleX, ShuffleY, local_id);
     detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
   };
-  GenericShuffle<T>(ShuffleBytes);
+  GenericCall<T>(ShuffleBytes);
   return Result;
 }
 
@@ -482,7 +576,7 @@ EnableIfGenericShuffle<T> SubgroupShuffleUp(T x, T y, id<1> local_id) {
     ShuffleResult = SubgroupShuffleUp(ShuffleX, ShuffleY, local_id);
     detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
   };
-  GenericShuffle<T>(ShuffleBytes);
+  GenericCall<T>(ShuffleBytes);
   return Result;
 }
 
diff --git a/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp b/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp
index 1ab0002cb6d7c..ad795d69806b5 100644
--- a/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp
+++ b/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp
@@ -14,6 +14,8 @@
 #include <CL/sycl/detail/sycl_mem_obj_i.hpp>
 #include <CL/sycl/detail/type_traits.hpp>
 #include <CL/sycl/event.hpp>
+#include <CL/sycl/properties/buffer_properties.hpp>
+#include <CL/sycl/properties/image_properties.hpp>
 #include <CL/sycl/property_list.hpp>
 #include <CL/sycl/stl.hpp>
 
diff --git a/sycl/include/CL/sycl/intel/group_algorithm.hpp b/sycl/include/CL/sycl/intel/group_algorithm.hpp
index 932a53ba07675..c8f6faa2a08a6 100644
--- a/sycl/include/CL/sycl/intel/group_algorithm.hpp
+++ b/sycl/include/CL/sycl/intel/group_algorithm.hpp
@@ -138,6 +138,12 @@ template <typename Ptr, typename T>
 using EnableIfIsPointer =
     cl::sycl::detail::enable_if_t<cl::sycl::detail::is_pointer<Ptr>::value, T>;
 
+template <typename T>
+using EnableIfIsTriviallyCopyable = cl::sycl::detail::enable_if_t<
+    std::is_trivially_copyable<T>::value &&
+        !cl::sycl::detail::is_vector_arithmetic<T>::value,
+    T>;
+
 // EnableIf shorthands for algorithms that depend on type and an operator
 template <typename T, typename BinaryOperation>
 using EnableIfIsScalarArithmeticNativeOp = cl::sycl::detail::enable_if_t<
@@ -286,8 +292,8 @@ EnableIfIsPointer<Ptr, bool> none_of(Group g, Ptr first, Ptr last,
 }
 
 template <typename Group, typename T>
-EnableIfIsScalarArithmetic<T> broadcast(Group, T x,
-                                        typename Group::id_type local_id) {
+EnableIfIsTriviallyCopyable<T> broadcast(Group, T x,
+                                         typename Group::id_type local_id) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
                 "intel::sub_group class.");
@@ -323,7 +329,7 @@ EnableIfIsVectorArithmetic<T> broadcast(Group g, T x,
 }
 
 template <typename Group, typename T>
-EnableIfIsScalarArithmetic<T>
+EnableIfIsTriviallyCopyable<T>
 broadcast(Group g, T x, typename Group::linear_id_type linear_local_id) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
@@ -363,7 +369,7 @@ broadcast(Group g, T x, typename Group::linear_id_type linear_local_id) {
 }
 
 template <typename Group, typename T>
-EnableIfIsScalarArithmetic<T> broadcast(Group g, T x) {
+EnableIfIsTriviallyCopyable<T> broadcast(Group g, T x) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
                 "intel::sub_group class.");
diff --git a/sycl/include/CL/sycl/properties/accessor_properties.hpp b/sycl/include/CL/sycl/properties/accessor_properties.hpp
new file mode 100644
index 0000000000000..2aea424846ee6
--- /dev/null
+++ b/sycl/include/CL/sycl/properties/accessor_properties.hpp
@@ -0,0 +1,37 @@
+//==----------- accessor_properties.hpp --- SYCL accessor properties -------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <CL/sycl/detail/common.hpp>
+#include <CL/sycl/detail/property_helper.hpp>
+
+__SYCL_INLINE_NAMESPACE(cl) {
+namespace sycl {
+namespace property {
+
+class noinit : public detail::DataLessProperty<detail::NoInit> {};
+
+} // namespace property
+
+#if __cplusplus > 201402L
+
+inline constexpr property::noinit noinit;
+
+#else
+
+namespace {
+
+constexpr const auto &noinit =
+    sycl::detail::InlineVariableHelper<property::noinit>::value;
+}
+
+#endif
+
+} // namespace sycl
+} // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/properties/buffer_properties.hpp b/sycl/include/CL/sycl/properties/buffer_properties.hpp
new file mode 100644
index 0000000000000..fe83402485d21
--- /dev/null
+++ b/sycl/include/CL/sycl/properties/buffer_properties.hpp
@@ -0,0 +1,57 @@
+//==----------- buffer_properties.hpp --- SYCL buffer properties -----------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <CL/sycl/context.hpp>
+#include <CL/sycl/detail/property_helper.hpp>
+
+__SYCL_INLINE_NAMESPACE(cl) {
+namespace sycl {
+
+namespace property {
+namespace buffer {
+class use_host_ptr : public detail::DataLessProperty<detail::BufferUseHostPtr> {
+};
+
+class use_mutex : public detail::PropertyWithData<detail::BufferUseMutex> {
+public:
+  use_mutex(sycl::mutex_class &MutexRef) : MMutex(MutexRef) {}
+
+  sycl::mutex_class *get_mutex_ptr() const { return &MMutex; }
+
+private:
+  sycl::mutex_class &MMutex;
+};
+
+class context_bound
+    : public detail::PropertyWithData<detail::BufferContextBound> {
+public:
+  context_bound(sycl::context BoundContext) : MCtx(std::move(BoundContext)) {}
+
+  context get_context() const { return MCtx; }
+
+private:
+  sycl::context MCtx;
+};
+} // namespace buffer
+} // namespace property
+
+namespace ext {
+namespace oneapi {
+namespace property {
+namespace buffer {
+
+class use_pinned_host_memory
+    : public detail::DataLessProperty<detail::BufferUsePinnedHostMemory> {};
+} // namespace buffer
+} // namespace property
+} // namespace oneapi
+} // namespace ext
+} // namespace sycl
+} // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/properties/context_properties.hpp b/sycl/include/CL/sycl/properties/context_properties.hpp
new file mode 100644
index 0000000000000..49ec19890c439
--- /dev/null
+++ b/sycl/include/CL/sycl/properties/context_properties.hpp
@@ -0,0 +1,25 @@
+//==----------- context_properties.hpp --- SYCL context properties ---------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <CL/sycl/context.hpp>
+#include <CL/sycl/detail/property_helper.hpp>
+
+__SYCL_INLINE_NAMESPACE(cl) {
+namespace sycl {
+namespace property {
+namespace context {
+namespace cuda {
+class use_primary_context
+    : public detail::DataLessProperty<detail::UsePrimaryContext> {};
+} // namespace cuda
+} // namespace context
+} // namespace property
+} // namespace sycl
+} // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/properties/image_properties.hpp b/sycl/include/CL/sycl/properties/image_properties.hpp
new file mode 100644
index 0000000000000..ebb7e5d2a2972
--- /dev/null
+++ b/sycl/include/CL/sycl/properties/image_properties.hpp
@@ -0,0 +1,44 @@
+//==----------- image_properties.hpp --- SYCL image properties -------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <CL/sycl/context.hpp>
+#include <CL/sycl/detail/property_helper.hpp>
+
+__SYCL_INLINE_NAMESPACE(cl) {
+namespace sycl {
+namespace property {
+namespace image {
+class use_host_ptr : public detail::DataLessProperty<detail::ImageUseHostPtr> {
+};
+
+class use_mutex : public detail::PropertyWithData<detail::ImageUseMutex> {
+public:
+  use_mutex(sycl::mutex_class &MutexRef) : MMutex(MutexRef) {}
+
+  sycl::mutex_class *get_mutex_ptr() const { return &MMutex; }
+
+private:
+  sycl::mutex_class &MMutex;
+};
+
+class context_bound
+    : public detail::PropertyWithData<detail::ImageContextBound> {
+public:
+  context_bound(sycl::context BoundContext) : MCtx(std::move(BoundContext)) {}
+
+  context get_context() const { return MCtx; }
+
+private:
+  sycl::context MCtx;
+};
+} // namespace image
+} // namespace property
+} // namespace sycl
+} // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/properties/queue_properties.hpp b/sycl/include/CL/sycl/properties/queue_properties.hpp
new file mode 100644
index 0000000000000..6d596fcf6a67c
--- /dev/null
+++ b/sycl/include/CL/sycl/properties/queue_properties.hpp
@@ -0,0 +1,23 @@
+//==----------- queue_properties.hpp --- SYCL queue properties -------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <CL/sycl/detail/property_helper.hpp>
+
+__SYCL_INLINE_NAMESPACE(cl) {
+namespace sycl {
+namespace property {
+namespace queue {
+class in_order : public detail::DataLessProperty<detail::InOrder> {};
+class enable_profiling
+    : public detail::DataLessProperty<detail::QueueEnableProfiling> {};
+} // namespace queue
+} // namespace property
+} // namespace sycl
+} // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/property_list.hpp b/sycl/include/CL/sycl/property_list.hpp
index b727bf05671c2..5edaa636692f6 100644
--- a/sycl/include/CL/sycl/property_list.hpp
+++ b/sycl/include/CL/sycl/property_list.hpp
@@ -8,332 +8,123 @@
 
 #pragma once
 
-#include <CL/sycl/context.hpp>
 #include <CL/sycl/detail/common.hpp>
-#include <tuple>
+#include <CL/sycl/detail/property_helper.hpp>
+
+#include <bitset>
+#include <memory>
 #include <type_traits>
-#include <utility>
+#include <vector>
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
 
-// HOW TO ADD NEW PROPERTY INSTRUCTION:
-// 1. Add forward declaration of property class.
-// 2. Add new record in PropKind enum.
-// 3. Use RegisterProp macro passing new record from enum and new class.
-// 4. Add implementation of the new property class using detail::Prop class with
-//    template parameter = new record in enum as a base class.
-
-namespace property {
-
-namespace image {
-class use_host_ptr;
-class use_mutex;
-class context_bound;
-} // namespace image
-
-namespace buffer {
-class use_host_ptr;
-class use_mutex;
-class context_bound;
-} // namespace buffer
-
-namespace queue {
-class enable_profiling;
-class in_order;
-} // namespace queue
-
-class noinit;
-
-namespace detail {
-
-// Will be aliased in the sycl::ext::oneapi::property namespace
-namespace buffer_ {
-class use_pinned_host_memory;
-}
-
-// List of all properties' IDs.
-enum PropKind {
-  // Buffer properties
-  BufferUseHostPtr = 0,
-  BufferContextBound,
-  BufferUseMutex,
-
-  // Image properties
-  ImageUseHostPtr,
-  ImageContextBound,
-  ImageUseMutex,
-
-  // Queue properties
-  QueueEnableProfiling,
-  InOrder,
-
-  // Accessor
-  NoInit,
-
-  BufferUsePinnedHostMemory,
-
-  PropKindSize
-};
-
-// Base class for all properties. Needed to check that user passed only
-// SYCL's properties to property_list c'tor.
-class PropBase {};
+/// Objects of the property_list class are containers for the SYCL properties
+///
+/// \ingroup sycl_api
+class property_list {
 
-// Second base class, needed for mapping PropKind to class and vice versa.
-template <PropKind PropKindT> class Prop;
+  // The structs validate that all objects passed are SYCL properties
+  template <typename... Tail> struct AllProperties : std::true_type {};
+  template <typename T, typename... Tail>
+  struct AllProperties<T, Tail...>
+      : std::conditional<
+            std::is_base_of<detail::DataLessPropertyBase, T>::value ||
+                std::is_base_of<detail::PropertyWithDataBase, T>::value,
+            AllProperties<Tail...>, std::false_type>::type {};
 
-// This class is used in property_list to hold properties.
-template <class T> class PropertyHolder {
 public:
-  PropertyHolder() = default;
-
-  PropertyHolder(const PropertyHolder &P) {
-    if (P.isInitialized()) {
-      new (m_Mem) T(P.getProp());
-      m_Initialized = true;
-    }
-  }
-
-  ~PropertyHolder() {
-    if (m_Initialized) {
-      T *MemPtr = reinterpret_cast<T *>(m_Mem);
-      MemPtr->~T();
-    }
+  template <typename... PropsT, typename = typename std::enable_if<
+                                    AllProperties<PropsT...>::value>::type>
+  property_list(PropsT... Props) : MDataLessProps(false) {
+    ctorHelper(Props...);
   }
 
-  PropertyHolder &operator=(const PropertyHolder &Other) {
-    if (this != &Other) {
-      if (m_Initialized) {
-        T *MemPtr = reinterpret_cast<T *>(m_Mem);
-        MemPtr->~T();
-        m_Initialized = false;
-      }
-
-      if (Other.m_Initialized) {
-        new (m_Mem) T(Other.getProp());
-        m_Initialized = true;
-      }
-    }
-    return *this;
-  }
+  template <typename PropT> PropT get_property() const {
+    if (!has_property<PropT>())
+      throw sycl::invalid_object_error("The property is not found",
+                                       PI_INVALID_VALUE);
 
-  void setProp(const T &Rhs) {
-    new (m_Mem) T(Rhs);
-    m_Initialized = true;
+    return get_property_helper<PropT>();
   }
 
-  const T &getProp() const {
-    assert(true == m_Initialized && "Property was not set!");
-    const T *MemPtr = reinterpret_cast<const T *>(m_Mem);
-    return *MemPtr;
+  template <typename PropT> bool has_property() const {
+    return has_property_helper<PropT>();
   }
-  bool isInitialized() const { return m_Initialized; }
 
 private:
-  // Memory that is used for property allocation
-  alignas(T) unsigned char m_Mem[sizeof(T)];
-  // Indicate whether property initialized or not.
-  bool m_Initialized = false;
-};
+  void ctorHelper() {}
 
-// This macro adds specialization of class Prop which provides possibility to
-// convert PropKind to class and vice versa.
-#define RegisterProp(PropKindT, Type)                                          \
-  template <> class Prop<PropKindT> : public PropBase {                        \
-  public:                                                                      \
-    static constexpr PropKind getKind() { return PropKindT; }                  \
-    using FinalType = Type;                                                    \
+  template <typename... PropsT, class PropT>
+  typename std::enable_if<
+      std::is_base_of<detail::DataLessPropertyBase, PropT>::value>::type
+  ctorHelper(PropT &, PropsT... Props) {
+    const int PropKind = static_cast<int>(PropT::getKind());
+    MDataLessProps[PropKind] = true;
+    ctorHelper(Props...);
   }
 
-// Image
-RegisterProp(PropKind::ImageUseHostPtr, image::use_host_ptr);
-RegisterProp(PropKind::ImageUseMutex, image::use_mutex);
-RegisterProp(PropKind::ImageContextBound, image::context_bound);
-
-// Buffer
-RegisterProp(PropKind::BufferUseHostPtr, buffer::use_host_ptr);
-RegisterProp(PropKind::BufferUseMutex, buffer::use_mutex);
-RegisterProp(PropKind::BufferContextBound, buffer::context_bound);
-RegisterProp(PropKind::BufferUsePinnedHostMemory,
-             buffer_::use_pinned_host_memory);
-
-// Queue
-RegisterProp(PropKind::QueueEnableProfiling, queue::enable_profiling);
-RegisterProp(PropKind::InOrder, queue::in_order);
-
-// Accessor
-RegisterProp(PropKind::NoInit, noinit);
-
-// Sentinel, needed for automatic build of tuple in property_list.
-RegisterProp(PropKind::PropKindSize, PropBase);
-
-// Common class for use_mutex in buffer and image namespaces.
-template <PropKind PropKindT> class UseMutexBase : public Prop<PropKindT> {
-public:
-  UseMutexBase(mutex_class &MutexRef) : m_MutexClass(MutexRef) {}
-  mutex_class *get_mutex_ptr() const { return &m_MutexClass; }
-
-private:
-  mutex_class &m_MutexClass;
-};
-
-// Common class for context_bound in buffer and image namespaces.
-template <PropKind PropKindT> class ContextBoundBase : public Prop<PropKindT> {
-public:
-  ContextBoundBase(cl::sycl::context Context) : m_Context(Context) {}
-  context get_context() const { return m_Context; }
-
-private:
-  cl::sycl::context m_Context;
-};
-} // namespace detail
-
-namespace image {
-
-class use_host_ptr : public detail::Prop<detail::PropKind::ImageUseHostPtr> {};
-
-class use_mutex : public detail::UseMutexBase<detail::PropKind::ImageUseMutex> {
-public:
-  use_mutex(mutex_class &MutexRef) : UseMutexBase(MutexRef) {}
-};
-
-class context_bound
-    : public detail::ContextBoundBase<detail::PropKind::ImageContextBound> {
-public:
-  context_bound(cl::sycl::context Context) : ContextBoundBase(Context) {}
-};
-
-} // namespace image
-
-namespace buffer {
-
-class use_host_ptr : public detail::Prop<detail::PropKind::BufferUseHostPtr> {};
-
-class use_mutex
-    : public detail::UseMutexBase<detail::PropKind::BufferUseMutex> {
-public:
-  use_mutex(mutex_class &MutexRef) : UseMutexBase(MutexRef) {}
-};
-
-class context_bound
-    : public detail::ContextBoundBase<detail::PropKind::BufferContextBound> {
-public:
-  context_bound(cl::sycl::context Context) : ContextBoundBase(Context) {}
-};
-
-} // namespace buffer
-
-namespace detail {
-namespace buffer_ {
-class use_pinned_host_memory
-    : public detail::Prop<detail::PropKind::BufferUsePinnedHostMemory> {};
-} // namespace buffer_
-} // namespace detail
-
-namespace queue {
-class enable_profiling
-    : public detail::Prop<detail::PropKind::QueueEnableProfiling> {};
-
-class in_order : public detail::Prop<detail::PropKind::InOrder> {};
-} // namespace queue
-
-class noinit : public detail::Prop<detail::PropKind::NoInit> {};
-
-} // namespace property
-
-namespace ext {
-namespace oneapi {
-namespace property {
-namespace buffer {
-using use_pinned_host_memory =
-    sycl::property::detail::buffer_::use_pinned_host_memory;
-} // namespace buffer
-} // namespace property
-} // namespace oneapi
-} // namespace ext
-
-#if __cplusplus > 201402L
-
-inline constexpr property::noinit noinit;
-
-#else
-
-namespace {
-
-constexpr const auto &noinit =
-    sycl::detail::InlineVariableHelper<property::noinit>::value;
-
-}
-
-#endif
-
-class property_list {
-
-  // The structs validate that all objects passed are base of PropBase class.
-  template <typename... Tail> struct AllProperties : std::true_type {};
-  template <typename T, typename... Tail>
-  struct AllProperties<T, Tail...>
-      : std::conditional<std::is_base_of<property::detail::PropBase, T>::value,
-                         AllProperties<Tail...>, std::false_type>::type {};
-
-  template <class T>
-  using PropertyHolder = cl::sycl::property::detail::PropertyHolder<T>;
-  template <property::detail::PropKind PropKindT>
-  using Property = cl::sycl::property::detail::Prop<PropKindT>;
-
-  // The structs build tuple type that can hold all properties.
-  template <typename... Head> struct DefineTupleType {
-    using Type = std::tuple<Head...>;
-  };
-
-  template <int Counter, typename... Head>
-  struct BuildTupleType
-      : public std::conditional<
-            (Counter < property::detail::PropKind::PropKindSize),
-            BuildTupleType<
-                Counter + 1, Head...,
-                PropertyHolder<typename Property<(property::detail::PropKind)(
-                    Counter)>::FinalType>>,
-            DefineTupleType<Head...>>::type {};
-
-public:
-  // C'tor initialize m_PropList with properties passed by invoking ctorHelper
-  // recursively
-  template <typename... propertyTN,
-            typename = typename std::enable_if<
-                AllProperties<propertyTN...>::value>::type>
-  property_list(propertyTN... Props) {
+  template <typename... PropsT, class PropT>
+  typename std::enable_if<
+      std::is_base_of<detail::PropertyWithDataBase, PropT>::value>::type
+  ctorHelper(PropT &Prop, PropsT... Props) {
+    MPropsWithData.emplace_back(new PropT(Prop));
     ctorHelper(Props...);
   }
 
-  template <typename propertyT> propertyT get_property() const {
-    if (!has_property<propertyT>()) {
-      throw sycl::invalid_object_error();
-    }
-    const auto &PropHolder =
-        std::get<static_cast<int>(propertyT::getKind())>(m_PropsList);
-    return PropHolder.getProp();
+  template <typename PropT>
+  typename std::enable_if<
+      std::is_base_of<detail::DataLessPropertyBase, PropT>::value, bool>::type
+  has_property_helper() const {
+    const int PropKind = static_cast<int>(PropT::getKind());
+    if (PropKind >= detail::DataLessPropKind::DataLessPropKindSize)
+      return false;
+    return MDataLessProps[PropKind];
   }
 
-  template <typename propertyT> bool has_property() const {
-    if (static_cast<int>(propertyT::getKind()) >
-        property::detail::PropKind::PropKindSize)
-      return false;
-    return std::get<(int)(propertyT::getKind())>(m_PropsList).isInitialized();
+  template <typename PropT>
+  typename std::enable_if<
+      std::is_base_of<detail::PropertyWithDataBase, PropT>::value, bool>::type
+  has_property_helper() const {
+    const int PropKind = static_cast<int>(PropT::getKind());
+    for (const std::shared_ptr<detail::PropertyWithDataBase> &Prop :
+         MPropsWithData)
+      if (Prop->isSame(PropKind))
+        return true;
+    return false;
   }
 
-private:
-  void ctorHelper() {}
+  template <typename PropT>
+  typename std::enable_if<
+      std::is_base_of<detail::DataLessPropertyBase, PropT>::value, PropT>::type
+  get_property_helper() const {
+    // In case of simple property we can just construct it
+    return PropT{};
+  }
 
-  template <typename... propertyTN, class PropT>
-  void ctorHelper(PropT &Prop, propertyTN... props) {
-    std::get<static_cast<int>(PropT::getKind())>(m_PropsList).setProp(Prop);
-    ctorHelper(props...);
+  template <typename PropT>
+  typename std::enable_if<
+      std::is_base_of<detail::PropertyWithDataBase, PropT>::value, PropT>::type
+  get_property_helper() const {
+    const int PropKind = static_cast<int>(PropT::getKind());
+    if (PropKind >= detail::PropWithDataKind::PropWithDataKindSize)
+      throw sycl::invalid_object_error("The property is not found",
+                                       PI_INVALID_VALUE);
+
+    for (const std::shared_ptr<detail::PropertyWithDataBase> &Prop :
+         MPropsWithData)
+      if (Prop->isSame(PropKind))
+        return *static_cast<PropT *>(Prop.get());
+
+    throw sycl::invalid_object_error("The property is not found",
+                                     PI_INVALID_VALUE);
   }
 
-  // Tuple that able to hold all the properties.
-  BuildTupleType<0>::Type m_PropsList;
+private:
+  // Stores enable/not enabled for simple properties
+  std::bitset<detail::DataLessPropKind::DataLessPropKindSize> MDataLessProps;
+  // Stores shared_ptrs to complex properties
+  std::vector<std::shared_ptr<detail::PropertyWithDataBase>> MPropsWithData;
 };
 
 } // namespace sycl
diff --git a/sycl/plugins/level_zero/CMakeLists.txt b/sycl/plugins/level_zero/CMakeLists.txt
index b7e89af870b48..a90dfc8ac841f 100755
--- a/sycl/plugins/level_zero/CMakeLists.txt
+++ b/sycl/plugins/level_zero/CMakeLists.txt
@@ -1,14 +1,14 @@
-# PI Level0 plugin library
+# PI Level Zero plugin library
 
 if(MSVC)
-  set(L0_LOADER
+  set(LEVEL_ZERO_LOADER
       "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}ze_loader${CMAKE_STATIC_LIBRARY_SUFFIX}")
 else()
-  set(L0_LOADER
+  set(LEVEL_ZERO_LOADER
       "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}ze_loader${CMAKE_SHARED_LIBRARY_SUFFIX}")
 endif()
 
-if (NOT DEFINED L0_LIBRARY OR NOT DEFINED L0_INCLUDE_DIR)
+if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR)
   message(STATUS "Download Level Zero loader and headers from github.com")
   if (CMAKE_C_COMPILER)
       list(APPEND AUX_CMAKE_FLAGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER})
@@ -16,18 +16,18 @@ if (NOT DEFINED L0_LIBRARY OR NOT DEFINED L0_INCLUDE_DIR)
   if (CMAKE_CXX_COMPILER)
       list(APPEND AUX_CMAKE_FLAGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER})
   endif()
-  file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/l0_loader_build)
-  set(L0_LOADER_SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/Level0/l0_loader")
-  if (NOT DEFINED SYCL_EP_L0_LOADER_SKIP_AUTO_UPDATE)
-    set(SYCL_EP_L0_LOADER_SKIP_AUTO_UPDATE ${SYCL_EXTERNAL_PROJECTS_SKIP_AUTO_UPDATE})
+  file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_build)
+  set(LEVEL_ZERO_LOADER_SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/level_zero/level_zero_loader")
+  if (NOT DEFINED SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE)
+    set(SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE ${SYCL_EXTERNAL_PROJECTS_SKIP_AUTO_UPDATE})
   endif()
-  ExternalProject_Add(l0-loader
+  ExternalProject_Add(level-zero-loader
     GIT_REPOSITORY    https://github.com/oneapi-src/level-zero.git
     GIT_TAG           v0.91.21
-    UPDATE_DISCONNECTED ${SYCL_EP_L0_LOADER_SKIP_AUTO_UPDATE}
-    SOURCE_DIR        ${L0_LOADER_SOURCE_DIR}
-    BINARY_DIR        "${CMAKE_CURRENT_BINARY_DIR}/l0_loader_build"
-    INSTALL_DIR       "${CMAKE_CURRENT_BINARY_DIR}/l0_loader_install"
+    UPDATE_DISCONNECTED ${SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE}
+    SOURCE_DIR        ${LEVEL_ZERO_LOADER_SOURCE_DIR}
+    BINARY_DIR        "${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_build"
+    INSTALL_DIR       "${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_install"
     CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
                -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
                -DOpenCL_INCLUDE_DIR=${OpenCL_INCLUDE_DIRS}
@@ -36,50 +36,50 @@ if (NOT DEFINED L0_LIBRARY OR NOT DEFINED L0_INCLUDE_DIR)
                ${AUX_CMAKE_FLAGS}
     STEP_TARGETS      configure,build,install
     DEPENDS           ocl-headers
-    BUILD_BYPRODUCTS ${L0_LOADER}
+    BUILD_BYPRODUCTS ${LEVEL_ZERO_LOADER}
   )
-  ExternalProject_Add_Step(l0-loader llvminstall
+  ExternalProject_Add_Step(level-zero-loader llvminstall
     COMMAND ${CMAKE_COMMAND} -E copy_directory <INSTALL_DIR>/ ${LLVM_BINARY_DIR}
-    COMMENT "Installing l0-loader into the LLVM binary directory"
+    COMMENT "Installing level-zero-loader into the LLVM binary directory"
     DEPENDEES install
   )
 
-  install(DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/l0_loader_install/"
+  install(DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_install/"
     DESTINATION "."
-    COMPONENT l0-loader
+    COMPONENT level-zero-loader
   )
 
-  list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS l0-loader)
+  list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS level-zero-loader)
 else()
-  include_directories("${L0_INCLUDE_DIR}")
-  file(GLOB L0_LIBRARY_SRC "${L0_LIBRARY}*")
-  file(COPY ${L0_LIBRARY_SRC} DESTINATION ${LLVM_LIBRARY_OUTPUT_INTDIR})
-  add_custom_target(l0-loader DEPENDS ${L0_LIBRARY} COMMENT "Copying Level Zero Loader ...")
+  include_directories("${LEVEL_ZERO_INCLUDE_DIR}")
+  file(GLOB LEVEL_ZERO_LIBRARY_SRC "${LEVEL_ZERO_LIBRARY}*")
+  file(COPY ${LEVEL_ZERO_LIBRARY_SRC} DESTINATION ${LLVM_LIBRARY_OUTPUT_INTDIR})
+  add_custom_target(level-zero-loader DEPENDS ${LEVEL_ZERO_LIBRARY} COMMENT "Copying Level Zero Loader ...")
 endif()
 
-add_library (L0Loader-Headers INTERFACE)
-add_library (L0Loader::Headers ALIAS L0Loader-Headers)
-target_include_directories(L0Loader-Headers
-  INTERFACE "${L0_INCLUDE_DIR}"
+add_library (LevelZeroLoader-Headers INTERFACE)
+add_library (LevelZeroLoader::Headers ALIAS LevelZeroLoader-Headers)
+target_include_directories(LevelZeroLoader-Headers
+  INTERFACE "${LEVEL_ZERO_INCLUDE_DIR}"
 )
 
 include_directories("${sycl_inc_dir}")
 include_directories(${OPENCL_INCLUDE})
 
-add_library(pi_level0 SHARED
+add_library(pi_level_zero SHARED
   "${sycl_inc_dir}/CL/sycl/detail/pi.h"
-  "${CMAKE_CURRENT_SOURCE_DIR}/pi_level0.cpp"
-  "${CMAKE_CURRENT_SOURCE_DIR}/pi_level0.hpp"
+  "${CMAKE_CURRENT_SOURCE_DIR}/pi_level_zero.cpp"
+  "${CMAKE_CURRENT_SOURCE_DIR}/pi_level_zero.hpp"
 )
 
 if (MSVC)
   # by defining __SYCL_BUILD_SYCL_DLL, we can use __declspec(dllexport)
   # which are individually tagged for all pi* symbols in pi.h
-  target_compile_definitions(pi_level0 PRIVATE __SYCL_BUILD_SYCL_DLL)
+  target_compile_definitions(pi_level_zero PRIVATE __SYCL_BUILD_SYCL_DLL)
 else()
   # we set the visibility of all symbols 'hidden' by default.
   # In pi.h file, we set exported symbols with visibility==default individually
-  target_compile_options(pi_level0 PUBLIC -fvisibility=hidden)
+  target_compile_options(pi_level_zero PUBLIC -fvisibility=hidden)
 
   # This script file is used to allow exporting pi* symbols only.
   # All other symbols are regarded as local (hidden)
@@ -87,23 +87,23 @@ else()
 
   # Filter symbols based on the scope defined in the script file,
   # and export pi* function symbols in the library.
-  target_link_libraries( pi_level0
+  target_link_libraries( pi_level_zero
     PRIVATE "-Wl,--version-script=${linker_script}"
   )
 endif()
 
-if (TARGET l0-loader)
-  add_dependencies(pi_level0 l0-loader)
+if (TARGET level-zero-loader)
+  add_dependencies(pi_level_zero level-zero-loader)
 endif()
-  add_dependencies(sycl-toolchain pi_level0)
+  add_dependencies(sycl-toolchain pi_level_zero)
 
-  target_link_libraries(pi_level0 PRIVATE "${L0_LOADER}")
+  target_link_libraries(pi_level_zero PRIVATE "${LEVEL_ZERO_LOADER}")
 if (UNIX)
-  target_link_libraries(pi_level0 PRIVATE pthread)
+  target_link_libraries(pi_level_zero PRIVATE pthread)
 endif()
 
-add_common_options(pi_level0)
+add_common_options(pi_level_zero)
 
-install(TARGETS pi_level0
-    LIBRARY DESTINATION "lib" COMPONENT pi_level0
-    RUNTIME DESTINATION "bin" COMPONENT pi_level0)
+install(TARGETS pi_level_zero
+    LIBRARY DESTINATION "lib" COMPONENT pi_level_zero
+    RUNTIME DESTINATION "bin" COMPONENT pi_level_zero)
diff --git a/sycl/plugins/level_zero/pi_level0.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp
similarity index 96%
rename from sycl/plugins/level_zero/pi_level0.cpp
rename to sycl/plugins/level_zero/pi_level_zero.cpp
index 47042442f1c45..a77719a2220ae 100644
--- a/sycl/plugins/level_zero/pi_level0.cpp
+++ b/sycl/plugins/level_zero/pi_level_zero.cpp
@@ -1,17 +1,17 @@
-//===----------- pi_level0.cpp - Level Zero Plugin--------------------------==//
+//===-------- pi_level_zero.cpp - Level Zero Plugin --------------------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-//===----------------------------------------------------------------------===//
+//===------------------------------------------------------------------===//
 
-/// \file pi_level0.cpp
+/// \file pi_level_zero.cpp
 /// Implementation of Level Zero Plugin.
 ///
-/// \ingroup sycl_pi_level0
+/// \ingroup sycl_pi_level_zero
 
-#include "pi_level0.hpp"
+#include "pi_level_zero.hpp"
 #include <cstdarg>
 #include <cstdio>
 #include <cstring>
@@ -26,8 +26,8 @@
 
 namespace {
 
-// Controls L0 calls serialization to w/a L0 driver being not MT ready.
-// Recognized values (can be used as a bit mask):
+// Controls Level Zero calls serialization to w/a Level Zero driver being not MT
+// ready. Recognized values (can be used as a bit mask):
 enum {
   ZeSerializeNone =
       0, // no locking or blocking (except when SYCL RT requested blocking)
@@ -37,10 +37,10 @@ enum {
 };
 static pi_uint32 ZeSerialize = 0;
 
-// This class encapsulates actions taken along with a call to L0 API.
+// This class encapsulates actions taken along with a call to Level Zero API.
 class ZeCall {
 private:
-  // The global mutex that is used for total serialization of L0 calls.
+  // The global mutex that is used for total serialization of Level Zero calls.
   static std::mutex GlobalLock;
 
 public:
@@ -61,7 +61,7 @@ class ZeCall {
 };
 std::mutex ZeCall::GlobalLock;
 
-// Controls L0 calls tracing in zePrint.
+// Controls Level Zero calls tracing in zePrint.
 static bool ZeDebug = false;
 
 static void zePrint(const char *Format, ...) {
@@ -233,12 +233,12 @@ _pi_context::decrementAliveEventsInPool(ze_event_pool_handle_t ZePool) {
   return ZE_RESULT_SUCCESS;
 }
 
-// Some opencl extensions we know are supported by all Level0 devices.
+// Some opencl extensions we know are supported by all Level Zero devices.
 constexpr char ZE_SUPPORTED_EXTENSIONS[] =
     "cl_khr_il_program cl_khr_subgroups cl_intel_subgroups "
     "cl_intel_subgroups_short cl_intel_required_subgroup_size ";
 
-// Map L0 runtime error code to PI error code
+// Map Level Zero runtime error code to PI error code
 static pi_result mapError(ze_result_t ZeResult) {
   // TODO: these mapping need to be clarified and synced with the PI API return
   // values, which is TBD.
@@ -373,7 +373,7 @@ pi_result _pi_device::initialize() {
 // Crate a new command list to be used in a PI call
 pi_result
 _pi_device::createCommandList(ze_command_list_handle_t *ZeCommandList) {
-  // Create the command list, because in L0 commands are added to
+  // Create the command list, because in Level Zero commands are added to
   // the command lists, and later are then added to the command queue.
   //
   // TODO: Figure out how to lower the overhead of creating a new list
@@ -448,7 +448,7 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
     return PI_INVALID_VALUE;
   }
 
-  // This is a good time to initialize L0.
+  // This is a good time to initialize Level Zero.
   // TODO: We can still safely recover if something goes wrong during the init.
   // Implement handling segfault using sigaction.
   // TODO: We should not call zeInit multiples times ever, so
@@ -467,7 +467,7 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
     return mapError(ZeResult);
   }
 
-  // L0 does not have concept of Platforms, but L0 driver is the
+  // Level Zero does not have concept of Platforms, but Level Zero driver is the
   // closest match.
   if (Platforms && NumEntries > 0) {
     uint32_t ZeDriverCount = 0;
@@ -533,10 +533,10 @@ pi_result piPlatformGetInfo(pi_platform Platform, pi_platform_info ParamName,
 
   switch (ParamName) {
   case PI_PLATFORM_INFO_NAME:
-    // TODO: Query L0 driver when relevant info is added there.
+    // TODO: Query Level Zero driver when relevant info is added there.
     return ReturnValue("Intel(R) Level-Zero");
   case PI_PLATFORM_INFO_VENDOR:
-    // TODO: Query L0 driver when relevant info is added there.
+    // TODO: Query Level Zero driver when relevant info is added there.
     return ReturnValue("Intel(R) Corporation");
   case PI_PLATFORM_INFO_EXTENSIONS:
     // Convention adopted from OpenCL:
@@ -547,7 +547,7 @@ pi_result piPlatformGetInfo(pi_platform Platform, pi_platform_info ParamName,
     //
     // TODO: Check the common extensions supported by all connected devices and
     // return them. For now, hardcoding some extensions we know are supported by
-    // all Level0 devices.
+    // all Level Zero devices.
     return ReturnValue(ZE_SUPPORTED_EXTENSIONS);
   case PI_PLATFORM_INFO_PROFILE:
     // TODO: figure out what this means and how is this used
@@ -575,7 +575,7 @@ pi_result piextPlatformGetNativeHandle(pi_platform Platform,
   assert(NativeHandle);
 
   auto ZeDriver = pi_cast<ze_driver_handle_t *>(NativeHandle);
-  // Extract the L0 driver handle from the given PI platform
+  // Extract the Level Zero driver handle from the given PI platform
   *ZeDriver = Platform->ZeDriver;
   return PI_SUCCESS;
 }
@@ -585,7 +585,7 @@ pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle,
   assert(NativeHandle);
   assert(Platform);
 
-  // Create PI platform from the given L0 driver handle.
+  // Create PI platform from the given Level Zero driver handle.
   auto ZeDriver = pi_cast<ze_driver_handle_t>(NativeHandle);
   *Platform = new _pi_platform(ZeDriver);
   return PI_SUCCESS;
@@ -598,7 +598,7 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,
   assert(Platform);
   ze_driver_handle_t ZeDriver = Platform->ZeDriver;
 
-  // Get number of devices supporting L0
+  // Get number of devices supporting Level Zero
   uint32_t ZeDeviceCount = 0;
   const bool AskingForGPU = (DeviceType & PI_DEVICE_TYPE_GPU);
   const bool AskingForDefault = (DeviceType == PI_DEVICE_TYPE_DEFAULT);
@@ -718,7 +718,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
     return ReturnValue(PI_DEVICE_TYPE_GPU);
   }
   case PI_DEVICE_INFO_PARENT_DEVICE:
-    // TODO: all L0 devices are parent ?
+    // TODO: all Level Zero devices are parent ?
     return ReturnValue(pi_device{0});
   case PI_DEVICE_INFO_PLATFORM:
     return ReturnValue(Device->Platform);
@@ -729,8 +729,8 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
     //     "Returns a space separated list of extension names (the extension
     // names themselves do not contain any spaces) supported by the device."
     //
-    // TODO: Use proper mechanism to get this information from Level0 after
-    // it is added to Level0.
+    // TODO: Use proper mechanism to get this information from Level Zero after
+    // it is added to Level Zero.
     // Hardcoding the few we know are supported by the current hardware.
     //
     //
@@ -754,7 +754,8 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
     // cl_khr_3d_image_writes - Extension to enable writes to 3D image memory
     //   objects.
     //
-    // Hardcoding some extensions we know are supported by all Level0 devices.
+    // Hardcoding some extensions we know are supported by all Level Zero
+    // devices.
     SupportedExtensions += (ZE_SUPPORTED_EXTENSIONS);
     if (ZeDeviceKernelProperties.fp16Supported)
       SupportedExtensions += ("cl_khr_fp16 ");
@@ -784,7 +785,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
     return ReturnValue(pi_uint32{MaxComputeUnits});
   }
   case PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS:
-    // L0 spec defines only three dimensions
+    // Level Zero spec defines only three dimensions
     return ReturnValue(pi_uint32{3});
   case PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE:
     return ReturnValue(
@@ -845,7 +846,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
     return ReturnValue(pi_uint32{Device->RefCount});
   case PI_DEVICE_INFO_PARTITION_PROPERTIES: {
     // It is debatable if SYCL sub-device and partitioning APIs sufficient to
-    // expose Level0 sub-devices?  We start with support of
+    // expose Level Zero sub-devices?  We start with support of
     // "partition_by_affinity_domain" and "numa" but if that doesn't seem to
     // be a good fit we could look at adding a more descriptive partitioning
     // type.
@@ -913,7 +914,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
   case PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN:
     // SYCL/OpenCL spec is vague on what this means exactly, but seems to
     // be for "alignment requirement (in bits) for sub-buffer offsets."
-    // An OpenCL implementation returns 8*128, but L0 can do just 8,
+    // An OpenCL implementation returns 8*128, but Level Zero can do just 8,
     // meaning unaligned access for values of types larger than 8 bits.
     return ReturnValue(pi_uint32{8});
   case PI_DEVICE_INFO_MAX_SAMPLERS:
@@ -995,24 +996,24 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
     return ReturnValue(pi_uint64{DoubleFPValue});
   }
   case PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH:
-    // Until L0 provides needed info, hardcode default minimum values required
-    // by the SYCL specification.
+    // Until Level Zero provides needed info, hardcode default minimum values
+    // required by the SYCL specification.
     return ReturnValue(size_t{8192});
   case PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT:
-    // Until L0 provides needed info, hardcode default minimum values required
-    // by the SYCL specification.
+    // Until Level Zero provides needed info, hardcode default minimum values
+    // required by the SYCL specification.
     return ReturnValue(size_t{8192});
   case PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH:
-    // Until L0 provides needed info, hardcode default minimum values required
-    // by the SYCL specification.
+    // Until Level Zero provides needed info, hardcode default minimum values
+    // required by the SYCL specification.
     return ReturnValue(size_t{2048});
   case PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT:
-    // Until L0 provides needed info, hardcode default minimum values required
-    // by the SYCL specification.
+    // Until Level Zero provides needed info, hardcode default minimum values
+    // required by the SYCL specification.
     return ReturnValue(size_t{2048});
   case PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH:
-    // Until L0 provides needed info, hardcode default minimum values required
-    // by the SYCL specification.
+    // Until Level Zero provides needed info, hardcode default minimum values
+    // required by the SYCL specification.
     return ReturnValue(size_t{2048});
   case PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE:
     return ReturnValue(size_t{ZeDeviceImageProperties.maxImageBufferSize});
@@ -1106,7 +1107,7 @@ pi_result piDevicePartition(pi_device Device,
                             const pi_device_partition_property *Properties,
                             pi_uint32 NumDevices, pi_device *OutDevices,
                             pi_uint32 *OutNumDevices) {
-  // Other partitioning ways are not supported by L0
+  // Other partitioning ways are not supported by Level Zero
   if (Properties[0] != PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN ||
       Properties[1] != PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE) {
     return PI_INVALID_VALUE;
@@ -1139,7 +1140,7 @@ pi_result piDevicePartition(pi_device Device,
     auto ZeSubdevices = new ze_device_handle_t[Count];
     ZE_CALL(zeDeviceGetSubDevices(Device->ZeDevice, &Count, ZeSubdevices));
 
-    // Wrap the L0 sub-devices into PI sub-devices, and write them out.
+    // Wrap the Level Zero sub-devices into PI sub-devices, and write them out.
     for (uint32_t I = 0; I < Count; ++I) {
       OutDevices[I] = new _pi_device(ZeSubdevices[I], Device->Platform,
                                      true /* isSubDevice */);
@@ -1183,7 +1184,7 @@ pi_result piextDeviceGetNativeHandle(pi_device Device,
   assert(NativeHandle);
 
   auto ZeDevice = pi_cast<ze_device_handle_t *>(NativeHandle);
-  // Extract the L0 module handle from the given PI device
+  // Extract the Level Zero module handle from the given PI device
   *ZeDevice = Device->ZeDevice;
   return PI_SUCCESS;
 }
@@ -1195,7 +1196,7 @@ pi_result piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle,
   assert(Device);
   assert(Platform);
 
-  // Create PI device from the given L0 device handle.
+  // Create PI device from the given Level Zero device handle.
   auto ZeDevice = pi_cast<ze_device_handle_t>(NativeHandle);
   *Device = new _pi_device(ZeDevice, Platform);
   return (*Device)->initialize();
@@ -1208,7 +1209,7 @@ pi_result piContextCreate(const pi_context_properties *Properties,
                                             void *UserData),
                           void *UserData, pi_context *RetContext) {
 
-  // L0 does not have notion of contexts.
+  // Level Zero does not have notion of contexts.
   // Return the device handle (only single device is allowed) as a context
   // handle.
   if (NumDevices != 1) {
@@ -1387,7 +1388,7 @@ pi_result piextQueueGetNativeHandle(pi_queue Queue,
   assert(NativeHandle);
 
   auto ZeQueue = pi_cast<ze_command_queue_handle_t *>(NativeHandle);
-  // Extract the L0 queue handle from the given PI queue
+  // Extract the Level Zero queue handle from the given PI queue
   *ZeQueue = Queue->ZeCommandQueue;
   return PI_SUCCESS;
 }
@@ -1437,9 +1438,9 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
   auto HostPtrOrNull =
       (Flags & PI_MEM_FLAGS_HOST_PTR_USE) ? pi_cast<char *>(HostPtr) : nullptr;
   try {
-    *RetMem = new _pi_buffer(Context->Device->Platform,
-                             pi_cast<char *>(Ptr) /* L0 Memory Handle */,
-                             HostPtrOrNull);
+    *RetMem = new _pi_buffer(
+        Context->Device->Platform,
+        pi_cast<char *>(Ptr) /* Level Zero Memory Handle */, HostPtrOrNull);
   } catch (const std::bad_alloc &) {
     return PI_OUT_OF_HOST_MEMORY;
   } catch (...) {
@@ -1660,8 +1661,8 @@ pi_result piProgramCreate(pi_context Context, const void *IL, size_t Length,
   assert(Context);
   assert(Program);
 
-  // NOTE: the L0 module creation is also building the program, so we are
-  // deferring it until the program is ready to be built in piProgramBuild
+  // NOTE: the Level Zero module creation is also building the program, so we
+  // are deferring it until the program is ready to be built in piProgramBuild
   // and piProgramCompile. Also it is only then we know the build options.
   //
   ze_module_desc_t ZeModuleDesc = {};
@@ -1726,7 +1727,7 @@ pi_result piclProgramCreateWithSource(pi_context Context, pi_uint32 Count,
                                       const size_t *Lengths,
                                       pi_program *RetProgram) {
 
-  zePrint("piclProgramCreateWithSource: not supported in L0\n");
+  zePrint("piclProgramCreateWithSource: not supported in Level Zero\n");
   return PI_INVALID_OPERATION;
 }
 
@@ -1740,7 +1741,7 @@ pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName,
   case PI_PROGRAM_INFO_REFERENCE_COUNT:
     return ReturnValue(pi_uint32{Program->RefCount});
   case PI_PROGRAM_INFO_NUM_DEVICES:
-    // L0 Module is always for a single device.
+    // Level Zero Module is always for a single device.
     return ReturnValue(pi_uint32{1});
   case PI_PROGRAM_INFO_DEVICES:
     return ReturnValue(Program->Context->Device);
@@ -1764,7 +1765,7 @@ pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName,
   case PI_PROGRAM_INFO_KERNEL_NAMES:
     try {
       // There are extra allocations/copying here dictated by the difference
-      // in L0 and PI interfaces.
+      // in Level Zero and PI interfaces.
       uint32_t Count = 0;
       ZE_CALL(zeModuleGetKernelNames(Program->ZeModule, &Count, nullptr));
       char **PNames = new char *[Count];
@@ -1795,7 +1796,9 @@ pi_result piProgramLink(pi_context Context, pi_uint32 NumDevices,
                         const pi_program *InputPrograms,
                         void (*PFnNotify)(pi_program Program, void *UserData),
                         void *UserData, pi_program *RetProgram) {
-  // TODO: L0 does not [yet] support linking so dummy implementation here.
+
+  // TODO: Level Zero does not [yet] support linking so dummy implementation
+  // here.
   assert(NumInputPrograms == 1 && InputPrograms);
   assert(RetProgram);
   *RetProgram = InputPrograms[0];
@@ -1812,7 +1815,7 @@ pi_result piProgramCompile(
   assert(NumInputHeaders == 0);
   assert(!InputHeaders);
 
-  // There is no support foe linking yet in L0 so "compile" actually
+  // There is no support for linking yet in Level Zero so "compile" actually
   // does the "build".
   return piProgramBuild(Program, NumDevices, DeviceList, Options, PFnNotify,
                         UserData);
@@ -1858,6 +1861,7 @@ pi_result piProgramBuild(pi_program Program, pi_uint32 NumDevices,
   ze_device_handle_t ZeDevice = Program->Context->Device->ZeDevice;
   ZE_CALL(zeModuleCreate(ZeDevice, &Program->ZeModuleDesc, &Program->ZeModule,
                          &Program->ZeBuildLog));
+
   return PI_SUCCESS;
 }
 
@@ -1868,14 +1872,14 @@ pi_result piProgramGetBuildInfo(pi_program Program, pi_device Device,
 
   ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet);
   if (ParamName == CL_PROGRAM_BINARY_TYPE) {
-    // TODO: is this the only supported binary type in L0?
+    // TODO: is this the only supported binary type in Level Zero?
     // We should probably return CL_PROGRAM_BINARY_TYPE_NONE if asked
     // before the program was compiled.
     return ReturnValue(
         cl_program_binary_type{CL_PROGRAM_BINARY_TYPE_EXECUTABLE});
   }
   if (ParamName == CL_PROGRAM_BUILD_OPTIONS) {
-    // TODO: how to get module build options out of L0?
+    // TODO: how to get module build options out of Level Zero?
     // For the programs that we compiled we can remember the options
     // passed with piProgramCompile/piProgramBuild, but what can we
     // return for programs that were built outside and registered
@@ -1909,7 +1913,7 @@ pi_result piProgramRelease(pi_program Program) {
     delete[] Program->ZeModuleDesc.pInputModule;
     if (Program->ZeBuildLog)
       zeModuleBuildLogDestroy(Program->ZeBuildLog);
-    // TODO: call zeModuleDestroy for non-interop L0 modules
+    // TODO: call zeModuleDestroy for non-interop Level Zero modules
     delete Program;
   }
   return PI_SUCCESS;
@@ -1921,7 +1925,7 @@ pi_result piextProgramGetNativeHandle(pi_program Program,
   assert(NativeHandle);
 
   auto ZeModule = pi_cast<ze_module_handle_t *>(NativeHandle);
-  // Extract the L0 module handle from the given PI program
+  // Extract the Level Zero module handle from the given PI program
   *ZeModule = Program->ZeModule;
   return PI_SUCCESS;
 }
@@ -1935,9 +1939,9 @@ pi_result piextProgramCreateWithNativeHandle(pi_native_handle NativeHandle,
 
   auto ZeModule = pi_cast<ze_module_handle_t>(NativeHandle);
 
-  // Create PI program from the given L0 module handle.
+  // Create PI program from the given Level Zero module handle.
   //
-  // TODO: We don't have the real L0 module descriptor with
+  // TODO: We don't have the real Level Zero module descriptor with
   // which it was created, but that's only needed for zeModuleCreate,
   // which we don't expect to be called on the interop program.
   //
@@ -2050,9 +2054,9 @@ pi_result piKernelGetInfo(pi_kernel Kernel, pi_kernel_info ParamName,
   case PI_KERNEL_INFO_PROGRAM:
     return ReturnValue(pi_program{Kernel->Program});
   case PI_KERNEL_INFO_FUNCTION_NAME:
-    // TODO: Replace with the line in the comment once bug in the L0 driver will
-    // be fixed. Problem is that currently L0 driver truncates name of the
-    // returned kernel if it is longer than 256 symbols.
+    // TODO: Replace with the line in the comment once bug in the Level Zero
+    // driver will be fixed. Problem is that currently Level Zero driver
+    // truncates name of the returned kernel if it is longer than 256 symbols.
     //
     // return ReturnValue(ZeKernelProperties.name);
     return ReturnValue(Kernel->KernelName.c_str());
@@ -2376,7 +2380,7 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName,
   }
   case PI_PROFILING_INFO_COMMAND_QUEUED:
   case PI_PROFILING_INFO_COMMAND_SUBMIT:
-    // TODO: Support these when L0 supported is added.
+    // TODO: Support these when Level Zero supported is added.
     return ReturnValue(uint64_t{0});
   default:
     zePrint("piEventGetProfilingInfo: not supported ParamName\n");
@@ -3016,8 +3020,8 @@ piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer, pi_bool BlockingMap,
   ZE_CALL(zeCommandListAppendWaitOnEvents(ZeCommandList, NumEventsInWaitList,
                                           ZeEventWaitList));
 
-  // TODO: L0 is missing the memory "mapping" capabilities, so we are left
-  // to doing new memory allocation and a copy (read).
+  // TODO: Level Zero is missing the memory "mapping" capabilities, so we are
+  // left to doing new memory allocation and a copy (read).
   //
   // TODO: check if the input buffer is already allocated in shared
   // memory and thus is accessible from the host as is. Can we get SYCL RT
@@ -3087,8 +3091,8 @@ pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr,
   ZE_CALL(zeCommandListAppendWaitOnEvents(ZeCommandList, NumEventsInWaitList,
                                           ZeEventWaitList));
 
-  // TODO: L0 is missing the memory "mapping" capabilities, so we are left
-  // to doing copy (write back to the device).
+  // TODO: Level Zero is missing the memory "mapping" capabilities, so we are
+  // left to doing copy (write back to the device).
   //
   // NOTE: Keep this in sync with the implementation of
   // piEnqueueMemBufferMap/piEnqueueMemImageMap.
@@ -3204,7 +3208,7 @@ enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue,
     const ze_image_region_t ZeSrcRegion =
         getImageRegionHelper(SrcMem, SrcOrigin, Region);
 
-    // TODO: L0 does not support row_pitch/slice_pitch for images yet.
+    // TODO: Level Zero does not support row_pitch/slice_pitch for images yet.
     // Check that SYCL RT did not want pitch larger than default.
 #ifndef NDEBUG
     assert(SrcMem->isImage());
@@ -3229,7 +3233,7 @@ enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue,
     const ze_image_region_t ZeDstRegion =
         getImageRegionHelper(DstMem, DstOrigin, Region);
 
-    // TODO: L0 does not support row_pitch/slice_pitch for images yet.
+    // TODO: Level Zero does not support row_pitch/slice_pitch for images yet.
     // Check that SYCL RT did not want pitch larger than default.
 #ifndef NDEBUG
     assert(DstMem->isImage());
@@ -3360,7 +3364,7 @@ pi_result piMemBufferPartition(pi_mem Buffer, pi_mem_flags Flags,
     *RetMem =
         new _pi_buffer(Buffer->Platform,
                        pi_cast<char *>(Buffer->getZeHandle()) +
-                           Region->origin /* L0 memory handle */,
+                           Region->origin /* Level Zero memory handle */,
                        nullptr /* Host pointer */, Buffer /* Parent buffer */,
                        Region->origin /* Sub-buffer origin */,
                        Region->size /*Sub-buffer size*/);
@@ -3406,7 +3410,7 @@ pi_result piextUSMHostAlloc(void **ResultPtr, pi_context Context,
 
   ze_host_mem_alloc_desc_t ZeDesc = {};
   ZeDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_DEFAULT;
-  // TODO: translate PI properties to L0 flags
+  // TODO: translate PI properties to Level Zero flags
   ZE_CALL(zeDriverAllocHostMem(Context->Device->Platform->ZeDriver, &ZeDesc,
                                Size, Alignment, ResultPtr));
 
@@ -3423,7 +3427,7 @@ pi_result piextUSMDeviceAlloc(void **ResultPtr, pi_context Context,
   // Check that incorrect bits are not set in the properties.
   assert(!Properties || (Properties && !(*Properties & ~PI_MEM_ALLOC_FLAGS)));
 
-  // TODO: translate PI properties to L0 flags
+  // TODO: translate PI properties to Level Zero flags
   ze_device_mem_alloc_desc_t ZeDesc = {};
   ZeDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_DEFAULT;
   ZeDesc.ordinal = 0;
@@ -3443,7 +3447,7 @@ pi_result piextUSMSharedAlloc(void **ResultPtr, pi_context Context,
   // Check that incorrect bits are not set in the properties.
   assert(!Properties || (Properties && !(*Properties & ~PI_MEM_ALLOC_FLAGS)));
 
-  // TODO: translate PI properties to L0 flags
+  // TODO: translate PI properties to Level Zero flags
   ze_host_mem_alloc_desc_t ZeHostDesc = {};
   ZeHostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_DEFAULT;
   ze_device_mem_alloc_desc_t ZeDevDesc = {};
@@ -3555,7 +3559,7 @@ pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size,
   // TODO: figure out how to translate "flags"
   ZE_CALL(zeCommandListAppendMemoryPrefetch(ZeCommandList, Ptr, Size));
 
-  // TODO: L0 does not have a completion "event" with the prefetch API,
+  // TODO: Level Zero does not have a completion "event" with the prefetch API,
   // so manually add command to signal our event.
   ZE_CALL(zeCommandListAppendSignalEvent(ZeCommandList, ZeEvent));
 
@@ -3638,7 +3642,7 @@ pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr,
   ZE_CALL(zeCommandListAppendMemAdvise(
       ZeCommandList, Queue->Context->Device->ZeDevice, Ptr, Length, ZeAdvice));
 
-  // TODO: L0 does not have a completion "event" with the advise API,
+  // TODO: Level Zero does not have a completion "event" with the advise API,
   // so manually add command to signal our event.
   ZE_CALL(zeCommandListAppendSignalEvent(ZeCommandList, ZeEvent));
 
@@ -3699,7 +3703,7 @@ pi_result piextUSMGetMemAllocInfo(pi_context Context, const void *Ptr,
   }
   case PI_MEM_ALLOC_DEVICE: {
     // TODO: this wants pi_device, but we didn't remember it, and cannot
-    // deduct from the L0 device.
+    // deduct from the Level Zero device.
     die("piextUSMGetMemAllocInfo: PI_MEM_ALLOC_DEVICE not implemented");
     break;
   }
@@ -3760,7 +3764,8 @@ pi_result piextProgramSetSpecializationConstant(pi_program Prog,
   // Pass SpecValue pointer. Spec constant value is retrieved
   // by Level-Zero when creating the modul
   //
-  // NOTE: SpecSize is unused in L0, the size is known from SPIR-V by SpecID.
+  // NOTE: SpecSize is unused in Level Zero, the size is known from SPIR-V by
+  // SpecID.
   Prog->ZeSpecConstants[SpecID] = reinterpret_cast<uint64_t>(SpecValue);
 
   return PI_SUCCESS;
diff --git a/sycl/plugins/level_zero/pi_level0.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp
old mode 100755
new mode 100644
similarity index 86%
rename from sycl/plugins/level_zero/pi_level0.hpp
rename to sycl/plugins/level_zero/pi_level_zero.hpp
index faf39d759c385..a3db143a55a48
--- a/sycl/plugins/level_zero/pi_level0.hpp
+++ b/sycl/plugins/level_zero/pi_level_zero.hpp
@@ -1,22 +1,22 @@
-//===---------- pi_level0.hpp - Level Zero Plugin -------------------------===//
+//===------- pi_level_zero.hpp - Level Zero Plugin -------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-//===----------------------------------------------------------------------===//
+//===-----------------------------------------------------------------===//
 
-/// \defgroup sycl_pi_level0 Level Zero Plugin
+/// \defgroup sycl_pi_level_zero Level Zero Plugin
 /// \ingroup sycl_pi
 
-/// \file pi_level0.hpp
+/// \file pi_level_zero.hpp
 /// Declarations for Level Zero Plugin. It is the interface between the
 /// device-agnostic SYCL runtime layer and underlying Level Zero runtime.
 ///
-/// \ingroup sycl_pi_level0
+/// \ingroup sycl_pi_level_zero
 
-#ifndef PI_LEVEL0_HPP
-#define PI_LEVEL0_HPP
+#ifndef PI_LEVEL_ZERO_HPP
+#define PI_LEVEL_ZERO_HPP
 
 #include <CL/sycl/detail/pi.h>
 #include <atomic>
@@ -51,18 +51,19 @@ template <> uint32_t pi_cast(uint64_t Value) {
 struct _pi_object {
   _pi_object() : RefCount{1} {}
 
-  // L0 doesn't do the reference counting, so we have to do.
+  // Level Zero doesn't do the reference counting, so we have to do.
   // Must be atomic to prevent data race when incrementing/decrementing.
   std::atomic<pi_uint32> RefCount;
 };
 
-// Define the types that are opaque in pi.h in a manner suitabale for L0 plugin
+// Define the types that are opaque in pi.h in a manner suitabale for Level Zero
+// plugin
 
 struct _pi_platform {
   _pi_platform(ze_driver_handle_t Driver) : ZeDriver{Driver} {}
 
-  // L0 lacks the notion of a platform, but there is a driver, which is a
-  // pretty good fit to keep here.
+  // Level Zero lacks the notion of a platform, but there is a driver, which is
+  // a pretty good fit to keep here.
   ze_driver_handle_t ZeDriver;
 
   // Cache versions info from zeDriverGetProperties.
@@ -83,14 +84,14 @@ struct _pi_device : _pi_object {
   // Initialize the entire PI device.
   pi_result initialize();
 
-  // L0 device handle.
+  // Level Zero device handle.
   ze_device_handle_t ZeDevice;
 
   // PI platform to which this device belongs.
   pi_platform Platform;
 
-  // Immediate L0 command list for this device, to be used for initializations.
-  // To be created as:
+  // Immediate Level Zero command list for this device, to be used for
+  // initializations. To be created as:
   // - Immediate command list: So any command appended to it is immediately
   //   offloaded to the device.
   // - Synchronous: So implicit synchronization is made inside the level-zero
@@ -117,7 +118,7 @@ struct _pi_context : _pi_object {
       : Device{Device}, ZeEventPool{nullptr}, NumEventsAvailableInEventPool{},
         NumEventsLiveInEventPool{} {}
 
-  // L0 does not have notion of contexts.
+  // Level Zero does not have notion of contexts.
   // Keep the device here (must be exactly one) to return it when PI context
   // is queried for devices.
   pi_device Device;
@@ -164,7 +165,7 @@ struct _pi_queue : _pi_object {
   _pi_queue(ze_command_queue_handle_t Queue, pi_context Context)
       : ZeCommandQueue{Queue}, Context{Context} {}
 
-  // L0 command queue handle.
+  // Level Zero command queue handle.
   ze_command_queue_handle_t ZeCommandQueue;
 
   // Keeps the PI context to which this queue belongs.
@@ -197,10 +198,10 @@ struct _pi_mem : _pi_object {
 
   // Interface of the _pi_mem object
 
-  // Get the L0 handle of the current memory object
+  // Get the Level Zero handle of the current memory object
   virtual void *getZeHandle() = 0;
 
-  // Get a pointer to the L0 handle of the current memory object
+  // Get a pointer to the Level Zero handle of the current memory object
   virtual void *getZeHandlePtr() = 0;
 
   // Method to get type of the derived object (image or buffer)
@@ -241,7 +242,7 @@ struct _pi_buffer final : _pi_mem {
 
   bool isSubBuffer() const { return SubBuffer.Parent != nullptr; }
 
-  // L0 memory handle is really just a naked pointer.
+  // Level Zero memory handle is really just a naked pointer.
   // It is just convenient to have it char * to simplify offset arithmetics.
   char *ZeMem;
 
@@ -268,7 +269,7 @@ struct _pi_image final : _pi_mem {
   ze_image_desc_t ZeImageDesc;
 #endif // !NDEBUG
 
-  // L0 image handle.
+  // Level Zero image handle.
   ze_image_handle_t ZeImage;
 };
 
@@ -278,14 +279,14 @@ struct _pi_event : _pi_object {
       : ZeEvent{ZeEvent}, ZeEventPool{ZeEventPool}, ZeCommandList{nullptr},
         CommandType{CommandType}, Context{Context}, CommandData{nullptr} {}
 
-  // L0 event handle.
+  // Level Zero event handle.
   ze_event_handle_t ZeEvent;
-  // L0 event pool handle.
+  // Level Zero event pool handle.
   ze_event_pool_handle_t ZeEventPool;
 
-  // L0 command list where the command signaling this event was appended to.
-  // This is currently used to remember/destroy the command list after
-  // all commands in it are completed, i.e. this event signaled.
+  // Level Zero command list where the command signaling this event was appended
+  // to. This is currently used to remember/destroy the command list after all
+  // commands in it are completed, i.e. this event signaled.
   ze_command_list_handle_t ZeCommandList;
 
   // Keeps the command-queue and command associated with the event.
@@ -300,7 +301,7 @@ struct _pi_event : _pi_object {
   // Opaque data to hold any data needed for CommandType.
   void *CommandData;
 
-  // Methods for translating PI events list into L0 events list
+  // Methods for translating PI events list into Level Zero events list
   static ze_event_handle_t *createZeEventList(pi_uint32, const pi_event *);
   static void deleteZeEventList(ze_event_handle_t *);
 };
@@ -311,16 +312,16 @@ struct _pi_program : _pi_object {
       : ZeModuleDesc(ModuleDesc), ZeModule{Module},
         ZeBuildLog{nullptr}, Context{Context} {}
 
-  // L0 module descriptor.
+  // Level Zero module descriptor.
   ze_module_desc_t ZeModuleDesc;
 
-  // L0 module handle.
+  // Level Zero module handle.
   ze_module_handle_t ZeModule;
-  // L0 module specialization constants
+  // Level Zero module specialization constants
   std::mutex ZeSpecConstantsMutex;
   std::unordered_map<uint32_t, uint64_t> ZeSpecConstants;
 
-  // L0 build log.
+  // Level Zero build log.
   ze_module_build_log_handle_t ZeBuildLog;
 
   // Keep the context of the program.
@@ -332,21 +333,21 @@ struct _pi_kernel : _pi_object {
              const char *KernelName)
       : ZeKernel{Kernel}, Program{Program}, KernelName(KernelName) {}
 
-  // L0 function handle.
+  // Level Zero function handle.
   ze_kernel_handle_t ZeKernel;
 
   // Keep the program of the kernel.
   pi_program Program;
 
-  // TODO: remove when bug in the L0 runtime will be fixed.
+  // TODO: remove when bug in the Level Zero runtime will be fixed.
   std::string KernelName;
 };
 
 struct _pi_sampler : _pi_object {
   _pi_sampler(ze_sampler_handle_t Sampler) : ZeSampler{Sampler} {}
 
-  // L0 sampler handle.
+  // Level Zero sampler handle.
   ze_sampler_handle_t ZeSampler;
 };
 
-#endif // PI_LEVEL0_HPP
+#endif // PI_LEVEL_ZERO_HPP
diff --git a/sycl/source/backend/level_zero.cpp b/sycl/source/backend/level_zero.cpp
index 2e62223c0301e..6d7c7a347e89d 100644
--- a/sycl/source/backend/level_zero.cpp
+++ b/sycl/source/backend/level_zero.cpp
@@ -14,13 +14,13 @@
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-namespace level0 {
+namespace level_zero {
 using namespace detail;
 
 //----------------------------------------------------------------------------
-// Implementation of level0::make<platform>
+// Implementation of level_zero::make<platform>
 __SYCL_EXPORT platform make_platform(pi_native_handle NativeHandle) {
-  const auto &Plugin = pi::getPlugin<backend::level0>();
+  const auto &Plugin = pi::getPlugin<backend::level_zero>();
   // Create PI platform first.
   pi::PiPlatform PiPlatform;
   Plugin.call<PiApiKind::piextPlatformCreateWithNativeHandle>(NativeHandle,
@@ -32,10 +32,10 @@ __SYCL_EXPORT platform make_platform(pi_native_handle NativeHandle) {
 }
 
 //----------------------------------------------------------------------------
-// Implementation of level0::make<device>
+// Implementation of level_zero::make<device>
 __SYCL_EXPORT device make_device(const platform &Platform,
                                  pi_native_handle NativeHandle) {
-  const auto &Plugin = pi::getPlugin<backend::level0>();
+  const auto &Plugin = pi::getPlugin<backend::level_zero>();
   const auto &PlatformImpl = getSyclObjImpl(Platform);
   // Create PI device first.
   pi::PiDevice PiDevice;
@@ -47,7 +47,7 @@ __SYCL_EXPORT device make_device(const platform &Platform,
 }
 
 //----------------------------------------------------------------------------
-// Implementation of level0::make<program>
+// Implementation of level_zero::make<program>
 __SYCL_EXPORT program make_program(const context &Context,
                                    pi_native_handle NativeHandle) {
   // Construct the SYCL program from native program.
@@ -58,10 +58,10 @@ __SYCL_EXPORT program make_program(const context &Context,
 }
 
 //----------------------------------------------------------------------------
-// Implementation of level0::make<queue>
+// Implementation of level_zero::make<queue>
 __SYCL_EXPORT queue make_queue(const context &Context,
                                pi_native_handle NativeHandle) {
-  const auto &Plugin = pi::getPlugin<backend::level0>();
+  const auto &Plugin = pi::getPlugin<backend::level_zero>();
   const auto &ContextImpl = getSyclObjImpl(Context);
   // Create PI queue first.
   pi::PiQueue PiQueue;
@@ -72,6 +72,6 @@ __SYCL_EXPORT queue make_queue(const context &Context,
       PiQueue, ContextImpl, ContextImpl->get_async_handler()));
 }
 
-} // namespace level0
+} // namespace level_zero
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/source/detail/config.hpp b/sycl/source/detail/config.hpp
index 1559f40e5ad86..ac6fe8fbcbd2b 100644
--- a/sycl/source/detail/config.hpp
+++ b/sycl/source/detail/config.hpp
@@ -15,6 +15,7 @@
 #include <algorithm>
 #include <array>
 #include <cstdlib>
+#include <string>
 #include <utility>
 
 __SYCL_INLINE_NAMESPACE(cl) {
@@ -118,9 +119,10 @@ template <> class SYCLConfig<SYCL_BE> {
       return BackendPtr;
 
     const char *ValStr = BaseT::getRawValue();
-    const std::array<std::pair<std::string, backend>, 3> SyclBeMap = {
+    const std::array<std::pair<std::string, backend>, 4> SyclBeMap = {
         {{"PI_OPENCL", backend::opencl},
-         {"PI_LEVEL0", backend::level0},
+         {"PI_LEVEL_ZERO", backend::level_zero},
+         {"PI_LEVEL0", backend::level_zero}, // for backward compatibility
          {"PI_CUDA", backend::cuda}}};
     if (ValStr) {
       auto It = std::find_if(
@@ -130,7 +132,7 @@ template <> class SYCLConfig<SYCL_BE> {
           });
       if (It == SyclBeMap.end())
         pi::die("Invalid backend. "
-                "Valid values are PI_OPENCL/PI_LEVEL0/PI_CUDA");
+                "Valid values are PI_OPENCL/PI_LEVEL_ZERO/PI_CUDA");
       static backend Backend = It->second;
       BackendPtr = &Backend;
     }
@@ -161,6 +163,6 @@ template <> class SYCLConfig<SYCL_PI_TRACE> {
   }
 };
 
-} // __SYCL_INLINE_NAMESPACE(cl)
-} // namespace sycl
 } // namespace detail
+} // namespace sycl
+} // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp
index 58df2a3f7ed9f..68b9f3cf59a27 100644
--- a/sycl/source/detail/pi.cpp
+++ b/sycl/source/detail/pi.cpp
@@ -215,7 +215,7 @@ bool findPlugins(vector_class<std::pair<std::string, backend>> &PluginNames) {
   // env only.
   //
   PluginNames.emplace_back(OPENCL_PLUGIN_NAME, backend::opencl);
-  PluginNames.emplace_back(LEVEL0_PLUGIN_NAME, backend::level0);
+  PluginNames.emplace_back(LEVEL_ZERO_PLUGIN_NAME, backend::level_zero);
   PluginNames.emplace_back(CUDA_PLUGIN_NAME, backend::cuda);
   return true;
 }
@@ -319,11 +319,11 @@ static void initializePlugins(vector_class<plugin> *Plugins) {
                PluginNames[I].first.find("cuda") != std::string::npos) {
       // Use the CUDA plugin as the GlobalPlugin
       GlobalPlugin = std::make_shared<plugin>(PluginInformation, backend::cuda);
-    } else if (InteropBE == backend::level0 &&
-               PluginNames[I].first.find("level0") != std::string::npos) {
-      // Use the LEVEL0 plugin as the GlobalPlugin
+    } else if (InteropBE == backend::level_zero &&
+               PluginNames[I].first.find("level_zero") != std::string::npos) {
+      // Use the LEVEL_ZERO plugin as the GlobalPlugin
       GlobalPlugin =
-          std::make_shared<plugin>(PluginInformation, backend::level0);
+          std::make_shared<plugin>(PluginInformation, backend::level_zero);
     }
     Plugins->emplace_back(plugin(PluginInformation, PluginNames[I].second));
     if (trace(TraceLevel::PI_TRACE_BASIC))
@@ -395,7 +395,7 @@ template <backend BE> const plugin &getPlugin() {
 }
 
 template const plugin &getPlugin<backend::opencl>();
-template const plugin &getPlugin<backend::level0>();
+template const plugin &getPlugin<backend::level_zero>();
 
 // Report error and no return (keeps compiler from printing warnings).
 // TODO: Probably change that to throw a catchable exception,
diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp
index 57a25c6c93aec..a4d7f162c1ab6 100644
--- a/sycl/source/detail/program_manager/program_manager.cpp
+++ b/sycl/source/detail/program_manager/program_manager.cpp
@@ -788,11 +788,11 @@ ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context,
     LinkOpts = LinkOptions.c_str();
   }
 
-  // L0 plugin doesn't support piProgramCompile/piProgramLink commands, program
-  // is built during piProgramCreate.
+  // Level-Zero plugin doesn't support piProgramCompile/piProgramLink commands,
+  // program is built during piProgramCreate.
   // TODO: remove this check as soon as piProgramCompile/piProgramLink will be
-  // implemented in L0 plugin.
-  if (Context->getPlugin().getBackend() == backend::level0) {
+  // implemented in Level-Zero plugin.
+  if (Context->getPlugin().getBackend() == backend::level_zero) {
     LinkDeviceLibs = false;
   }
 
diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp
index 5cdcf8c3ee1f8..f3600318cac1b 100644
--- a/sycl/source/detail/queue_impl.hpp
+++ b/sycl/source/detail/queue_impl.hpp
@@ -14,6 +14,8 @@
 #include <CL/sycl/exception.hpp>
 #include <CL/sycl/exception_list.hpp>
 #include <CL/sycl/handler.hpp>
+#include <CL/sycl/properties/context_properties.hpp>
+#include <CL/sycl/properties/queue_properties.hpp>
 #include <CL/sycl/property_list.hpp>
 #include <CL/sycl/stl.hpp>
 #include <detail/context_impl.hpp>
diff --git a/sycl/source/device_selector.cpp b/sycl/source/device_selector.cpp
index 22b3a613467ec..831ae5f124bfa 100644
--- a/sycl/source/device_selector.cpp
+++ b/sycl/source/device_selector.cpp
@@ -19,13 +19,13 @@ __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
 
 // Utility function to check if device is of the preferred backend.
-// Currently preference is given to the level0 backend.
+// Currently preference is given to the level_zero backend.
 static bool isDeviceOfPreferredSyclBe(const device &Device) {
   if (Device.is_host())
     return false;
 
   return detail::getSyclObjImpl(Device)->getPlugin().getBackend() ==
-         backend::level0;
+         backend::level_zero;
 }
 
 device device_selector::select_device() const {
diff --git a/sycl/test/CMakeLists.txt b/sycl/test/CMakeLists.txt
index 4010c13d3b219..882de8263cb36 100644
--- a/sycl/test/CMakeLists.txt
+++ b/sycl/test/CMakeLists.txt
@@ -81,17 +81,17 @@ add_lit_testsuite(check-sycl-opencl "Running the SYCL regression tests for OpenC
   )
 set_target_properties(check-sycl-opencl PROPERTIES FOLDER "SYCL tests")
 
-add_lit_testsuite(check-sycl-level0 "Running the SYCL regression tests for Level Zero"
+add_lit_testsuite(check-sycl-level-zero "Running the SYCL regression tests for Level Zero"
   ${CMAKE_CURRENT_BINARY_DIR}
   ARGS ${RT_TEST_ARGS}
-  PARAMS "SYCL_BE=PI_LEVEL0"
+  PARAMS "SYCL_BE=PI_LEVEL_ZERO"
   DEPENDS ${SYCL_TEST_DEPS}
   EXCLUDE_FROM_CHECK_ALL
   )
-set_target_properties(check-sycl-level0 PROPERTIES FOLDER "SYCL tests")
+set_target_properties(check-sycl-level-zero PROPERTIES FOLDER "SYCL tests")
 
 add_custom_target(check-sycl)
-add_dependencies(check-sycl check-sycl-opencl check-sycl-level0)
+add_dependencies(check-sycl check-sycl-opencl check-sycl-level-zero)
 set_target_properties(check-sycl PROPERTIES FOLDER "SYCL tests")
 
 if(SYCL_BUILD_PI_CUDA)
diff --git a/sycl/test/abi/pi_level0_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump
similarity index 98%
rename from sycl/test/abi/pi_level0_symbol_check.dump
rename to sycl/test/abi/pi_level_zero_symbol_check.dump
index f6bb19aa16a5b..7c8c74b8cc77d 100644
--- a/sycl/test/abi/pi_level0_symbol_check.dump
+++ b/sycl/test/abi/pi_level_zero_symbol_check.dump
@@ -3,7 +3,7 @@
 # DO NOT EDIT IT MANUALLY. Refer to sycl/docs/ABIPolicyGuide.md for more info.
 ################################################################################
 
-# RUN: env LLVM_BIN_PATH=%llvm_build_bin_dir python %sycl_tools_src_dir/abi_check.py --mode check_symbols --reference %s %sycl_libs_dir/libpi_level0.so
+# RUN: env LLVM_BIN_PATH=%llvm_build_bin_dir python %sycl_tools_src_dir/abi_check.py --mode check_symbols --reference %s %sycl_libs_dir/libpi_level_zero.so
 # REQUIRES: linux
 
 piContextCreate
diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump
index 9ff2e1195cac9..0b336464462d3 100644
--- a/sycl/test/abi/sycl_symbols_linux.dump
+++ b/sycl/test/abi/sycl_symbols_linux.dump
@@ -3589,6 +3589,10 @@ _ZN2cl10__host_std9u_sub_satEhh
 _ZN2cl10__host_std9u_sub_satEjj
 _ZN2cl10__host_std9u_sub_satEmm
 _ZN2cl10__host_std9u_sub_satEtt
+_ZN2cl4sycl10level_zero10make_queueERKNS0_7contextEm
+_ZN2cl4sycl10level_zero11make_deviceERKNS0_8platformEm
+_ZN2cl4sycl10level_zero12make_programERKNS0_7contextEm
+_ZN2cl4sycl10level_zero13make_platformEm
 _ZN2cl4sycl11malloc_hostEmRKNS0_5queueE
 _ZN2cl4sycl11malloc_hostEmRKNS0_7contextE
 _ZN2cl4sycl13aligned_allocEmmRKNS0_5queueENS0_3usm5allocE
@@ -3621,8 +3625,8 @@ _ZN2cl4sycl5eventC1Ev
 _ZN2cl4sycl5eventC2EP9_cl_eventRKNS0_7contextE
 _ZN2cl4sycl5eventC2ESt10shared_ptrINS0_6detail10event_implEE
 _ZN2cl4sycl5eventC2Ev
-_ZN2cl4sycl5intel6detail17reduComputeWGSizeEmmRm
 _ZN2cl4sycl5intel6detail16reduGetMaxWGSizeESt10shared_ptrINS0_6detail10queue_implEEm
+_ZN2cl4sycl5intel6detail17reduComputeWGSizeEmmRm
 _ZN2cl4sycl5queue10mem_adviseEPKvm14_pi_mem_advice
 _ZN2cl4sycl5queue10wait_proxyERKNS0_6detail13code_locationE
 _ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE
@@ -3728,15 +3732,15 @@ _ZN2cl4sycl6detail12sampler_implD2Ev
 _ZN2cl4sycl6detail12split_stringERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEc
 _ZN2cl4sycl6detail13MemoryManager12prefetch_usmEPvSt10shared_ptrINS1_10queue_implEEmSt6vectorIP9_pi_eventSaIS9_EERS9_
 _ZN2cl4sycl6detail13MemoryManager13releaseMemObjESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvS8_
-_ZN2cl4sycl6detail13MemoryManager19allocateImageObjectESt10shared_ptrINS1_12context_implEEPvbRK14_pi_image_descRK16_pi_image_formatRKNS0_13property_listE
 _ZN2cl4sycl6detail13MemoryManager16allocateMemImageESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvbmRK14_pi_image_descRK16_pi_image_formatRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event
-_ZN2cl4sycl6detail13MemoryManager24allocateInteropMemObjectESt10shared_ptrINS1_12context_implEEPvRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event
-_ZN2cl4sycl6detail13MemoryManager20allocateBufferObjectESt10shared_ptrINS1_12context_implEEPvbmRKNS0_13property_listE
+_ZN2cl4sycl6detail13MemoryManager17allocateMemBufferESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvbmRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event
 _ZN2cl4sycl6detail13MemoryManager18allocateHostMemoryEPNS1_11SYCLMemObjIEPvbmRKNS0_13property_listE
-_ZN2cl4sycl6detail13MemoryManager19wrapIntoImageBufferESt10shared_ptrINS1_12context_implEEPvPNS1_11SYCLMemObjIE
 _ZN2cl4sycl6detail13MemoryManager18releaseImageBufferESt10shared_ptrINS1_12context_implEEPv
-_ZN2cl4sycl6detail13MemoryManager17allocateMemBufferESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvbmRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event
+_ZN2cl4sycl6detail13MemoryManager19allocateImageObjectESt10shared_ptrINS1_12context_implEEPvbRK14_pi_image_descRK16_pi_image_formatRKNS0_13property_listE
+_ZN2cl4sycl6detail13MemoryManager19wrapIntoImageBufferESt10shared_ptrINS1_12context_implEEPvPNS1_11SYCLMemObjIE
+_ZN2cl4sycl6detail13MemoryManager20allocateBufferObjectESt10shared_ptrINS1_12context_implEEPvbmRKNS0_13property_listE
 _ZN2cl4sycl6detail13MemoryManager20allocateMemSubBufferESt10shared_ptrINS1_12context_implEEPvmmNS0_5rangeILi3EEESt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event
+_ZN2cl4sycl6detail13MemoryManager24allocateInteropMemObjectESt10shared_ptrINS1_12context_implEEPvRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event
 _ZN2cl4sycl6detail13MemoryManager3mapEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEENS0_6access4modeEjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_
 _ZN2cl4sycl6detail13MemoryManager4copyEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEjNS0_5rangeILi3EEESA_NS0_2idILi3EEEjS5_S8_jSA_SA_SC_jSt6vectorIP9_pi_eventSaISF_EERSF_
 _ZN2cl4sycl6detail13MemoryManager4fillEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEmPKcjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_
@@ -3791,10 +3795,6 @@ _ZN2cl4sycl6kernelC1EP10_cl_kernelRKNS0_7contextE
 _ZN2cl4sycl6kernelC1ESt10shared_ptrINS0_6detail11kernel_implEE
 _ZN2cl4sycl6kernelC2EP10_cl_kernelRKNS0_7contextE
 _ZN2cl4sycl6kernelC2ESt10shared_ptrINS0_6detail11kernel_implEE
-_ZN2cl4sycl6level010make_queueERKNS0_7contextEm
-_ZN2cl4sycl6level011make_deviceERKNS0_8platformEm
-_ZN2cl4sycl6level012make_programERKNS0_7contextEm
-_ZN2cl4sycl6level013make_platformEm
 _ZN2cl4sycl6mallocEmRKNS0_5queueENS0_3usm5allocE
 _ZN2cl4sycl6mallocEmRKNS0_6deviceERKNS0_7contextENS0_3usm5allocE
 _ZN2cl4sycl6opencl10make_queueERKNS0_7contextEm
diff --git a/sycl/test/abi/symbol_size.cpp b/sycl/test/abi/symbol_size.cpp
index 40270190afc4f..b6ae3965eda61 100644
--- a/sycl/test/abi/symbol_size.cpp
+++ b/sycl/test/abi/symbol_size.cpp
@@ -54,12 +54,12 @@ int main() {
   check_size<gpu_selector, 8>();
 #ifdef _MSC_VER
   check_size<handler, 552>();
-  check_size<detail::buffer_impl, 288>();
-  check_size<detail::image_impl<1>, 344>();
+  check_size<detail::buffer_impl, 216>();
+  check_size<detail::image_impl<1>, 272>();
 #else
   check_size<handler, 560>();
-  check_size<detail::buffer_impl, 256>();
-  check_size<detail::image_impl<1>, 312>();
+  check_size<detail::buffer_impl, 184>();
+  check_size<detail::image_impl<1>, 240>();
 #endif
   check_size<image<1>, 16>();
   check_size<kernel, 16>();
diff --git a/sycl/test/basic_tests/buffer/buffer_full_copy.cpp b/sycl/test/basic_tests/buffer/buffer_full_copy.cpp
index 79e69067682d5..2743557f6f971 100644
--- a/sycl/test/basic_tests/buffer/buffer_full_copy.cpp
+++ b/sycl/test/basic_tests/buffer/buffer_full_copy.cpp
@@ -6,7 +6,7 @@
 // RUN: %GPU_RUN_PLACEHOLDER %t2.out
 // RUN: %ACC_RUN_PLACEHOLDER %t2.out
 
-// XFAIL: level0
+// XFAIL: level_zero
 
 //==------------- buffer_full_copy.cpp - SYCL buffer basic test ------------==//
 //
diff --git a/sycl/test/basic_tests/buffer/reinterpret.cpp b/sycl/test/basic_tests/buffer/reinterpret.cpp
index 7fd000f165131..7288d9bfb2c97 100644
--- a/sycl/test/basic_tests/buffer/reinterpret.cpp
+++ b/sycl/test/basic_tests/buffer/reinterpret.cpp
@@ -3,7 +3,7 @@
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 //
-// XFAIL: level0
+// XFAIL: level_zero
 
 //==---------- reinterpret.cpp --- SYCL buffer reinterpret basic test ------==//
 //
diff --git a/sycl/test/basic_tests/event.cpp b/sycl/test/basic_tests/event.cpp
index e8e30015079f7..be6ba2f3aa43a 100644
--- a/sycl/test/basic_tests/event.cpp
+++ b/sycl/test/basic_tests/event.cpp
@@ -1,7 +1,10 @@
-// REQUIRES: opencl
+// REQUIRES: opencl || level0
 
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+
 //==--------------- event.cpp - SYCL event test ----------------------------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
diff --git a/sycl/test/basic_tests/get_backend.cpp b/sycl/test/basic_tests/get_backend.cpp
index de0738dd88341..a312304e0741b 100644
--- a/sycl/test/basic_tests/get_backend.cpp
+++ b/sycl/test/basic_tests/get_backend.cpp
@@ -3,7 +3,6 @@
 //
 //==----------------- get_backend.cpp ------------------------==//
 // This is a test of get_backend().
-// Also prints handy info about the system.
 // Do not set SYCL_BE.  We do not want the preferred backend.
 //==----------------------------------------------------------==//
 
@@ -16,7 +15,7 @@ using namespace cl::sycl;
 bool check(backend be) {
   switch (be) {
   case backend::opencl:
-  case backend::level0:
+  case backend::level_zero:
   case backend::cuda:
   case backend::host:
     return true;
diff --git a/sycl/test/basic_tests/image_accessor_readwrite.cpp b/sycl/test/basic_tests/image_accessor_readwrite.cpp
index 13c0b175bd1fc..0451ced85dc55 100644
--- a/sycl/test/basic_tests/image_accessor_readwrite.cpp
+++ b/sycl/test/basic_tests/image_accessor_readwrite.cpp
@@ -6,7 +6,7 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 //
-// XFAIL: windows && level0
+// XFAIL: windows && level_zero
 
 //==--------------------image_accessor_readwrite.cpp ----------------------==//
 //==----------image_accessor read without sampler & write API test---------==//
diff --git a/sycl/test/basic_tests/image_accessor_readwrite_half.cpp b/sycl/test/basic_tests/image_accessor_readwrite_half.cpp
index 6d5c1960655e5..931780c6c36a6 100644
--- a/sycl/test/basic_tests/image_accessor_readwrite_half.cpp
+++ b/sycl/test/basic_tests/image_accessor_readwrite_half.cpp
@@ -6,7 +6,7 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 //
-// XFAIL: windows && level0
+// XFAIL: windows && level_zero
 
 //==--------------------image_accessor_readwrite_half.cpp -------------------==//
 //==-image_accessor read (without sampler)& write API test for half datatype-==//
diff --git a/sycl/test/basic_tests/kernel_info.cpp b/sycl/test/basic_tests/kernel_info.cpp
index 193d89a4c4bd8..68a642b476e27 100644
--- a/sycl/test/basic_tests/kernel_info.cpp
+++ b/sycl/test/basic_tests/kernel_info.cpp
@@ -3,8 +3,8 @@
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 //
-// Fail is flaky for level0, enable when fixed.
-// UNSUPPORTED: level0
+// Fail is flaky for level_zero, enable when fixed.
+// UNSUPPORTED: level_zero
 
 //==--- kernel_info.cpp - SYCL kernel info test ----------------------------==//
 //
diff --git a/sycl/test/basic_tests/parallel_for_indexers.cpp b/sycl/test/basic_tests/parallel_for_indexers.cpp
index 3d20ec3d66903..a4ec46b42bc9a 100644
--- a/sycl/test/basic_tests/parallel_for_indexers.cpp
+++ b/sycl/test/basic_tests/parallel_for_indexers.cpp
@@ -8,7 +8,7 @@
 
 // TODO: Unexpected result
 // TODO: _indexers.cpp:37: int main(): Assertion `id == -1' failed.
-// XFAIL: level0
+// XFAIL: level_zero
 
 #include <CL/sycl.hpp>
 
diff --git a/sycl/test/basic_tests/parallel_for_range.cpp b/sycl/test/basic_tests/parallel_for_range.cpp
index 3031d3d30f388..4ec9b23158239 100644
--- a/sycl/test/basic_tests/parallel_for_range.cpp
+++ b/sycl/test/basic_tests/parallel_for_range.cpp
@@ -1,4 +1,4 @@
-// XFAIL: cuda || level0
+// XFAIL: cuda || level_zero
 // CUDA exposes broken hierarchical parallelism.
 
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple  %s -o %t.out
diff --git a/sycl/test/group-algorithm/broadcast.cpp b/sycl/test/group-algorithm/broadcast.cpp
index df0887a40d4a0..04028fade9669 100644
--- a/sycl/test/group-algorithm/broadcast.cpp
+++ b/sycl/test/group-algorithm/broadcast.cpp
@@ -10,17 +10,19 @@
 #include <CL/sycl.hpp>
 #include <algorithm>
 #include <cassert>
+#include <complex>
 #include <numeric>
 using namespace sycl;
 using namespace sycl::intel;
 
+template <typename InputContainer, typename OutputContainer>
 class broadcast_kernel;
 
 template <typename InputContainer, typename OutputContainer>
 void test(queue q, InputContainer input, OutputContainer output) {
   typedef typename InputContainer::value_type InputT;
   typedef typename OutputContainer::value_type OutputT;
-  typedef class broadcast_kernel kernel_name;
+  typedef class broadcast_kernel<InputContainer, OutputContainer> kernel_name;
   size_t N = input.size();
   size_t G = 4;
   range<2> R(G, G);
@@ -54,12 +56,49 @@ int main() {
   }
 
   constexpr int N = 16;
-  std::array<int, N> input;
-  std::array<int, N> output;
-  std::iota(input.begin(), input.end(), 1);
-  std::fill(output.begin(), output.end(), false);
 
-  test(q, input, output);
+  // Test built-in scalar type
+  {
+    std::array<int, N> input;
+    std::array<int, 3> output;
+    std::iota(input.begin(), input.end(), 1);
+    std::fill(output.begin(), output.end(), false);
+    test(q, input, output);
+  }
+
+  // Test pointer type
+  {
+    std::array<int *, N> input;
+    std::array<int *, 3> output;
+    for (int i = 0; i < N; ++i) {
+      input[i] = static_cast<int *>(0x0) + i;
+    }
+    std::fill(output.begin(), output.end(), static_cast<int *>(0x0));
+    test(q, input, output);
+  }
 
+  // Test user-defined type
+  // - Use complex as a proxy for this
+  // - Test float and double to test 64-bit and 128-bit types
+  {
+    std::array<std::complex<float>, N> input;
+    std::array<std::complex<float>, 3> output;
+    for (int i = 0; i < N; ++i) {
+      input[i] =
+          std::complex<float>(0, 1) + (float)i * std::complex<float>(2, 2);
+    }
+    std::fill(output.begin(), output.end(), std::complex<float>(0, 0));
+    test(q, input, output);
+  }
+  {
+    std::array<std::complex<double>, N> input;
+    std::array<std::complex<double>, 3> output;
+    for (int i = 0; i < N; ++i) {
+      input[i] =
+          std::complex<double>(0, 1) + (double)i * std::complex<double>(2, 2);
+    }
+    std::fill(output.begin(), output.end(), std::complex<float>(0, 0));
+    test(q, input, output);
+  }
   std::cout << "Test passed." << std::endl;
 }
diff --git a/sycl/test/host-interop-task/host-task-dependency.cpp b/sycl/test/host-interop-task/host-task-dependency.cpp
index 60a1e60883d71..2bbc059a43e44 100644
--- a/sycl/test/host-interop-task/host-task-dependency.cpp
+++ b/sycl/test/host-interop-task/host-task-dependency.cpp
@@ -4,7 +4,7 @@
 // RUN: %ACC_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %ACC_CHECK_PLACEHOLDER
 //
 // TODO: Behaviour is unstable for level zero on Windows. Enable when fixed.
-// UNSUPPORTED: windows && level0
+// UNSUPPORTED: windows && level_zero
 
 #include <atomic>
 #include <condition_variable>
diff --git a/sycl/test/host-interop-task/host-task-two-queues.cpp b/sycl/test/host-interop-task/host-task-two-queues.cpp
index 5157b83b60092..7644d6bcfcd53 100644
--- a/sycl/test/host-interop-task/host-task-two-queues.cpp
+++ b/sycl/test/host-interop-task/host-task-two-queues.cpp
@@ -4,7 +4,7 @@
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 //
 // TODO: Flaky fail on Level Zero that is why mark as unsupported temporarily.
-// UNSUPPORTED: level0
+// UNSUPPORTED: level_zero
 
 #include <CL/sycl.hpp>
 #include <vector>
diff --git a/sycl/test/host-interop-task/interop-task.cpp b/sycl/test/host-interop-task/interop-task.cpp
index bcd784038228a..ea4f08eee4a83 100644
--- a/sycl/test/host-interop-task/interop-task.cpp
+++ b/sycl/test/host-interop-task/interop-task.cpp
@@ -2,7 +2,7 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: level0, cuda
+// UNSUPPORTED: level_zero, cuda
 // REQUIRES: opencl
 // REQUIRES: TEMPORARY_DISABLED
 
diff --git a/sycl/test/inorder_queue/in_order_buffs.cpp b/sycl/test/inorder_queue/in_order_buffs.cpp
index ef317031291ce..be96e3b849f2a 100644
--- a/sycl/test/inorder_queue/in_order_buffs.cpp
+++ b/sycl/test/inorder_queue/in_order_buffs.cpp
@@ -1,6 +1,4 @@
-// REQUIRES: opencl
-
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
@@ -18,14 +16,6 @@ using namespace cl::sycl;
 
 const int dataSize = 32;
 
-bool isQueueInOrder(cl_command_queue cq) {
-  cl_command_queue_properties reportedProps;
-  cl_int iRet = clGetCommandQueueInfo(
-      cq, CL_QUEUE_PROPERTIES, sizeof(reportedProps), &reportedProps, nullptr);
-  assert(CL_SUCCESS == iRet && "Failed to obtain queue info from ocl device");
-  return (!(reportedProps & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
-}
-
 int main() {
   int dataA[dataSize] = {0};
   int dataB[dataSize] = {0};
@@ -63,18 +53,6 @@ int main() {
       cgh.parallel_for<class ordered_reader>(myRange, myKernel);
     });
 
-    bool result = true;
-    cl_command_queue cq = Queue.get();
-    device dev = Queue.get_device();
-    bool expected_result = dev.is_host() ? true : isQueueInOrder(cq);
-
-    if (expected_result != result) {
-      std::cout << "Resulting queue order is OOO but expected order is inorder"
-                << std::endl;
-
-      return -1;
-    }
-
     auto readBufferB = bufB.get_access<access::mode::read>();
     for (size_t i = 0; i != dataSize; ++i) {
       if (readBufferB[i] != i) {
diff --git a/sycl/test/inorder_queue/in_order_buffs_ocl.cpp b/sycl/test/inorder_queue/in_order_buffs_ocl.cpp
new file mode 100644
index 0000000000000..d42bdd2ec4e02
--- /dev/null
+++ b/sycl/test/inorder_queue/in_order_buffs_ocl.cpp
@@ -0,0 +1,50 @@
+// REQUIRES: opencl
+
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+//==-------- ordered_buffs.cpp - SYCL buffers in ordered queues test--------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include <CL/sycl.hpp>
+#include <iostream>
+
+using namespace cl::sycl;
+
+const int dataSize = 32;
+
+bool isQueueInOrder(cl_command_queue cq) {
+  cl_command_queue_properties reportedProps;
+  cl_int iRet = clGetCommandQueueInfo(
+      cq, CL_QUEUE_PROPERTIES, sizeof(reportedProps), &reportedProps, nullptr);
+  assert(CL_SUCCESS == iRet && "Failed to obtain queue info from ocl device");
+  return (!(reportedProps & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
+}
+
+int main() {
+  int dataA[dataSize] = {0};
+  int dataB[dataSize] = {0};
+
+  {
+    queue Queue{property::queue::in_order()};
+
+    bool result = true;
+    cl_command_queue cq = Queue.get();
+    device dev = Queue.get_device();
+    bool expected_result = dev.is_host() ? true : isQueueInOrder(cq);
+
+    if (expected_result != result) {
+      std::cout << "Resulting queue order is OOO but expected order is inorder"
+                << std::endl;
+
+      return -1;
+    }
+  }
+
+  return 0;
+}
diff --git a/sycl/test/inorder_queue/in_order_dmemll.cpp b/sycl/test/inorder_queue/in_order_dmemll.cpp
index defb64115d560..3eb6ecc39a34c 100644
--- a/sycl/test/inorder_queue/in_order_dmemll.cpp
+++ b/sycl/test/inorder_queue/in_order_dmemll.cpp
@@ -1,8 +1,8 @@
-// REQUIRES: opencl
-
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out -L %opencl_libs_dir -lOpenCL
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
+//
+// XFAIL: cuda
 //==----------- ordered_dmemll.cpp - Device Memory Linked List test --------==//
 // It uses an ordered queue where explicit waiting is not necessary between
 // kernels
@@ -19,15 +19,6 @@ using namespace cl::sycl;
 
 constexpr int numNodes = 4;
 
-bool getQueueOrder(cl_command_queue cq) {
-  cl_command_queue_properties reportedProps;
-  cl_int iRet = clGetCommandQueueInfo(
-      cq, CL_QUEUE_PROPERTIES, sizeof(reportedProps), &reportedProps, nullptr);
-  assert(CL_SUCCESS == iRet && "Failed to obtain queue info from ocl device");
-  return (reportedProps & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) ? false
-                                                                  : true;
-}
-
 struct Node {
   Node() : pNext(nullptr), Num(0xDEADBEEF) {}
 
@@ -103,15 +94,5 @@ int main() {
     d_cur = h_cur.pNext;
   }
 
-  bool result = true;
-  cl_command_queue cq = q.get(); 
-  bool expected_result = dev.is_host() ? true : getQueueOrder(cq);
-  if (expected_result != result) {
-    std::cout << "Resulting queue order is OOO but expected order is inorder"
-              << std::endl;
-
-    return -1;
-  }
-
   return 0;
 }
diff --git a/sycl/test/inorder_queue/in_order_dmemll_ocl.cpp b/sycl/test/inorder_queue/in_order_dmemll_ocl.cpp
new file mode 100644
index 0000000000000..e5fb04e2b1e96
--- /dev/null
+++ b/sycl/test/inorder_queue/in_order_dmemll_ocl.cpp
@@ -0,0 +1,46 @@
+// REQUIRES: opencl
+
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out -L %opencl_libs_dir -lOpenCL
+// RUN: %CPU_RUN_PLACEHOLDER %t1.out
+// RUN: %GPU_RUN_PLACEHOLDER %t1.out
+//==----------- ordered_dmemll.cpp - Device Memory Linked List test --------==//
+// It uses an ordered queue where explicit waiting is not necessary between
+// kernels
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <CL/sycl.hpp>
+
+using namespace cl::sycl;
+
+constexpr int numNodes = 4;
+
+bool getQueueOrder(cl_command_queue cq) {
+  cl_command_queue_properties reportedProps;
+  cl_int iRet = clGetCommandQueueInfo(
+      cq, CL_QUEUE_PROPERTIES, sizeof(reportedProps), &reportedProps, nullptr);
+  assert(CL_SUCCESS == iRet && "Failed to obtain queue info from ocl device");
+  return (reportedProps & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) ? false
+                                                                  : true;
+}
+
+int main() {
+  queue q{property::queue::in_order()};
+  auto dev = q.get_device();
+
+  bool result = true;
+  cl_command_queue cq = q.get();
+  bool expected_result = dev.is_host() ? true : getQueueOrder(cq);
+  if (expected_result != result) {
+    std::cout << "Resulting queue order is OOO but expected order is inorder"
+              << std::endl;
+
+    return -1;
+  }
+
+  return 0;
+}
diff --git a/sycl/test/kernel-and-program/kernel-and-program-interop.cpp b/sycl/test/kernel-and-program/kernel-and-program-interop.cpp
new file mode 100644
index 0000000000000..991a313712fae
--- /dev/null
+++ b/sycl/test/kernel-and-program/kernel-and-program-interop.cpp
@@ -0,0 +1,205 @@
+// REQUIRES: opencl
+
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
+// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUNx: %GPU_RUN_PLACEHOLDER %t.out
+// RUNx: %ACC_RUN_PLACEHOLDER %t.out
+
+//==--- kernel-and-program.cpp - SYCL kernel/program test ------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <CL/sycl.hpp>
+
+#include <iostream>
+#include <numeric>
+#include <string>
+#include <utility>
+
+int main() {
+  // Single task invocation methods
+  {
+    cl::sycl::queue q;
+    int data = 0;
+
+    // OpenCL interoperability kernel invocation
+    if (!q.is_host()) {
+      {
+        cl_int err;
+        cl::sycl::context ctx = q.get_context();
+        cl_context clCtx = ctx.get();
+        cl_command_queue clQ = q.get();
+        cl_mem clBuffer =
+            clCreateBuffer(clCtx, CL_MEM_WRITE_ONLY, sizeof(int), NULL, NULL);
+        err = clEnqueueWriteBuffer(clQ, clBuffer, CL_TRUE, 0, sizeof(int),
+                                   &data, 0, NULL, NULL);
+        assert(err == CL_SUCCESS);
+        clFinish(clQ);
+        cl::sycl::program prog(ctx);
+        prog.build_with_source(
+            "kernel void SingleTask(global int* a) {*a+=1; }\n");
+        q.submit([&](cl::sycl::handler &cgh) {
+          cgh.set_args(clBuffer);
+          cgh.single_task(prog.get_kernel("SingleTask"));
+        });
+        q.wait();
+        err = clEnqueueReadBuffer(clQ, clBuffer, CL_TRUE, 0, sizeof(int), &data,
+                                  0, NULL, NULL);
+        clReleaseCommandQueue(clQ);
+        clReleaseContext(clCtx);
+        assert(err == CL_SUCCESS);
+        assert(data == 1);
+      }
+      {
+        cl::sycl::queue sycl_queue;
+        cl::sycl::program prog(sycl_queue.get_context());
+        prog.build_with_source("kernel void foo(global int* a, global int* b, "
+                               "global int* c) {*a=*b+*c; }\n");
+        int a = 13, b = 14, c = 15;
+        {
+          cl::sycl::buffer<int, 1> bufa(&a, cl::sycl::range<1>(1));
+          cl::sycl::buffer<int, 1> bufb(&b, cl::sycl::range<1>(1));
+          cl::sycl::buffer<int, 1> bufc(&c, cl::sycl::range<1>(1));
+          sycl_queue.submit([&](cl::sycl::handler &cgh) {
+            auto A = bufa.get_access<cl::sycl::access::mode::write>(cgh);
+            auto B = bufb.get_access<cl::sycl::access::mode::read>(cgh);
+            auto C = bufc.get_access<cl::sycl::access::mode::read>(cgh);
+            cgh.set_args(A, B, C);
+            cgh.single_task(prog.get_kernel("foo"));
+          });
+        }
+        assert(a == b + c);
+      }
+    }
+    {
+      cl::sycl::queue Queue;
+      if (!Queue.is_host()) {
+        cl::sycl::sampler first(
+            cl::sycl::coordinate_normalization_mode::normalized,
+            cl::sycl::addressing_mode::clamp, cl::sycl::filtering_mode::linear);
+        cl::sycl::sampler second(
+            cl::sycl::coordinate_normalization_mode::unnormalized,
+            cl::sycl::addressing_mode::clamp_to_edge,
+            cl::sycl::filtering_mode::nearest);
+        cl::sycl::program prog(Queue.get_context());
+        prog.build_with_source(
+            "kernel void sampler_args(int a, sampler_t first, "
+            "int b, sampler_t second, int c) {}\n");
+        cl::sycl::kernel krn = prog.get_kernel("sampler_args");
+
+        Queue.submit([&](cl::sycl::handler &cgh) {
+          cgh.set_args(0, first, 2, second, 3);
+          cgh.single_task(krn);
+        });
+      }
+    }
+  }
+  // Parallel for with range
+  {
+    cl::sycl::queue q;
+    std::vector<int> dataVec(10);
+    std::iota(dataVec.begin(), dataVec.end(), 0);
+
+    if (!q.is_host()) {
+      cl_int err;
+      {
+        cl::sycl::context ctx = q.get_context();
+        cl_context clCtx = ctx.get();
+        cl_command_queue clQ = q.get();
+        cl_mem clBuffer = clCreateBuffer(
+            clCtx, CL_MEM_WRITE_ONLY, sizeof(int) * dataVec.size(), NULL, NULL);
+        err = clEnqueueWriteBuffer(clQ, clBuffer, CL_TRUE, 0,
+                                   sizeof(int) * dataVec.size(), dataVec.data(),
+                                   0, NULL, NULL);
+        assert(err == CL_SUCCESS);
+
+        cl::sycl::program prog(ctx);
+        prog.build_with_source(
+            "kernel void ParallelFor(__global int* a, int v, __local int *l) "
+            "{ size_t index = get_global_id(0); l[index] = a[index];"
+            " l[index] += v; a[index] = l[index]; }\n");
+
+        q.submit([&](cl::sycl::handler &cgh) {
+          const int value = 1;
+          auto local_acc =
+              cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write,
+                                 cl::sycl::access::target::local>(
+                  cl::sycl::range<1>(10), cgh);
+          cgh.set_args(clBuffer, value, local_acc);
+          cgh.parallel_for(cl::sycl::range<1>(10),
+                           prog.get_kernel("ParallelFor"));
+        });
+
+        q.wait();
+        err = clEnqueueReadBuffer(clQ, clBuffer, CL_TRUE, 0,
+                                  sizeof(int) * dataVec.size(), dataVec.data(),
+                                  0, NULL, NULL);
+        clReleaseCommandQueue(clQ);
+        clReleaseContext(clCtx);
+        assert(err == CL_SUCCESS);
+        for (size_t i = 0; i < dataVec.size(); ++i) {
+          assert(dataVec[i] == i + 1);
+        }
+      }
+    }
+  }
+
+  // Parallel for with nd_range
+  {
+    cl::sycl::queue q;
+    std::vector<int> dataVec(10);
+    std::iota(dataVec.begin(), dataVec.end(), 0);
+
+    if (!q.is_host()) {
+      cl_int err;
+      {
+        cl::sycl::context ctx = q.get_context();
+        cl_context clCtx = ctx.get();
+        cl_command_queue clQ = q.get();
+        cl_mem clBuffer = clCreateBuffer(
+            clCtx, CL_MEM_WRITE_ONLY, sizeof(int) * dataVec.size(), NULL, NULL);
+        err = clEnqueueWriteBuffer(clQ, clBuffer, CL_TRUE, 0,
+                                   sizeof(int) * dataVec.size(), dataVec.data(),
+                                   0, NULL, NULL);
+        assert(err == CL_SUCCESS);
+
+        cl::sycl::program prog(ctx);
+        prog.build_with_source(
+            "kernel void ParallelForND( local int* l,global int* a)"
+            "{  size_t idx = get_global_id(0);"
+            "  int pos = idx & 1;"
+            "  int opp = pos ^ 1;"
+            "  l[pos] = a[get_global_id(0)];"
+            "  barrier(CLK_LOCAL_MEM_FENCE);"
+            "  a[idx]=l[opp]; }");
+
+        // TODO is there no way to set local memory size via interoperability?
+        cl::sycl::kernel krn = prog.get_kernel("ParallelForND");
+        clSetKernelArg(krn.get(), 0, sizeof(int) * 2, NULL);
+
+        q.submit([&](cl::sycl::handler &cgh) {
+          cgh.set_arg(1, clBuffer);
+          cgh.parallel_for(cl::sycl::nd_range<1>(cl::sycl::range<1>(10),
+                                                 cl::sycl::range<1>(2)),
+                           krn);
+        });
+
+        q.wait();
+        err = clEnqueueReadBuffer(clQ, clBuffer, CL_TRUE, 0,
+                                  sizeof(int) * dataVec.size(), dataVec.data(),
+                                  0, NULL, NULL);
+        clReleaseCommandQueue(clQ);
+        clReleaseContext(clCtx);
+        assert(err == CL_SUCCESS);
+      }
+      for (size_t i = 0; i < dataVec.size(); ++i) {
+        assert(dataVec[i] == (i ^ 1));
+      }
+    }
+  }
+}
diff --git a/sycl/test/kernel-and-program/kernel-and-program.cpp b/sycl/test/kernel-and-program/kernel-and-program.cpp
index 8d3f57eb5a671..1233ec3664ee6 100644
--- a/sycl/test/kernel-and-program/kernel-and-program.cpp
+++ b/sycl/test/kernel-and-program/kernel-and-program.cpp
@@ -1,6 +1,4 @@
-// REQUIRES: opencl
-
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUNx: %GPU_RUN_PLACEHOLDER %t.out
@@ -54,77 +52,6 @@ int main() {
     }
     assert(data == 1);
 
-    // OpenCL interoperability kernel invocation
-    if (!q.is_host()) {
-      {
-        cl_int err;
-        cl::sycl::context ctx = q.get_context();
-        cl_context clCtx = ctx.get();
-        cl_command_queue clQ = q.get();
-        cl_mem clBuffer =
-            clCreateBuffer(clCtx, CL_MEM_WRITE_ONLY, sizeof(int), NULL, NULL);
-        err = clEnqueueWriteBuffer(clQ, clBuffer, CL_TRUE, 0, sizeof(int),
-                                   &data, 0, NULL, NULL);
-        assert(err == CL_SUCCESS);
-        clFinish(clQ);
-        cl::sycl::program prog(ctx);
-        prog.build_with_source(
-            "kernel void SingleTask(global int* a) {*a+=1; }\n");
-        q.submit([&](cl::sycl::handler &cgh) {
-          cgh.set_args(clBuffer);
-          cgh.single_task(prog.get_kernel("SingleTask"));
-        });
-        q.wait();
-        err = clEnqueueReadBuffer(clQ, clBuffer, CL_TRUE, 0, sizeof(int), &data,
-                                  0, NULL, NULL);
-        clReleaseCommandQueue(clQ);
-        clReleaseContext(clCtx);
-        assert(err == CL_SUCCESS);
-        assert(data == 2);
-      }
-      {
-        cl::sycl::queue sycl_queue;
-        cl::sycl::program prog(sycl_queue.get_context());
-        prog.build_with_source("kernel void foo(global int* a, global int* b, "
-                               "global int* c) {*a=*b+*c; }\n");
-        int a = 13, b = 14, c = 15;
-        {
-          cl::sycl::buffer<int, 1> bufa(&a, cl::sycl::range<1>(1));
-          cl::sycl::buffer<int, 1> bufb(&b, cl::sycl::range<1>(1));
-          cl::sycl::buffer<int, 1> bufc(&c, cl::sycl::range<1>(1));
-          sycl_queue.submit([&](cl::sycl::handler &cgh) {
-            auto A = bufa.get_access<cl::sycl::access::mode::write>(cgh);
-            auto B = bufb.get_access<cl::sycl::access::mode::read>(cgh);
-            auto C = bufc.get_access<cl::sycl::access::mode::read>(cgh);
-            cgh.set_args(A, B, C);
-            cgh.single_task(prog.get_kernel("foo"));
-          });
-        }
-        assert(a == b + c);
-      }
-    }
-    {
-      cl::sycl::queue Queue;
-      if (!Queue.is_host()) {
-        cl::sycl::sampler first(
-            cl::sycl::coordinate_normalization_mode::normalized,
-            cl::sycl::addressing_mode::clamp, cl::sycl::filtering_mode::linear);
-        cl::sycl::sampler second(
-            cl::sycl::coordinate_normalization_mode::unnormalized,
-            cl::sycl::addressing_mode::clamp_to_edge,
-            cl::sycl::filtering_mode::nearest);
-        cl::sycl::program prog(Queue.get_context());
-        prog.build_with_source(
-            "kernel void sampler_args(int a, sampler_t first, "
-            "int b, sampler_t second, int c) {}\n");
-        cl::sycl::kernel krn = prog.get_kernel("sampler_args");
-
-        Queue.submit([&](cl::sycl::handler &cgh) {
-          cgh.set_args(0, first, 2, second, 3);
-          cgh.single_task(krn);
-        });
-      }
-    }
   }
   // Parallel for with range
   {
@@ -157,50 +84,6 @@ int main() {
     for (size_t i = 0; i < dataVec.size(); ++i) {
       assert(dataVec[i] == i + 1);
     }
-
-    // OpenCL interoperability kernel invocation
-    if (!q.is_host()) {
-      cl_int err;
-      {
-        cl::sycl::context ctx = q.get_context();
-        cl_context clCtx = ctx.get();
-        cl_command_queue clQ = q.get();
-        cl_mem clBuffer = clCreateBuffer(
-            clCtx, CL_MEM_WRITE_ONLY, sizeof(int) * dataVec.size(), NULL, NULL);
-        err = clEnqueueWriteBuffer(clQ, clBuffer, CL_TRUE, 0,
-                                   sizeof(int) * dataVec.size(), dataVec.data(),
-                                   0, NULL, NULL);
-        assert(err == CL_SUCCESS);
-
-        cl::sycl::program prog(ctx);
-        prog.build_with_source(
-            "kernel void ParallelFor(__global int* a, int v, __local int *l) "
-            "{ size_t index = get_global_id(0); l[index] = a[index];"
-            " l[index] += v; a[index] = l[index]; }\n");
-
-        q.submit([&](cl::sycl::handler &cgh) {
-          const int value = 1;
-          auto local_acc =
-              cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write,
-                                 cl::sycl::access::target::local>(
-                  cl::sycl::range<1>(10), cgh);
-          cgh.set_args(clBuffer, value, local_acc);
-          cgh.parallel_for(cl::sycl::range<1>(10),
-                           prog.get_kernel("ParallelFor"));
-        });
-
-        q.wait();
-        err = clEnqueueReadBuffer(clQ, clBuffer, CL_TRUE, 0,
-                                  sizeof(int) * dataVec.size(), dataVec.data(),
-                                  0, NULL, NULL);
-        clReleaseCommandQueue(clQ);
-        clReleaseContext(clCtx);
-        assert(err == CL_SUCCESS);
-        for (size_t i = 0; i < dataVec.size(); ++i) {
-          assert(dataVec[i] == i + 2);
-        }
-      }
-    }
   }
 
   // Parallel for with nd_range
@@ -250,53 +133,5 @@ int main() {
         assert(dataVec[i] == (i ^ 1));
       }
     }
-
-    // OpenCL interoperability kernel invocation
-    if (!q.is_host()) {
-      cl_int err;
-      {
-        cl::sycl::context ctx = q.get_context();
-        cl_context clCtx = ctx.get();
-        cl_command_queue clQ = q.get();
-        cl_mem clBuffer = clCreateBuffer(
-            clCtx, CL_MEM_WRITE_ONLY, sizeof(int) * dataVec.size(), NULL, NULL);
-        err = clEnqueueWriteBuffer(clQ, clBuffer, CL_TRUE, 0,
-                                   sizeof(int) * dataVec.size(), dataVec.data(),
-                                   0, NULL, NULL);
-        assert(err == CL_SUCCESS);
-
-        cl::sycl::program prog(ctx);
-        prog.build_with_source(
-            "kernel void ParallelForND( local int* l,global int* a)"
-            "{  size_t idx = get_global_id(0);"
-            "  int pos = idx & 1;"
-            "  int opp = pos ^ 1;"
-            "  l[pos] = a[get_global_id(0)];"
-            "  barrier(CLK_LOCAL_MEM_FENCE);"
-            "  a[idx]=l[opp]; }");
-
-        // TODO is there no way to set local memory size via interoperability?
-        cl::sycl::kernel krn = prog.get_kernel("ParallelForND");
-        clSetKernelArg(krn.get(), 0, sizeof(int) * 2, NULL);
-
-        q.submit([&](cl::sycl::handler &cgh) {
-          cgh.set_arg(1, clBuffer);
-          cgh.parallel_for(cl::sycl::nd_range<1>(cl::sycl::range<1>(10),
-                                                 cl::sycl::range<1>(2)),
-                           krn);
-        });
-
-        q.wait();
-        err = clEnqueueReadBuffer(clQ, clBuffer, CL_TRUE, 0,
-                                  sizeof(int) * dataVec.size(), dataVec.data(),
-                                  0, NULL, NULL);
-        clReleaseCommandQueue(clQ);
-        clReleaseContext(clCtx);
-        assert(err == CL_SUCCESS);
-      }
-      for (size_t i = 0; i < dataVec.size(); ++i) {
-        assert(dataVec[i] == i);
-      }
-    }
   }
 }
diff --git a/sycl/test/lit.cfg.py b/sycl/test/lit.cfg.py
index 545ebb995f60d..f2a58749a2a93 100644
--- a/sycl/test/lit.cfg.py
+++ b/sycl/test/lit.cfg.py
@@ -81,7 +81,7 @@
 
 def getDeviceCount(device_type):
     is_cuda = False;
-    is_level0 = False;
+    is_level_zero = False;
     process = subprocess.Popen([get_device_count_by_type_path, device_type, backend],
         stdout=subprocess.PIPE)
     (output, err) = process.communicate()
@@ -106,12 +106,12 @@ def getDeviceCount(device_type):
         if re.match(r".*cuda", result[1]):
             is_cuda = True;
         if re.match(r".*level zero", result[1]):
-            is_level0 = True;
+            is_level_zero = True;
 
     if err:
         lit_config.warning("getDeviceCount {TYPE} {BACKEND} stderr:{ERR}".format(
             TYPE=device_type, BACKEND=backend, ERR=err))
-    return [value,is_cuda,is_level0]
+    return [value,is_cuda,is_level_zero]
 
 # Every SYCL implementation provides a host implementation.
 config.available_features.add('host')
@@ -149,8 +149,8 @@ def getDeviceCount(device_type):
 gpu_check_on_linux_substitute = ""
 
 cuda = False
-level0 = False
-[gpu_count, cuda, level0] = getDeviceCount("gpu")
+level_zero = False
+[gpu_count, cuda, level_zero] = getDeviceCount("gpu")
 
 if gpu_count > 0:
     found_at_least_one_device = True
@@ -160,8 +160,8 @@ def getDeviceCount(device_type):
     config.available_features.add('gpu')
     if cuda:
        config.available_features.add('cuda')
-    elif level0:
-       config.available_features.add('level0')
+    elif level_zero:
+       config.available_features.add('level_zero')
 
     if platform.system() == "Linux":
         gpu_run_on_linux_substitute = "env SYCL_DEVICE_TYPE=GPU SYCL_BE={SYCL_BE} ".format(SYCL_BE=backend)
@@ -188,7 +188,7 @@ def getDeviceCount(device_type):
 config.substitutions.append( ('%ACC_CHECK_PLACEHOLDER',  acc_check_substitute) )
 
 # LIT testing either supports OpenCL or CUDA or Level Zero.
-if not cuda and not level0 and found_at_least_one_device:
+if not cuda and not level_zero and found_at_least_one_device:
     config.available_features.add('opencl')
 
 if cuda:
diff --git a/sycl/test/plugins/sycl-ls-gpu-default.cpp b/sycl/test/plugins/sycl-ls-gpu-default.cpp
index a07933f2ccff8..a57fc2a39a027 100755
--- a/sycl/test/plugins/sycl-ls-gpu-default.cpp
+++ b/sycl/test/plugins/sycl-ls-gpu-default.cpp
@@ -1,4 +1,4 @@
-// REQUIRES: gpu, level0
+// REQUIRES: gpu, level_zero
 
 // RUN: sycl-ls --verbose >%t.default.out
 // RUN: FileCheck %s --check-prefixes=CHECK-GPU-BUILTIN,CHECK-GPU-CUSTOM --input-file %t.default.out
diff --git a/sycl/test/regression/fsycl-save-temps.cpp b/sycl/test/regression/fsycl-save-temps.cpp
index ce9e653af2608..7f89fa54feeca 100644
--- a/sycl/test/regression/fsycl-save-temps.cpp
+++ b/sycl/test/regression/fsycl-save-temps.cpp
@@ -22,6 +22,6 @@ int main() {
 }
 
 // TODO: Address a Windows-specific issue with integration header filenames
-// XFAIL: system-windows && !level0
+// XFAIL: system-windows && !level_zero
 // TODO: fail is flaky on Windows for Level Zero. Enable when fixed.
-// UNSUPPORTED: system-windows && level0
+// UNSUPPORTED: system-windows && level_zero
diff --git a/sycl/test/regression/image_access.cpp b/sycl/test/regression/image_access.cpp
index f32d0a33f6b91..e779fb5b4111b 100644
--- a/sycl/test/regression/image_access.cpp
+++ b/sycl/test/regression/image_access.cpp
@@ -5,9 +5,9 @@
 // TODO: For now PI checks are skipped for ACC device. To decide if it's good.
 // RUN: env %ACC_RUN_PLACEHOLDER %t.out
 //
-// UNSUPPORTED: cuda || windows && level0
+// UNSUPPORTED: cuda || windows && level_zero
 // CUDA cannot support OpenCL spec conform images.
-// TODO: test hangs on level0, enable when fixed.
+// TODO: test hangs on level_zero, enable when fixed.
 
 //==-------------- image_access.cpp - SYCL image accessors test  -----------==//
 //
diff --git a/sycl/test/regression/static-buffer-dtor.cpp b/sycl/test/regression/static-buffer-dtor.cpp
index 5899420c20454..c541c180e7d73 100644
--- a/sycl/test/regression/static-buffer-dtor.cpp
+++ b/sycl/test/regression/static-buffer-dtor.cpp
@@ -14,7 +14,7 @@
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 //
-// XFAIL: linux && level0
+// XFAIL: linux && level_zero
 
 #include <CL/sycl.hpp>
 
diff --git a/sycl/test/spec_const/spec_const_hw.cpp b/sycl/test/spec_const/spec_const_hw.cpp
index c50550c0827ad..442121353bb73 100644
--- a/sycl/test/spec_const/spec_const_hw.cpp
+++ b/sycl/test/spec_const/spec_const_hw.cpp
@@ -1,4 +1,4 @@
-// UNSUPPORTED: cuda || level0
+// UNSUPPORTED: cuda || level_zero
 //
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
diff --git a/sycl/test/spec_const/spec_const_hw_accelerator.cpp b/sycl/test/spec_const/spec_const_hw_accelerator.cpp
index d4eb754065c18..bd7df40ed378a 100644
--- a/sycl/test/spec_const/spec_const_hw_accelerator.cpp
+++ b/sycl/test/spec_const/spec_const_hw_accelerator.cpp
@@ -12,6 +12,6 @@
 // TODO: re-enable after CI drivers are updated to newer which support spec
 // constants:
 // XFAIL: linux && opencl && accelerator
-// UNSUPPORTED: cuda || level0
+// UNSUPPORTED: cuda || level_zero
 #include "spec_const_hw.cpp"
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
diff --git a/sycl/test/spec_const/spec_const_neg.cpp b/sycl/test/spec_const/spec_const_neg.cpp
index 18fb8ed5d9d0c..7312e29ab40e1 100644
--- a/sycl/test/spec_const/spec_const_neg.cpp
+++ b/sycl/test/spec_const/spec_const_neg.cpp
@@ -3,7 +3,7 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // Specialization constants are not supported on FPGA h/w and emulator.
-// UNSUPPORTED: cuda || level0
+// UNSUPPORTED: cuda || level_zero
 //
 //==----------- spec_const_hw.cpp ------------------------------------------==//
 //
diff --git a/sycl/test/spec_const/spec_const_redefine.cpp b/sycl/test/spec_const/spec_const_redefine.cpp
index 36a82f9c9825e..6883ce5c9d7d6 100644
--- a/sycl/test/spec_const/spec_const_redefine.cpp
+++ b/sycl/test/spec_const/spec_const_redefine.cpp
@@ -1,4 +1,4 @@
-// UNSUPPORTED: cuda || level0
+// UNSUPPORTED: cuda || level_zero
 //
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
diff --git a/sycl/test/spec_const/spec_const_redefine_accelerator.cpp b/sycl/test/spec_const/spec_const_redefine_accelerator.cpp
index 00ef10f63fdfa..52293f7a953e7 100644
--- a/sycl/test/spec_const/spec_const_redefine_accelerator.cpp
+++ b/sycl/test/spec_const/spec_const_redefine_accelerator.cpp
@@ -14,6 +14,6 @@
 // TODO: re-enable after CI drivers are updated to newer which support spec
 // constants:
 // XFAIL: linux && opencl && accelerator
-// UNSUPPORTED: cuda || level0
+// UNSUPPORTED: cuda || level_zero
 #include "spec_const_redefine_accelerator.cpp"
 // RUN: env SYCL_PI_TRACE=2 %ACC_RUN_PLACEHOLDER %t.out 2>&1 %ACC_CHECK_PLACEHOLDER
diff --git a/sycl/test/sub_group/common_ocl.cpp b/sycl/test/sub_group/common_ocl.cpp
deleted file mode 100644
index 232e6c6c11acc..0000000000000
--- a/sycl/test/sub_group/common_ocl.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-// REQUIRES: opencl
-
-// RUN: %clang_cc1 -x cl -cl-std=CL2.0 %S/sg.cl -triple spir64-unknown-unknown -emit-llvm-bc -o %T/kernel_ocl.bc -include opencl-c.h
-// RUN: llvm-spirv %T/kernel_ocl.bc -o %T/kernel_ocl.spv
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
-// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out %T/kernel_ocl.spv
-// RUN: %GPU_RUN_PLACEHOLDER %t.out %T/kernel_ocl.spv
-// RUN: %ACC_RUN_PLACEHOLDER %t.out %T/kernel_ocl.spv
-
-//==--- common_ocl.cpp - basic SG methods in SYCL vs OpenCL  ---*- C++ -*---==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "helper.hpp"
-#include <CL/sycl.hpp>
-#include <cstring>
-#include <fstream>
-#include <iostream>
-
-using namespace cl::sycl;
-struct Data {
-  unsigned int local_id;
-  unsigned int local_range;
-  unsigned int max_local_range;
-  unsigned int group_id;
-  unsigned int group_range;
-};
-
-void check(queue &Queue, const int G, const int L, const char *SpvFile) {
-  try {
-    nd_range<1> NdRange(G, L);
-    buffer<struct Data, 1> oclbuf(G);
-    buffer<struct Data, 1> syclbuf(G);
-
-    std::ifstream File(SpvFile, std::ios::binary);
-    if (!File.is_open()) {
-      std::cerr << std::strerror(errno);
-      throw compile_program_error("Cannot open SPIRV file\n", PI_INVALID_VALUE);
-    }
-    File.seekg(0, std::ios::end);
-    vector_class<char> Spv(File.tellg());
-    File.seekg(0);
-    File.read(Spv.data(), Spv.size());
-    File.close();
-    int Err;
-    cl_program ClProgram = clCreateProgramWithIL(Queue.get_context().get(),
-                                                 Spv.data(), Spv.size(), &Err);
-    CHECK_OCL_CODE(Err);
-    CHECK_OCL_CODE(
-        clBuildProgram(ClProgram, 0, nullptr, nullptr, nullptr, nullptr));
-    program Prog(Queue.get_context(), ClProgram);
-    Queue.submit([&](handler &cgh) {
-      auto oclacc = oclbuf.get_access<access::mode::read_write>(cgh);
-      cgh.set_args(oclacc);
-      cgh.parallel_for(NdRange, Prog.get_kernel("ocl_subgr"));
-    });
-    auto oclacc = oclbuf.get_access<access::mode::read_write>();
-
-    Queue.submit([&](handler &cgh) {
-      auto syclacc = syclbuf.get_access<access::mode::read_write>(cgh);
-      cgh.parallel_for<class sycl_subgr>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
-        syclacc[NdItem.get_global_id()].local_id = SG.get_local_id().get(0);
-        syclacc[NdItem.get_global_id()].local_range =
-            SG.get_local_range().get(0);
-        syclacc[NdItem.get_global_id()].max_local_range =
-            SG.get_max_local_range().get(0);
-        syclacc[NdItem.get_global_id()].group_id = SG.get_group_id().get(0);
-        syclacc[NdItem.get_global_id()].group_range = SG.get_group_range().get(0);
-      });
-    });
-    auto syclacc = syclbuf.get_access<access::mode::read_write>();
-    for (int j = 0; j < G; j++) {
-      exit_if_not_equal(syclacc[j].local_id, oclacc[j].local_id, "local_id");
-      exit_if_not_equal(syclacc[j].local_range, oclacc[j].local_range,
-                        "local_range");
-      exit_if_not_equal(syclacc[j].max_local_range, oclacc[j].max_local_range,
-                        "max_local_range");
-      exit_if_not_equal(syclacc[j].group_id, oclacc[j].group_id, "group_id");
-      exit_if_not_equal(syclacc[j].group_range, oclacc[j].group_range,
-                        "group_range");
-    }
-  } catch (exception e) {
-    std::cout << "SYCL exception caught: " << e.what();
-    exit(1);
-  }
-}
-int main(int argc, char **argv) {
-  queue Queue;
-  if (!core_sg_supported(Queue.get_device()) || argc != 2) {
-    std::cout << "Skipping test\n";
-    return 0;
-  }
-
-  check(Queue, 240, 80, argv[1]);
-  check(Queue, 8, 4, argv[1]);
-  check(Queue, 24, 12, argv[1]);
-  check(Queue, 1024, 256, argv[1]);
-  std::cout << "Test passed." << std::endl;
-  return 0;
-}
diff --git a/sycl/test/sub_group/generic-shuffle.cpp b/sycl/test/sub_group/generic-shuffle.cpp
index d2d7e191dfa32..d4dbe84906537 100644
--- a/sycl/test/sub_group/generic-shuffle.cpp
+++ b/sycl/test/sub_group/generic-shuffle.cpp
@@ -18,8 +18,7 @@
 #include "helper.hpp"
 #include <CL/sycl.hpp>
 #include <complex>
-template <typename T>
-class pointer_kernel;
+template <typename T> class pointer_kernel;
 
 using namespace cl::sycl;
 
@@ -59,8 +58,9 @@ void check_pointer(queue &Queue, size_t G = 240, size_t L = 60) {
         /* Save GID+SGID */
         acc_down[NdItem.get_global_id()] = SG.shuffle_down(ptr, sgid);
 
-        /* Save GID XOR SGID */
-        acc_xor[NdItem.get_global_id()] = SG.shuffle_xor(ptr, sgid);
+        /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */
+        acc_xor[NdItem.get_global_id()] =
+            SG.shuffle_xor(ptr, sgid % SG.get_max_local_range()[0]);
       });
     });
     auto acc = buf.template get_access<access::mode::read_write>();
@@ -71,30 +71,44 @@ void check_pointer(queue &Queue, size_t G = 240, size_t L = 60) {
 
     size_t sg_size = sgsizeacc[0];
     int SGid = 0;
+    int SGLid = 0;
+    int SGBeginGid = 0;
     for (int j = 0; j < G; j++) {
       if (j % L % sg_size == 0) {
         SGid++;
+        SGLid = 0;
+        SGBeginGid = j;
       }
       if (j % L == 0) {
         SGid = 0;
+        SGLid = 0;
+        SGBeginGid = j;
       }
 
       /*GID of middle element in every subgroup*/
-      exit_if_not_equal(acc[j], static_cast<T *>(0x0) + (j / L * L + SGid * sg_size + sg_size / 2),
+      exit_if_not_equal(acc[j],
+                        static_cast<T *>(0x0) +
+                            (j / L * L + SGid * sg_size + sg_size / 2),
                         "shuffle");
 
       /* Value GID+SGID for all element except last SGID in SG*/
       if (j % L % sg_size + SGid < sg_size && j % L + SGid < L) {
-        exit_if_not_equal(acc_down[j], static_cast<T *>(0x0) + (j + SGid), "shuffle_down");
+        exit_if_not_equal(acc_down[j], static_cast<T *>(0x0) + (j + SGid),
+                          "shuffle_down");
       }
 
       /* Value GID-SGID for all element except first SGID in SG*/
       if (j % L % sg_size >= SGid) {
-        exit_if_not_equal(acc_up[j], static_cast<T *>(0x0) + (j - SGid), "shuffle_up");
+        exit_if_not_equal(acc_up[j], static_cast<T *>(0x0) + (j - SGid),
+                          "shuffle_up");
       }
 
-      /* GID XOR SGID */
-      exit_if_not_equal(acc_xor[j], static_cast<T *>(0x0) + (j ^ SGid), "shuffle_xor");
+      /* Value GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */
+      exit_if_not_equal(acc_xor[j],
+                        static_cast<T *>(0x0) +
+                            (SGBeginGid + (SGLid ^ (SGid % sg_size))),
+                        "shuffle_xor");
+      SGLid++;
     }
   } catch (exception e) {
     std::cout << "SYCL exception caught: " << e.what();
@@ -145,8 +159,9 @@ void check_struct(queue &Queue, Generator &Gen, size_t G = 240, size_t L = 60) {
         /* Save GID+SGID */
         acc_down[NdItem.get_global_id()] = SG.shuffle_down(val, sgid);
 
-        /* Save GID XOR SGID */
-        acc_xor[NdItem.get_global_id()] = SG.shuffle_xor(val, sgid);
+        /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */
+        acc_xor[NdItem.get_global_id()] =
+            SG.shuffle_xor(val, sgid % SG.get_max_local_range()[0]);
       });
     });
     auto acc = buf.template get_access<access::mode::read_write>();
@@ -157,17 +172,23 @@ void check_struct(queue &Queue, Generator &Gen, size_t G = 240, size_t L = 60) {
 
     size_t sg_size = sgsizeacc[0];
     int SGid = 0;
+    int SGLid = 0;
+    int SGBeginGid = 0;
     for (int j = 0; j < G; j++) {
       if (j % L % sg_size == 0) {
         SGid++;
+        SGLid = 0;
+        SGBeginGid = j;
       }
       if (j % L == 0) {
         SGid = 0;
+        SGLid = 0;
+        SGBeginGid = j;
       }
 
       /*GID of middle element in every subgroup*/
-      exit_if_not_equal(acc[j], values[j / L * L + SGid * sg_size + sg_size / 2],
-                        "shuffle");
+      exit_if_not_equal(
+          acc[j], values[j / L * L + SGid * sg_size + sg_size / 2], "shuffle");
 
       /* Value GID+SGID for all element except last SGID in SG*/
       if (j % L % sg_size + SGid < sg_size && j % L + SGid < L) {
@@ -179,8 +200,11 @@ void check_struct(queue &Queue, Generator &Gen, size_t G = 240, size_t L = 60) {
         exit_if_not_equal(acc_up[j], values[j - SGid], "shuffle_up");
       }
 
-      /* GID XOR SGID */
-      exit_if_not_equal(acc_xor[j], values[j ^ SGid], "shuffle_xor");
+      /* Value GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */
+      exit_if_not_equal(acc_xor[j],
+                        values[SGBeginGid + (SGLid ^ (SGid % sg_size))],
+                        "shuffle_xor");
+      SGLid++;
     }
   } catch (exception e) {
     std::cout << "SYCL exception caught: " << e.what();
diff --git a/sycl/test/sub_group/shuffle.hpp b/sycl/test/sub_group/shuffle.hpp
index 94c82ab99c2d1..7c16121febc0f 100644
--- a/sycl/test/sub_group/shuffle.hpp
+++ b/sycl/test/sub_group/shuffle.hpp
@@ -8,8 +8,7 @@
 
 #include "helper.hpp"
 #include <CL/sycl.hpp>
-template <typename T, int N>
-class sycl_subgr;
+template <typename T, int N> class sycl_subgr;
 
 using namespace cl::sycl;
 
@@ -66,8 +65,9 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
         acc_up[NdItem.get_global_id()] = SG.shuffle_up(vwggid, sgid);
         /* Save GID+SGID */
         acc_down[NdItem.get_global_id()] = SG.shuffle_down(vwggid, sgid);
-        /* Save GID XOR SGID */
-        acc_xor[NdItem.get_global_id()] = SG.shuffle_xor(vwggid, sgid);
+        /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */
+        acc_xor[NdItem.get_global_id()] =
+            SG.shuffle_xor(vwggid, sgid % SG.get_max_local_range()[0]);
       });
     });
     auto acc = buf.template get_access<access::mode::read_write>();
@@ -81,12 +81,18 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
 
     size_t sg_size = sgsizeacc[0];
     int SGid = 0;
+    int SGLid = 0;
+    int SGBeginGid = 0;
     for (int j = 0; j < G; j++) {
       if (j % L % sg_size == 0) {
         SGid++;
+        SGLid = 0;
+        SGBeginGid = j;
       }
       if (j % L == 0) {
         SGid = 0;
+        SGLid = 0;
+        SGBeginGid = j;
       }
       /*GID of middle element in every subgroup*/
       exit_if_not_equal_vec<T, N>(
@@ -115,8 +121,11 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
           exit_if_not_equal_vec(acc2_up[j], vec<T, N>(j - SGid + sg_size),
                                 "shuffle2_up");
       }
-      /* GID XOR SGID */
-      exit_if_not_equal_vec(acc_xor[j], vec<T, N>(j ^ SGid), "shuffle_xor");
+      /* Value GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */
+      exit_if_not_equal_vec(acc_xor[j],
+                            vec<T, N>(SGBeginGid + (SGLid ^ (SGid % sg_size))),
+                            "shuffle_xor");
+      SGLid++;
     }
   } catch (exception e) {
     std::cout << "SYCL exception caught: " << e.what();
@@ -124,8 +133,7 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
   }
 }
 
-template <typename T>
-void check(queue &Queue, size_t G = 240, size_t L = 60) {
+template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
   try {
     nd_range<1> NdRange(G, L);
     buffer<T> buf2(G);
@@ -171,8 +179,9 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
         acc_up[NdItem.get_global_id()] = SG.shuffle_up<T>(wggid, sgid);
         /* Save GID+SGID */
         acc_down[NdItem.get_global_id()] = SG.shuffle_down<T>(wggid, sgid);
-        /* Save GID XOR SGID */
-        acc_xor[NdItem.get_global_id()] = SG.shuffle_xor<T>(wggid, sgid);
+        /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */
+        acc_xor[NdItem.get_global_id()] =
+            SG.shuffle_xor<T>(wggid, sgid % SG.get_max_local_range()[0]);
       });
     });
     auto acc = buf.template get_access<access::mode::read_write>();
@@ -186,13 +195,20 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
 
     size_t sg_size = sgsizeacc[0];
     int SGid = 0;
+    int SGLid = 0;
+    int SGBeginGid = 0;
     for (int j = 0; j < G; j++) {
       if (j % L % sg_size == 0) {
         SGid++;
+        SGLid = 0;
+        SGBeginGid = j;
       }
       if (j % L == 0) {
         SGid = 0;
+        SGLid = 0;
+        SGBeginGid = j;
       }
+
       /*GID of middle element in every subgroup*/
       exit_if_not_equal<T>(acc[j], j / L * L + SGid * sg_size + sg_size / 2,
                            "shuffle");
@@ -215,8 +231,10 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
         if (j % L - SGid + sg_size < L) /* Do not go out  LG*/
           exit_if_not_equal<T>(acc2_up[j], j - SGid + sg_size, "shuffle2_up");
       }
-      /* GID XOR SGID */
-      exit_if_not_equal<T>(acc_xor[j], j ^ SGid, "shuffle_xor");
+      /* Value GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */
+      exit_if_not_equal<T>(acc_xor[j], SGBeginGid + (SGLid ^ (SGid % sg_size)),
+                           "shuffle_xor");
+      SGLid++;
     }
   } catch (exception e) {
     std::cout << "SYCL exception caught: " << e.what();
diff --git a/sycl/tools/CMakeLists.txt b/sycl/tools/CMakeLists.txt
index 1e2843e0b9385..83c02bd94481f 100644
--- a/sycl/tools/CMakeLists.txt
+++ b/sycl/tools/CMakeLists.txt
@@ -1,27 +1,23 @@
-set(CMAKE_CXX_STANDARD 11)
-set(CMAKE_CXX_STANDARD_REQUIRED ON)
-set(CMAKE_CXX_EXTENSIONS OFF)
-
 add_subdirectory(sycl-ls)
 
 # TODO: move each tool in its own sub-directory
 add_executable(get_device_count_by_type get_device_count_by_type.cpp)
-add_dependencies(get_device_count_by_type ocl-headers ocl-icd l0-loader)
+add_dependencies(get_device_count_by_type ocl-headers ocl-icd level-zero-loader)
 
 if(MSVC)
-  set(L0_LIBRARY
+  set(LEVEL_ZERO_LIBRARY
     "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}ze_loader${CMAKE_STATIC_LIBRARY_SUFFIX}")
 else()
-  set(L0_LIBRARY
+  set(LEVEL_ZERO_LIBRARY
     "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}ze_loader${CMAKE_SHARED_LIBRARY_SUFFIX}")
 endif()
 
 target_link_libraries(get_device_count_by_type
   PRIVATE
     OpenCL::Headers
-    L0Loader::Headers
+    LevelZeroLoader::Headers
     ${OpenCL_LIBRARIES}
-    ${L0_LIBRARY}
+    ${LEVEL_ZERO_LIBRARY}
     $<$<BOOL:${SYCL_BUILD_PI_CUDA}>:cudadrv>
 )
 target_compile_definitions(get_device_count_by_type
diff --git a/sycl/tools/get_device_count_by_type.cpp b/sycl/tools/get_device_count_by_type.cpp
index 26a9a0a4ed6ad..abf70ce98c357 100644
--- a/sycl/tools/get_device_count_by_type.cpp
+++ b/sycl/tools/get_device_count_by_type.cpp
@@ -32,7 +32,7 @@ static const std::string help =
     "   Help\n"
     "   Example: ./get_device_count_by_type cpu opencl\n"
     "   Supported device types: cpu/gpu/accelerator/default/all\n"
-    "   Supported backends: PI_CUDA/PI_OPENCL/PI_LEVEL0 \n"
+    "   Supported backends: PI_CUDA/PI_OPENCL/PI_LEVEL_ZERO \n"
     "   Output format: <number_of_devices>:<additional_Information>";
 
 // Return the string with all characters translated to lower case.
@@ -259,7 +259,7 @@ int main(int argc, char *argv[]) {
 
   if (backend == "opencl" || backend == "pi_opencl") {
     querySuccess = queryOpenCL(deviceType, deviceCount, msg);
-  } else if (backend == "level0" || backend == "pi_level0") {
+  } else if (backend == "level_zero" || backend == "pi_level_zero") {
     querySuccess = queryLevelZero(deviceType, deviceCount, msg);
   } else if (backend == "cuda" || backend == "pi_cuda") {
     querySuccess = queryCUDA(deviceType, deviceCount, msg);
diff --git a/sycl/tools/install.bat b/sycl/tools/install.bat
index f770d47d3583a..6b1af03030117 100755
--- a/sycl/tools/install.bat
+++ b/sycl/tools/install.bat
@@ -1,5 +1,5 @@
 @echo off
-
+setlocal EnableDelayedExpansion
 set OCL_RT_DIR=%~dp0
 
 echo ###
@@ -7,6 +7,32 @@ echo ### 1. Save and update OpenCL.dll available in the system
 echo ###
 set TMP_FILE=%TEMP%\install.bat.tmp
 
+set OCL_RT_ENTRY_LIB=%OCL_RT_DIR%intelocl64.dll
+IF NOT EXIST %OCL_RT_ENTRY_LIB% (
+   set OCL_RT_ENTRY_LIB=%OCL_RT_DIR%intelocl64_emu.dll
+)
+
+IF "%OCL_ICD_FILENAMES%" == "" (
+  set EXTENDEXISTING=N
+) else (
+  echo OCL_ICD_FILENAMES is present and contains %OCL_ICD_FILENAMES%
+  :USERINPUT
+  set /P "EXTENDEXISTING=Should the OpenCL RT extend existing configuration (Y/N): "
+)
+IF "%EXTENDEXISTING%" == "N" (
+  echo Clean up previous configuration
+  set OCL_ICD_FILENAMES=%OCL_RT_ENTRY_LIB%
+) else (
+  IF "%EXTENDEXISTING%" == "Y" (
+
+    set OCL_ICD_FILENAMES=%OCL_ICD_FILENAMES%;%OCL_RT_ENTRY_LIB%
+    echo Extend previous configuration to %OCL_ICD_FILENAMES%;%OCL_RT_ENTRY_LIB%
+  ) else (
+    echo WARNING: Incorrect input %EXTENDEXISTING%. Only Y and N are allowed.
+    goto USERINPUT
+  )
+)
+
 
 set SYSTEM_OCL_ICD_LOADER=C:\Windows\System32\OpenCL.dll
 set NEW_OCL_ICD_LOADER=%OCL_RT_DIR%\OpenCL.dll
@@ -73,11 +99,13 @@ IF %NEED_OPENCL_UPGRADE% == True (
   echo System OpenCL.dll is already new, no need to upgrade it.
 )
 
+
+
 echo.
 echo ###
 echo ### 3. Set the environment variable OCL_ICD_FILENAMES to %OCL_ICD_FILENAMES%
 echo ###
-REG ADD "HKLM\SYSTEM\CurrentControlSet\Control\Session Manager\Environment" /v OCL_ICD_FILENAMES /d "%OCL_ICD_FILENAMES%"
+REG ADD "HKLM\SYSTEM\CurrentControlSet\Control\Session Manager\Environment" /f /v OCL_ICD_FILENAMES /d "%OCL_ICD_FILENAMES%"
 IF ERRORLEVEL 1 (
   echo !!! Cannot set the environment variable OCL_ICD_FILENAMES
   set INSTALL_ERRORS=1
@@ -137,7 +165,7 @@ IF %INSTALL_ERRORS% == 1 (
   echo See recommendations printed above and perform the following actions manually:
   echo   1. Save %SYSTEM_OCL_ICD_LOADER% to %SYSTEM_OCL_ICD_LOADER%.%SYSTEM_OPENCL_VER%
   echo   2. Copy %NEW_OCL_ICD_LOADER% to %SYSTEM_OCL_ICD_LOADER%
-  echo   3. Add/set the environment variable OCL_ICD_FILENAMES to %OCL_RT_DIR%intelocl64.dll
+  echo   3. Add/set the environment variable OCL_ICD_FILENAMES to %OCL_RT_ENTRY_LIB%
   echo   4. Copy TBB libraries or create symbolic links in %OCL_RT_DIR%tbb.
   echo   5. Add/set the environment variable PATH to %OCL_RT_DIR%tbb
   echo Or try running this batch file as Administrator.
@@ -147,5 +175,5 @@ IF %INSTALL_ERRORS% == 1 (
 echo.
 
 endlocal& ^
-set OCL_ICD_FILENAMES=%OCL_RT_DIR%intelocl64.dll
+set OCL_ICD_FILENAMES=%OCL_ICD_FILENAMES%
 set "PATH=%PATH%;%OCL_RT_DIR%\tbb"
diff --git a/sycl/unittests/pi/BackendString.hpp b/sycl/unittests/pi/BackendString.hpp
index cea0eee8b8338..7f051f5ab6790 100644
--- a/sycl/unittests/pi/BackendString.hpp
+++ b/sycl/unittests/pi/BackendString.hpp
@@ -15,7 +15,7 @@ inline const char *GetBackendString(cl::sycl::backend backend) {
     PI_BACKEND_STR(cuda);
     PI_BACKEND_STR(host);
     PI_BACKEND_STR(opencl);
-    PI_BACKEND_STR(level0);
+    PI_BACKEND_STR(level_zero);
 #undef PI_BACKEND_STR
   default:
     return "Unknown Plugin";