diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt
index 53bec90a35bb6..48044a804eb4e 100644
--- a/sycl/CMakeLists.txt
+++ b/sycl/CMakeLists.txt
@@ -15,7 +15,7 @@ include(AddSYCLExecutable)
 set(SYCL_MAJOR_VERSION 5)
 set(SYCL_MINOR_VERSION 1)
 set(SYCL_PATCH_VERSION 0)
-set(SYCL_DEV_ABI_VERSION 0)
+set(SYCL_DEV_ABI_VERSION 1)
 if (SYCL_ADD_DEV_VERSION_POSTFIX)
   set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}")
 endif()
diff --git a/sycl/include/CL/sycl/ONEAPI/reduction.hpp b/sycl/include/CL/sycl/ONEAPI/reduction.hpp
index 3d3078ac95bc9..a73ea6ab687f7 100644
--- a/sycl/include/CL/sycl/ONEAPI/reduction.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/reduction.hpp
@@ -432,6 +432,29 @@ template <typename T> struct AreAllButLastReductions<T> {
   static constexpr bool value = !std::is_base_of<reduction_impl_base, T>::value;
 };
 
+/// Predicate returning true if all template type parameters except the last two
+/// are reductions.
+template <typename FirstT, typename... RestT>
+struct AreAllButLastTwoReductions {
+  static constexpr bool value =
+      std::is_base_of<reduction_impl_base, FirstT>::value &&
+      AreAllButLastTwoReductions<RestT...>::value;
+};
+
+/// Helper specialization of AreAllButLastTwoReductions for two elements.
+/// Returns true if the template parameters are not a reduction.
+template <typename T1, typename T2> struct AreAllButLastTwoReductions<T1, T2> {
+  static constexpr bool value =
+      !std::is_base_of<reduction_impl_base, T1>::value &&
+      !std::is_base_of<reduction_impl_base, T2>::value;
+};
+
+/// Helper specialization of AreAllButLastTwoReductions for one element only.
+/// Returns true if the template parameter is not a reduction.
+template <typename T> struct AreAllButLastTwoReductions<T> {
+  static constexpr bool value = !std::is_base_of<reduction_impl_base, T>::value;
+};
+
 /// This class encapsulates the reduction variable/accessor,
 /// the reduction operator and an optional operator identity.
 template <typename T, class BinaryOperation, int Dims, bool IsUSM,
diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp
index 420ec280cffeb..0fdb8014fda7e 100644
--- a/sycl/include/CL/sycl/detail/cg.hpp
+++ b/sycl/include/CL/sycl/detail/cg.hpp
@@ -121,6 +121,7 @@ class CGExecKernel : public CG {
 public:
   /// Stores ND-range description.
   NDRDescT MNDRDesc;
+  property_list MPropList;
   unique_ptr_class<HostKernelBase> MHostKernel;
   shared_ptr_class<detail::kernel_impl> MSyclKernel;
   vector_class<ArgDesc> MArgs;
@@ -128,7 +129,8 @@ class CGExecKernel : public CG {
   detail::OSModuleHandle MOSModuleHandle;
   vector_class<shared_ptr_class<detail::stream_impl>> MStreams;
 
-  CGExecKernel(NDRDescT NDRDesc, unique_ptr_class<HostKernelBase> HKernel,
+  CGExecKernel(NDRDescT NDRDesc, const property_list &PropList,
+               unique_ptr_class<HostKernelBase> HKernel,
                shared_ptr_class<detail::kernel_impl> SyclKernel,
                vector_class<vector_class<char>> ArgsStorage,
                vector_class<detail::AccessorImplPtr> AccStorage,
@@ -142,10 +144,10 @@ class CGExecKernel : public CG {
       : CG(Type, std::move(ArgsStorage), std::move(AccStorage),
            std::move(SharedPtrStorage), std::move(Requirements),
            std::move(Events), std::move(loc)),
-        MNDRDesc(std::move(NDRDesc)), MHostKernel(std::move(HKernel)),
-        MSyclKernel(std::move(SyclKernel)), MArgs(std::move(Args)),
-        MKernelName(std::move(KernelName)), MOSModuleHandle(OSModuleHandle),
-        MStreams(std::move(Streams)) {
+        MNDRDesc(std::move(NDRDesc)), MPropList(PropList),
+        MHostKernel(std::move(HKernel)), MSyclKernel(std::move(SyclKernel)),
+        MArgs(std::move(Args)), MKernelName(std::move(KernelName)),
+        MOSModuleHandle(OSModuleHandle), MStreams(std::move(Streams)) {
     assert((getType() == RUN_ON_HOST_INTEL || getType() == KERNEL) &&
            "Wrong type of exec kernel CG.");
   }
diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp
index ef60964e3d332..5f1d10aa17f5e 100644
--- a/sycl/include/CL/sycl/handler.hpp
+++ b/sycl/include/CL/sycl/handler.hpp
@@ -270,6 +270,7 @@ std::tuple<std::tuple_element_t<Is, TupleT>...>
 tuple_select_elements(TupleT Tuple, std::index_sequence<Is...>);
 
 template <typename FirstT, typename... RestT> struct AreAllButLastReductions;
+template <typename FirstT, typename... RestT> struct AreAllButLastTwoReductions;
 
 } // namespace detail
 } // namespace ONEAPI
@@ -716,8 +717,8 @@ class __SYCL_EXPORT handler {
   /// \param NumWorkItems is a range defining indexing space.
   /// \param KernelFunc is a SYCL kernel function.
   template <typename KernelName, typename KernelType, int Dims>
-  void parallel_for_lambda_impl(range<Dims> NumWorkItems,
-                                KernelType KernelFunc) {
+  void parallel_for_lambda_impl(range<Dims> NumWorkItems, KernelType KernelFunc,
+                                const property_list &PropList = {}) {
     throwIfActionIsCreated();
     using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
 
@@ -813,10 +814,12 @@ class __SYCL_EXPORT handler {
       range<Dims> AdjustedRange = NumWorkItems;
       AdjustedRange.set_range_dim0(NewValX);
 #ifdef __SYCL_DEVICE_ONLY__
+      (void)PropList;
       kernel_parallel_for<NameWT, TransformedArgType>(Wrapper);
 #else
       detail::checkValueRange<Dims>(AdjustedRange);
       MNDRDesc.set(std::move(AdjustedRange));
+      MPropList = PropList;
       StoreLambda<NameWT, decltype(Wrapper), Dims, TransformedArgType>(
           std::move(Wrapper));
       MCGType = detail::CG::KERNEL;
@@ -828,10 +831,12 @@ class __SYCL_EXPORT handler {
     {
 #ifdef __SYCL_DEVICE_ONLY__
       (void)NumWorkItems;
+      (void)PropList;
       kernel_parallel_for<NameT, TransformedArgType>(KernelFunc);
 #else
       detail::checkValueRange<Dims>(NumWorkItems);
       MNDRDesc.set(std::move(NumWorkItems));
+      MPropList = PropList;
       StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
           std::move(KernelFunc));
       MCGType = detail::CG::KERNEL;
@@ -847,12 +852,14 @@ class __SYCL_EXPORT handler {
   /// \param NumWorkItems is a range defining indexing space.
   /// \param Kernel is a SYCL kernel function.
   template <int Dims>
-  void parallel_for_impl(range<Dims> NumWorkItems, kernel Kernel) {
+  void parallel_for_impl(range<Dims> NumWorkItems, kernel Kernel,
+                         const property_list &PropList = {}) {
     throwIfActionIsCreated();
     verifyKernelInvoc(Kernel);
     MKernel = detail::getSyclObjImpl(std::move(Kernel));
     detail::checkValueRange<Dims>(NumWorkItems);
     MNDRDesc.set(std::move(NumWorkItems));
+    MPropList = PropList;
     MCGType = detail::CG::KERNEL;
     extractArgsAndReqs();
     MKernelName = getKernelName();
@@ -996,19 +1003,22 @@ class __SYCL_EXPORT handler {
   /// \param KernelFunc is a SYCL kernel function.
   template <typename KernelName = detail::auto_name, typename KernelType>
 #ifdef __SYCL_NONCONST_FUNCTOR__
-  void single_task(KernelType KernelFunc) {
+  void single_task(KernelType KernelFunc, const property_list &PropList = {}) {
 #else
-  void single_task(const KernelType &KernelFunc) {
+  void single_task(const KernelType &KernelFunc,
+                   const property_list &PropList = {}) {
 #endif
     throwIfActionIsCreated();
     using NameT =
         typename detail::get_kernel_name_t<KernelName, KernelType>::name;
 #ifdef __SYCL_DEVICE_ONLY__
+    (void)PropList;
     kernel_single_task<NameT>(KernelFunc);
 #else
     // No need to check if range is out of INT_MAX limits as it's compile-time
     // known constant.
     MNDRDesc.set(range<1>{1});
+    MPropList = PropList;
 
     StoreLambda<NameT, KernelType, /*Dims*/ 0, void>(KernelFunc);
     MCGType = detail::CG::KERNEL;
@@ -1017,29 +1027,38 @@ class __SYCL_EXPORT handler {
 
   template <typename KernelName = detail::auto_name, typename KernelType>
 #ifdef __SYCL_NONCONST_FUNCTOR__
-  void parallel_for(range<1> NumWorkItems, KernelType KernelFunc) {
+  void parallel_for(range<1> NumWorkItems, KernelType KernelFunc,
+                    const property_list &PropList = {}) {
 #else
-  void parallel_for(range<1> NumWorkItems, const KernelType &KernelFunc) {
+  void parallel_for(range<1> NumWorkItems, const KernelType &KernelFunc,
+                    const property_list &PropList = {}) {
 #endif
-    parallel_for_lambda_impl<KernelName>(NumWorkItems, std::move(KernelFunc));
+    parallel_for_lambda_impl<KernelName>(NumWorkItems, std::move(KernelFunc),
+                                         PropList);
   }
 
   template <typename KernelName = detail::auto_name, typename KernelType>
 #ifdef __SYCL_NONCONST_FUNCTOR__
-  void parallel_for(range<2> NumWorkItems, KernelType KernelFunc) {
+  void parallel_for(range<2> NumWorkItems, KernelType KernelFunc,
+                    const property_list &PropList = {}) {
 #else
-  void parallel_for(range<2> NumWorkItems, const KernelType &KernelFunc) {
+  void parallel_for(range<2> NumWorkItems, const KernelType &KernelFunc,
+                    const property_list &PropList = {}) {
 #endif
-    parallel_for_lambda_impl<KernelName>(NumWorkItems, std::move(KernelFunc));
+    parallel_for_lambda_impl<KernelName>(NumWorkItems, std::move(KernelFunc),
+                                         PropList);
   }
 
   template <typename KernelName = detail::auto_name, typename KernelType>
 #ifdef __SYCL_NONCONST_FUNCTOR__
-  void parallel_for(range<3> NumWorkItems, KernelType KernelFunc) {
+  void parallel_for(range<3> NumWorkItems, KernelType KernelFunc,
+                    const property_list &PropList = {}) {
 #else
-  void parallel_for(range<3> NumWorkItems, const KernelType &KernelFunc) {
+  void parallel_for(range<3> NumWorkItems, const KernelType &KernelFunc,
+                    const property_list &PropList = {}) {
 #endif
-    parallel_for_lambda_impl<KernelName>(NumWorkItems, std::move(KernelFunc));
+    parallel_for_lambda_impl<KernelName>(NumWorkItems, std::move(KernelFunc),
+                                         PropList);
   }
 
   /// Defines and invokes a SYCL kernel on host device.
@@ -1110,7 +1129,8 @@ class __SYCL_EXPORT handler {
   template <typename KernelName = detail::auto_name, typename KernelType,
             int Dims>
   void parallel_for(range<Dims> NumWorkItems, id<Dims> WorkItemOffset,
-                    _KERNELFUNCPARAM(KernelFunc)) {
+                    _KERNELFUNCPARAM(KernelFunc),
+                    const property_list &PropList = {}) {
     throwIfActionIsCreated();
     using NameT =
         typename detail::get_kernel_name_t<KernelName, KernelType>::name;
@@ -1118,8 +1138,10 @@ class __SYCL_EXPORT handler {
 #ifdef __SYCL_DEVICE_ONLY__
     (void)NumWorkItems;
     (void)WorkItemOffset;
+    (void)PropList;
     kernel_parallel_for<NameT, LambdaArgType>(KernelFunc);
 #else
+    MPropList = PropList;
     detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
     MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
     StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
@@ -1141,8 +1163,8 @@ class __SYCL_EXPORT handler {
   /// \param KernelFunc is a SYCL kernel function.
   template <typename KernelName = detail::auto_name, typename KernelType,
             int Dims>
-  void parallel_for(nd_range<Dims> ExecutionRange,
-                    _KERNELFUNCPARAM(KernelFunc)) {
+  void parallel_for(nd_range<Dims> ExecutionRange, _KERNELFUNCPARAM(KernelFunc),
+                    const property_list &PropList = {}) {
     throwIfActionIsCreated();
     using NameT =
         typename detail::get_kernel_name_t<KernelName, KernelType>::name;
@@ -1150,8 +1172,10 @@ class __SYCL_EXPORT handler {
         sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
 #ifdef __SYCL_DEVICE_ONLY__
     (void)ExecutionRange;
+    (void)PropList;
     kernel_parallel_for<NameT, LambdaArgType>(KernelFunc);
 #else
+    MPropList = PropList;
     detail::checkValueRange<Dims>(ExecutionRange);
     MNDRDesc.set(std::move(ExecutionRange));
     StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
@@ -1168,7 +1192,9 @@ class __SYCL_EXPORT handler {
   detail::enable_if_t<Reduction::accessor_mode == access::mode::read_write &&
                       Reduction::has_fast_atomics && !Reduction::is_usm>
   parallel_for(nd_range<Dims> Range, Reduction Redu,
-               _KERNELFUNCPARAM(KernelFunc)) {
+               _KERNELFUNCPARAM(KernelFunc),
+               const property_list &PropList = {}) {
+    MPropList = PropList;
     ONEAPI::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu,
                                            Redu.getUserAccessor());
   }
@@ -1182,7 +1208,9 @@ class __SYCL_EXPORT handler {
   detail::enable_if_t<Reduction::accessor_mode == access::mode::read_write &&
                       Reduction::has_fast_atomics && Reduction::is_usm>
   parallel_for(nd_range<Dims> Range, Reduction Redu,
-               _KERNELFUNCPARAM(KernelFunc)) {
+               _KERNELFUNCPARAM(KernelFunc),
+               const property_list &PropList = {}) {
+    MPropList = PropList;
     ONEAPI::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu,
                                            Redu.getUSMPointer());
   }
@@ -1202,7 +1230,9 @@ class __SYCL_EXPORT handler {
   detail::enable_if_t<Reduction::accessor_mode == access::mode::discard_write &&
                       Reduction::has_fast_atomics>
   parallel_for(nd_range<Dims> Range, Reduction Redu,
-               _KERNELFUNCPARAM(KernelFunc)) {
+               _KERNELFUNCPARAM(KernelFunc),
+               const property_list &PropList = {}) {
+    MPropList = PropList;
     shared_ptr_class<detail::queue_impl> QueueCopy = MQueue;
     auto RWAcc = Redu.getReadWriteScalarAcc(*this);
     ONEAPI::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu,
@@ -1238,7 +1268,8 @@ class __SYCL_EXPORT handler {
             int Dims, typename Reduction>
   detail::enable_if_t<!Reduction::has_fast_atomics>
   parallel_for(nd_range<Dims> Range, Reduction Redu,
-               _KERNELFUNCPARAM(KernelFunc)) {
+               _KERNELFUNCPARAM(KernelFunc),
+               const property_list &PropList = {}) {
     // This parallel_for() is lowered to the following sequence:
     // 1) Call a kernel that a) call user's lambda function and b) performs
     //    one iteration of reduction, storing the partial reductions/sums
@@ -1260,6 +1291,7 @@ class __SYCL_EXPORT handler {
     // the main kernel, but simply generate Range.get_global_range.size() number
     // of partial sums, leaving the reduction work to the additional/aux
     // kernels.
+    MPropList = PropList;
     constexpr bool HFR = Reduction::has_fast_reduce;
     size_t OneElemSize = HFR ? 0 : sizeof(typename Reduction::result_type);
     // TODO: currently the maximal work group size is determined for the given
@@ -1302,20 +1334,16 @@ class __SYCL_EXPORT handler {
     } // end while (NWorkItems > 1)
   }
 
-  // This version of parallel_for may handle one or more reductions packed in
-  // \p Rest argument. Note thought that the last element in \p Rest pack is
-  // the kernel function.
-  // TODO: this variant is currently enabled for 2+ reductions only as the
-  // versions handling 1 reduction variable are more efficient right now.
-  template <typename KernelName = detail::auto_name, int Dims,
+  // Helper template to handle parallel_for with 2+ reductions which can
+  // optionally have property_list as the last argument.
+  template <typename KernelName = detail::auto_name, int Dims, bool hasPropList,
             typename... RestT>
-  std::enable_if_t<(sizeof...(RestT) >= 3 &&
-                    ONEAPI::detail::AreAllButLastReductions<RestT...>::value)>
-  parallel_for(nd_range<Dims> Range, RestT... Rest) {
+  void parallel_for_impl(nd_range<Dims> Range, RestT... Rest) {
     std::tuple<RestT...> ArgsTuple(Rest...);
     constexpr size_t NumArgs = sizeof...(RestT);
-    auto KernelFunc = std::get<NumArgs - 1>(ArgsTuple);
-    auto ReduIndices = std::make_index_sequence<NumArgs - 1>();
+    constexpr size_t Offset = hasPropList ? 2 : 1;
+    auto KernelFunc = std::get<NumArgs - Offset>(ArgsTuple);
+    auto ReduIndices = std::make_index_sequence<NumArgs - Offset>();
     auto ReduTuple =
         ONEAPI::detail::tuple_select_elements(ArgsTuple, ReduIndices);
 
@@ -1350,6 +1378,36 @@ class __SYCL_EXPORT handler {
     } // end while (NWorkItems > 1)
   }
 
+  // This version of parallel_for may handle one or more reductions packed in
+  // \p Rest argument. Note though that the last element in \p Rest pack is
+  // the kernel function.
+  // TODO: this variant is currently enabled for 2+ reductions only as the
+  // versions handling 1 reduction variable are more efficient right now.
+  template <typename KernelName = detail::auto_name, int Dims,
+            typename... RestT>
+  std::enable_if_t<(sizeof...(RestT) >= 3 &&
+                    ONEAPI::detail::AreAllButLastReductions<RestT...>::value)>
+  parallel_for(nd_range<Dims> Range, RestT... Rest) {
+    parallel_for_impl<KernelName, Dims, false, RestT...>(Range, Rest...);
+  }
+
+  // This version of parallel_for may handle one or more reductions packed in
+  // \p Rest argument. Note though that the last two elements in \p Rest pack
+  // are the kernel function and the property list.
+  // TODO: this variant is currently enabled for 2+ reductions only as the
+  // versions handling 1 reduction variable are more efficient right now.
+  template <typename KernelName = detail::auto_name, int Dims,
+            typename... RestT>
+  std::enable_if_t<
+      (sizeof...(RestT) >= 4 &&
+       ONEAPI::detail::AreAllButLastTwoReductions<RestT...>::value)>
+  parallel_for(nd_range<Dims> Range, RestT... Rest) {
+    std::tuple<RestT...> ArgsTuple(Rest...);
+    constexpr size_t NumArgs = sizeof...(RestT);
+    MPropList = std::get<NumArgs - 1>(ArgsTuple);
+    parallel_for_impl<KernelName, Dims, true, RestT...>(Range, Rest...);
+  }
+
   /// Hierarchical kernel invocation method of a kernel defined as a lambda
   /// encoding the body of each work-group to launch.
   ///
@@ -1363,7 +1421,8 @@ class __SYCL_EXPORT handler {
   template <typename KernelName = detail::auto_name, typename KernelType,
             int Dims>
   void parallel_for_work_group(range<Dims> NumWorkGroups,
-                               _KERNELFUNCPARAM(KernelFunc)) {
+                               _KERNELFUNCPARAM(KernelFunc),
+                               const property_list &PropList = {}) {
     throwIfActionIsCreated();
     using NameT =
         typename detail::get_kernel_name_t<KernelName, KernelType>::name;
@@ -1371,10 +1430,12 @@ class __SYCL_EXPORT handler {
         sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
 #ifdef __SYCL_DEVICE_ONLY__
     (void)NumWorkGroups;
+    (void)PropList;
     kernel_parallel_for_work_group<NameT, LambdaArgType>(KernelFunc);
 #else
     detail::checkValueRange<Dims>(NumWorkGroups);
     MNDRDesc.setNumWorkGroups(NumWorkGroups);
+    MPropList = PropList;
     StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
     MCGType = detail::CG::KERNEL;
 #endif // __SYCL_DEVICE_ONLY__
@@ -1396,7 +1457,8 @@ class __SYCL_EXPORT handler {
             int Dims>
   void parallel_for_work_group(range<Dims> NumWorkGroups,
                                range<Dims> WorkGroupSize,
-                               _KERNELFUNCPARAM(KernelFunc)) {
+                               _KERNELFUNCPARAM(KernelFunc),
+                               const property_list &PropList = {}) {
     throwIfActionIsCreated();
     using NameT =
         typename detail::get_kernel_name_t<KernelName, KernelType>::name;
@@ -1405,12 +1467,14 @@ class __SYCL_EXPORT handler {
 #ifdef __SYCL_DEVICE_ONLY__
     (void)NumWorkGroups;
     (void)WorkGroupSize;
+    (void)PropList;
     kernel_parallel_for_work_group<NameT, LambdaArgType>(KernelFunc);
 #else
     nd_range<Dims> ExecRange =
         nd_range<Dims>(NumWorkGroups * WorkGroupSize, WorkGroupSize);
     detail::checkValueRange<Dims>(ExecRange);
     MNDRDesc.set(std::move(ExecRange));
+    MPropList = PropList;
     StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
     MCGType = detail::CG::KERNEL;
 #endif // __SYCL_DEVICE_ONLY__
@@ -1422,28 +1486,32 @@ class __SYCL_EXPORT handler {
   /// cannot be called on host.
   ///
   /// \param Kernel is a SYCL kernel object.
-  void single_task(kernel Kernel) {
+  void single_task(kernel Kernel, const property_list &PropList = {}) {
     throwIfActionIsCreated();
     verifyKernelInvoc(Kernel);
     // No need to check if range is out of INT_MAX limits as it's compile-time
     // known constant
     MNDRDesc.set(range<1>{1});
+    MPropList = PropList;
     MKernel = detail::getSyclObjImpl(std::move(Kernel));
     MCGType = detail::CG::KERNEL;
     extractArgsAndReqs();
     MKernelName = getKernelName();
   }
 
-  void parallel_for(range<1> NumWorkItems, kernel Kernel) {
-    parallel_for_impl(NumWorkItems, Kernel);
+  void parallel_for(range<1> NumWorkItems, kernel Kernel,
+                    const property_list &PropList = {}) {
+    parallel_for_impl(NumWorkItems, Kernel, PropList);
   }
 
-  void parallel_for(range<2> NumWorkItems, kernel Kernel) {
-    parallel_for_impl(NumWorkItems, Kernel);
+  void parallel_for(range<2> NumWorkItems, kernel Kernel,
+                    const property_list &PropList = {}) {
+    parallel_for_impl(NumWorkItems, Kernel, PropList);
   }
 
-  void parallel_for(range<3> NumWorkItems, kernel Kernel) {
-    parallel_for_impl(NumWorkItems, Kernel);
+  void parallel_for(range<3> NumWorkItems, kernel Kernel,
+                    const property_list &PropList = {}) {
+    parallel_for_impl(NumWorkItems, Kernel, PropList);
   }
 
   /// Defines and invokes a SYCL kernel function for the specified range and
@@ -1456,12 +1524,13 @@ class __SYCL_EXPORT handler {
   /// \param Kernel is a SYCL kernel function.
   template <int Dims>
   void parallel_for(range<Dims> NumWorkItems, id<Dims> WorkItemOffset,
-                    kernel Kernel) {
+                    kernel Kernel, const property_list &PropList = {}) {
     throwIfActionIsCreated();
     verifyKernelInvoc(Kernel);
     MKernel = detail::getSyclObjImpl(std::move(Kernel));
     detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
     MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
+    MPropList = PropList;
     MCGType = detail::CG::KERNEL;
     extractArgsAndReqs();
     MKernelName = getKernelName();
@@ -1475,12 +1544,15 @@ class __SYCL_EXPORT handler {
   /// \param NDRange is a ND-range defining global and local sizes as
   /// well as offset.
   /// \param Kernel is a SYCL kernel function.
-  template <int Dims> void parallel_for(nd_range<Dims> NDRange, kernel Kernel) {
+  template <int Dims>
+  void parallel_for(nd_range<Dims> NDRange, kernel Kernel,
+                    const property_list &PropList = {}) {
     throwIfActionIsCreated();
     verifyKernelInvoc(Kernel);
     MKernel = detail::getSyclObjImpl(std::move(Kernel));
     detail::checkValueRange<Dims>(NDRange);
     MNDRDesc.set(std::move(NDRange));
+    MPropList = PropList;
     MCGType = detail::CG::KERNEL;
     extractArgsAndReqs();
     MKernelName = getKernelName();
@@ -1493,17 +1565,20 @@ class __SYCL_EXPORT handler {
   /// \param KernelFunc is a lambda that is used if device, queue is bound to,
   /// is a host device.
   template <typename KernelName = detail::auto_name, typename KernelType>
-  void single_task(kernel Kernel, _KERNELFUNCPARAM(KernelFunc)) {
+  void single_task(kernel Kernel, _KERNELFUNCPARAM(KernelFunc),
+                   const property_list &PropList = {}) {
     throwIfActionIsCreated();
     using NameT =
         typename detail::get_kernel_name_t<KernelName, KernelType>::name;
 #ifdef __SYCL_DEVICE_ONLY__
     (void)Kernel;
+    (void)PropList;
     kernel_single_task<NameT>(KernelFunc);
 #else
     // No need to check if range is out of INT_MAX limits as it's compile-time
     // known constant
     MNDRDesc.set(range<1>{1});
+    MPropList = PropList;
     MKernel = detail::getSyclObjImpl(std::move(Kernel));
     MCGType = detail::CG::KERNEL;
     if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
@@ -1533,7 +1608,8 @@ class __SYCL_EXPORT handler {
   template <typename KernelName = detail::auto_name, typename KernelType,
             int Dims>
   void parallel_for(kernel Kernel, range<Dims> NumWorkItems,
-                    _KERNELFUNCPARAM(KernelFunc)) {
+                    _KERNELFUNCPARAM(KernelFunc),
+                    const property_list &PropList = {}) {
     throwIfActionIsCreated();
     using NameT =
         typename detail::get_kernel_name_t<KernelName, KernelType>::name;
@@ -1541,10 +1617,12 @@ class __SYCL_EXPORT handler {
 #ifdef __SYCL_DEVICE_ONLY__
     (void)Kernel;
     (void)NumWorkItems;
+    (void)PropList;
     kernel_parallel_for<NameT, LambdaArgType>(KernelFunc);
 #else
     detail::checkValueRange<Dims>(NumWorkItems);
     MNDRDesc.set(std::move(NumWorkItems));
+    MPropList = PropList;
     MKernel = detail::getSyclObjImpl(std::move(Kernel));
     MCGType = detail::CG::KERNEL;
     if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
@@ -1568,7 +1646,8 @@ class __SYCL_EXPORT handler {
   template <typename KernelName = detail::auto_name, typename KernelType,
             int Dims>
   void parallel_for(kernel Kernel, range<Dims> NumWorkItems,
-                    id<Dims> WorkItemOffset, _KERNELFUNCPARAM(KernelFunc)) {
+                    id<Dims> WorkItemOffset, _KERNELFUNCPARAM(KernelFunc),
+                    const property_list &PropList = {}) {
     throwIfActionIsCreated();
     using NameT =
         typename detail::get_kernel_name_t<KernelName, KernelType>::name;
@@ -1577,10 +1656,12 @@ class __SYCL_EXPORT handler {
     (void)Kernel;
     (void)NumWorkItems;
     (void)WorkItemOffset;
+    (void)PropList;
     kernel_parallel_for<NameT, LambdaArgType>(KernelFunc);
 #else
     detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
     MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
+    MPropList = PropList;
     MKernel = detail::getSyclObjImpl(std::move(Kernel));
     MCGType = detail::CG::KERNEL;
     if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
@@ -1604,7 +1685,8 @@ class __SYCL_EXPORT handler {
   template <typename KernelName = detail::auto_name, typename KernelType,
             int Dims>
   void parallel_for(kernel Kernel, nd_range<Dims> NDRange,
-                    _KERNELFUNCPARAM(KernelFunc)) {
+                    _KERNELFUNCPARAM(KernelFunc),
+                    const property_list &PropList = {}) {
     throwIfActionIsCreated();
     using NameT =
         typename detail::get_kernel_name_t<KernelName, KernelType>::name;
@@ -1613,10 +1695,12 @@ class __SYCL_EXPORT handler {
 #ifdef __SYCL_DEVICE_ONLY__
     (void)Kernel;
     (void)NDRange;
+    (void)PropList;
     kernel_parallel_for<NameT, LambdaArgType>(KernelFunc);
 #else
     detail::checkValueRange<Dims>(NDRange);
     MNDRDesc.set(std::move(NDRange));
+    MPropList = PropList;
     MKernel = detail::getSyclObjImpl(std::move(Kernel));
     MCGType = detail::CG::KERNEL;
     if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
@@ -1644,7 +1728,8 @@ class __SYCL_EXPORT handler {
   template <typename KernelName = detail::auto_name, typename KernelType,
             int Dims>
   void parallel_for_work_group(kernel Kernel, range<Dims> NumWorkGroups,
-                               _KERNELFUNCPARAM(KernelFunc)) {
+                               _KERNELFUNCPARAM(KernelFunc),
+                               const property_list &PropList = {}) {
     throwIfActionIsCreated();
     using NameT =
         typename detail::get_kernel_name_t<KernelName, KernelType>::name;
@@ -1653,10 +1738,12 @@ class __SYCL_EXPORT handler {
 #ifdef __SYCL_DEVICE_ONLY__
     (void)Kernel;
     (void)NumWorkGroups;
+    (void)PropList;
     kernel_parallel_for_work_group<NameT, LambdaArgType>(KernelFunc);
 #else
     detail::checkValueRange<Dims>(NumWorkGroups);
     MNDRDesc.setNumWorkGroups(NumWorkGroups);
+    MPropList = PropList;
     MKernel = detail::getSyclObjImpl(std::move(Kernel));
     StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
     MCGType = detail::CG::KERNEL;
@@ -1682,7 +1769,8 @@ class __SYCL_EXPORT handler {
             int Dims>
   void parallel_for_work_group(kernel Kernel, range<Dims> NumWorkGroups,
                                range<Dims> WorkGroupSize,
-                               _KERNELFUNCPARAM(KernelFunc)) {
+                               _KERNELFUNCPARAM(KernelFunc),
+                               const property_list &PropList = {}) {
     throwIfActionIsCreated();
     using NameT =
         typename detail::get_kernel_name_t<KernelName, KernelType>::name;
@@ -1692,12 +1780,14 @@ class __SYCL_EXPORT handler {
     (void)Kernel;
     (void)NumWorkGroups;
     (void)WorkGroupSize;
+    (void)PropList;
     kernel_parallel_for_work_group<NameT, LambdaArgType>(KernelFunc);
 #else
     nd_range<Dims> ExecRange =
         nd_range<Dims>(NumWorkGroups * WorkGroupSize, WorkGroupSize);
     detail::checkValueRange<Dims>(ExecRange);
     MNDRDesc.set(std::move(ExecRange));
+    MPropList = PropList;
     MKernel = detail::getSyclObjImpl(std::move(Kernel));
     StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
     MCGType = detail::CG::KERNEL;
@@ -2066,6 +2156,7 @@ class __SYCL_EXPORT handler {
   detail::code_location MCodeLoc = {};
   bool MIsFinalized = false;
   event MLastEvent;
+  property_list MPropList;
 
   // Make queue_impl class friend to be able to call finalize method.
   friend class detail::queue_impl;
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index 2e0e28a3538f7..523a307c423da 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -1641,8 +1641,8 @@ static void ReverseRangeDimensionsForKernel(NDRDescT &NDR) {
 
 pi_result ExecCGCommand::SetKernelParamsAndLaunch(
     CGExecKernel *ExecKernel, RT::PiKernel Kernel, NDRDescT &NDRDesc,
-    std::vector<RT::PiEvent> &RawEvents, RT::PiEvent &Event,
-    ProgramManager::KernelArgMask EliminatedArgMask) {
+    const property_list &PropList, std::vector<RT::PiEvent> &RawEvents,
+    RT::PiEvent &Event, ProgramManager::KernelArgMask EliminatedArgMask) {
   vector_class<ArgDesc> &Args = ExecKernel->MArgs;
   // TODO this is not necessary as long as we can guarantee that the arguments
   // are already sorted (e. g. handle the sorting in handler if necessary due
@@ -1884,6 +1884,7 @@ cl_int ExecCGCommand::enqueueImp() {
     CGExecKernel *ExecKernel = (CGExecKernel *)MCommandGroup.get();
 
     NDRDescT &NDRDesc = ExecKernel->MNDRDesc;
+    const property_list &PropList = ExecKernel->MPropList;
 
     if (MQueue->is_host()) {
       for (ArgDesc &Arg : ExecKernel->MArgs)
@@ -1950,11 +1951,11 @@ cl_int ExecCGCommand::enqueueImp() {
     if (KernelMutex != nullptr) {
       // For cacheable kernels, we use per-kernel mutex
       std::lock_guard<std::mutex> Lock(*KernelMutex);
-      Error = SetKernelParamsAndLaunch(ExecKernel, Kernel, NDRDesc, RawEvents,
-                                       Event, EliminatedArgMask);
+      Error = SetKernelParamsAndLaunch(ExecKernel, Kernel, NDRDesc, PropList,
+                                       RawEvents, Event, EliminatedArgMask);
     } else {
-      Error = SetKernelParamsAndLaunch(ExecKernel, Kernel, NDRDesc, RawEvents,
-                                       Event, EliminatedArgMask);
+      Error = SetKernelParamsAndLaunch(ExecKernel, Kernel, NDRDesc, PropList,
+                                       RawEvents, Event, EliminatedArgMask);
     }
 
     if (PI_SUCCESS != Error) {
diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp
index 8713096be9127..e63664e66512c 100644
--- a/sycl/source/detail/scheduler/commands.hpp
+++ b/sycl/source/detail/scheduler/commands.hpp
@@ -520,8 +520,8 @@ class ExecCGCommand : public Command {
 
   pi_result SetKernelParamsAndLaunch(
       CGExecKernel *ExecKernel, RT::PiKernel Kernel, NDRDescT &NDRDesc,
-      std::vector<RT::PiEvent> &RawEvents, RT::PiEvent &Event,
-      ProgramManager::KernelArgMask EliminatedArgMask);
+      const property_list &, std::vector<RT::PiEvent> &RawEvents,
+      RT::PiEvent &Event, ProgramManager::KernelArgMask EliminatedArgMask);
 
   std::unique_ptr<detail::CG> MCommandGroup;
 
diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp
index 5e63addf8e446..77231bf6dfb7b 100644
--- a/sycl/source/handler.cpp
+++ b/sycl/source/handler.cpp
@@ -33,8 +33,8 @@ event handler::finalize() {
   case detail::CG::KERNEL:
   case detail::CG::RUN_ON_HOST_INTEL: {
     CommandGroup.reset(new detail::CGExecKernel(
-        std::move(MNDRDesc), std::move(MHostKernel), std::move(MKernel),
-        std::move(MArgsStorage), std::move(MAccStorage),
+        std::move(MNDRDesc), std::move(MPropList), std::move(MHostKernel),
+        std::move(MKernel), std::move(MArgsStorage), std::move(MAccStorage),
         std::move(MSharedPtrStorage), std::move(MRequirements),
         std::move(MEvents), std::move(MArgs), std::move(MKernelName),
         std::move(MOSModuleHandle), std::move(MStreamStorage), MCGType,
diff --git a/sycl/test/abi/layout_handler.cpp b/sycl/test/abi/layout_handler.cpp
index 9ba991356e6ad..2f623607363a9 100644
--- a/sycl/test/abi/layout_handler.cpp
+++ b/sycl/test/abi/layout_handler.cpp
@@ -186,5 +186,19 @@ void foo() {
 // CHECK-NEXT: 544 |         std::__shared_ptr<class sycl::detail::event_impl, __gnu_cxx::_S_atomic>::element_type * _M_ptr
 // CHECK-NEXT: 552 |         class std::__shared_count<__gnu_cxx::_S_atomic> _M_refcount
 // CHECK-NEXT: 552 |           _Sp_counted_base<(enum __gnu_cxx::_Lock_policy)2U> * _M_pi
-// CHECK-NEXT:     | [sizeof=560, dsize=560, align=8,
-// CHECK-NEXT:     |  nvsize=560, nvalign=8]
+// CHECK-NEXT: 560 |   class sycl::property_list MPropList
+// CHECK-NEXT: 560 |     class sycl::detail::PropertyListBase (base)
+// CHECK-NEXT: 560 |       class std::bitset<7> MDataLessProps
+// CHECK-NEXT: 560 |         struct std::_Base_bitset<1> (base)
+// CHECK-NEXT: 560 |           std::_Base_bitset<1>::_WordT _M_w
+// CHECK-NEXT: 568 |       class std::vector<class std::shared_ptr<class sycl::detail::PropertyWithDataBase> > MPropsWithData
+// CHECK-NEXT: 568 |         struct std::_Vector_base<class std::shared_ptr<class sycl::detail::PropertyWithDataBase>, class std::allocator<class std::shared_ptr<class sycl::detail::PropertyWithDataBase> > > (base)
+// CHECK-NEXT: 568 |           struct std::_Vector_base<class std::shared_ptr<class sycl::detail::PropertyWithDataBase>, class std::allocator<class std::shared_ptr<class sycl::detail::PropertyWithDataBase> > >::_Vector_impl _M_impl
+// CHECK-NEXT: 568 |             class std::allocator<class std::shared_ptr<class sycl::detail::PropertyWithDataBase> > (base) (empty)
+// CHECK-NEXT: 568 |               class __gnu_cxx::new_allocator<class std::shared_ptr<class sycl::detail::PropertyWithDataBase> > (base) (empty)
+// CHECK-NEXT: 568 |             struct std::_Vector_base<class std::shared_ptr<class sycl::detail::PropertyWithDataBase>, class std::allocator<class std::shared_ptr<class sycl::detail::PropertyWithDataBase> > >::_Vector_impl_data (base)
+// CHECK-NEXT: 568 |               std::_Vector_base<class std::shared_ptr<class sycl::detail::PropertyWithDataBase>, class std::allocator<class std::shared_ptr<class sycl::detail::PropertyWithDataBase> > >::pointer _M_start
+// CHECK-NEXT: 576 |               std::_Vector_base<class std::shared_ptr<class sycl::detail::PropertyWithDataBase>, class std::allocator<class std::shared_ptr<class sycl::detail::PropertyWithDataBase> > >::pointer _M_finish
+// CHECK-NEXT: 584 |               std::_Vector_base<class std::shared_ptr<class sycl::detail::PropertyWithDataBase>, class std::allocator<class std::shared_ptr<class sycl::detail::PropertyWithDataBase> > >::pointer _M_end_of_storage
+// CHECK-NEXT:     | [sizeof=592, dsize=592, align=8,
+// CHECK-NEXT:     |  nvsize=592, nvalign=8]
diff --git a/sycl/test/abi/symbol_size_alignment.cpp b/sycl/test/abi/symbol_size_alignment.cpp
index 4ff2cdac83d8e..81fd10efde62a 100644
--- a/sycl/test/abi/symbol_size_alignment.cpp
+++ b/sycl/test/abi/symbol_size_alignment.cpp
@@ -62,7 +62,7 @@ int main() {
   check<detail::buffer_impl, 216, 8>();
   check<detail::image_impl<1>, 272, 8>();
 #else
-  check<handler, 560, 8>();
+  check<handler, 592, 8>();
   check<detail::buffer_impl, 184, 8>();
   check<detail::image_impl<1>, 240, 8>();
 #endif
diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
index 01d9e8a20bd6f..d8568fda7b7df 100644
--- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
+++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
@@ -33,13 +33,13 @@ class MockHandler : public sycl::handler {
     case detail::CG::KERNEL:
     case detail::CG::RUN_ON_HOST_INTEL: {
       CommandGroup.reset(new detail::CGExecKernel(
-          std::move(CGH->MNDRDesc), std::move(CGH->MHostKernel),
-          std::move(CGH->MKernel), std::move(CGH->MArgsStorage),
-          std::move(CGH->MAccStorage), std::move(CGH->MSharedPtrStorage),
-          std::move(CGH->MRequirements), std::move(CGH->MEvents),
-          std::move(CGH->MArgs), std::move(CGH->MKernelName),
-          std::move(CGH->MOSModuleHandle), std::move(CGH->MStreamStorage),
-          CGH->MCGType, CGH->MCodeLoc));
+          std::move(CGH->MNDRDesc), std::move(CGH->MPropList),
+          std::move(CGH->MHostKernel), std::move(CGH->MKernel),
+          std::move(CGH->MArgsStorage), std::move(CGH->MAccStorage),
+          std::move(CGH->MSharedPtrStorage), std::move(CGH->MRequirements),
+          std::move(CGH->MEvents), std::move(CGH->MArgs),
+          std::move(CGH->MKernelName), std::move(CGH->MOSModuleHandle),
+          std::move(CGH->MStreamStorage), CGH->MCGType, CGH->MCodeLoc));
       break;
     }
     default: