diff --git a/SYCL/Basic/built-ins.cpp b/SYCL/Basic/built-ins.cpp
new file mode 100644
index 0000000000..06ffb9e240
--- /dev/null
+++ b/SYCL/Basic/built-ins.cpp
@@ -0,0 +1,66 @@
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER
+// RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER
+// RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_CHECK_PLACEHOLDER
+
+// CUDA does not support printf.
+// UNSUPPORTED: cuda
+//
+// Hits an assertion with AMD:
+// XFAIL: hip_amd
+
+#include <sycl/sycl.hpp>
+
+#include <cassert>
+
+namespace s = sycl;
+
+// According to OpenCL C spec, the format string must be in constant address
+// space
+#ifdef __SYCL_DEVICE_ONLY__
+#define CONSTANT __attribute__((opencl_constant))
+#else
+#define CONSTANT
+#endif
+
+static const CONSTANT char format[] = "Hello, World! %d %f\n";
+
+int main() {
+  s::queue q{};
+
+  // Test printf
+  q.submit([&](s::handler &CGH) {
+     CGH.single_task<class printf>([=]() {
+       s::ext::oneapi::experimental::printf(format, 123, 1.23);
+       // CHECK: {{(Hello, World! 123 1.23)?}}
+     });
+   }).wait();
+
+  s::ext::oneapi::experimental::printf(format, 321, 3.21);
+  // CHECK: {{(Hello, World! 123 1.23)?}}
+
+  // Test common
+  {
+    s::buffer<s::cl_float, 1> BufMin(s::range<1>(1));
+    s::buffer<s::cl_float2, 1> BufMax(s::range<1>(1));
+    q.submit([&](s::handler &cgh) {
+      auto AccMin = BufMin.get_access<s::access::mode::write>(cgh);
+      auto AccMax = BufMax.get_access<s::access::mode::write>(cgh);
+      cgh.single_task<class common>([=]() {
+        AccMax[0] = s::max(s::cl_float2{0.5f, 2.5}, s::cl_float2{2.3f, 2.3});
+        AccMin[0] = s::min(s::cl_float{0.5f}, s::cl_float{2.3f});
+      });
+    });
+
+    auto AccMin = BufMin.template get_access<s::access::mode::read>();
+    auto AccMax = BufMax.template get_access<s::access::mode::read>();
+
+    assert(AccMin[0] == 0.5);
+    assert(AccMax[0].x() == 2.3f && AccMax[0].y() == 2.5f);
+    assert(s::min(0.5f, 2.3f) == 0.5);
+    auto Res = s::max(s::int4{5, 2, 1, 5}, s::int4{3, 3, 4, 2});
+    assert(Res.x() == 5 && Res.y() == 3 && Res.z() == 4 && Res.w() == 5);
+  }
+
+  return 0;
+}
diff --git a/SYCL/Basic/context.cpp b/SYCL/Basic/context.cpp
new file mode 100644
index 0000000000..486440d46e
--- /dev/null
+++ b/SYCL/Basic/context.cpp
@@ -0,0 +1,83 @@
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+// This test performs basic check of the SYCL context class.
+
+#include <iostream>
+#include <sycl/sycl.hpp>
+
+using namespace sycl;
+
+int main() {
+  try {
+    context c;
+  } catch (device_error e) {
+    std::cout << "Failed to create device for context" << std::endl;
+  }
+
+  auto devices = device::get_devices();
+  device &deviceA = devices[0];
+  device &deviceB = (devices.size() > 1 ? devices[1] : devices[0]);
+  {
+    std::cout << "move constructor" << std::endl;
+    context Context(deviceA);
+    size_t hash = std::hash<context>()(Context);
+    context MovedContext(std::move(Context));
+    assert(hash == std::hash<context>()(MovedContext));
+  }
+  {
+    std::cout << "move assignment operator" << std::endl;
+    context Context(deviceA);
+    size_t hash = std::hash<context>()(Context);
+    context WillMovedContext(deviceB);
+    WillMovedContext = std::move(Context);
+    assert(hash == std::hash<context>()(WillMovedContext));
+  }
+  {
+    std::cout << "copy constructor" << std::endl;
+    context Context(deviceA);
+    size_t hash = std::hash<context>()(Context);
+    context ContextCopy(Context);
+    assert(hash == std::hash<context>()(Context));
+    assert(hash == std::hash<context>()(ContextCopy));
+    assert(Context == ContextCopy);
+  }
+  {
+    std::cout << "copy assignment operator" << std::endl;
+    context Context(deviceA);
+    size_t hash = std::hash<context>()(Context);
+    context WillContextCopy(deviceB);
+    WillContextCopy = Context;
+    assert(hash == std::hash<context>()(Context));
+    assert(hash == std::hash<context>()(WillContextCopy));
+    assert(Context == WillContextCopy);
+  }
+  {
+    auto AsyncHandler = [](const sycl::exception_list &EL) {};
+    sycl::context Context1(sycl::property_list{});
+    sycl::context Context2(AsyncHandler, sycl::property_list{});
+    sycl::context Context3(deviceA, sycl::property_list{});
+    sycl::context Context4(deviceA, AsyncHandler, sycl::property_list{});
+    sycl::context Context5(deviceA.get_platform(), sycl::property_list{});
+    sycl::context Context6(deviceA.get_platform(), AsyncHandler,
+                           sycl::property_list{});
+    sycl::context Context7(std::vector<sycl::device>{deviceA},
+                           sycl::property_list{});
+    sycl::context Context8(
+        std::vector<sycl::device>{deviceA}, AsyncHandler,
+        sycl::property_list{
+            sycl::ext::oneapi::cuda::property::context::use_primary_context{}});
+
+    if (!Context8.has_property<sycl::ext::oneapi::cuda::property::context::
+                                   use_primary_context>()) {
+      std::cerr << "Line " << __LINE__ << ": Property was not found"
+                << std::endl;
+      return 1;
+    }
+
+    auto Prop = Context8.get_property<
+        sycl::ext::oneapi::cuda::property::context::use_primary_context>();
+  }
+}
diff --git a/SYCL/Basic/device.cpp b/SYCL/Basic/device.cpp
new file mode 100644
index 0000000000..4bcc731946
--- /dev/null
+++ b/SYCL/Basic/device.cpp
@@ -0,0 +1,97 @@
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+// This test performs basic check of the SYCL device class.
+
+#include <cassert>
+#include <iostream>
+#include <sycl/sycl.hpp>
+#include <utility>
+
+using namespace sycl;
+
+std::string get_type(const device &dev) {
+  if (dev.is_gpu()) {
+    return "OpenCL.GPU";
+  } else if (dev.is_accelerator()) {
+    return "OpenCL.ACC";
+  } else {
+    return "OpenCL.CPU";
+  }
+}
+
+int main() {
+  device d;
+  std::cout << "Default device type: " << get_type(d) << std::endl;
+
+  int i = 1;
+  std::cout << "Get all devices in the system" << std::endl;
+  for (const auto &dev : device::get_devices()) {
+    std::cout << "Device " << i++ << " is available: " << get_type(dev)
+              << std::endl;
+  }
+  i = 1;
+  std::cout << "Get host devices in the system" << std::endl;
+  for (const auto &dev : device::get_devices(info::device_type::host)) {
+    std::cout << "Device " << i++ << " is available: " << get_type(dev)
+              << std::endl;
+  }
+  i = 1;
+  std::cout << "Get OpenCL.CPU devices in the system" << std::endl;
+  for (const auto &dev : device::get_devices(info::device_type::cpu)) {
+    std::cout << "Device " << i++ << " is available: " << get_type(dev)
+              << std::endl;
+  }
+  i = 1;
+  std::cout << "Get OpenCL.GPU devices in the system" << std::endl;
+  for (const auto &dev : device::get_devices(info::device_type::gpu)) {
+    std::cout << "Device " << i++ << " is available: " << get_type(dev)
+              << std::endl;
+  }
+  i = 1;
+  std::cout << "Get OpenCL.ACC devices in the system" << std::endl;
+  for (const auto &dev : device::get_devices(info::device_type::accelerator)) {
+    std::cout << "Device " << i++ << " is available: " << get_type(dev)
+              << std::endl;
+  }
+
+  auto devices = device::get_devices();
+  device &deviceA = devices[0];
+  device &deviceB = (devices.size() > 1 ? devices[1] : devices[0]);
+  {
+    std::cout << "move constructor" << std::endl;
+    device Device(deviceA);
+    size_t hash = std::hash<device>()(Device);
+    device MovedDevice(std::move(Device));
+    assert(hash == std::hash<device>()(MovedDevice));
+  }
+  {
+    std::cout << "move assignment operator" << std::endl;
+    device Device(deviceA);
+    size_t hash = std::hash<device>()(Device);
+    device WillMovedDevice(deviceB);
+    WillMovedDevice = std::move(Device);
+    assert(hash == std::hash<device>()(WillMovedDevice));
+  }
+  {
+    std::cout << "copy constructor" << std::endl;
+    device Device(deviceA);
+    size_t hash = std::hash<device>()(Device);
+    device DeviceCopy(Device);
+    assert(hash == std::hash<device>()(Device));
+    assert(hash == std::hash<device>()(DeviceCopy));
+    assert(Device == DeviceCopy);
+  }
+  {
+    std::cout << "copy assignment operator" << std::endl;
+    device Device(deviceA);
+    size_t hash = std::hash<device>()(Device);
+    device WillDeviceCopy(deviceB);
+    WillDeviceCopy = Device;
+    assert(hash == std::hash<device>()(Device));
+    assert(hash == std::hash<device>()(WillDeviceCopy));
+    assert(Device == WillDeviceCopy);
+  }
+}
diff --git a/SYCL/Basic/event_async_exception.cpp b/SYCL/Basic/event_async_exception.cpp
new file mode 100644
index 0000000000..979f7cfc0e
--- /dev/null
+++ b/SYCL/Basic/event_async_exception.cpp
@@ -0,0 +1,42 @@
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+//==---- event_async_exception.cpp - Test for event async exceptions -------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <sycl/sycl.hpp>
+
+// This test checks that if there is a submit failure, the asynchronous
+// exception is associated with the returned event.
+
+using namespace sycl;
+
+class KernelName;
+
+int main() {
+  auto asyncHandler = [](exception_list el) {
+    for (auto &e : el) {
+      std::rethrow_exception(e);
+    }
+  };
+
+  queue q(asyncHandler);
+
+  try {
+    // Check that submitting a CG with no kernel or memory operation doesn't
+    // produce an async exception
+    event e = q.submit([&](handler &cgh) {});
+
+    e.wait_and_throw();
+    return 0;
+  } catch (runtime_error e) {
+    return 1;
+  }
+}
diff --git a/SYCL/Basic/offset-accessor-get_pointer.cpp b/SYCL/Basic/offset-accessor-get_pointer.cpp
new file mode 100644
index 0000000000..c3e1352c18
--- /dev/null
+++ b/SYCL/Basic/offset-accessor-get_pointer.cpp
@@ -0,0 +1,115 @@
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+// Per the SYCL 2020 spec (4.7.6.12 and others)
+// accessor::get_pointer() returns a pointer to the start of this accessor’s
+// memory. For a buffer accessor this is a pointer to the start of the
+// underlying buffer, even if this is a ranged accessor whose range does not
+// start at the beginning of the buffer.
+
+// This is a departure from how get_pointer() was interpreted with offset
+// accessors in the past. Not relevant for images, which do not support offset
+// accessors.
+
+#include <sycl/sycl.hpp>
+#include <vector>
+using namespace sycl;
+
+void test_across_ranges() {
+  constexpr auto r_w = access::mode::read_write;
+  constexpr unsigned long width = 4;
+  constexpr unsigned long count = width * width;
+  constexpr unsigned long count3D = width * width * width; // 64
+  std::vector<int> v1(count);                              // for 1D testing.
+  std::vector<int> v2(count);                              // for 2D testing.
+  std::vector<int> v3(count3D);                            // 3D
+
+  range<1> range_1D(count);
+  range<2> range_2D(width, width);
+  range<3> range_3D(width, width, width);
+
+  queue myQueue;
+  {
+    // 1D, 2D, 3D
+    buffer<int> buf_1D(v1.data(), count);
+    buffer<int, 2> buf_2D(v2.data(), range_2D);
+    buffer<int, 3> buf_3D(v3.data(), range_3D);
+
+    myQueue.submit([&](handler &cgh) {
+      auto acc_1D = buf_1D.get_access<r_w>(cgh, {2}, {10});
+      auto acc_2D = buf_2D.get_access<r_w>(cgh, {2, 2}, {1, 1});
+      auto acc_3D = buf_3D.get_access<r_w>(cgh, {2, 2, 2}, {1, 1, 1});
+      cgh.single_task<class task>([=] {
+        acc_1D.get_pointer()[0] = 5; // s.b. offset 0
+        acc_1D[0] = 15;              // s.b. offset 10
+
+        // 2D
+        acc_2D.get_pointer()[0] = 7; // s.b. offset 0
+        acc_2D[{0, 0}] = 17;         // s.b. offset {1,1} aka 5 if linear.
+
+        // 3D
+        acc_3D.get_pointer()[0] = 9; // s.b. offset 0
+        acc_3D[{0, 0, 0}] = 19;      // s.b. offset {1,1,1} aka 21 if linear.
+      });
+    });
+    myQueue.wait();
+    // now host access - we offset by one more than the device test
+    auto acc_1D = buf_1D.get_access<r_w>({2}, {11});
+    auto acc_2D = buf_2D.get_access<r_w>({2, 2}, {1, 2});
+    auto acc_3D = buf_3D.get_access<r_w>({2, 2, 2}, {1, 1, 2});
+    acc_1D.get_pointer()[1] = 4; // s.b. offset 1
+    acc_1D[0] = 14;              // s.b. offset 11
+
+    // 2D
+    acc_2D.get_pointer()[1] = 6; // s.b. offset 1
+    acc_2D[{0, 0}] = 16;         // s.b. offset {1,2} aka 6 if linear.
+
+    // 3D
+    acc_3D.get_pointer()[1] = 8; // s.b. offset 1
+    acc_3D[{0, 0, 0}] = 18;      // s.b. offset {1,1,2} aka 22 if linear.
+  }                              //~buffer
+  // always nice to have some feedback
+  std::cout << "DEVICE" << std::endl;
+  std::cout << "1D CHECK:  v1[0] should be 5: " << v1[0]
+            << ", and v1[10] s.b. 15: " << v1[10] << std::endl;
+  std::cout << "2D CHECK:  v2[0] should be 7: " << v2[0]
+            << ", and v2[5]  s.b. 17: " << v2[5] << std::endl;
+  std::cout << "3D CHECK:  v3[0] should be 9: " << v3[0]
+            << ", and v3[21] s.b. 19: " << v3[21] << std::endl
+            << std::endl;
+
+  std::cout << "HOST" << std::endl;
+  std::cout << "1D CHECK:  v1[1] should be 4: " << v1[1]
+            << ", and v1[11] s.b. 14: " << v1[11] << std::endl;
+  std::cout << "2D CHECK:  v2[1] should be 6: " << v2[1]
+            << ", and v2[6]  s.b. 16: " << v2[6] << std::endl;
+  std::cout << "3D CHECK:  v3[1] should be 8: " << v3[1]
+            << ", and v3[22] s.b. 17: " << v3[22] << std::endl
+            << std::endl;
+
+  // device
+  assert(v1[0] == 5);
+  assert(v1[10] == 15);
+  assert(v2[0] == 7);
+  assert(v2[5] == 17); // offset {1,1} in a 4x4 field is linear offset 5
+  assert(v3[0] == 9);
+  assert(v3[21] == 19); // offset {1,1,1} in a 4x4x4 field is linear offset 21
+
+  // host
+  assert(v1[1] == 4);
+  assert(v1[11] == 14);
+  assert(v2[1] == 6);
+  assert(v2[6] == 16); // offset {1,2} in a 4x4 field is linear offset 6
+  assert(v3[1] == 8);
+  assert(v3[22] == 18); // offset {1,1,2} in a 4x4x4 field is linear offset 22
+}
+
+int main() {
+  test_across_ranges();
+
+  std::cout << "OK!" << std::endl;
+
+  return 0;
+}
diff --git a/SYCL/Reduction/reduction_ctor.cpp b/SYCL/Reduction/reduction_ctor.cpp
new file mode 100644
index 0000000000..7df80bcf13
--- /dev/null
+++ b/SYCL/Reduction/reduction_ctor.cpp
@@ -0,0 +1,168 @@
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+// This performs basic checks such as reduction creation, getIdentity() method,
+// and the combine() method of the aux class 'reducer'.
+
+#include "reduction_utils.hpp"
+#include <cassert>
+#include <sycl/sycl.hpp>
+
+using namespace sycl;
+
+bool toBool(bool V) { return V; }
+bool toBool(vec<int, 2> V) { return V.x() && V.y(); }
+bool toBool(vec<int, 4> V) { return V.x() && V.y() && V.z() && V.w(); }
+
+template <typename T, typename Reduction>
+void test_reducer(Reduction &Redu, T A, T B) {
+  typename Reduction::reducer_type Reducer;
+  Reducer.combine(A);
+  Reducer.combine(B);
+
+  typename Reduction::binary_operation BOp;
+  T ExpectedValue = BOp(A, B);
+  assert(ExpectedValue == Reducer.MValue &&
+         "Wrong result of binary operation.");
+}
+
+template <typename T, typename Reduction, typename BinaryOperation>
+void test_reducer(Reduction &Redu, T Identity, BinaryOperation BOp, T A, T B) {
+  typename Reduction::reducer_type Reducer(Identity, BOp);
+  Reducer.combine(A);
+  Reducer.combine(B);
+
+  T ExpectedValue = BOp(A, B);
+  assert(toBool(ExpectedValue == Reducer.MValue) &&
+         "Wrong result of binary operation.");
+}
+
+template <typename... Ts> class KernelNameGroup;
+
+template <typename SpecializationKernelName, typename T, int Dim,
+          class BinaryOperation>
+void testKnown(T Identity, BinaryOperation BOp, T A, T B) {
+  static_assert(has_known_identity<BinaryOperation, T>::value);
+  queue Q;
+  buffer<T, 1> ReduBuf(1);
+  T *ReduUSMPtr = malloc_host<T>(1, Q);
+
+  Q.submit([&](handler &CGH) {
+    // Reduction needs a device accessor as a parameter.
+    // This accessor is not really used in this test.
+    accessor<T, Dim, access::mode::read_write, access::target::device>
+        ReduRWAcc(ReduBuf, CGH);
+    accessor<T, Dim, access::mode::discard_write, access::target::device>
+        ReduDWAcc(ReduBuf, CGH);
+    auto Redu = sycl::reduction(ReduBuf, CGH, BOp);
+    auto ReduUSM = sycl::reduction(ReduUSMPtr, BOp);
+
+    assert(toBool(Redu.getIdentity() == Identity) &&
+           toBool(ReduUSM.getIdentity() == Identity) &&
+           toBool(known_identity<BinaryOperation, T>::value == Identity) &&
+           "Failed getIdentity() check().");
+    test_reducer(Redu, A, B);
+    test_reducer(ReduUSM, A, B);
+
+    test_reducer(Redu, Identity, BOp, A, B);
+    test_reducer(ReduUSM, Identity, BOp, A, B);
+
+    // Command group must have at least one task in it. Use an empty one.
+    CGH.single_task<SpecializationKernelName>([=]() {});
+  });
+  free(ReduUSMPtr, Q);
+}
+
+template <typename SpecializationKernelName, typename T, int Dim,
+          class BinaryOperation>
+void testUnknown(T Identity, BinaryOperation BOp, T A, T B) {
+  queue Q;
+  buffer<T, 1> ReduBuf(1);
+  T *ReduUSMPtr = malloc_host<T>(1, Q);
+  Q.submit([&](handler &CGH) {
+    // Reduction needs a device accessor as a parameter.
+    // This accessor is not really used in this test.
+    accessor<T, Dim, access::mode::read_write, access::target::device>
+        ReduRWAcc(ReduBuf, CGH);
+    accessor<T, Dim, access::mode::discard_write, access::target::device>
+        ReduDWAcc(ReduBuf, CGH);
+    auto Redu = sycl::reduction(ReduBuf, CGH, Identity, BOp);
+    auto ReduUSM = sycl::reduction(ReduUSMPtr, Identity, BOp);
+    assert(toBool(Redu.getIdentity() == Identity) &&
+           toBool(ReduUSM.getIdentity() == Identity) &&
+           "Failed getIdentity() check().");
+    test_reducer(Redu, Identity, BOp, A, B);
+    test_reducer(ReduUSM, Identity, BOp, A, B);
+
+    // Command group must have at least one task in it. Use an empty one.
+    CGH.single_task<SpecializationKernelName>([=]() {});
+  });
+  free(ReduUSMPtr, Q);
+}
+
+template <typename SpecializationKernelName, typename T, class BinaryOperation>
+void testBoth(T Identity, BinaryOperation BOp, T A, T B) {
+  testKnown<KernelNameGroup<SpecializationKernelName,
+                            class KernelName_SpronAvHpacKFL>,
+            T, 0>(Identity, BOp, A, B);
+  testKnown<
+      KernelNameGroup<SpecializationKernelName, class KernelName_XFxrYatPJlU>,
+      T, 1>(Identity, BOp, A, B);
+  testUnknown<
+      KernelNameGroup<SpecializationKernelName, class KernelName_oUFYMyQSlL>, T,
+      0>(Identity, BOp, A, B);
+  testUnknown<KernelNameGroup<SpecializationKernelName, class KernelName_Ndbp>,
+              T, 1>(Identity, BOp, A, B);
+}
+
+int main() {
+  testBoth<class KernelName_DpWavJTNjhJtrHmLWt, int>(
+      0, ext::oneapi::plus<int>(), 1, 7);
+  testBoth<class KernelName_MHRtc, int>(1, std::multiplies<int>(), 1, 7);
+  testBoth<class KernelName_eYhurMyKBZvzctmqwUZ, int>(
+      0, ext::oneapi::bit_or<int>(), 1, 8);
+  testBoth<class KernelName_DpVPIUBjUMGZEwBFHH, int>(
+      0, ext::oneapi::bit_xor<int>(), 7, 3);
+  testBoth<class KernelName_vGKFactgrkngMXd, int>(
+      ~0, ext::oneapi::bit_and<int>(), 7, 3);
+  testBoth<class KernelName_GLpknSBxclKWjm, int>(
+      (std::numeric_limits<int>::max)(), ext::oneapi::minimum<int>(), 7, 3);
+  testBoth<class KernelName_EvOaOYQ, int>((std::numeric_limits<int>::min)(),
+                                          ext::oneapi::maximum<int>(), 7, 3);
+
+  testBoth<class KernelName_iFbcoTtPeDtUEK, float>(
+      0, ext::oneapi::plus<float>(), 1, 7);
+  testBoth<class KernelName_PEMJanstdNezDSXnP, float>(
+      1, std::multiplies<float>(), 1, 7);
+  testBoth<class KernelName_wOEuftXSjCLpoTOMrYHR, float>(
+      getMaximumFPValue<float>(), ext::oneapi::minimum<float>(), 7, 3);
+  testBoth<class KernelName_HzFCIZQKeV, float>(
+      getMinimumFPValue<float>(), ext::oneapi::maximum<float>(), 7, 3);
+
+  testUnknown<class KernelName_sJOZPgFeiALyqwIWnFP, CustomVec<float>, 0,
+              CustomVecPlus<float>>(CustomVec<float>(0), CustomVecPlus<float>(),
+                                    CustomVec<float>(1), CustomVec<float>(7));
+  testUnknown<class KernelName_jMA, CustomVec<float>, 1>(
+      CustomVec<float>(0), CustomVecPlus<float>(), CustomVec<float>(1),
+      CustomVec<float>(7));
+
+  testUnknown<class KernelName_zhF, int, 0>(
+      0, [](auto a, auto b) { return a | b; }, 1, 8);
+
+  int2 IdentityI2 = {0, 0};
+  int2 AI2 = {1, 2};
+  int2 BI2 = {7, 13};
+  testUnknown<class KNI2, int2, 0>(IdentityI2, ext::oneapi::plus<int2>(), AI2,
+                                   BI2);
+
+  float4 IdentityF4 = {0, 0, 0, 0};
+  float4 AF4 = {1, 2, -1, -34};
+  float4 BF4 = {7, 13, 0, 35};
+  testUnknown<class KNF4, float4, 0>(IdentityF4, ext::oneapi::plus<>(), AF4,
+                                     BF4);
+
+  std::cout << "Test passed\n";
+  return 0;
+}
diff --git a/SYCL/Regression/check_vector_of_opencl_event.cpp b/SYCL/Regression/check_vector_of_opencl_event.cpp
new file mode 100644
index 0000000000..b29a1916e6
--- /dev/null
+++ b/SYCL/Regression/check_vector_of_opencl_event.cpp
@@ -0,0 +1,31 @@
+// REQUIRES: opencl
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+//
+//===----------------------------------------------------------------------===//
+// This test verifies that sycl::get_native<backend::opencl> and
+// sycl::make_event<backend::opencl> work according to the SYCL™ 2020
+// Specification (revision 4)
+//===----------------------------------------------------------------------===//
+
+#include <sycl/sycl.hpp>
+
+int main() {
+  sycl::queue Queue;
+  sycl::event event = Queue.submit([&](sycl::handler &cgh) {
+    cgh.single_task<class event_kernel>([]() {});
+  });
+  // Check that get_native function returns a vector
+  std::vector<cl_event> ClEventVec = get_native<sycl::backend::opencl>(event);
+  // Check that make_event is working properly with vector<cl_event> as a
+  // param
+  sycl::event SyclEvent =
+      sycl::make_event<sycl::backend::opencl>(ClEventVec, Queue.get_context());
+  std::vector<cl_event> ClEventVecFromMake =
+      sycl::get_native<sycl::backend::opencl>(SyclEvent);
+  if (ClEventVec[0] != ClEventVecFromMake[0])
+    throw std::runtime_error("Cl events are not the same");
+  return 0;
+}
diff --git a/SYCL/USM/usm_alloc_utility.cpp b/SYCL/USM/usm_alloc_utility.cpp
new file mode 100644
index 0000000000..53649ba2a6
--- /dev/null
+++ b/SYCL/USM/usm_alloc_utility.cpp
@@ -0,0 +1,115 @@
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+//==------ usm_alloc_utility.cpp - USM malloc and aligned_alloc test -------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <sycl/sycl.hpp>
+
+#include <cassert>
+
+using namespace sycl;
+
+constexpr int N = 8;
+
+static void check_and_free(int *array, const device &dev, const context &ctxt,
+                           usm::alloc expected_type) {
+  // host device treats all allocations as host allocations
+  assert((get_pointer_type(array, ctxt) == expected_type) &&
+         "Allocation pointer has unexpected type.");
+  assert((get_pointer_device(array, ctxt) == dev) &&
+         "Allocation pointer has unexpected device associated with it.");
+  free(array, ctxt);
+}
+
+int main() {
+  queue q;
+  auto dev = q.get_device();
+  auto ctxt = q.get_context();
+  int *array;
+
+  if (dev.get_info<info::device::usm_host_allocations>()) {
+    array = (int *)malloc(N * sizeof(int), q, usm::alloc::host);
+    check_and_free(array, dev, ctxt, usm::alloc::host);
+
+    array =
+        (int *)malloc(N * sizeof(int), q, usm::alloc::host, property_list{});
+    check_and_free(array, dev, ctxt, usm::alloc::host);
+
+    array = (int *)aligned_alloc(alignof(long long), N * sizeof(int), q,
+                                 usm::alloc::host);
+    check_and_free(array, dev, ctxt, usm::alloc::host);
+
+    array = (int *)aligned_alloc(alignof(long long), N * sizeof(int), q,
+                                 usm::alloc::host, property_list{});
+    check_and_free(array, dev, ctxt, usm::alloc::host);
+
+    array = (int *)malloc_host(N * sizeof(int), q);
+    check_and_free(array, dev, ctxt, usm::alloc::host);
+
+    array = (int *)malloc_host(
+        N * sizeof(int), q,
+        property_list{
+            ext::intel::experimental::property::usm::buffer_location{2}});
+    check_and_free(array, dev, ctxt, usm::alloc::host);
+
+    array =
+        (int *)aligned_alloc_host(alignof(long long), N * sizeof(int), ctxt);
+    check_and_free(array, dev, ctxt, usm::alloc::host);
+
+    array = (int *)aligned_alloc_host(
+        alignof(long long), N * sizeof(int), ctxt,
+        property_list{
+            ext::intel::experimental::property::usm::buffer_location{2}});
+    check_and_free(array, dev, ctxt, usm::alloc::host);
+  }
+
+  if (dev.get_info<info::device::usm_shared_allocations>()) {
+    array = (int *)malloc_shared(N * sizeof(int), q);
+    check_and_free(array, dev, ctxt, usm::alloc::shared);
+
+    array = (int *)malloc_shared(
+        N * sizeof(int), q,
+        property_list{
+            ext::intel::experimental::property::usm::buffer_location{2}});
+    check_and_free(array, dev, ctxt, usm::alloc::shared);
+
+    array = (int *)aligned_alloc_shared(alignof(long long), N * sizeof(int),
+                                        dev, ctxt);
+    check_and_free(array, dev, ctxt, usm::alloc::shared);
+
+    array = (int *)aligned_alloc_shared(
+        alignof(long long), N * sizeof(int), dev, ctxt,
+        property_list{
+            ext::intel::experimental::property::usm::buffer_location{2}});
+    check_and_free(array, dev, ctxt, usm::alloc::shared);
+  }
+
+  if (dev.get_info<info::device::usm_device_allocations>()) {
+    array = (int *)malloc_device(N * sizeof(int), q);
+    check_and_free(array, dev, ctxt, usm::alloc::device);
+
+    array = malloc_device<int>(
+        N, q,
+        property_list{
+            ext::intel::experimental::property::usm::buffer_location(2)});
+    check_and_free(array, dev, ctxt, usm::alloc::device);
+
+    array = (int *)aligned_alloc_device(alignof(long long), N * sizeof(int),
+                                        dev, ctxt);
+    check_and_free(array, dev, ctxt, usm::alloc::device);
+
+    array = (int *)aligned_alloc_device(alignof(long long), N * sizeof(int),
+                                        dev, ctxt, property_list{});
+    check_and_free(array, dev, ctxt, usm::alloc::device);
+  }
+
+  return 0;
+}
diff --git a/SYCL/USM/usm_allocator.cpp b/SYCL/USM/usm_allocator.cpp
new file mode 100644
index 0000000000..e434267f95
--- /dev/null
+++ b/SYCL/USM/usm_allocator.cpp
@@ -0,0 +1,46 @@
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+//==--------- usm_allocator.cpp - USM allocator construction test ----------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <sycl/sycl.hpp>
+
+#include <cassert>
+
+using namespace sycl;
+
+int main() {
+  queue q;
+  auto dev = q.get_device();
+  auto ctxt = q.get_context();
+
+  {
+    // Test usm_allocator
+    if (dev.get_info<info::device::usm_shared_allocations>() &&
+        dev.get_info<info::device::usm_host_allocations>()) {
+      usm_allocator<int, usm::alloc::shared> alloc11(ctxt, dev);
+      usm_allocator<int, usm::alloc::shared> alloc12(ctxt, dev,
+                                                     property_list{});
+      usm_allocator<int, usm::alloc::host> alloc21(q);
+      usm_allocator<int, usm::alloc::host> alloc22(alloc21);
+      usm_allocator<int, usm::alloc::host> alloc23(q, property_list{});
+
+      // usm::alloc::device is not supported by usm_allocator
+
+      assert((alloc11 != alloc22) && "Allocators should NOT be equal.");
+      assert((alloc11 == alloc12) && "Allocators should be equal.");
+      assert((alloc21 == alloc22) && "Allocators should be equal.");
+      assert((alloc21 == alloc23) && "Allocators should be equal.");
+    }
+  }
+
+  return 0;
+}