|
| 1 | +; Copyright (C) Codeplay Software Limited |
| 2 | +; |
| 3 | +; Licensed under the Apache License, Version 2.0 (the "License") with LLVM |
| 4 | +; Exceptions; you may not use this file except in compliance with the License. |
| 5 | +; You may obtain a copy of the License at |
| 6 | +; |
| 7 | +; https://github.com/codeplaysoftware/oneapi-construction-kit/blob/main/LICENSE.txt |
| 8 | +; |
| 9 | +; Unless required by applicable law or agreed to in writing, software |
| 10 | +; distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 11 | +; WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 12 | +; License for the specific language governing permissions and limitations |
| 13 | +; under the License. |
| 14 | +; |
| 15 | +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 16 | + |
| 17 | +; RUN: veczc -vecz-simd-width=4 -S < %s | FileCheck %s |
| 18 | + |
| 19 | +target triple = "spir64-unknown-unknown" |
| 20 | +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" |
| 21 | + |
| 22 | +declare spir_func i32 @_Z16get_sub_group_idv() |
| 23 | +declare spir_func i32 @_Z22get_sub_group_local_idv() |
| 24 | +declare spir_func i32 @_Z19sub_group_broadcastij(i32, i32) |
| 25 | + |
| 26 | +; It makes sure broadcast still works when its source operand is uniform |
| 27 | +define spir_kernel void @sub_group_broadcast(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { |
| 28 | + %call = tail call spir_func i32 @_Z16get_sub_group_idv() |
| 29 | + %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %call |
| 30 | + %v = load i32, i32 addrspace(1)* %arrayidx, align 4 |
| 31 | + %broadcast = call spir_func i32 @_Z19sub_group_broadcastij(i32 %v, i32 0) |
| 32 | + %idx = tail call spir_func i32 @_Z22get_sub_group_local_idv() |
| 33 | + %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %idx |
| 34 | + store i32 %broadcast, i32 addrspace(1)* %arrayidx2, align 4 |
| 35 | + ret void |
| 36 | +} |
| 37 | + |
| 38 | +; CHECK-LABEL: define spir_kernel void @__vecz_v4_sub_group_broadcast( |
| 39 | +; CHECK: [[LD:%.+]] = load i32, ptr addrspace(1) %{{.+}}, align 4 |
| 40 | +; CHECK: [[INS:%.+]] = insertelement <4 x i32> poison, i32 [[LD]], i64 0 |
| 41 | +; CHECK: [[BCAST:%.+]] = shufflevector <4 x i32> [[INS]], <4 x i32> poison, <4 x i32> zeroinitializer |
| 42 | +; CHECK: store <4 x i32> [[BCAST]], ptr addrspace(1) %out, align 4 |
| 43 | + |
| 44 | +!opencl.ocl.version = !{!0} |
| 45 | + |
| 46 | +!0 = !{i32 3, i32 0} |
0 commit comments