Skip to content

Commit 01f9f35

Browse files
authored
Subgroup broadcast of uniform value becomes a NOP (intel#59)
1 parent 2db3fb5 commit 01f9f35

File tree

2 files changed

+58
-2
lines changed

2 files changed

+58
-2
lines changed

modules/compiler/vecz/source/transform/packetizer.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,10 +1258,20 @@ Value *Packetizer::Impl::packetizeSubgroupBroadcast(Instruction *I) {
12581258

12591259
IRBuilder<> B(buildAfter(CI, F));
12601260

1261-
auto *const idx = CI->getArgOperand(1);
1261+
auto *const src = CI->getArgOperand(0);
12621262

1263-
auto op = packetize(CI->getArgOperand(0));
1263+
auto op = packetize(src);
12641264
PACK_FAIL_IF(!op);
1265+
1266+
// If the source operand happened to be a broadcast value already, we can use
1267+
// it directly.
1268+
if (op.info->numInstances == 0) {
1269+
IC.deleteInstructionLater(CI);
1270+
CI->replaceAllUsesWith(src);
1271+
return src;
1272+
}
1273+
1274+
auto *const idx = CI->getArgOperand(1);
12651275
Value *val = nullptr;
12661276
// Optimize the constant fixed-vector case, where we can choose the exact
12671277
// subpacket to extract from directly.
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
; Copyright (C) Codeplay Software Limited
2+
;
3+
; Licensed under the Apache License, Version 2.0 (the "License") with LLVM
4+
; Exceptions; you may not use this file except in compliance with the License.
5+
; You may obtain a copy of the License at
6+
;
7+
; https://github.com/codeplaysoftware/oneapi-construction-kit/blob/main/LICENSE.txt
8+
;
9+
; Unless required by applicable law or agreed to in writing, software
10+
; distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11+
; WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12+
; License for the specific language governing permissions and limitations
13+
; under the License.
14+
;
15+
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
16+
17+
; RUN: veczc -vecz-simd-width=4 -S < %s | FileCheck %s
18+
19+
target triple = "spir64-unknown-unknown"
20+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
21+
22+
declare spir_func i32 @_Z16get_sub_group_idv()
23+
declare spir_func i32 @_Z22get_sub_group_local_idv()
24+
declare spir_func i32 @_Z19sub_group_broadcastij(i32, i32)
25+
26+
; It makes sure broadcast still works when its source operand is uniform
27+
define spir_kernel void @sub_group_broadcast(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
28+
%call = tail call spir_func i32 @_Z16get_sub_group_idv()
29+
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %call
30+
%v = load i32, i32 addrspace(1)* %arrayidx, align 4
31+
%broadcast = call spir_func i32 @_Z19sub_group_broadcastij(i32 %v, i32 0)
32+
%idx = tail call spir_func i32 @_Z22get_sub_group_local_idv()
33+
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %idx
34+
store i32 %broadcast, i32 addrspace(1)* %arrayidx2, align 4
35+
ret void
36+
}
37+
38+
; CHECK-LABEL: define spir_kernel void @__vecz_v4_sub_group_broadcast(
39+
; CHECK: [[LD:%.+]] = load i32, ptr addrspace(1) %{{.+}}, align 4
40+
; CHECK: [[INS:%.+]] = insertelement <4 x i32> poison, i32 [[LD]], i64 0
41+
; CHECK: [[BCAST:%.+]] = shufflevector <4 x i32> [[INS]], <4 x i32> poison, <4 x i32> zeroinitializer
42+
; CHECK: store <4 x i32> [[BCAST]], ptr addrspace(1) %out, align 4
43+
44+
!opencl.ocl.version = !{!0}
45+
46+
!0 = !{i32 3, i32 0}

0 commit comments

Comments
 (0)