Skip to content

Commit b01879e

Browse files
chencha3joker-ephadam-smnk
authored
[MLIR][XeGPU] Add XeGPU scattered ops (#86594)
- Extended TensorDescAttr with scattered attribute - Add scattered ops: CreateDescOp, PrefetchOp, LoadGatherOp, StoreScatterOp, UpdateOffsetOp - Add a block op: UpdateNdOffsetOp --------- Co-authored-by: Mehdi Amini <[email protected]> Co-authored-by: Adam Siemieniuk <[email protected]>
1 parent 5c6af60 commit b01879e

File tree

8 files changed

+937
-107
lines changed

8 files changed

+937
-107
lines changed

mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "mlir/Bytecode/BytecodeOpInterface.h"
1313
#include "mlir/IR/BuiltinTypes.h"
1414
#include "mlir/IR/Dialect.h"
15+
#include "mlir/IR/TypeUtilities.h"
1516
#include "mlir/Interfaces/ShapedOpInterfaces.h"
1617
#include "mlir/Interfaces/SideEffectInterfaces.h"
1718
#include "mlir/Interfaces/ViewLikeInterface.h"

mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,36 @@ class XeGPUAttr<string name, string attrMnemonic, list<Trait> traits = [],
1919
}
2020

2121
def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> {
22+
let summary = [{a composite attribute for `TensorDescType`}];
23+
let description = [{`TensorDescAttr` (or `tdesc_attr`) is a composite
24+
attribute defined for `TensorDescType` for describing following
25+
properties of a `TensorDesc`.
26+
1. `memory_scope`: It describes where the data block described by the
27+
TensorDesc is located, `Global` device memory or `Shared` local memory.
28+
It is default to `Global`.
29+
2. `array_length`: It describes how many horizontally consecutive blocks
30+
will be loaded by a hardware load instruction. If the TensorDesc shape
31+
is 8x16, with array_length = 2. The loaded block shape will be acctually
32+
8x32. Its default value is 1.
33+
3. `boundary_check`: It is used to indicates the hardware whether to do
34+
out-of-boundary check. The default value is true.
35+
4. `scattered`: It is used to differenciate TensorDescs created from
36+
`create_nd_tdesc` vs from `create_tdesc`.
37+
}];
38+
2239
let parameters = (ins
2340
OptionalParameter<"MemoryScopeAttr">: $memory_scope,
2441
OptionalParameter<"IntegerAttr", "1">: $array_length,
25-
OptionalParameter<"BoolAttr", "true">: $boundary_check
42+
OptionalParameter<"BoolAttr", "true">: $boundary_check,
43+
OptionalParameter<"BoolAttr", "false">: $scattered
2644
);
2745

2846
let builders = [
2947
AttrBuilder<(ins
3048
CArg<"xegpu::MemoryScope", "xegpu::MemoryScope::Global">:$memory_scope,
3149
CArg<"int", "1">:$array_length,
32-
CArg<"bool", "true">: $boundary_check
50+
CArg<"bool", "true">: $boundary_check,
51+
CArg<"bool", "false">: $scattered
3352
)>
3453
];
3554

@@ -41,15 +60,17 @@ def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> {
4160
//===----------------------------------------------------------------------===//
4261
def XeGPU_MemoryScopeGlobal: I32EnumAttrCase<"Global", 0, "global">;
4362
def XeGPU_MemoryScopeShared: I32EnumAttrCase<"SLM", 1, "slm">;
44-
def XeGPU_MemoryScope: I32EnumAttr<"MemoryScope",
45-
"The address space of the memory the tensor descritor is created for",
63+
def XeGPU_MemoryScope: I32EnumAttr<"MemoryScope",
64+
"The address space of the memory the tensor descritor is created for",
4665
[XeGPU_MemoryScopeGlobal, XeGPU_MemoryScopeShared]> {
4766
let genSpecializedAttr = 0;
4867
let cppNamespace = "::mlir::xegpu";
4968
}
5069

51-
def XeGPU_MemoryScopeAttr:
70+
def XeGPU_MemoryScopeAttr:
5271
EnumAttr<XeGPU_Dialect, XeGPU_MemoryScope, "memory_scope"> {
72+
let summary = [{Describe the location of data described by a `TensorDesc`:
73+
Global device memory (`Global`) or Shared local memory (`SLM`).}];
5374
let assemblyFormat = "$value";
5475
}
5576

@@ -63,19 +84,18 @@ def XeGPU_CachePolicyInvalid: I32EnumAttrCase<"READ_INVALIDATE", 3, "read_
6384
def XeGPU_CachePolicyWriteBack: I32EnumAttrCase<"WRITE_BACK", 4, "write_back">; // valid for write only
6485
def XeGPU_CachePolicyWriteThrough: I32EnumAttrCase<"WRITE_THROUGH", 5, "write_through">; // valid for write only
6586

66-
def XeGPU_CachePolicyEnums : I32EnumAttr<"CachePolicy", "Cache policy",
67-
[XeGPU_CachePolicyCached, XeGPU_CachePolicyUncached,
87+
def XeGPU_CachePolicyEnums : I32EnumAttr<"CachePolicy", "Cache policy",
88+
[XeGPU_CachePolicyCached, XeGPU_CachePolicyUncached,
6889
XeGPU_CachePolicyStreaming, XeGPU_CachePolicyInvalid,
6990
XeGPU_CachePolicyWriteBack, XeGPU_CachePolicyWriteThrough]> {
7091
let genSpecializedAttr = 0;
7192
let cppNamespace = "::mlir::xegpu";
7293
}
7394

74-
def XeGPU_CacheHintAttr
95+
def XeGPU_CacheHintAttr
7596
: EnumAttr<XeGPU_Dialect, XeGPU_CachePolicyEnums, "cache_hint"> {
97+
let summary = [{Describe the cache settings for prefetch/load/store operators}];
7698
let assemblyFormat = "`<` $value `>`";
7799
}
78100

79-
80-
81-
#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
101+
#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD

0 commit comments

Comments
 (0)