-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[Clang][LLVM] Implement single-single vectors MOP4{A/S} #127797
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 5 commits
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
87b7d61
[Clang][LLVM] Implement single-single vectors MOP4{A/S}
virginia-cangelosi 228b757
Add white spaces back in to simply patch
virginia-cangelosi e7dd7ca
Add extra intrinsics to differentiate u/smop4
virginia-cangelosi 88e9c0e
Simplify clang multiclasses
virginia-cangelosi d29ca21
Add instrinsics to differentiate za64 and za32
virginia-cangelosi d69c8ed
Fix immediates and add more tests
virginia-cangelosi eb39b71
Fix llvm test
virginia-cangelosi 3b1f667
Fix typo in QuarterTile
virginia-cangelosi 51ab585
Restructure files and add negative tests
virginia-cangelosi 6e8e25f
For loop the intrinsics
virginia-cangelosi 1fa3a35
Change pattern name
virginia-cangelosi 9cc55eb
fix optional 1x1 in tests
virginia-cangelosi 41e9484
Fix SUMOP and USMOP [_1x1]
virginia-cangelosi 3c5fa20
Merge branch 'main' into si_si_vec
virginia-cangelosi 1012467
Merge branch 'main' into si_si_vec
virginia-cangelosi File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -376,6 +376,24 @@ let SMETargetGuard = "sme2" in { | |
// Outer product and accumulate/subtract | ||
// | ||
|
||
multiclass MOP4<string name, string n, string t, string i, string wide> { | ||
def NAME # "_1x1" : Inst<"svmop4" # name # "_1x1_" # n # "[_{d}_{d}]", "vidd", t, MergeNone, i # wide # "_1x1", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; | ||
} | ||
|
||
multiclass SUMOP4<string s, string za, string t, string i> { | ||
def _1x1 : SInst<"svmop4" # s # "[_1x1_]" # za # "[_{2}_{3}]", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: replace {2}{3}. by {d}{3} |
||
"vidu", t, MergeNone, "aarch64_sme_sumop4" # s # i # "_wide_1x1", | ||
[IsStreaming, IsInOutZA], | ||
[ImmCheck<0, ImmCheck0_3>]>; | ||
} | ||
|
||
multiclass USMOP4<string s, string za, string t, string i> { | ||
def _1x1 : SInst<"svmop4" # s # "[_1x1_]" # za # "[_{2}_{3}]", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: replace {2}{3}. by {d}{3} |
||
"vidx", t, MergeNone, "aarch64_sme_usmop4" # s # i # "_wide_1x1", | ||
[IsStreaming, IsInOutZA], | ||
[ImmCheck<0, ImmCheck0_3>]>; | ||
} | ||
|
||
Lukacma marked this conversation as resolved.
Show resolved
Hide resolved
|
||
let SMETargetGuard = "sme2" in { | ||
def SVSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smopa_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; | ||
def SVUSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umopa_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; | ||
|
@@ -387,6 +405,25 @@ let SMETargetGuard = "sme2" in { | |
|
||
def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; | ||
Lukacma marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
defm SVSMOP4A_H : MOP4<"a", "za32", "cs", "aarch64_sme_smop4a", "_wide">; | ||
defm SVSMOP4S_H : MOP4<"s", "za32", "cs", "aarch64_sme_smop4s", "_wide">; | ||
|
||
defm SVUMOP4A_H : MOP4<"a", "za32", "UcUs", "aarch64_sme_umop4a", "_wide">; | ||
defm SVUMOP4S_H : MOP4<"s", "za32", "UcUs", "aarch64_sme_umop4s", "_wide">; | ||
|
||
defm SVFMOP4A_HtoS : MOP4<"a", "za32", "h", "aarch64_sme_mop4a", "_wide">; | ||
defm SVFMOP4S_HtoS : MOP4<"s", "za32", "h", "aarch64_sme_mop4s", "_wide">; | ||
defm SVFMOP4A_S : MOP4<"a", "za32", "f", "aarch64_sme_mop4a", "">; | ||
defm SVFMOP4S_S : MOP4<"s", "za32", "f", "aarch64_sme_mop4s", "">; | ||
|
||
defm SVBMOP4A_S : MOP4<"a", "za32", "b", "aarch64_sme_mop4a", "_wide">; | ||
defm SVBMOP4S_S : MOP4<"s", "za32", "b", "aarch64_sme_mop4s", "_wide">; | ||
|
||
defm SVSUMOP4A_S : SUMOP4<"a", "za32", "cs", "">; | ||
defm SVSUMOP4S_S : SUMOP4<"s", "za32", "cs", "">; | ||
defm SVUSMOP4A_S : USMOP4<"a", "za32", "UcUs", "">; | ||
defm SVUSMOP4S_S : USMOP4<"s", "za32", "UcUs", "">; | ||
|
||
// VERTICAL DOT-PRODUCT | ||
def SVVDOT_LANE_ZA32_VG1x2_S : Inst<"svvdot_lane_za32[_{d}]_vg1x2", "vm2di", "s", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x2", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>; | ||
def SVVDOT_LANE_ZA32_VG1x4_S : Inst<"svvdot_lane_za32[_{d}]_vg1x4", "vm4di", "c", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>; | ||
|
@@ -437,6 +474,15 @@ let SMETargetGuard = "sme2" in { | |
} | ||
|
||
let SMETargetGuard = "sme2,sme-i16i64" in { | ||
defm SVSMOP4A_HtoD : MOP4<"a", "za64", "s", "aarch64_sme_smop4a_za64", "_wide">; | ||
defm SVSMOP4S_HtoD : MOP4<"s", "za64", "s", "aarch64_sme_smop4s_za64", "_wide">; | ||
defm SVUMOP4A_HtoD : MOP4<"a", "za64", "Us", "aarch64_sme_umop4a_za64", "_wide">; | ||
defm SVUMOP4S_HtoD : MOP4<"s", "za64", "Us", "aarch64_sme_umop4s_za64", "_wide">; | ||
defm SVSUMOP4A_D : SUMOP4<"a", "za64", "s", "_za64">; | ||
defm SVSUMOP4S_D : SUMOP4<"s", "za64", "s", "_za64">; | ||
defm SVUSMOP4A_D : USMOP4<"a", "za64", "Us", "_za64">; | ||
defm SVUSMOP4S_D : USMOP4<"s", "za64", "Us", "_za64">; | ||
|
||
def SVVDOT_LANE_ZA64_VG1x4_S : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "s", MergeNone, "aarch64_sme_svdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>; | ||
def SVVDOT_LANE_ZA64_VG1x4_U : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "Us", MergeNone, "aarch64_sme_uvdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>; | ||
|
||
|
@@ -473,6 +519,9 @@ let SMETargetGuard = "sme2" in { | |
} | ||
|
||
let SMETargetGuard = "sme2,sme-f64f64" in { | ||
defm SVFMOP4A_D : MOP4<"a", "za64", "d", "aarch64_sme_mop4a", "">; | ||
defm SVFMOP4S_D : MOP4<"s", "za64", "d", "aarch64_sme_mop4s", "">; | ||
|
||
def SVMLA_MULTI_VG1x2_F64 : Inst<"svmla_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; | ||
def SVMLA_MULTI_VG1x4_F64 : Inst<"svmla_za64[_{d}]_vg1x4", "vm44", "d", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; | ||
def SVMLS_MULTI_VG1x2_F64 : Inst<"svmls_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsInOutZA], []>; | ||
|
@@ -490,6 +539,9 @@ let SMETargetGuard = "sme2,sme-f64f64" in { | |
} | ||
|
||
let SMETargetGuard = "sme-f16f16" in { | ||
defm SVFMOP4A_H : MOP4<"a", "za16", "h", "aarch64_sme_mop4a", "">; | ||
defm SVFMOP4S_H : MOP4<"s", "za16", "h", "aarch64_sme_mop4s", "">; | ||
|
||
def SVMLA_MULTI_VG1x2_F16 : Inst<"svmla_za16[_f16]_vg1x2", "vm22", "h", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; | ||
def SVMLA_MULTI_VG1x4_F16 : Inst<"svmla_za16[_f16]_vg1x4", "vm44", "h", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; | ||
def SVMLS_MULTI_VG1x2_F16 : Inst<"svmls_za16[_f16]_vg1x2", "vm22", "h", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsInOutZA], []>; | ||
|
@@ -507,6 +559,9 @@ let SMETargetGuard = "sme-f16f16" in { | |
} | ||
|
||
let SMETargetGuard = "sme-b16b16" in { | ||
defm SVBMOP4A_H : MOP4<"a", "za16", "bf", "aarch64_sme_mop4a", "">; | ||
defm SVBMOP4S_H : MOP4<"s", "za16", "bf", "aarch64_sme_mop4s", "">; | ||
|
||
def SVMLA_MULTI_VG1x2_BF16 : Inst<"svmla_za16[_bf16]_vg1x2", "vm22", "b", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; | ||
def SVMLA_MULTI_VG1x4_BF16 : Inst<"svmla_za16[_bf16]_vg1x4", "vm44", "b", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; | ||
def SVMLS_MULTI_VG1x2_BF16 : Inst<"svmls_za16[_bf16]_vg1x2", "vm22", "b", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsInOutZA], []>; | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe ImmCheck0_3 should be a variable that is passed according to the za type size(64,32 or 16 bit), because they have different limits.