-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[TableGen][InstrInfoEmitter] Count sub-operands on def operands #88837
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Created using spr 1.3.4
@llvm/pr-subscribers-backend-sparc @llvm/pr-subscribers-backend-x86 Author: None (darkbuck) Changes
Full diff: https://github.com/llvm/llvm-project/pull/88837.diff 4 Files Affected:
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 5e792427cca282..4d68f93efeac17 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -693,38 +693,38 @@ let DecoderNamespace = "SparcV8", Predicates = [HasNoV9] in {
}
let rd = 0 in {
- let Defs = [CPSR] in {
- def STCSRrr : F3_1<3, 0b110101, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+ let mayStore = 1, Uses = [CPSR] in {
+ def STCSRrr : F3_1<3, 0b110101, (outs), (ins (MEMrr $rs1, $rs2):$addr),
"st %csr, [$addr]", [], IIC_st>;
- def STCSRri : F3_2<3, 0b110101, (outs (MEMri $rs1, $simm13):$addr), (ins),
+ def STCSRri : F3_2<3, 0b110101, (outs), (ins (MEMri $rs1, $simm13):$addr),
"st %csr, [$addr]", [], IIC_st>;
}
- let Defs = [CPQ] in {
- def STDCQrr : F3_1<3, 0b110110, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+ let mayStore = 1, Uses = [CPQ] in {
+ def STDCQrr : F3_1<3, 0b110110, (outs), (ins (MEMrr $rs1, $rs2):$addr),
"std %cq, [$addr]", [], IIC_std>;
- def STDCQri : F3_2<3, 0b110110, (outs (MEMri $rs1, $simm13):$addr), (ins),
+ def STDCQri : F3_2<3, 0b110110, (outs), (ins (MEMri $rs1, $simm13):$addr),
"std %cq, [$addr]", [], IIC_std>;
}
}
let rd = 0 in {
- let Defs = [FSR] in {
- def STFSRrr : F3_1<3, 0b100101, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+ let mayStore = 1, Uses = [FSR] in {
+ def STFSRrr : F3_1<3, 0b100101, (outs), (ins (MEMrr $rs1, $rs2):$addr),
"st %fsr, [$addr]", [], IIC_st>;
- def STFSRri : F3_2<3, 0b100101, (outs (MEMri $rs1, $simm13):$addr), (ins),
+ def STFSRri : F3_2<3, 0b100101, (outs), (ins (MEMri $rs1, $simm13):$addr),
"st %fsr, [$addr]", [], IIC_st>;
}
- let Defs = [FQ] in {
- def STDFQrr : F3_1<3, 0b100110, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+ let mayStore = 1, Defs = [FQ] in {
+ def STDFQrr : F3_1<3, 0b100110, (outs), (ins (MEMrr $rs1, $rs2):$addr),
"std %fq, [$addr]", [], IIC_std>;
- def STDFQri : F3_2<3, 0b100110, (outs (MEMri $rs1, $simm13):$addr), (ins),
+ def STDFQri : F3_2<3, 0b100110, (outs), (ins (MEMri $rs1, $simm13):$addr),
"std %fq, [$addr]", [], IIC_std>;
}
}
-let rd = 1, Defs = [FSR] in {
- def STXFSRrr : F3_1<3, 0b100101, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+let rd = 1, mayStore = 1, Uses = [FSR] in {
+ def STXFSRrr : F3_1<3, 0b100101, (outs), (ins (MEMrr $rs1, $rs2):$addr),
"stx %fsr, [$addr]", []>, Requires<[HasV9]>;
- def STXFSRri : F3_2<3, 0b100101, (outs (MEMri $rs1, $simm13):$addr), (ins),
+ def STXFSRri : F3_2<3, 0b100101, (outs), (ins (MEMri $rs1, $simm13):$addr),
"stx %fsr, [$addr]", []>, Requires<[HasV9]>;
}
diff --git a/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/llvm/lib/Target/X86/X86InstrCMovSetCC.td
index 27a0c889a4da3e..e27aa4115990e9 100644
--- a/llvm/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/llvm/lib/Target/X86/X86InstrCMovSetCC.td
@@ -58,8 +58,8 @@ let SchedRW = [WriteCMOV.Folded, WriteCMOV.ReadAfterFold] in {
}
let SchedRW = [WriteCMOV, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault],
Predicates = [HasCMOV, HasCF, In64BitMode], mayStore = 1 in
- def mr : ITy<0x40, MRMDestMemCC, t, (outs t.MemOperand:$dst),
- (ins t.RegClass:$src1, ccode:$cond),
+ def mr : ITy<0x40, MRMDestMemCC, t, (outs),
+ (ins t.MemOperand:$dst, t.RegClass:$src1, ccode:$cond),
"cfcmov${cond}", unaryop_ndd_args, []>, UseEFLAGS, NF;
}
diff --git a/llvm/test/TableGen/def-multiple-operands.td b/llvm/test/TableGen/def-multiple-operands.td
new file mode 100644
index 00000000000000..dbee7cfacdf7d8
--- /dev/null
+++ b/llvm/test/TableGen/def-multiple-operands.td
@@ -0,0 +1,33 @@
+// RUN: llvm-tblgen -gen-instr-info -I %p/../../include %s | FileCheck %s
+
+include "llvm/Target/Target.td"
+
+def archInstrInfo : InstrInfo {}
+
+def arch : Target {
+ let InstructionSet = archInstrInfo;
+}
+
+def R0 : Register<"r0">;
+def P0 : Register<"p0">;
+def R32 : RegisterClass<"MyNS", [i32], 0, (add R0)>;
+def P1 : RegisterClass<"MyNS", [i1], 0, (add P0)>;
+
+def Reg3Opnd : Operand<OtherVT> {
+ let MIOperandInfo = (ops R32, R32, P1);
+}
+
+// CHECK: archInstrTable {{.* = \{}}
+// CHECK: {{\{}}
+// CHECK: {{\{}} [[ID:[0-9]+]], 4, 3, 13, {{.+\}, \/\/}}
+// CHECK-SAME: Inst #[[ID]] = InstA
+def InstA : Instruction {
+ let Namespace = "MyNS";
+ let Size = 13;
+ // InstA should have 3 defs out of 4 operands.
+ let OutOperandList = (outs Reg3Opnd:$dst);
+ let InOperandList = (ins i32imm:$c);
+ field bits<8> Inst;
+ field bits<8> SoftFail = 0;
+ let hasSideEffects = false;
+}
diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp
index 36f8fa14653938..b3a05e081f6375 100644
--- a/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -1181,9 +1181,15 @@ void InstrInfoEmitter::emitRecord(
// Each logical operand can be multiple MI operands.
MinOperands =
Inst.Operands.back().MIOperandNo + Inst.Operands.back().MINumOperands;
+ // Even the logical output operand may be multiple MI operands.
+ int DefOperands = 0;
+ if (Inst.Operands.NumDefs) {
+ auto &Opnd = Inst.Operands[Inst.Operands.NumDefs - 1];
+ DefOperands = Opnd.MIOperandNo + Opnd.MINumOperands;
+ }
OS << " { ";
- OS << Num << ",\t" << MinOperands << ",\t" << Inst.Operands.NumDefs << ",\t"
+ OS << Num << ",\t" << MinOperands << ",\t" << DefOperands << ",\t"
<< Inst.TheDef->getValueAsInt("Size") << ",\t"
<< SchedModels.getSchedClassIdx(Inst) << ",\t";
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't really understand the backend changes so I'll leave it up to other to review those
@@ -0,0 +1,33 @@ | |||
// RUN: llvm-tblgen -gen-instr-info -I %p/../../include %s | FileCheck %s | |||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a short description of what this test is checking?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, this test checks whether the MCInstrDesc
entry for InstA
has the expected NumOperands
and NumDefs
. It should have 3 defs out of 4 operands.
@@ -693,38 +693,38 @@ let DecoderNamespace = "SparcV8", Predicates = [HasNoV9] in { | |||
} | |||
|
|||
let rd = 0 in { | |||
let Defs = [CPSR] in { | |||
def STCSRrr : F3_1<3, 0b110101, (outs (MEMrr $rs1, $rs2):$addr), (ins), | |||
let mayStore = 1, Uses = [CPSR] in { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The mayStore setting looks like an unrelated change
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The mayStore setting looks like an unrelated change
I agree but I don't want those instructions are optimized away after this change. I cannot find any test from sparc target. Not sure whom should we ask for help.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But can you precommit them? That should be doable without the operand change?
def mr : ITy<0x40, MRMDestMemCC, t, (outs t.MemOperand:$dst), | ||
(ins t.RegClass:$src1, ccode:$cond), | ||
def mr : ITy<0x40, MRMDestMemCC, t, (outs), | ||
(ins t.MemOperand:$dst, t.RegClass:$src1, ccode:$cond), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change looks good to me.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overall the changes looks okay to me, but a little question; why doesn't it also affect the normal store instructions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overall the changes looks okay to me, but a little question; why doesn't it also affect the normal store instructions?
These STORE-like instructions have the wrong operand info for their memory operands, which should be 'use' instead of 'def'. I found that during the debugging of this change.
Not sure how best to test this, but I think it fixes the error https://github.com/llvm/mlir-www/actions/runs/8699908058/job/23859264085#step:7:1111 Co-authored-by: Jeremy Kun <[email protected]> Co-authored-by: Jacques Pienaar <[email protected]>
Created using spr 1.3.4
…) (#88014) Use refactored `CheckForConstantInitializer()` to skip checking expr with error. --------- Co-authored-by: Aaron Ballman <[email protected]>
…ccess and bad_function_call (#87390) This patch uses our availability machinery to allow defining a key function for bad_function_call and bad_expected_access at all times but only rely on it when we can. This prevents compilers from complaining about weak vtables and reduces code bloat and the amount of work done by the dynamic linker. rdar://111917845
…6410) When we initially implemented the C++20 synchronization library, we reluctantly accepted for the implementation to be backported to C++03 upon request from the person who provided the patch. This was when we were only starting to have experience with the issues this can create, so we flinched. Nowadays, we have a much stricter stance about not backporting features to previous standards. We have recently started fixing several bugs (and near bugs) in our implementation of the synchronization library. A recurring theme during these reviews has been how difficult to understand the current code is, and upon inspection it becomes clear that being able to use a few recent C++ features (in particular lambdas) would help a great deal. The code would still be pretty intricate, but it would be a lot easier to reason about the flow of callbacks through things like __thread_poll_with_backoff. As a result, this patch deprecates support for the synchronization library before C++20. In the next release, we can remove that support entirely.
This tests requires the OpenMP runtime to be present, but the way that the lit config detects it fails when "openmp" is added to RUNTIMES instead of PROJECTS. This caused the tests to be skipped as unsupported in local and upstream tests. The actual bug was a missing word in the message, and putting the check at the wrong line.
This reverts commit 82f479b due to bot breakage.
CASE_VFMA_OPCODE_VV and CASE_VFMA_CHANGE_OPCODE_VV need to match up if we are are to avoid "Unexpected opcode" errors, but in CASE_VFMA_CHANGE_OPCODE_VV, CASE_VFMA_CHANGE_OPCODE_LMULS_MF2 had mistakenly been used instead of CASE_VFMA_CHANGE_OPCODE_LMULS_MF4.
…es" (#88919) Reverts #87130 Bot is broken with clang crash: https://lab.llvm.org/buildbot/#/builders/272/builds/14063/steps/6/logs/stdio
Should be a bit easier to read.
…e CUDA build. (#88821) This is to experiment with distributing F18 runtime CUDA library in the form of a pure PTX library. The change is under FLANG_EXPERIMENTAL_CUDA_RUNTIME CMake control.
Include types `fexcept_t` and `fenv_t ` from corresponding proxy headers, as they are available since #88467.
Remove the fenv macro dependency from the CMake files as the underlying targets do not make use of it. Note that we do not have to worry about [corresponding Bazel targets](https://github.com/llvm/llvm-project/blob/main/utils/bazel/llvm-project-overlay/libc/BUILD.bazel#L1138-L1288), as they look good.
This patch enables users of MemProfReader to directly supply mappings from CallStackId to actual call stacks. Once the users of the current constructor without CSIdMap switch to the new constructor, we'll have fewer users of: - IndexedAllocationInfo::CallStack - IndexedMemProfRecord::CallSites bringing us one step closer to the removal of these fields in favor of: - IndexedAllocationInfo::CSId - IndexedMemProfRecord::CallSiteIds
…telement. If the vectorized GEP instruction can be still kept as a scalar GEP, better to keep it as scalar instead of extractelement. In many cases it is more profitable. Metric: size..text Program size..text results results0 diff test-suite :: SingleSource/Benchmarks/Misc/oourafft.test 18911.00 19695.00 4.1% test-suite :: SingleSource/Benchmarks/Misc-C++-EH/spirit.test 59987.00 60707.00 1.2% test-suite :: External/SPEC/CFP2017speed/638.imagick_s/638.imagick_s.test 1392209.00 1392753.00 0.0% test-suite :: External/SPEC/CFP2017rate/538.imagick_r/538.imagick_r.test 1392209.00 1392753.00 0.0% test-suite :: External/SPEC/CINT2006/400.perlbench/400.perlbench.test 1087996.00 1088236.00 0.0% test-suite :: MultiSource/Benchmarks/Bullet/bullet.test 309310.00 309342.00 0.0% test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test 664661.00 664693.00 0.0% test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test 664661.00 664693.00 0.0% test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 12354636.00 12354908.00 0.0% test-suite :: External/SPEC/CFP2006/453.povray/453.povray.test 1152748.00 1152716.00 -0.0% test-suite :: MultiSource/Applications/oggenc/oggenc.test 191787.00 191771.00 -0.0% test-suite :: SingleSource/UnitTests/matrix-types-spec.test 480796.00 480476.00 -0.1% Misc/oourafft - Extra code gets vectorized Misc-C++-EH/spirit - same CFP2017speed/638.imagick_s CFP2017rate/538.imagick_r - same, extra code gets vectorized CINT2006/400.perlbench - some extra 4 x ptr stores vectorized Bullet/bullet - extra 4 x ptr store vectorized CINT2017rate/525.x264_r CINT2017speed/625.x264_s - same CFP2017rate/526.blender_r - extra 8 x float stores (several), some extra 4 x ptr stores CFP2006/453.povray - 2 x double loads/stores replaced by 4 x double loads/stores Applications/oggenc - extra code is vectorized UnitTests/matrix-types-spec - extra code gets vectorized Reviewers: RKSimon Reviewed By: RKSimon Pull Request: #88877
We can try to vectorize long store sequences, if short ones were unsuccessful because of the non-profitable vectorization. It should not increase compile time significantly (stores are sorted already, complexity is n x log n), but vectorize extra code. Metric: size..text Program size..text results results0 diff test-suite :: External/SPEC/CINT2006/400.perlbench/400.perlbench.test 1088012.00 1088236.00 0.0% test-suite :: SingleSource/UnitTests/matrix-types-spec.test 480396.00 480476.00 0.0% test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test 664613.00 664661.00 0.0% test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test 664613.00 664661.00 0.0% test-suite :: External/SPEC/CFP2017rate/510.parest_r/510.parest_r.test 2041105.00 2040961.00 -0.0% test-suite :: MultiSource/Applications/JM/lencod/lencod.test 836563.00 836387.00 -0.0% test-suite :: MultiSource/Benchmarks/7zip/7zip-benchmark.test 1035100.00 1032140.00 -0.3% In all benchmarks extra code gets vectorized Reviewers: RKSimon Reviewed By: RKSimon Pull Request: #88563
This was missed in #88455, causing most of the .hlsl to SPIR-V tests to fail (such as clang\test\Driver\hlsl-lang-targets-spirv.hlsl)
This patch fixes: mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp:58:2: error: extra ';' outside of a function is incompatible with C++98 [-Werror,-Wc++98-compat-extra-semi]
This just replaces an '#include<new>' with a declaration of array placement new.
Summary: Previously, the R&R support was global state initialized by a global constructor. This is bad because it prevents us from adequately constraining the lifetime of the library. Additionally, we want to minimize the amount of global state floating around. This patch moves the R&R support into a plugin member like everything else. This means there will be multiple copies of the R&R implementation floating around, but this was already the case given the fact that we currently handle everything with dynamic libraries.
I figured out how to test this with `make mlir-doc doxygen-mlir`
Created using spr 1.3.4 [skip ci]
Created using spr 1.3.4
Created using spr 1.3.4 [skip ci]
Created using spr 1.3.4
Created using spr 1.3.4 [skip ci]
this commit was split into 3 |
generating instr info.
store or store-like instructions are wrongly placed in the output
list.