|
| 1 | +//==- RISCVSchedXiangShanNanHu.td - XS-NanHu Scheduling Defs -*- tablegen -*-=// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | + |
| 9 | +//===----------------------------------------------------------------------===// |
| 10 | + |
| 11 | +// XiangShan is a high-performance open-source RISC-V processor developed by |
| 12 | +// the Institute of Computing Technology (ICT), Chinese Academy of Sciences. |
| 13 | +// Source: https://github.com/OpenXiangShan/XiangShan |
| 14 | +// Documentation: https://github.com/OpenXiangShan/XiangShan-doc |
| 15 | + |
| 16 | +// XiangShan-NanHu is the second generation of XiangShan processor series. |
| 17 | +// Overview: https://xiangshan-doc.readthedocs.io/zh-cn/latest/integration/overview/ |
| 18 | + |
| 19 | +def XiangShanNanHuModel : SchedMachineModel { |
| 20 | + let MicroOpBufferSize = 256; |
| 21 | + let LoopMicroOpBufferSize = 48; // Instruction queue size |
| 22 | + let IssueWidth = 6; // 6-way decode and dispatch |
| 23 | + let LoadLatency = 4; |
| 24 | + let MispredictPenalty = 11; // Based on estimate of pipeline depth. |
| 25 | + let CompleteModel = 0; |
| 26 | + let UnsupportedFeatures = [HasStdExtZcmt, HasStdExtZkr, HasVInstructions, |
| 27 | + HasVInstructionsI64]; |
| 28 | +} |
| 29 | + |
| 30 | +let SchedModel = XiangShanNanHuModel in { |
| 31 | + |
| 32 | +// The reservation stations are distributed and grouped as 32-entry or 16-entry smaller ones. |
| 33 | +let BufferSize = 16 in { |
| 34 | + def XS2ALU : ProcResource<4>; |
| 35 | + def XS2MDU : ProcResource<2>; |
| 36 | + def XS2MISC : ProcResource<1>; |
| 37 | + |
| 38 | + def XS2FMAC : ProcResource<4>; |
| 39 | + def XS2FMISC : ProcResource<2>; |
| 40 | + |
| 41 | + // Load/Store queues are ignored. |
| 42 | + def XS2LD : ProcResource<2>; |
| 43 | + def XS2ST : ProcResource<2>; |
| 44 | +} |
| 45 | + |
| 46 | +// Branching |
| 47 | +def : WriteRes<WriteJmp, [XS2MISC]>; |
| 48 | +def : WriteRes<WriteJal, [XS2MISC]>; |
| 49 | +def : WriteRes<WriteJalr, [XS2MISC]>; |
| 50 | + |
| 51 | +// Integer arithmetic and logic |
| 52 | +let Latency = 1 in { |
| 53 | +def : WriteRes<WriteIALU, [XS2ALU]>; |
| 54 | +def : WriteRes<WriteIALU32, [XS2ALU]>; |
| 55 | +def : WriteRes<WriteShiftImm, [XS2ALU]>; |
| 56 | +def : WriteRes<WriteShiftImm32, [XS2ALU]>; |
| 57 | +def : WriteRes<WriteShiftReg, [XS2ALU]>; |
| 58 | +def : WriteRes<WriteShiftReg32, [XS2ALU]>; |
| 59 | +} |
| 60 | + |
| 61 | +// Integer multiplication |
| 62 | +let Latency = 3 in { |
| 63 | +def : WriteRes<WriteIMul, [XS2MDU]>; |
| 64 | +def : WriteRes<WriteIMul32, [XS2MDU]>; |
| 65 | +} |
| 66 | + |
| 67 | +// Integer division |
| 68 | +// SRT16 algorithm |
| 69 | +let Latency = 20, ReleaseAtCycles = [20] in { |
| 70 | +def : WriteRes<WriteIDiv32, [XS2MDU]>; |
| 71 | +def : WriteRes<WriteIDiv, [XS2MDU]>; |
| 72 | +} |
| 73 | + |
| 74 | +// Zb* |
| 75 | +let Latency = 1 in { |
| 76 | +// Zba |
| 77 | +def : WriteRes<WriteSHXADD, [XS2ALU]>; |
| 78 | +def : WriteRes<WriteSHXADD32, [XS2ALU]>; |
| 79 | + |
| 80 | +// Zbb |
| 81 | +def : WriteRes<WriteRotateImm, [XS2ALU]>; |
| 82 | +def : WriteRes<WriteRotateImm32, [XS2ALU]>; |
| 83 | +def : WriteRes<WriteRotateReg, [XS2ALU]>; |
| 84 | +def : WriteRes<WriteRotateReg32, [XS2ALU]>; |
| 85 | +def : WriteRes<WriteORCB, [XS2ALU]>; |
| 86 | +def : WriteRes<WriteREV8, [XS2ALU]>; |
| 87 | + |
| 88 | +// Zbkb |
| 89 | +def : WriteRes<WriteBREV8, [XS2ALU]>; |
| 90 | +def : WriteRes<WritePACK, [XS2ALU]>; |
| 91 | +def : WriteRes<WritePACK32, [XS2ALU]>; |
| 92 | +def : WriteRes<WriteZIP, [XS2ALU]>; |
| 93 | + |
| 94 | +// Zbs |
| 95 | +def : WriteRes<WriteSingleBit, [XS2ALU]>; |
| 96 | +def : WriteRes<WriteSingleBitImm, [XS2ALU]>; |
| 97 | +def : WriteRes<WriteBEXT, [XS2ALU]>; |
| 98 | +def : WriteRes<WriteBEXTI, [XS2ALU]>; |
| 99 | +} |
| 100 | + |
| 101 | +let Latency = 3 in { |
| 102 | +// Zbb |
| 103 | +def : WriteRes<WriteCLZ, [XS2MDU]>; |
| 104 | +def : WriteRes<WriteCLZ32, [XS2MDU]>; |
| 105 | +def : WriteRes<WriteCTZ, [XS2MDU]>; |
| 106 | +def : WriteRes<WriteCTZ32, [XS2MDU]>; |
| 107 | +def : WriteRes<WriteCPOP, [XS2MDU]>; |
| 108 | +def : WriteRes<WriteCPOP32, [XS2MDU]>; |
| 109 | + |
| 110 | +// Zbkc |
| 111 | +def : WriteRes<WriteCLMUL, [XS2MDU]>; |
| 112 | + |
| 113 | +// Zbkx |
| 114 | +def : WriteRes<WriteXPERM, [XS2MDU]>; |
| 115 | +} |
| 116 | + |
| 117 | +// Memory |
| 118 | +def : WriteRes<WriteSTB, [XS2ST]>; |
| 119 | +def : WriteRes<WriteSTH, [XS2ST]>; |
| 120 | +def : WriteRes<WriteSTW, [XS2ST]>; |
| 121 | +def : WriteRes<WriteSTD, [XS2ST]>; |
| 122 | +def : WriteRes<WriteFST32, [XS2ST]>; |
| 123 | +def : WriteRes<WriteFST64, [XS2ST]>; |
| 124 | +def : WriteRes<WriteAtomicSTW, [XS2ST]>; |
| 125 | +def : WriteRes<WriteAtomicSTD, [XS2ST]>; |
| 126 | + |
| 127 | +let Latency = 5 in { |
| 128 | +def : WriteRes<WriteLDB, [XS2LD]>; |
| 129 | +def : WriteRes<WriteLDH, [XS2LD]>; |
| 130 | +def : WriteRes<WriteLDW, [XS2LD]>; |
| 131 | +def : WriteRes<WriteLDD, [XS2LD]>; |
| 132 | + |
| 133 | +def : WriteRes<WriteAtomicW, [XS2LD]>; |
| 134 | +def : WriteRes<WriteAtomicD, [XS2LD]>; |
| 135 | +def : WriteRes<WriteAtomicLDW, [XS2LD]>; |
| 136 | +def : WriteRes<WriteAtomicLDD, [XS2LD]>; |
| 137 | + |
| 138 | +def : WriteRes<WriteFLD32, [XS2LD]>; |
| 139 | +def : WriteRes<WriteFLD64, [XS2LD]>; |
| 140 | +} |
| 141 | + |
| 142 | +// XiangShan-NanHu uses FuDian FPU instead of Berkeley HardFloat. |
| 143 | +// Documentation: https://github.com/OpenXiangShan/fudian |
| 144 | + |
| 145 | +let Latency = 3 in { |
| 146 | +def : WriteRes<WriteFAdd32, [XS2FMAC]>; |
| 147 | +def : WriteRes<WriteFSGNJ32, [XS2FMAC]>; |
| 148 | +def : WriteRes<WriteFMinMax32, [XS2FMAC]>; |
| 149 | +def : WriteRes<WriteFAdd64, [XS2FMAC]>; |
| 150 | +def : WriteRes<WriteFSGNJ64, [XS2FMAC]>; |
| 151 | +def : WriteRes<WriteFMinMax64, [XS2FMAC]>; |
| 152 | + |
| 153 | +def : WriteRes<WriteFCvtI32ToF32, [XS2FMAC]>; |
| 154 | +def : WriteRes<WriteFCvtI32ToF64, [XS2FMAC]>; |
| 155 | +def : WriteRes<WriteFCvtI64ToF32, [XS2FMAC]>; |
| 156 | +def : WriteRes<WriteFCvtI64ToF64, [XS2FMAC]>; |
| 157 | +def : WriteRes<WriteFCvtF32ToI32, [XS2FMAC]>; |
| 158 | +def : WriteRes<WriteFCvtF32ToI64, [XS2FMAC]>; |
| 159 | +def : WriteRes<WriteFCvtF64ToI32, [XS2FMAC]>; |
| 160 | +def : WriteRes<WriteFCvtF64ToI64, [XS2FMAC]>; |
| 161 | +def : WriteRes<WriteFCvtF32ToF64, [XS2FMAC]>; |
| 162 | +def : WriteRes<WriteFCvtF64ToF32, [XS2FMAC]>; |
| 163 | + |
| 164 | +def : WriteRes<WriteFClass32, [XS2FMAC]>; |
| 165 | +def : WriteRes<WriteFClass64, [XS2FMAC]>; |
| 166 | +def : WriteRes<WriteFCmp32, [XS2FMAC]>; |
| 167 | +def : WriteRes<WriteFCmp64, [XS2FMAC]>; |
| 168 | +def : WriteRes<WriteFMovF32ToI32, [XS2FMAC]>; |
| 169 | +def : WriteRes<WriteFMovI32ToF32, [XS2FMAC]>; |
| 170 | +def : WriteRes<WriteFMovF64ToI64, [XS2FMAC]>; |
| 171 | +def : WriteRes<WriteFMovI64ToF64, [XS2FMAC]>; |
| 172 | +} |
| 173 | + |
| 174 | +// FP multiplication |
| 175 | +let Latency = 3 in { |
| 176 | +def : WriteRes<WriteFMul32, [XS2FMAC]>; |
| 177 | +def : WriteRes<WriteFMul64, [XS2FMAC]>; |
| 178 | +} |
| 179 | + |
| 180 | +let Latency = 5 in { |
| 181 | +def : WriteRes<WriteFMA32, [XS2FMAC]>; |
| 182 | +def : WriteRes<WriteFMA64, [XS2FMAC]>; |
| 183 | +} |
| 184 | + |
| 185 | +// FP division |
| 186 | +def : WriteRes<WriteFDiv32, [XS2FMISC]> { |
| 187 | + let Latency = 11; |
| 188 | +} |
| 189 | +def : WriteRes<WriteFDiv64, [XS2FMISC]> { |
| 190 | + let Latency = 18; |
| 191 | +} |
| 192 | + |
| 193 | +def : WriteRes<WriteFSqrt32, [XS2FMISC]> { |
| 194 | + let Latency = 17; |
| 195 | +} |
| 196 | +def : WriteRes<WriteFSqrt64, [XS2FMISC]> { |
| 197 | + let Latency = 31; |
| 198 | +} |
| 199 | + |
| 200 | +// Others |
| 201 | +def : WriteRes<WriteCSR, [XS2MISC]>; |
| 202 | +def : WriteRes<WriteNop, []>; |
| 203 | + |
| 204 | +def : InstRW<[WriteIALU], (instrs COPY)>; |
| 205 | + |
| 206 | +// Bypass and advance |
| 207 | + |
| 208 | +class XS2LoadToALUBypass<SchedRead read> |
| 209 | + : ReadAdvance<read, 1, [WriteLDB, WriteLDH, WriteLDW, WriteLDD, WriteAtomicW, WriteAtomicD, WriteAtomicLDW, WriteAtomicLDD]>; |
| 210 | + |
| 211 | +def : ReadAdvance<ReadJmp, 0>; |
| 212 | +def : ReadAdvance<ReadJalr, 0>; |
| 213 | +def : ReadAdvance<ReadCSR, 0>; |
| 214 | +def : ReadAdvance<ReadStoreData, 0>; |
| 215 | +def : ReadAdvance<ReadMemBase, 0>; |
| 216 | +def : XS2LoadToALUBypass<ReadIALU>; |
| 217 | +def : XS2LoadToALUBypass<ReadIALU32>; |
| 218 | +def : XS2LoadToALUBypass<ReadShiftImm>; |
| 219 | +def : XS2LoadToALUBypass<ReadShiftImm32>; |
| 220 | +def : XS2LoadToALUBypass<ReadShiftReg>; |
| 221 | +def : XS2LoadToALUBypass<ReadShiftReg32>; |
| 222 | +def : ReadAdvance<ReadIDiv, 0>; |
| 223 | +def : ReadAdvance<ReadIDiv32, 0>; |
| 224 | +def : ReadAdvance<ReadIMul, 0>; |
| 225 | +def : ReadAdvance<ReadIMul32, 0>; |
| 226 | +def : ReadAdvance<ReadAtomicWA, 0>; |
| 227 | +def : ReadAdvance<ReadAtomicWD, 0>; |
| 228 | +def : ReadAdvance<ReadAtomicDA, 0>; |
| 229 | +def : ReadAdvance<ReadAtomicDD, 0>; |
| 230 | +def : ReadAdvance<ReadAtomicLDW, 0>; |
| 231 | +def : ReadAdvance<ReadAtomicLDD, 0>; |
| 232 | +def : ReadAdvance<ReadAtomicSTW, 0>; |
| 233 | +def : ReadAdvance<ReadAtomicSTD, 0>; |
| 234 | +def : ReadAdvance<ReadFStoreData, 0>; |
| 235 | +def : ReadAdvance<ReadFMemBase, 0>; |
| 236 | +def : ReadAdvance<ReadFAdd32, 0>; |
| 237 | +def : ReadAdvance<ReadFAdd64, 0>; |
| 238 | +def : ReadAdvance<ReadFMul32, 0>; |
| 239 | +def : ReadAdvance<ReadFMul64, 0>; |
| 240 | +def : ReadAdvance<ReadFMA32, 0>; |
| 241 | +def : ReadAdvance<ReadFMA32Addend, 2>; // Cascade FMA |
| 242 | +def : ReadAdvance<ReadFMA64, 0>; |
| 243 | +def : ReadAdvance<ReadFMA64Addend, 2>; // Cascade FMA |
| 244 | +def : ReadAdvance<ReadFDiv32, 0>; |
| 245 | +def : ReadAdvance<ReadFDiv64, 0>; |
| 246 | +def : ReadAdvance<ReadFSqrt32, 0>; |
| 247 | +def : ReadAdvance<ReadFSqrt64, 0>; |
| 248 | +def : ReadAdvance<ReadFCmp32, 0>; |
| 249 | +def : ReadAdvance<ReadFCmp64, 0>; |
| 250 | +def : ReadAdvance<ReadFSGNJ32, 0>; |
| 251 | +def : ReadAdvance<ReadFSGNJ64, 0>; |
| 252 | +def : ReadAdvance<ReadFMinMax32, 0>; |
| 253 | +def : ReadAdvance<ReadFMinMax64, 0>; |
| 254 | +def : ReadAdvance<ReadFCvtF32ToI32, 0>; |
| 255 | +def : ReadAdvance<ReadFCvtF32ToI64, 0>; |
| 256 | +def : ReadAdvance<ReadFCvtF64ToI32, 0>; |
| 257 | +def : ReadAdvance<ReadFCvtF64ToI64, 0>; |
| 258 | +def : ReadAdvance<ReadFCvtI32ToF32, 0>; |
| 259 | +def : ReadAdvance<ReadFCvtI32ToF64, 0>; |
| 260 | +def : ReadAdvance<ReadFCvtI64ToF32, 0>; |
| 261 | +def : ReadAdvance<ReadFCvtI64ToF64, 0>; |
| 262 | +def : ReadAdvance<ReadFCvtF32ToF64, 0>; |
| 263 | +def : ReadAdvance<ReadFCvtF64ToF32, 0>; |
| 264 | +def : ReadAdvance<ReadFMovF32ToI32, 0>; |
| 265 | +def : ReadAdvance<ReadFMovI32ToF32, 0>; |
| 266 | +def : ReadAdvance<ReadFMovF64ToI64, 0>; |
| 267 | +def : ReadAdvance<ReadFMovI64ToF64, 0>; |
| 268 | +def : ReadAdvance<ReadFClass32, 0>; |
| 269 | +def : ReadAdvance<ReadFClass64, 0>; |
| 270 | + |
| 271 | +// Zb* |
| 272 | +// Zba |
| 273 | +def : XS2LoadToALUBypass<ReadSHXADD>; |
| 274 | +def : XS2LoadToALUBypass<ReadSHXADD32>; |
| 275 | +// Zbb |
| 276 | +def : XS2LoadToALUBypass<ReadRotateImm>; |
| 277 | +def : XS2LoadToALUBypass<ReadRotateImm32>; |
| 278 | +def : XS2LoadToALUBypass<ReadRotateReg>; |
| 279 | +def : XS2LoadToALUBypass<ReadRotateReg32>; |
| 280 | +def : ReadAdvance<ReadCLZ, 0>; |
| 281 | +def : ReadAdvance<ReadCLZ32, 0>; |
| 282 | +def : ReadAdvance<ReadCTZ, 0>; |
| 283 | +def : ReadAdvance<ReadCTZ32, 0>; |
| 284 | +def : ReadAdvance<ReadCPOP, 0>; |
| 285 | +def : ReadAdvance<ReadCPOP32, 0>; |
| 286 | +def : XS2LoadToALUBypass<ReadORCB>; |
| 287 | +def : XS2LoadToALUBypass<ReadREV8>; |
| 288 | +// Zbkc |
| 289 | +def : ReadAdvance<ReadCLMUL, 0>; |
| 290 | +// Zbs |
| 291 | +def : XS2LoadToALUBypass<ReadSingleBit>; |
| 292 | +def : XS2LoadToALUBypass<ReadSingleBitImm>; |
| 293 | +// Zbkb |
| 294 | +def : XS2LoadToALUBypass<ReadBREV8>; |
| 295 | +def : XS2LoadToALUBypass<ReadPACK>; |
| 296 | +def : XS2LoadToALUBypass<ReadPACK32>; |
| 297 | +def : XS2LoadToALUBypass<ReadZIP>; |
| 298 | +// Zbkx |
| 299 | +def : ReadAdvance<ReadXPERM, 0>; |
| 300 | + |
| 301 | +//===----------------------------------------------------------------------===// |
| 302 | +// Unsupported extensions |
| 303 | +defm : UnsupportedSchedV; |
| 304 | +defm : UnsupportedSchedZfa; |
| 305 | +defm : UnsupportedSchedZfh; |
| 306 | +defm : UnsupportedSchedSFB; |
| 307 | +defm : UnsupportedSchedZabha; |
| 308 | +} |
0 commit comments