@@ -312,15 +312,7 @@ static uint32_t last_pc = 0;
312
312
313
313
/* RV32I Base Instruction Set */
314
314
315
- /* Internal */
316
- static bool do_nop (riscv_t * rv , const rv_insn_t * ir )
317
- {
318
- rv -> X [rv_reg_zero ] = 0 ;
319
- rv -> csr_cycle ++ ;
320
- rv -> PC += ir -> insn_len ;
321
- const rv_insn_t * next = ir + 1 ;
322
- MUST_TAIL return next -> impl (rv , next );
323
- }
315
+ RVOP (nop , {/* no operation */ })
324
316
325
317
/* LUI is used to build 32-bit constants and uses the U-type format. LUI
326
318
* places the U-immediate value in the top 20 bits of the destination
@@ -1251,15 +1243,38 @@ RVOP(cswsp, {
1251
1243
#endif
1252
1244
1253
1245
/* auipc + addi */
1254
- RVOP (fuse1 , { rv -> X [ir -> rd ] = (int32_t ) (rv -> PC + ir -> imm + ir -> imm2 ); })
1246
+ static bool do_fuse1 (riscv_t * rv , const rv_insn_t * ir )
1247
+ {
1248
+ rv -> X [rv_reg_zero ] = 0 ;
1249
+ rv -> csr_cycle += 2 ;
1250
+ rv -> X [ir -> rd ] = rv -> PC + ir -> imm ;
1251
+ rv -> X [ir -> rs1 ] = rv -> X [ir -> rd ] + ir -> imm2 ;
1252
+ rv -> PC += 2 * ir -> insn_len ;
1253
+ if (unlikely (RVOP_NO_NEXT (ir )))
1254
+ return true;
1255
+ const rv_insn_t * next = ir + 2 ;
1256
+ MUST_TAIL return next -> impl (rv , next );
1257
+ }
1255
1258
1256
1259
/* auipc + add */
1257
- RVOP (fuse2 , {
1258
- rv -> X [ir -> rd ] = (int32_t ) (rv -> X [ir -> rs1 ]) + (int32_t ) (rv -> PC + ir -> imm );
1259
- })
1260
+ static bool do_fuse2 (riscv_t * rv , const rv_insn_t * ir )
1261
+ {
1262
+ rv -> X [rv_reg_zero ] = 0 ;
1263
+ rv -> csr_cycle += 2 ;
1264
+ rv -> X [ir -> rd ] = rv -> PC + ir -> imm ;
1265
+ rv -> X [ir -> rs2 ] = rv -> X [ir -> rd ] + rv -> X [ir -> rs1 ];
1266
+ rv -> PC += 2 * ir -> insn_len ;
1267
+ if (unlikely (RVOP_NO_NEXT (ir )))
1268
+ return true;
1269
+ const rv_insn_t * next = ir + 2 ;
1270
+ MUST_TAIL return next -> impl (rv , next );
1271
+ }
1260
1272
1261
1273
/* multiple sw */
1262
- RVOP (fuse3 , {
1274
+ static bool do_fuse3 (riscv_t * rv , const rv_insn_t * ir )
1275
+ {
1276
+ rv -> X [rv_reg_zero ] = 0 ;
1277
+ rv -> csr_cycle += ir -> imm2 ;
1263
1278
opcode_fuse_t * fuse = ir -> fuse ;
1264
1279
uint32_t addr = rv -> X [fuse [0 ].rs1 ] + fuse [0 ].imm ;
1265
1280
/* the memory addresses of the sw instructions are contiguous, so we only
@@ -1272,10 +1287,18 @@ RVOP(fuse3, {
1272
1287
addr = rv -> X [fuse [i ].rs1 ] + fuse [i ].imm ;
1273
1288
rv -> io .mem_write_w (addr , rv -> X [fuse [i ].rs2 ]);
1274
1289
}
1275
- })
1290
+ rv -> PC += ir -> imm2 * ir -> insn_len ;
1291
+ if (unlikely (RVOP_NO_NEXT (ir )))
1292
+ return true;
1293
+ const rv_insn_t * next = ir + ir -> imm2 ;
1294
+ MUST_TAIL return next -> impl (rv , next );
1295
+ }
1276
1296
1277
1297
/* multiple lw */
1278
- RVOP (fuse4 , {
1298
+ static bool do_fuse4 (riscv_t * rv , const rv_insn_t * ir )
1299
+ {
1300
+ rv -> X [rv_reg_zero ] = 0 ;
1301
+ rv -> csr_cycle += ir -> imm2 ;
1279
1302
opcode_fuse_t * fuse = ir -> fuse ;
1280
1303
uint32_t addr = rv -> X [fuse [0 ].rs1 ] + fuse [0 ].imm ;
1281
1304
/* the memory addresses of the lw instructions are contiguous, so we only
@@ -1288,7 +1311,26 @@ RVOP(fuse4, {
1288
1311
addr = rv -> X [fuse [i ].rs1 ] + fuse [i ].imm ;
1289
1312
rv -> X [fuse [i ].rd ] = rv -> io .mem_read_w (addr );
1290
1313
}
1291
- })
1314
+ rv -> PC += ir -> imm2 * ir -> insn_len ;
1315
+ if (unlikely (RVOP_NO_NEXT (ir )))
1316
+ return true;
1317
+ const rv_insn_t * next = ir + ir -> imm2 ;
1318
+ MUST_TAIL return next -> impl (rv , next );
1319
+ }
1320
+
1321
+ /* lui + addi */
1322
+ static bool do_fuse5 (riscv_t * rv , const rv_insn_t * ir )
1323
+ {
1324
+ rv -> X [rv_reg_zero ] = 0 ;
1325
+ rv -> csr_cycle += 2 ;
1326
+ rv -> X [ir -> rd ] = ir -> imm ;
1327
+ rv -> X [ir -> rs1 ] = ir -> imm + ir -> imm2 ;
1328
+ rv -> PC += 2 * ir -> insn_len ;
1329
+ if (unlikely (RVOP_NO_NEXT (ir )))
1330
+ return true;
1331
+ const rv_insn_t * next = ir + 2 ;
1332
+ MUST_TAIL return next -> impl (rv , next );
1333
+ }
1292
1334
1293
1335
static const void * dispatch_table [] = {
1294
1336
#define _ (inst , can_branch ) [rv_insn_ ##inst ] = do_ ##inst ,
@@ -1448,9 +1490,8 @@ static void block_translate(riscv_t *rv, block_t *block)
1448
1490
for (int j = 1; j < count; j++) { \
1449
1491
next_ir = ir + j; \
1450
1492
memcpy(ir->fuse + j, next_ir, sizeof(opcode_fuse_t)); \
1451
- next_ir->opcode = rv_insn_nop; \
1452
- next_ir->impl = dispatch_table[next_ir->opcode]; \
1453
1493
} \
1494
+ ir->tailcall = next_ir->tailcall; \
1454
1495
}
1455
1496
1456
1497
/* examine whether instructions in a block match a specific pattern. If so,
@@ -1469,25 +1510,32 @@ static void match_pattern(block_t *block)
1469
1510
next_ir = ir + 1 ;
1470
1511
if (next_ir -> opcode == rv_insn_addi && ir -> rd == next_ir -> rs1 ) {
1471
1512
/* the destination register of instruction auipc is equal to the
1472
- * source register 1 of next instruction addi */
1513
+ * source register 1 of next instruction addi.
1514
+ */
1473
1515
ir -> opcode = rv_insn_fuse1 ;
1474
- ir -> rd = next_ir -> rd ;
1516
+ ir -> rs1 = next_ir -> rd ;
1475
1517
ir -> imm2 = next_ir -> imm ;
1476
1518
ir -> impl = dispatch_table [ir -> opcode ];
1477
- next_ir -> opcode = rv_insn_nop ;
1478
- next_ir -> impl = dispatch_table [next_ir -> opcode ];
1519
+ ir -> tailcall = next_ir -> tailcall ;
1479
1520
} else if (next_ir -> opcode == rv_insn_add &&
1480
1521
ir -> rd == next_ir -> rs2 ) {
1481
1522
/* the destination register of instruction auipc is equal to the
1482
1523
* source register 2 of next instruction add */
1483
1524
ir -> opcode = rv_insn_fuse2 ;
1484
- ir -> rd = next_ir -> rd ;
1525
+ ir -> rs2 = next_ir -> rd ;
1485
1526
ir -> rs1 = next_ir -> rs1 ;
1486
1527
ir -> impl = dispatch_table [ir -> opcode ];
1487
- next_ir -> opcode = rv_insn_nop ;
1488
- next_ir -> impl = dispatch_table [next_ir -> opcode ];
1528
+ } else if (next_ir -> opcode == rv_insn_add &&
1529
+ ir -> rd == next_ir -> rs1 ) {
1530
+ /* the destination register of instruction auipc is equal to the
1531
+ * source register 1 of next instruction add */
1532
+ ir -> opcode = rv_insn_fuse2 ;
1533
+ ir -> rs2 = next_ir -> rd ;
1534
+ ir -> rs1 = next_ir -> rs2 ;
1535
+ ir -> impl = dispatch_table [ir -> opcode ];
1489
1536
}
1490
1537
break ;
1538
+
1491
1539
/* If the memory addresses of a sequence of store or load instructions
1492
1540
* are contiguous, combine these instructions.
1493
1541
*/
@@ -1497,7 +1545,19 @@ static void match_pattern(block_t *block)
1497
1545
case rv_insn_lw :
1498
1546
COMBINE_MEM_OPS (1 );
1499
1547
break ;
1500
- /* FIXME: lui + addi */
1548
+ case rv_insn_lui :
1549
+ next_ir = ir + 1 ;
1550
+ if (next_ir -> opcode == rv_insn_addi && ir -> rd == next_ir -> rs1 ) {
1551
+ /* the destination register of instruction lui is equal to
1552
+ * the source register 1 of next instruction addi.
1553
+ */
1554
+ ir -> opcode = rv_insn_fuse5 ;
1555
+ ir -> rs1 = next_ir -> rd ;
1556
+ ir -> imm2 = next_ir -> imm ;
1557
+ ir -> impl = dispatch_table [ir -> opcode ];
1558
+ ir -> tailcall = next_ir -> tailcall ;
1559
+ }
1560
+ break ;
1501
1561
/* TODO: mixture of sw and lw */
1502
1562
/* TODO: reorder insturction to match pattern */
1503
1563
}
0 commit comments