@@ -287,18 +287,18 @@ enum {
287
287
#define RVOP_RUN_NEXT (!ir->tailcall)
288
288
#endif
289
289
290
- #define RVOP (inst , code ) \
291
- static bool do_##inst(riscv_t *rv UNUSED , const rv_insn_t *ir UNUSED ) \
292
- { \
293
- rv->X[rv_reg_zero] = 0; \
294
- code; \
295
- rv->csr_cycle++; \
296
- nextop: \
297
- rv->PC += ir->insn_len; \
298
- if (!RVOP_RUN_NEXT) \
299
- return true; \
300
- const rv_insn_t *next = ir + 1; \
301
- MUST_TAIL return next->impl(rv, next); \
290
+ #define RVOP (inst , code ) \
291
+ static bool do_##inst(riscv_t *rv, const rv_insn_t *ir) \
292
+ { \
293
+ rv->X[rv_reg_zero] = 0; \
294
+ rv->csr_cycle++; \
295
+ code; \
296
+ nextop: \
297
+ rv->PC += ir->insn_len; \
298
+ if (!RVOP_RUN_NEXT) \
299
+ return true; \
300
+ const rv_insn_t *next = ir + 1; \
301
+ MUST_TAIL return next->impl(rv, next); \
302
302
}
303
303
304
304
/* RV32I Base Instruction Set */
@@ -1277,6 +1277,48 @@ RVOP(cswsp, {
1277
1277
})
1278
1278
#endif
1279
1279
1280
+ /* auipc + addi */
1281
+ RVOP (fuse1 , {
1282
+ rv -> X [ir -> rd ] = (int32_t ) (rv -> PC + ir -> imm + ir -> imm2 );
1283
+ rv -> PC += ir -> insn_len ;
1284
+ })
1285
+
1286
+ /* auipc + add */
1287
+ RVOP (fuse2 , {
1288
+ rv -> X [ir -> rd ] = (int32_t ) (rv -> X [ir -> rs1 ]) + (int32_t ) (rv -> PC + ir -> imm );
1289
+ rv -> PC += ir -> insn_len ;
1290
+ })
1291
+
1292
+ /* multiple sw */
1293
+ RVOP (fuse3 , {
1294
+ mem_fuse_t * mem_fuse = ir -> mem_fuse ;
1295
+ for (int i = 0 ; i < ir -> imm2 ; i ++ ) {
1296
+ const uint32_t addr = rv -> X [mem_fuse [i ].rs1 ] + mem_fuse [i ].imm ;
1297
+ RV_EXC_MISALIGN_HANDLER (3 , store , false, 1 );
1298
+ rv -> io .mem_write_w (rv , addr , rv -> X [mem_fuse [i ].rs2 ]);
1299
+ }
1300
+ rv -> PC += ir -> insn_len * (ir -> imm2 - 1 );
1301
+ })
1302
+
1303
+ /* multiple lw */
1304
+ RVOP (fuse4 , {
1305
+ mem_fuse_t * mem_fuse = ir -> mem_fuse ;
1306
+ for (int i = 0 ; i < ir -> imm2 ; i ++ ) {
1307
+ const uint32_t addr = rv -> X [mem_fuse [i ].rs1 ] + mem_fuse [i ].imm ;
1308
+ RV_EXC_MISALIGN_HANDLER (3 , load , false, 1 );
1309
+ rv -> X [mem_fuse [i ].rd ] = rv -> io .mem_read_w (rv , addr );
1310
+ }
1311
+ rv -> PC += ir -> insn_len * (ir -> imm2 - 1 );
1312
+ })
1313
+
1314
+ static bool do_empty (riscv_t * rv , const rv_insn_t * ir )
1315
+ {
1316
+ rv -> X [rv_reg_zero ] = 0 ;
1317
+ rv -> csr_cycle ++ ;
1318
+ const rv_insn_t * next = ir + 1 ;
1319
+ MUST_TAIL return next -> impl (rv , next );
1320
+ }
1321
+
1280
1322
static const void * dispatch_table [] = {
1281
1323
#define _ (inst , can_branch ) [rv_insn_ ##inst ] = do_ ##inst ,
1282
1324
RISCV_INSN_LIST
@@ -1407,6 +1449,92 @@ static void extend_block(riscv_t *rv, block_t *block)
1407
1449
last_ir -> branch_untaken = next -> ir ;
1408
1450
}
1409
1451
1452
+ static void match_pattern (block_t * block )
1453
+ {
1454
+ for (uint32_t i = 0 ; i < block -> n_insn - 1 ; i ++ ) {
1455
+ rv_insn_t * ir = block -> ir + i , * next_ir = NULL ;
1456
+ int32_t count = 0 ;
1457
+ switch (ir -> opcode ) {
1458
+ case rv_insn_auipc :
1459
+ next_ir = ir + 1 ;
1460
+ if (next_ir -> opcode == rv_insn_addi ) {
1461
+ if (ir -> rd == next_ir -> rs1 ) {
1462
+ ir -> opcode = rv_insn_fuse1 ;
1463
+ ir -> rd = next_ir -> rd ;
1464
+ ir -> imm2 = next_ir -> imm ;
1465
+ ir -> impl = dispatch_table [ir -> opcode ];
1466
+ next_ir -> opcode = rv_insn_empty ;
1467
+ next_ir -> impl = dispatch_table [next_ir -> opcode ];
1468
+ } else if (ir -> rd == next_ir -> rs2 ) {
1469
+ ir -> opcode = rv_insn_fuse2 ;
1470
+ ir -> rd = next_ir -> rd ;
1471
+ ir -> rs1 = next_ir -> rs1 ;
1472
+ ir -> impl = dispatch_table [ir -> opcode ];
1473
+ next_ir -> opcode = rv_insn_empty ;
1474
+ next_ir -> impl = dispatch_table [next_ir -> opcode ];
1475
+ }
1476
+ }
1477
+ break ;
1478
+ case rv_insn_sw :
1479
+ count = 1 ;
1480
+ for (uint32_t j = 1 ; j < block -> n_insn - 1 - i ; j ++ ) {
1481
+ next_ir = ir + j ;
1482
+ if (next_ir -> opcode != rv_insn_sw )
1483
+ break ;
1484
+ count ++ ;
1485
+ }
1486
+ if (count >= 5 ) {
1487
+ ir -> opcode = rv_insn_fuse3 ;
1488
+ ir -> mem_fuse = malloc (count * sizeof (mem_fuse_t ));
1489
+ ir -> imm2 = count ;
1490
+ ir -> mem_fuse [0 ].imm = ir -> imm ;
1491
+ ir -> mem_fuse [0 ].rd = ir -> rd ;
1492
+ ir -> mem_fuse [0 ].rs1 = ir -> rs1 ;
1493
+ ir -> mem_fuse [0 ].rs2 = ir -> rs2 ;
1494
+ ir -> impl = dispatch_table [ir -> opcode ];
1495
+ for (int j = 1 ; j < count ; j ++ ) {
1496
+ next_ir = ir + j ;
1497
+ ir -> mem_fuse [j ].imm = next_ir -> imm ;
1498
+ ir -> mem_fuse [j ].rd = next_ir -> rd ;
1499
+ ir -> mem_fuse [j ].rs1 = next_ir -> rs1 ;
1500
+ ir -> mem_fuse [j ].rs2 = next_ir -> rs2 ;
1501
+ next_ir -> opcode = rv_insn_empty ;
1502
+ next_ir -> impl = dispatch_table [next_ir -> opcode ];
1503
+ }
1504
+ }
1505
+ break ;
1506
+ case rv_insn_lw :
1507
+ count = 1 ;
1508
+ for (uint32_t j = 1 ; j < block -> n_insn - 1 - i ; j ++ ) {
1509
+ next_ir = ir + j ;
1510
+ if (next_ir -> opcode != rv_insn_lw )
1511
+ break ;
1512
+ count ++ ;
1513
+ }
1514
+ if (count >= 5 ) {
1515
+ ir -> opcode = rv_insn_fuse4 ;
1516
+ ir -> mem_fuse = malloc (count * sizeof (mem_fuse_t ));
1517
+ ir -> imm2 = count ;
1518
+ ir -> mem_fuse [0 ].imm = ir -> imm ;
1519
+ ir -> mem_fuse [0 ].rd = ir -> rd ;
1520
+ ir -> mem_fuse [0 ].rs1 = ir -> rs1 ;
1521
+ ir -> mem_fuse [0 ].rs2 = ir -> rs2 ;
1522
+ ir -> impl = dispatch_table [ir -> opcode ];
1523
+ for (int j = 1 ; j < count ; j ++ ) {
1524
+ next_ir = ir + j ;
1525
+ ir -> mem_fuse [j ].imm = next_ir -> imm ;
1526
+ ir -> mem_fuse [j ].rd = next_ir -> rd ;
1527
+ ir -> mem_fuse [j ].rs1 = next_ir -> rs1 ;
1528
+ ir -> mem_fuse [j ].rs2 = next_ir -> rs2 ;
1529
+ next_ir -> opcode = rv_insn_empty ;
1530
+ next_ir -> impl = dispatch_table [next_ir -> opcode ];
1531
+ }
1532
+ }
1533
+ break ;
1534
+ }
1535
+ }
1536
+ }
1537
+
1410
1538
static block_t * block_find_or_translate (riscv_t * rv , block_t * prev )
1411
1539
{
1412
1540
block_map_t * map = & rv -> block_map ;
@@ -1425,6 +1553,9 @@ static block_t *block_find_or_translate(riscv_t *rv, block_t *prev)
1425
1553
/* translate the basic block */
1426
1554
block_translate (rv , next );
1427
1555
1556
+ /* fuse instruction */
1557
+ match_pattern (next );
1558
+
1428
1559
/* insert the block into block map */
1429
1560
block_insert (& rv -> block_map , next );
1430
1561
0 commit comments