@@ -31,6 +31,7 @@ extern struct target_ops gdbstub_ops;
31
31
#include "decode.h"
32
32
#include "riscv.h"
33
33
#include "riscv_private.h"
34
+ #include "state.h"
34
35
#include "utils.h"
35
36
36
37
/* RISC-V exception code list */
@@ -310,7 +311,15 @@ static uint32_t last_pc = 0;
310
311
/* RV32I Base Instruction Set */
311
312
312
313
/* Internal */
313
- RVOP (nop , {/* no operation */ });
314
+ static bool do_nop (riscv_t * rv , const rv_insn_t * ir )
315
+ {
316
+ rv -> X [rv_reg_zero ] = 0 ;
317
+ rv -> csr_cycle ++ ;
318
+ rv -> PC += ir -> insn_len ;
319
+ const rv_insn_t * next = ir + 1 ;
320
+ MUST_TAIL return next -> impl (rv , next );
321
+ }
322
+
314
323
315
324
/* LUI is used to build 32-bit constants and uses the U-type format. LUI
316
325
* places the U-immediate value in the top 20 bits of the destination
@@ -1219,6 +1228,46 @@ RVOP(cswsp, {
1219
1228
})
1220
1229
#endif
1221
1230
1231
+ /* auipc + addi */
1232
+ RVOP (fuse1 , { rv -> X [ir -> rd ] = (int32_t ) (rv -> PC + ir -> imm + ir -> imm2 ); })
1233
+
1234
+ /* auipc + add */
1235
+ RVOP (fuse2 , {
1236
+ rv -> X [ir -> rd ] = (int32_t ) (rv -> X [ir -> rs1 ]) + (int32_t ) (rv -> PC + ir -> imm );
1237
+ })
1238
+
1239
+ /* multiple sw */
1240
+ RVOP (fuse3 , {
1241
+ opcode_fuse_t * fuse = ir -> fuse ;
1242
+ uint32_t addr = rv -> X [fuse [0 ].rs1 ] + fuse [0 ].imm ;
1243
+ /* the memory addresses of the sw instructions are contiguous, so we only
1244
+ * need to check the first sw instruction to determine if its memory address
1245
+ * is misaligned or if the memory chunk does not exist.
1246
+ */
1247
+ RV_EXC_MISALIGN_HANDLER (3 , store , false, 1 );
1248
+ rv -> io .mem_write_w (rv , addr , rv -> X [fuse [0 ].rs2 ]);
1249
+ for (int i = 1 ; i < ir -> imm2 ; i ++ ) {
1250
+ addr = rv -> X [fuse [i ].rs1 ] + fuse [i ].imm ;
1251
+ rv -> io .mem_write_w (rv , addr , rv -> X [fuse [i ].rs2 ]);
1252
+ }
1253
+ })
1254
+
1255
+ /* multiple lw */
1256
+ RVOP (fuse4 , {
1257
+ opcode_fuse_t * fuse = ir -> fuse ;
1258
+ uint32_t addr = rv -> X [fuse [0 ].rs1 ] + fuse [0 ].imm ;
1259
+ /* the memory addresses of the lw instructions are contiguous, so we only
1260
+ * need to check the first lw instruction to determine if its memory address
1261
+ * is misaligned or if the memory chunk does not exist.
1262
+ */
1263
+ RV_EXC_MISALIGN_HANDLER (3 , load , false, 1 );
1264
+ rv -> X [fuse [0 ].rd ] = rv -> io .mem_read_w (rv , addr );
1265
+ for (int i = 1 ; i < ir -> imm2 ; i ++ ) {
1266
+ addr = rv -> X [fuse [i ].rs1 ] + fuse [i ].imm ;
1267
+ rv -> X [fuse [i ].rd ] = rv -> io .mem_read_w (rv , addr );
1268
+ }
1269
+ })
1270
+
1222
1271
static const void * dispatch_table [] = {
1223
1272
#define _ (inst , can_branch ) [rv_insn_ ##inst ] = do_ ##inst ,
1224
1273
RISCV_INSN_LIST
@@ -1337,7 +1386,6 @@ static void block_translate(riscv_t *rv, block_t *block)
1337
1386
/* compute the end of pc */
1338
1387
block -> pc_end += ir -> insn_len ;
1339
1388
block -> n_insn ++ ;
1340
-
1341
1389
/* stop on branch */
1342
1390
if (insn_is_branch (ir -> opcode )) {
1343
1391
/* recursive jump translation */
@@ -1356,6 +1404,82 @@ static void block_translate(riscv_t *rv, block_t *block)
1356
1404
block -> ir [block -> n_insn - 1 ].tailcall = true;
1357
1405
}
1358
1406
1407
+ #define COMBINE_MEM_OPS (RW ) \
1408
+ count = 1; \
1409
+ next_ir = ir + 1; \
1410
+ if (next_ir->opcode != IIF(RW)(rv_insn_lw, rv_insn_sw)) \
1411
+ break; \
1412
+ sign = (ir->imm - next_ir->imm) >> 31 ? -1 : 1; \
1413
+ for (uint32_t j = 1; j < block->n_insn - 1 - i; j++) { \
1414
+ next_ir = ir + j; \
1415
+ if (next_ir->opcode != IIF(RW)(rv_insn_lw, rv_insn_sw) || \
1416
+ ir->rs1 != next_ir->rs1 || ir->imm - next_ir->imm != 4 * sign) \
1417
+ break; \
1418
+ count++; \
1419
+ } \
1420
+ if (count > 1) { \
1421
+ ir->opcode = IIF(RW)(rv_insn_fuse4, rv_insn_fuse3); \
1422
+ ir->fuse = malloc(count * sizeof(opcode_fuse_t)); \
1423
+ ir->imm2 = count; \
1424
+ memcpy(ir->fuse, ir, sizeof(opcode_fuse_t)); \
1425
+ ir->impl = dispatch_table[ir->opcode]; \
1426
+ for (int j = 1; j < count; j++) { \
1427
+ next_ir = ir + j; \
1428
+ memcpy(ir->fuse + j, next_ir, sizeof(opcode_fuse_t)); \
1429
+ next_ir->opcode = rv_insn_nop; \
1430
+ next_ir->impl = dispatch_table[next_ir->opcode]; \
1431
+ } \
1432
+ } \
1433
+ break;
1434
+
1435
+
1436
+ /* examine whether instructions in a block match a specific pattern. If so,
1437
+ * rewrite them into fused instructions.
1438
+ *
1439
+ * We plan to devise strategies to increase the number of instructions that
1440
+ * match the pattern, such as reordering the instructions.
1441
+ */
1442
+ static void match_pattern (block_t * block )
1443
+ {
1444
+ for (uint32_t i = 0 ; i < block -> n_insn - 1 ; i ++ ) {
1445
+ rv_insn_t * ir = block -> ir + i , * next_ir = NULL ;
1446
+ int32_t count = 0 , sign = 1 ;
1447
+ switch (ir -> opcode ) {
1448
+ case rv_insn_auipc :
1449
+ next_ir = ir + 1 ;
1450
+ if (next_ir -> opcode == rv_insn_addi && ir -> rd == next_ir -> rs1 ) {
1451
+ /* the destination register of instruction auipc is equal to the
1452
+ * source register 1 of next instruction addi */
1453
+ ir -> opcode = rv_insn_fuse1 ;
1454
+ ir -> rd = next_ir -> rd ;
1455
+ ir -> imm2 = next_ir -> imm ;
1456
+ ir -> impl = dispatch_table [ir -> opcode ];
1457
+ next_ir -> opcode = rv_insn_nop ;
1458
+ next_ir -> impl = dispatch_table [next_ir -> opcode ];
1459
+ } else if (next_ir -> opcode == rv_insn_add &&
1460
+ ir -> rd == next_ir -> rs2 ) {
1461
+ /* the destination register of instruction auipc is equal to the
1462
+ * source register 2 of next instruction add */
1463
+ ir -> opcode = rv_insn_fuse2 ;
1464
+ ir -> rd = next_ir -> rd ;
1465
+ ir -> rs1 = next_ir -> rs1 ;
1466
+ ir -> impl = dispatch_table [ir -> opcode ];
1467
+ next_ir -> opcode = rv_insn_nop ;
1468
+ next_ir -> impl = dispatch_table [next_ir -> opcode ];
1469
+ }
1470
+ break ;
1471
+ /* If the memory addresses of a sequence of store or load instructions
1472
+ * are contiguous, combine these instructions.
1473
+ */
1474
+ case rv_insn_sw :
1475
+ COMBINE_MEM_OPS (0 );
1476
+ case rv_insn_lw :
1477
+ COMBINE_MEM_OPS (1 );
1478
+ /* FIXME: lui + addi*/
1479
+ }
1480
+ }
1481
+ }
1482
+
1359
1483
static block_t * prev = NULL ;
1360
1484
static block_t * block_find_or_translate (riscv_t * rv )
1361
1485
{
@@ -1375,6 +1499,9 @@ static block_t *block_find_or_translate(riscv_t *rv)
1375
1499
/* translate the basic block */
1376
1500
block_translate (rv , next );
1377
1501
1502
+ /* macro operation fusion */
1503
+ match_pattern (next );
1504
+
1378
1505
/* insert the block into block map */
1379
1506
block_insert (& rv -> block_map , next );
1380
1507
0 commit comments