Skip to content

Commit e7dce22

Browse files
committed
Add Trace EBB
We enhance the original EBB by introducing trace EBB. Trace EBB employs two main strategies: recursive jump translation and retroactive block chaining. After decoding instructions, we can determine the target addresses of jal, cj, and cjal instructions. This enables us to recursively translate the instructions of the jump target into the same basic block. As for the second strategy, we chain the previous block to the current block after emulation since the branch target can be obtained during emulation. Based on performance results below, we observed improvements resulting from the trace EBB strategy. | Test | commit 1c11b39 | trace EBB |Speedup| |----------+------------------------+-------------------------+-------| | CoreMark |1155.174(Iterations/Sec)|1351.065 (Iterations/Sec)|+16.6% | |----------+------------------------+-------------------------+-------| | dhrystone| 1017 DMIPS | 1073 DMIPS | +5.5% | |----------+------------------------+-------------------------+-------| | nqueens | 8630 msec | 8295 msec | +4.0% |
1 parent 1c11b39 commit e7dce22

File tree

1 file changed

+108
-138
lines changed

1 file changed

+108
-138
lines changed

src/emulate.c

Lines changed: 108 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -287,18 +287,22 @@ enum {
287287
#define RVOP_RUN_NEXT (!ir->tailcall)
288288
#endif
289289

290-
#define RVOP(inst, code) \
291-
static bool do_##inst(riscv_t *rv UNUSED, const rv_insn_t *ir UNUSED) \
292-
{ \
293-
rv->X[rv_reg_zero] = 0; \
294-
code; \
295-
rv->csr_cycle++; \
296-
nextop: \
297-
rv->PC += ir->insn_len; \
298-
if (!RVOP_RUN_NEXT) \
299-
return true; \
300-
const rv_insn_t *next = ir + 1; \
301-
MUST_TAIL return next->impl(rv, next); \
290+
/* branch_taken record the branch is taken or not during emulation */
291+
static bool branch_taken = false;
292+
/* last_pc record the program counter of the previous block */
293+
static uint32_t last_pc = 0;
294+
#define RVOP(inst, code) \
295+
static bool do_##inst(riscv_t *rv, const rv_insn_t *ir) \
296+
{ \
297+
rv->X[rv_reg_zero] = 0; \
298+
rv->csr_cycle++; \
299+
code; \
300+
nextop: \
301+
rv->PC += ir->insn_len; \
302+
if (!RVOP_RUN_NEXT) \
303+
return true; \
304+
const rv_insn_t *next = ir + 1; \
305+
MUST_TAIL return next->impl(rv, next); \
302306
}
303307

304308
/* RV32I Base Instruction Set */
@@ -334,7 +338,7 @@ RVOP(jal, {
334338
rv->X[ir->rd] = pc + ir->insn_len;
335339
/* check instruction misaligned */
336340
RV_EXC_MISALIGN_HANDLER(pc, insn, false, 0);
337-
return true;
341+
return ir->branch_taken->impl(rv, ir->branch_taken);
338342
})
339343

340344
/*The indirect jump instruction JALR uses the I-type encoding. The
@@ -356,107 +360,45 @@ RVOP(jalr, {
356360
return true;
357361
})
358362

359-
/* BEQ: Branch if Equal */
360-
RVOP(beq, {
361-
const uint32_t pc = rv->PC;
362-
if (rv->X[ir->rs1] != rv->X[ir->rs2]) {
363-
if (!ir->branch_untaken)
364-
goto nextop;
365-
rv->PC += ir->insn_len;
366-
return ir->branch_untaken->impl(rv, ir->branch_untaken);
367-
}
368-
rv->PC += ir->imm;
369-
/* check instruction misaligned */
370-
RV_EXC_MISALIGN_HANDLER(pc, insn, false, 0);
371-
if (ir->branch_taken)
372-
return ir->branch_taken->impl(rv, ir->branch_taken);
363+
/* clang-format off */
364+
#define BRANCH_FUNC(type, cond) \
365+
const uint32_t pc = rv->PC; \
366+
if ((type) rv->X[ir->rs1] cond (type) rv->X[ir->rs2]) { \
367+
branch_taken = false; \
368+
if (!ir->branch_untaken) \
369+
goto nextop; \
370+
rv->PC += ir->insn_len; \
371+
last_pc = rv->PC; \
372+
return ir->branch_untaken->impl(rv, ir->branch_untaken); \
373+
} \
374+
branch_taken = true; \
375+
rv->PC += ir->imm; \
376+
/* check instruction misaligned */ \
377+
RV_EXC_MISALIGN_HANDLER(pc, insn, false, 0); \
378+
if (ir->branch_taken) { \
379+
last_pc = rv->PC; \
380+
return ir->branch_taken->impl(rv, ir->branch_taken); \
381+
} \
373382
return true;
374-
})
383+
/* clang-format on */
384+
385+
/* BEQ: Branch if Equal */
386+
RVOP(beq, { BRANCH_FUNC(uint32_t, !=); })
375387

376388
/* BNE: Branch if Not Equal */
377-
RVOP(bne, {
378-
const uint32_t pc = rv->PC;
379-
if (rv->X[ir->rs1] == rv->X[ir->rs2]) {
380-
if (!ir->branch_untaken)
381-
goto nextop;
382-
rv->PC += ir->insn_len;
383-
return ir->branch_untaken->impl(rv, ir->branch_untaken);
384-
}
385-
rv->PC += ir->imm;
386-
/* check instruction misaligned */
387-
RV_EXC_MISALIGN_HANDLER(pc, insn, false, 0);
388-
if (ir->branch_taken)
389-
return ir->branch_taken->impl(rv, ir->branch_taken);
390-
return true;
391-
})
389+
RVOP(bne, { BRANCH_FUNC(uint32_t, ==); })
392390

393391
/* BLT: Branch if Less Than */
394-
RVOP(blt, {
395-
const uint32_t pc = rv->PC;
396-
if ((int32_t) rv->X[ir->rs1] >= (int32_t) rv->X[ir->rs2]) {
397-
if (!ir->branch_untaken)
398-
goto nextop;
399-
rv->PC += ir->insn_len;
400-
return ir->branch_untaken->impl(rv, ir->branch_untaken);
401-
}
402-
rv->PC += ir->imm;
403-
/* check instruction misaligned */
404-
RV_EXC_MISALIGN_HANDLER(pc, insn, false, 0);
405-
if (ir->branch_taken)
406-
return ir->branch_taken->impl(rv, ir->branch_taken);
407-
return true;
408-
})
392+
RVOP(blt, { BRANCH_FUNC(int32_t, >=); })
409393

410394
/* BGE: Branch if Greater Than */
411-
RVOP(bge, {
412-
const uint32_t pc = rv->PC;
413-
if ((int32_t) rv->X[ir->rs1] < (int32_t) rv->X[ir->rs2]) {
414-
if (!ir->branch_untaken)
415-
goto nextop;
416-
rv->PC += ir->insn_len;
417-
return ir->branch_untaken->impl(rv, ir->branch_untaken);
418-
}
419-
rv->PC += ir->imm;
420-
/* check instruction misaligned */
421-
RV_EXC_MISALIGN_HANDLER(pc, insn, false, 0);
422-
if (ir->branch_taken)
423-
return ir->branch_taken->impl(rv, ir->branch_taken);
424-
return true;
425-
})
395+
RVOP(bge, { BRANCH_FUNC(int32_t, <); })
426396

427397
/* BLTU: Branch if Less Than Unsigned */
428-
RVOP(bltu, {
429-
const uint32_t pc = rv->PC;
430-
if (rv->X[ir->rs1] >= rv->X[ir->rs2]) {
431-
if (!ir->branch_untaken)
432-
goto nextop;
433-
rv->PC += ir->insn_len;
434-
return ir->branch_untaken->impl(rv, ir->branch_untaken);
435-
}
436-
rv->PC += ir->imm;
437-
/* check instruction misaligned */
438-
RV_EXC_MISALIGN_HANDLER(pc, insn, false, 0);
439-
if (ir->branch_taken)
440-
return ir->branch_taken->impl(rv, ir->branch_taken);
441-
return true;
442-
})
398+
RVOP(bltu, { BRANCH_FUNC(uint32_t, >=); })
443399

444400
/* BGEU: Branch if Greater Than Unsigned */
445-
RVOP(bgeu, {
446-
const uint32_t pc = rv->PC;
447-
if (rv->X[ir->rs1] < rv->X[ir->rs2]) {
448-
if (!ir->branch_untaken)
449-
goto nextop;
450-
rv->PC += ir->insn_len;
451-
return ir->branch_untaken->impl(rv, ir->branch_untaken);
452-
}
453-
rv->PC += ir->imm;
454-
/* check instruction misaligned */
455-
RV_EXC_MISALIGN_HANDLER(pc, insn, false, 0);
456-
if (ir->branch_taken)
457-
return ir->branch_taken->impl(rv, ir->branch_taken);
458-
return true;
459-
})
401+
RVOP(bgeu, { BRANCH_FUNC(uint32_t, <); })
460402

461403
/* LB: Load Byte */
462404
RVOP(lb, {
@@ -1116,7 +1058,7 @@ RVOP(cjal, {
11161058
rv->X[1] = rv->PC + ir->insn_len;
11171059
rv->PC += ir->imm;
11181060
RV_EXC_MISALIGN_HANDLER(rv->PC, insn, true, 0);
1119-
return true;
1061+
return ir->branch_taken->impl(rv, ir->branch_taken);
11201062
})
11211063

11221064
/* C.LI loads the sign-extended 6-bit immediate, imm, into register rd.
@@ -1184,7 +1126,7 @@ RVOP(cand, { rv->X[ir->rd] = rv->X[ir->rs1] & rv->X[ir->rs2]; })
11841126
RVOP(cj, {
11851127
rv->PC += ir->imm;
11861128
RV_EXC_MISALIGN_HANDLER(rv->PC, insn, true, 0);
1187-
return true;
1129+
return ir->branch_taken->impl(rv, ir->branch_taken);
11881130
})
11891131

11901132
/* C.BEQZ performs conditional control transfers. The offset is
@@ -1195,11 +1137,13 @@ RVOP(cj, {
11951137
*/
11961138
RVOP(cbeqz, {
11971139
if (rv->X[ir->rs1]) {
1140+
branch_taken = false;
11981141
if (!ir->branch_untaken)
11991142
goto nextop;
12001143
rv->PC += ir->insn_len;
12011144
return ir->branch_untaken->impl(rv, ir->branch_untaken);
12021145
}
1146+
branch_taken = true;
12031147
rv->PC += (uint32_t) ir->imm;
12041148
if (ir->branch_taken)
12051149
return ir->branch_taken->impl(rv, ir->branch_taken);
@@ -1209,11 +1153,13 @@ RVOP(cbeqz, {
12091153
/* C.BEQZ */
12101154
RVOP(cbnez, {
12111155
if (!rv->X[ir->rs1]) {
1156+
branch_taken = false;
12121157
if (!ir->branch_untaken)
12131158
goto nextop;
12141159
rv->PC += ir->insn_len;
12151160
return ir->branch_untaken->impl(rv, ir->branch_untaken);
12161161
}
1162+
branch_taken = true;
12171163
rv->PC += (uint32_t) ir->imm;
12181164
if (ir->branch_taken)
12191165
return ir->branch_taken->impl(rv, ir->branch_taken);
@@ -1294,6 +1240,26 @@ static bool insn_is_branch(uint8_t opcode)
12941240
return false;
12951241
}
12961242

1243+
static bool insn_is_unconditional_branch(uint8_t opcode)
1244+
{
1245+
switch (opcode) {
1246+
case rv_insn_ecall:
1247+
case rv_insn_ebreak:
1248+
case rv_insn_jal:
1249+
case rv_insn_jalr:
1250+
case rv_insn_mret:
1251+
#if RV32_HAS(EXT_C)
1252+
case rv_insn_cj:
1253+
case rv_insn_cjalr:
1254+
case rv_insn_cjal:
1255+
case rv_insn_cjr:
1256+
case rv_insn_cebreak:
1257+
#endif
1258+
return true;
1259+
}
1260+
return false;
1261+
}
1262+
12971263
/* hash function is used when mapping address into the block map */
12981264
static uint32_t hash(size_t k)
12991265
{
@@ -1377,37 +1343,25 @@ static void block_translate(riscv_t *rv, block_t *block)
13771343
block->n_insn++;
13781344

13791345
/* stop on branch */
1380-
if (insn_is_branch(ir->opcode))
1346+
if (insn_is_branch(ir->opcode)) {
1347+
/* recursive jump translation */
1348+
if (ir->opcode == rv_insn_jal
1349+
#if RV32_HAS(EXT_C)
1350+
|| ir->opcode == rv_insn_cj || ir->opcode == rv_insn_cjal
1351+
#endif
1352+
) {
1353+
block->pc_end = block->pc_end - ir->insn_len + ir->imm;
1354+
ir->branch_taken = ir + 1;
1355+
continue;
1356+
}
13811357
break;
1358+
}
13821359
}
13831360
block->ir[block->n_insn - 1].tailcall = true;
13841361
}
13851362

1386-
static void extend_block(riscv_t *rv, block_t *block)
1387-
{
1388-
rv_insn_t *last_ir = block->ir + block->n_insn - 1;
1389-
if (last_ir->branch_taken && last_ir->branch_untaken)
1390-
return;
1391-
/* calculate the PC of taken and untaken branches to find block */
1392-
uint32_t taken_pc = block->pc_end - last_ir->insn_len + last_ir->imm,
1393-
not_taken_pc = block->pc_end;
1394-
1395-
block_map_t *map = &rv->block_map;
1396-
block_t *next;
1397-
1398-
/* check the branch_taken/branch_untaken pointer has been assigned and the
1399-
* first basic block in the path of the taken/untaken branches exists or
1400-
* not. If either of these conditions is not met, it will not be possible to
1401-
* extend the path of the taken/untaken branches for basic block.
1402-
*/
1403-
if (!last_ir->branch_taken && (next = block_find(map, taken_pc)))
1404-
last_ir->branch_taken = next->ir;
1405-
1406-
if (!last_ir->branch_untaken && (next = block_find(map, not_taken_pc)))
1407-
last_ir->branch_untaken = next->ir;
1408-
}
1409-
1410-
static block_t *block_find_or_translate(riscv_t *rv, block_t *prev)
1363+
static block_t *prev = NULL;
1364+
static block_t *block_find_or_translate(riscv_t *rv)
14111365
{
14121366
block_map_t *map = &rv->block_map;
14131367
/* lookup the next block in the block map */
@@ -1435,9 +1389,7 @@ static block_t *block_find_or_translate(riscv_t *rv, block_t *prev)
14351389
*/
14361390
if (prev)
14371391
prev->predict = next;
1438-
} else
1439-
extend_block(rv, next);
1440-
1392+
}
14411393

14421394
return next;
14431395
}
@@ -1447,27 +1399,45 @@ void rv_step(riscv_t *rv, int32_t cycles)
14471399
assert(rv);
14481400

14491401
/* find or translate a block for starting PC */
1450-
block_t *prev = NULL;
1451-
14521402
const uint64_t cycles_target = rv->csr_cycle + cycles;
14531403

14541404
/* loop until we hit out cycle target */
14551405
while (rv->csr_cycle < cycles_target && !rv->halt) {
14561406
block_t *block;
1457-
14581407
/* try to predict the next block */
14591408
if (prev && prev->predict && prev->predict->pc_start == rv->PC) {
14601409
block = prev->predict;
14611410
} else {
14621411
/* lookup the next block in block map or translate a new block,
14631412
* and move onto the next block.
14641413
*/
1465-
block = block_find_or_translate(rv, prev);
1414+
block = block_find_or_translate(rv);
14661415
}
14671416

14681417
/* we should have a block by now */
14691418
assert(block);
14701419

1420+
/* After emulating the previous block, we determine whether the branch
1421+
* is taken or not. Consequently, we assign the IR array of the current
1422+
* block to either the branch_taken or branch_untaken pointer of the
1423+
* previous block.
1424+
*/
1425+
if (prev) {
1426+
/* updtae previous block */
1427+
if (prev->pc_start != last_pc)
1428+
prev = block_find(&rv->block_map, last_pc);
1429+
1430+
rv_insn_t *last_ir = prev->ir + prev->n_insn - 1;
1431+
/* chain block */
1432+
if (!insn_is_unconditional_branch(last_ir->opcode)) {
1433+
if (branch_taken && !last_ir->branch_taken)
1434+
last_ir->branch_taken = block->ir;
1435+
else if (!last_ir->branch_untaken)
1436+
last_ir->branch_untaken = block->ir;
1437+
}
1438+
}
1439+
last_pc = rv->PC;
1440+
14711441
/* execute the block */
14721442
const rv_insn_t *ir = block->ir;
14731443
if (unlikely(!ir->impl(rv, ir)))

0 commit comments

Comments
 (0)