diff --git a/src/decode.h b/src/decode.h index f7adc5ce..94b78efd 100644 --- a/src/decode.h +++ b/src/decode.h @@ -163,7 +163,8 @@ _(fuse1, 0) \ _(fuse2, 0) \ _(fuse3, 0) \ - _(fuse4, 0) + _(fuse4, 0) \ + _(fuse5, 0) /* clang-format on */ /* IR list */ @@ -253,7 +254,7 @@ typedef struct rv_insn { uint8_t shamt; #endif /* fuse operation */ - int16_t imm2; + int32_t imm2; opcode_fuse_t *fuse; /* instruction length */ diff --git a/src/emulate.c b/src/emulate.c index bd9be83e..ca32d8bf 100644 --- a/src/emulate.c +++ b/src/emulate.c @@ -1251,15 +1251,38 @@ RVOP(cswsp, { #endif /* auipc + addi */ -RVOP(fuse1, { rv->X[ir->rd] = (int32_t) (rv->PC + ir->imm + ir->imm2); }) +static bool do_fuse1(riscv_t *rv, const rv_insn_t *ir) +{ + rv->X[rv_reg_zero] = 0; + rv->csr_cycle += 2; + rv->X[ir->rd] = rv->PC + ir->imm; + rv->X[ir->rs1] = rv->X[ir->rd] + ir->imm2; + rv->PC += 2 * ir->insn_len; + if (unlikely(RVOP_NO_NEXT(ir))) + return true; + const rv_insn_t *next = ir + 2; + MUST_TAIL return next->impl(rv, next); +} /* auipc + add */ -RVOP(fuse2, { - rv->X[ir->rd] = (int32_t) (rv->X[ir->rs1]) + (int32_t) (rv->PC + ir->imm); -}) +static bool do_fuse2(riscv_t *rv, const rv_insn_t *ir) +{ + rv->X[rv_reg_zero] = 0; + rv->csr_cycle += 2; + rv->X[ir->rd] = rv->PC + ir->imm; + rv->X[ir->rs2] = rv->X[ir->rd] + rv->X[ir->rs1]; + rv->PC += 2 * ir->insn_len; + if (unlikely(RVOP_NO_NEXT(ir))) + return true; + const rv_insn_t *next = ir + 2; + MUST_TAIL return next->impl(rv, next); +} /* multiple sw */ -RVOP(fuse3, { +static bool do_fuse3(riscv_t *rv, const rv_insn_t *ir) +{ + rv->X[rv_reg_zero] = 0; + rv->csr_cycle += ir->imm2; opcode_fuse_t *fuse = ir->fuse; uint32_t addr = rv->X[fuse[0].rs1] + fuse[0].imm; /* the memory addresses of the sw instructions are contiguous, so we only @@ -1272,10 +1295,18 @@ RVOP(fuse3, { addr = rv->X[fuse[i].rs1] + fuse[i].imm; rv->io.mem_write_w(addr, rv->X[fuse[i].rs2]); } -}) + rv->PC += ir->imm2 * ir->insn_len; + if (unlikely(RVOP_NO_NEXT(ir))) + return true; + const rv_insn_t *next = ir + ir->imm2; + MUST_TAIL return next->impl(rv, next); +} /* multiple lw */ -RVOP(fuse4, { +static bool do_fuse4(riscv_t *rv, const rv_insn_t *ir) +{ + rv->X[rv_reg_zero] = 0; + rv->csr_cycle += ir->imm2; opcode_fuse_t *fuse = ir->fuse; uint32_t addr = rv->X[fuse[0].rs1] + fuse[0].imm; /* the memory addresses of the lw instructions are contiguous, so we only @@ -1288,7 +1319,26 @@ RVOP(fuse4, { addr = rv->X[fuse[i].rs1] + fuse[i].imm; rv->X[fuse[i].rd] = rv->io.mem_read_w(addr); } -}) + rv->PC += ir->imm2 * ir->insn_len; + if (unlikely(RVOP_NO_NEXT(ir))) + return true; + const rv_insn_t *next = ir + ir->imm2; + MUST_TAIL return next->impl(rv, next); +} + +/* lui + addi */ +static bool do_fuse5(riscv_t *rv, const rv_insn_t *ir) +{ + rv->X[rv_reg_zero] = 0; + rv->csr_cycle += 2; + rv->X[ir->rd] = ir->imm; + rv->X[ir->rs1] = ir->imm + ir->imm2; + rv->PC += 2 * ir->insn_len; + if (unlikely(RVOP_NO_NEXT(ir))) + return true; + const rv_insn_t *next = ir + 2; + MUST_TAIL return next->impl(rv, next); +} static const void *dispatch_table[] = { #define _(inst, can_branch) [rv_insn_##inst] = do_##inst, @@ -1448,9 +1498,8 @@ static void block_translate(riscv_t *rv, block_t *block) for (int j = 1; j < count; j++) { \ next_ir = ir + j; \ memcpy(ir->fuse + j, next_ir, sizeof(opcode_fuse_t)); \ - next_ir->opcode = rv_insn_nop; \ - next_ir->impl = dispatch_table[next_ir->opcode]; \ } \ + ir->tailcall = next_ir->tailcall; \ } /* examine whether instructions in a block match a specific pattern. If so, @@ -1469,25 +1518,32 @@ static void match_pattern(block_t *block) next_ir = ir + 1; if (next_ir->opcode == rv_insn_addi && ir->rd == next_ir->rs1) { /* the destination register of instruction auipc is equal to the - * source register 1 of next instruction addi */ + * source register 1 of next instruction addi. + */ ir->opcode = rv_insn_fuse1; - ir->rd = next_ir->rd; + ir->rs1 = next_ir->rd; ir->imm2 = next_ir->imm; ir->impl = dispatch_table[ir->opcode]; - next_ir->opcode = rv_insn_nop; - next_ir->impl = dispatch_table[next_ir->opcode]; + ir->tailcall = next_ir->tailcall; } else if (next_ir->opcode == rv_insn_add && ir->rd == next_ir->rs2) { /* the destination register of instruction auipc is equal to the * source register 2 of next instruction add */ ir->opcode = rv_insn_fuse2; - ir->rd = next_ir->rd; + ir->rs2 = next_ir->rd; ir->rs1 = next_ir->rs1; ir->impl = dispatch_table[ir->opcode]; - next_ir->opcode = rv_insn_nop; - next_ir->impl = dispatch_table[next_ir->opcode]; + } else if (next_ir->opcode == rv_insn_add && + ir->rd == next_ir->rs1) { + /* the destination register of instruction auipc is equal to the + * source register 1 of next instruction add */ + ir->opcode = rv_insn_fuse2; + ir->rs2 = next_ir->rd; + ir->rs1 = next_ir->rs2; + ir->impl = dispatch_table[ir->opcode]; } break; + /* If the memory addresses of a sequence of store or load instructions * are contiguous, combine these instructions. */ @@ -1497,7 +1553,19 @@ static void match_pattern(block_t *block) case rv_insn_lw: COMBINE_MEM_OPS(1); break; - /* FIXME: lui + addi */ + case rv_insn_lui: + next_ir = ir + 1; + if (next_ir->opcode == rv_insn_addi && ir->rd == next_ir->rs1) { + /* the destination register of instruction lui is equal to + * the source register 1 of next instruction addi. + */ + ir->opcode = rv_insn_fuse5; + ir->rs1 = next_ir->rd; + ir->imm2 = next_ir->imm; + ir->impl = dispatch_table[ir->opcode]; + ir->tailcall = next_ir->tailcall; + } + break; /* TODO: mixture of sw and lw */ /* TODO: reorder insturction to match pattern */ }