Skip to content

Commit e5ea38a

Browse files
mralephCommit Queue
authored and
Commit Queue
committed
Revert "[vm/compiler] Split ParallelMove codegen into scheduling and emission"
This reverts commit 32b0933. Reason for revert: various test failures across the board. Original change's description: > [vm/compiler] Split ParallelMove codegen into scheduling and emission > > This CL does not contain any changes to behaviour, but simply moves > ParallelMoveResolver to a separate file. Additionally instead of > immediately generating code we produce a move schedule which is > attached to the ParallelMoveInstr and later converted to the > native code. > > This refactoring prepares the code for subsequent improvements, e.g. > we want to rework how temporaries used by move resolution are > allocated: instead of pushing/poping them around every move that needs > them we will allocate space for them in spill area. > > Having ParallelMove scheduling separated from code emission also > allows to unit test it. > > TEST=ci > > Change-Id: If3f7a88836037a9812a85c1cfc2ef21a7fe15747 > Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/284222 > Commit-Queue: Slava Egorov <[email protected]> > Reviewed-by: Alexander Markov <[email protected]> > Reviewed-by: Martin Kustermann <[email protected]> Change-Id: I82952d024816327ca5f084a2185fa1ab566cfa82 No-Presubmit: true No-Tree-Checks: true No-Try: true Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/285560 Auto-Submit: Slava Egorov <[email protected]> Commit-Queue: Rubber Stamper <[email protected]> Bot-Commit: Rubber Stamper <[email protected]>
1 parent 12b606e commit e5ea38a

21 files changed

+600
-720
lines changed

runtime/vm/compiler/backend/flow_graph_compiler.cc

Lines changed: 282 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ FlowGraphCompiler::FlowGraphCompiler(
166166
Class::ZoneHandle(isolate_group()->object_store()->int32x4_class())),
167167
list_class_(Class::ZoneHandle(Library::Handle(Library::CoreLibrary())
168168
.LookupClass(Symbols::List()))),
169+
parallel_move_resolver_(this),
169170
pending_deoptimization_env_(NULL),
170171
deopt_id_to_ic_data_(deopt_id_to_ic_data),
171172
edge_counters_array_(Array::ZoneHandle()) {
@@ -736,22 +737,25 @@ void FlowGraphCompiler::VisitBlocks() {
736737
}
737738
EmitComment(instr);
738739
}
739-
740-
BeginCodeSourceRange(instr->source());
741-
EmitInstructionPrologue(instr);
742-
ASSERT(pending_deoptimization_env_ == NULL);
743-
pending_deoptimization_env_ = instr->env();
744-
DEBUG_ONLY(current_instruction_ = instr);
745-
instr->EmitNativeCode(this);
746-
DEBUG_ONLY(current_instruction_ = nullptr);
747-
pending_deoptimization_env_ = NULL;
748-
if (IsPeephole(instr)) {
749-
ASSERT(top_of_stack_ == nullptr);
750-
top_of_stack_ = instr->AsDefinition();
740+
if (instr->IsParallelMove()) {
741+
parallel_move_resolver_.EmitNativeCode(instr->AsParallelMove());
751742
} else {
752-
EmitInstructionEpilogue(instr);
743+
BeginCodeSourceRange(instr->source());
744+
EmitInstructionPrologue(instr);
745+
ASSERT(pending_deoptimization_env_ == NULL);
746+
pending_deoptimization_env_ = instr->env();
747+
DEBUG_ONLY(current_instruction_ = instr);
748+
instr->EmitNativeCode(this);
749+
DEBUG_ONLY(current_instruction_ = nullptr);
750+
pending_deoptimization_env_ = NULL;
751+
if (IsPeephole(instr)) {
752+
ASSERT(top_of_stack_ == nullptr);
753+
top_of_stack_ = instr->AsDefinition();
754+
} else {
755+
EmitInstructionEpilogue(instr);
756+
}
757+
EndCodeSourceRange(instr->source());
753758
}
754-
EndCodeSourceRange(instr->source());
755759

756760
#if defined(DEBUG)
757761
if (!is_optimizing()) {
@@ -1850,6 +1854,270 @@ void FlowGraphCompiler::AllocateRegistersLocally(Instruction* instr) {
18501854
}
18511855
}
18521856

1857+
static uword RegMaskBit(Register reg) {
1858+
return ((reg) != kNoRegister) ? (1 << (reg)) : 0;
1859+
}
1860+
1861+
ParallelMoveResolver::ParallelMoveResolver(FlowGraphCompiler* compiler)
1862+
: compiler_(compiler), moves_(32) {}
1863+
1864+
void ParallelMoveResolver::EmitNativeCode(ParallelMoveInstr* parallel_move) {
1865+
ASSERT(moves_.is_empty());
1866+
1867+
// Build up a worklist of moves.
1868+
BuildInitialMoveList(parallel_move);
1869+
1870+
const InstructionSource& move_source = InstructionSource(
1871+
TokenPosition::kParallelMove, parallel_move->inlining_id());
1872+
for (int i = 0; i < moves_.length(); ++i) {
1873+
const MoveOperands& move = *moves_[i];
1874+
// Skip constants to perform them last. They don't block other moves
1875+
// and skipping such moves with register destinations keeps those
1876+
// registers free for the whole algorithm.
1877+
if (!move.IsEliminated() && !move.src().IsConstant()) {
1878+
PerformMove(move_source, i);
1879+
}
1880+
}
1881+
1882+
// Perform the moves with constant sources.
1883+
for (int i = 0; i < moves_.length(); ++i) {
1884+
const MoveOperands& move = *moves_[i];
1885+
if (!move.IsEliminated()) {
1886+
ASSERT(move.src().IsConstant());
1887+
compiler_->BeginCodeSourceRange(move_source);
1888+
EmitMove(i);
1889+
compiler_->EndCodeSourceRange(move_source);
1890+
}
1891+
}
1892+
1893+
moves_.Clear();
1894+
}
1895+
1896+
void ParallelMoveResolver::BuildInitialMoveList(
1897+
ParallelMoveInstr* parallel_move) {
1898+
// Perform a linear sweep of the moves to add them to the initial list of
1899+
// moves to perform, ignoring any move that is redundant (the source is
1900+
// the same as the destination, the destination is ignored and
1901+
// unallocated, or the move was already eliminated).
1902+
for (int i = 0; i < parallel_move->NumMoves(); i++) {
1903+
MoveOperands* move = parallel_move->MoveOperandsAt(i);
1904+
if (!move->IsRedundant()) moves_.Add(move);
1905+
}
1906+
}
1907+
1908+
void ParallelMoveResolver::PerformMove(const InstructionSource& source,
1909+
int index) {
1910+
// Each call to this function performs a move and deletes it from the move
1911+
// graph. We first recursively perform any move blocking this one. We
1912+
// mark a move as "pending" on entry to PerformMove in order to detect
1913+
// cycles in the move graph. We use operand swaps to resolve cycles,
1914+
// which means that a call to PerformMove could change any source operand
1915+
// in the move graph.
1916+
1917+
ASSERT(!moves_[index]->IsPending());
1918+
ASSERT(!moves_[index]->IsRedundant());
1919+
1920+
// Clear this move's destination to indicate a pending move. The actual
1921+
// destination is saved in a stack-allocated local. Recursion may allow
1922+
// multiple moves to be pending.
1923+
ASSERT(!moves_[index]->src().IsInvalid());
1924+
Location destination = moves_[index]->MarkPending();
1925+
1926+
// Perform a depth-first traversal of the move graph to resolve
1927+
// dependencies. Any unperformed, unpending move with a source the same
1928+
// as this one's destination blocks this one so recursively perform all
1929+
// such moves.
1930+
for (int i = 0; i < moves_.length(); ++i) {
1931+
const MoveOperands& other_move = *moves_[i];
1932+
if (other_move.Blocks(destination) && !other_move.IsPending()) {
1933+
// Though PerformMove can change any source operand in the move graph,
1934+
// this call cannot create a blocking move via a swap (this loop does
1935+
// not miss any). Assume there is a non-blocking move with source A
1936+
// and this move is blocked on source B and there is a swap of A and
1937+
// B. Then A and B must be involved in the same cycle (or they would
1938+
// not be swapped). Since this move's destination is B and there is
1939+
// only a single incoming edge to an operand, this move must also be
1940+
// involved in the same cycle. In that case, the blocking move will
1941+
// be created but will be "pending" when we return from PerformMove.
1942+
PerformMove(source, i);
1943+
}
1944+
}
1945+
1946+
// We are about to resolve this move and don't need it marked as
1947+
// pending, so restore its destination.
1948+
moves_[index]->ClearPending(destination);
1949+
1950+
// This move's source may have changed due to swaps to resolve cycles and
1951+
// so it may now be the last move in the cycle. If so remove it.
1952+
if (moves_[index]->src().Equals(destination)) {
1953+
moves_[index]->Eliminate();
1954+
return;
1955+
}
1956+
1957+
// The move may be blocked on a (at most one) pending move, in which case
1958+
// we have a cycle. Search for such a blocking move and perform a swap to
1959+
// resolve it.
1960+
for (int i = 0; i < moves_.length(); ++i) {
1961+
const MoveOperands& other_move = *moves_[i];
1962+
if (other_move.Blocks(destination)) {
1963+
ASSERT(other_move.IsPending());
1964+
compiler_->BeginCodeSourceRange(source);
1965+
EmitSwap(index);
1966+
compiler_->EndCodeSourceRange(source);
1967+
return;
1968+
}
1969+
}
1970+
1971+
// This move is not blocked.
1972+
compiler_->BeginCodeSourceRange(source);
1973+
EmitMove(index);
1974+
compiler_->EndCodeSourceRange(source);
1975+
}
1976+
1977+
void ParallelMoveResolver::EmitMove(int index) {
1978+
MoveOperands* const move = moves_[index];
1979+
const Location dst = move->dest();
1980+
if (dst.IsStackSlot() || dst.IsDoubleStackSlot()) {
1981+
ASSERT((dst.base_reg() != FPREG) ||
1982+
((-compiler::target::frame_layout.VariableIndexForFrameSlot(
1983+
dst.stack_index())) < compiler_->StackSize()));
1984+
}
1985+
const Location src = move->src();
1986+
ParallelMoveResolver::TemporaryAllocator temp(this, /*blocked=*/kNoRegister);
1987+
compiler_->EmitMove(dst, src, &temp);
1988+
#if defined(DEBUG)
1989+
// Allocating a scratch register here may cause stack spilling. Neither the
1990+
// source nor destination register should be SP-relative in that case.
1991+
for (const Location& loc : {dst, src}) {
1992+
ASSERT(!temp.DidAllocateTemporary() || !loc.HasStackIndex() ||
1993+
loc.base_reg() != SPREG);
1994+
}
1995+
#endif
1996+
move->Eliminate();
1997+
}
1998+
1999+
bool ParallelMoveResolver::IsScratchLocation(Location loc) {
2000+
for (int i = 0; i < moves_.length(); ++i) {
2001+
if (moves_[i]->Blocks(loc)) {
2002+
return false;
2003+
}
2004+
}
2005+
2006+
for (int i = 0; i < moves_.length(); ++i) {
2007+
if (moves_[i]->dest().Equals(loc)) {
2008+
return true;
2009+
}
2010+
}
2011+
2012+
return false;
2013+
}
2014+
2015+
intptr_t ParallelMoveResolver::AllocateScratchRegister(
2016+
Location::Kind kind,
2017+
uword blocked_mask,
2018+
intptr_t first_free_register,
2019+
intptr_t last_free_register,
2020+
bool* spilled) {
2021+
COMPILE_ASSERT(static_cast<intptr_t>(sizeof(blocked_mask)) * kBitsPerByte >=
2022+
kNumberOfFpuRegisters);
2023+
COMPILE_ASSERT(static_cast<intptr_t>(sizeof(blocked_mask)) * kBitsPerByte >=
2024+
kNumberOfCpuRegisters);
2025+
intptr_t scratch = -1;
2026+
for (intptr_t reg = first_free_register; reg <= last_free_register; reg++) {
2027+
if ((((1 << reg) & blocked_mask) == 0) &&
2028+
IsScratchLocation(Location::MachineRegisterLocation(kind, reg))) {
2029+
scratch = reg;
2030+
break;
2031+
}
2032+
}
2033+
2034+
if (scratch == -1) {
2035+
*spilled = true;
2036+
for (intptr_t reg = first_free_register; reg <= last_free_register; reg++) {
2037+
if (((1 << reg) & blocked_mask) == 0) {
2038+
scratch = reg;
2039+
break;
2040+
}
2041+
}
2042+
} else {
2043+
*spilled = false;
2044+
}
2045+
2046+
return scratch;
2047+
}
2048+
2049+
ParallelMoveResolver::ScratchFpuRegisterScope::ScratchFpuRegisterScope(
2050+
ParallelMoveResolver* resolver,
2051+
FpuRegister blocked)
2052+
: resolver_(resolver), reg_(kNoFpuRegister), spilled_(false) {
2053+
COMPILE_ASSERT(FpuTMP != kNoFpuRegister);
2054+
uword blocked_mask =
2055+
((blocked != kNoFpuRegister) ? 1 << blocked : 0) | 1 << FpuTMP;
2056+
reg_ = static_cast<FpuRegister>(resolver_->AllocateScratchRegister(
2057+
Location::kFpuRegister, blocked_mask, 0, kNumberOfFpuRegisters - 1,
2058+
&spilled_));
2059+
2060+
if (spilled_) {
2061+
resolver->SpillFpuScratch(reg_);
2062+
}
2063+
}
2064+
2065+
ParallelMoveResolver::ScratchFpuRegisterScope::~ScratchFpuRegisterScope() {
2066+
if (spilled_) {
2067+
resolver_->RestoreFpuScratch(reg_);
2068+
}
2069+
}
2070+
2071+
ParallelMoveResolver::TemporaryAllocator::TemporaryAllocator(
2072+
ParallelMoveResolver* resolver,
2073+
Register blocked)
2074+
: resolver_(resolver),
2075+
blocked_(blocked),
2076+
reg_(kNoRegister),
2077+
spilled_(false) {}
2078+
2079+
Register ParallelMoveResolver::TemporaryAllocator::AllocateTemporary() {
2080+
ASSERT(reg_ == kNoRegister);
2081+
2082+
uword blocked_mask = RegMaskBit(blocked_) | kReservedCpuRegisters;
2083+
if (resolver_->compiler_->intrinsic_mode()) {
2084+
// Block additional registers that must be preserved for intrinsics.
2085+
blocked_mask |= RegMaskBit(ARGS_DESC_REG);
2086+
#if !defined(TARGET_ARCH_IA32)
2087+
// Need to preserve CODE_REG to be able to store the PC marker
2088+
// and load the pool pointer.
2089+
blocked_mask |= RegMaskBit(CODE_REG);
2090+
#endif
2091+
}
2092+
reg_ = static_cast<Register>(
2093+
resolver_->AllocateScratchRegister(Location::kRegister, blocked_mask, 0,
2094+
kNumberOfCpuRegisters - 1, &spilled_));
2095+
2096+
if (spilled_) {
2097+
resolver_->SpillScratch(reg_);
2098+
}
2099+
2100+
DEBUG_ONLY(allocated_ = true;)
2101+
return reg_;
2102+
}
2103+
2104+
void ParallelMoveResolver::TemporaryAllocator::ReleaseTemporary() {
2105+
if (spilled_) {
2106+
resolver_->RestoreScratch(reg_);
2107+
}
2108+
reg_ = kNoRegister;
2109+
}
2110+
2111+
ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
2112+
ParallelMoveResolver* resolver,
2113+
Register blocked)
2114+
: allocator_(resolver, blocked) {
2115+
reg_ = allocator_.AllocateTemporary();
2116+
}
2117+
2118+
ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() {
2119+
allocator_.ReleaseTemporary();
2120+
}
18532121

18542122
const ICData* FlowGraphCompiler::GetOrAddInstanceCallICData(
18552123
intptr_t deopt_id,

0 commit comments

Comments
 (0)