-
Notifications
You must be signed in to change notification settings - Fork 786
CFG traversal for the new EH spec #3494
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b45dca7
2297dfe
6c60880
7ee8a09
c211825
65d4ff9
1077763
024210d
53ff4b5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -69,14 +69,27 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> { | |
std::vector<BasicBlock*> ifStack; | ||
// stack of the first blocks of loops | ||
std::vector<BasicBlock*> loopStack; | ||
|
||
// stack of the last blocks of try bodies | ||
std::vector<BasicBlock*> tryStack; | ||
// stack of the first blocks of catch bodies | ||
std::vector<BasicBlock*> catchStack; | ||
|
||
void startBasicBlock() { | ||
// stack of the first blocks of catches that throwing instructions should | ||
// unwind to at any moment. Because there can be multiple catch blocks for a | ||
// try, we maintain a vector of first blocks of catches. | ||
std::vector<std::vector<BasicBlock*>> unwindCatchStack; | ||
// stack of 'Try' expressions corresponding to unwindCatchStack. | ||
std::vector<Expression*> unwindExprStack; | ||
// A stack for each try, where each entry is a list of blocks, one for each | ||
// catch, used during processing. We start by assigning the start blocks to | ||
// here, and then read those at the appropriate time; when we finish a catch | ||
// we write to here the end block, so that when we finish with them all we can | ||
// connect the ends to the outside. In principle two vectors could be used, | ||
// but their usage does not overlap in time, and this is more efficient. | ||
std::vector<std::vector<BasicBlock*>> processCatchStack; | ||
|
||
BasicBlock* startBasicBlock() { | ||
currBasicBlock = ((SubType*)this)->makeBasicBlock(); | ||
basicBlocks.push_back(std::unique_ptr<BasicBlock>(currBasicBlock)); | ||
return currBasicBlock; | ||
} | ||
|
||
void startUnreachableBlock() { currBasicBlock = nullptr; } | ||
|
@@ -119,16 +132,14 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> { | |
|
||
static void doStartIfTrue(SubType* self, Expression** currp) { | ||
auto* last = self->currBasicBlock; | ||
self->startBasicBlock(); | ||
self->link(last, self->currBasicBlock); // ifTrue | ||
self->link(last, self->startBasicBlock()); // ifTrue | ||
self->ifStack.push_back(last); // the block before the ifTrue | ||
} | ||
|
||
static void doStartIfFalse(SubType* self, Expression** currp) { | ||
self->ifStack.push_back(self->currBasicBlock); // the ifTrue fallthrough | ||
self->startBasicBlock(); | ||
self->link(self->ifStack[self->ifStack.size() - 2], | ||
self->currBasicBlock); // before if -> ifFalse | ||
self->startBasicBlock()); // before if -> ifFalse | ||
} | ||
|
||
static void doEndIf(SubType* self, Expression** currp) { | ||
|
@@ -159,8 +170,7 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> { | |
|
||
static void doEndLoop(SubType* self, Expression** currp) { | ||
auto* last = self->currBasicBlock; | ||
self->startBasicBlock(); | ||
self->link(last, self->currBasicBlock); // fallthrough | ||
self->link(last, self->startBasicBlock()); // fallthrough | ||
auto* curr = (*currp)->cast<Loop>(); | ||
// branches to the top of the loop | ||
if (curr->name.is()) { | ||
|
@@ -180,8 +190,7 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> { | |
self->currBasicBlock); // branch to the target | ||
if (curr->condition) { | ||
auto* last = self->currBasicBlock; | ||
self->startBasicBlock(); | ||
self->link(last, self->currBasicBlock); // we might fall through | ||
self->link(last, self->startBasicBlock()); // we might fall through | ||
} else { | ||
self->startUnreachableBlock(); | ||
} | ||
|
@@ -205,47 +214,106 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> { | |
self->startUnreachableBlock(); | ||
} | ||
|
||
static void doEndThrowingInst(SubType* self, Expression** currp) { | ||
// Even if the instruction can possibly throw, we don't end the current | ||
// basic block unless the instruction is within a try-catch, because the CFG | ||
// will have too many blocks that way, and if an exception is thrown, the | ||
// function will be exited anyway. | ||
if (self->unwindCatchStack.empty()) { | ||
return; | ||
} | ||
|
||
// Exception thrown. Create a link to each catch within the innermost try. | ||
for (auto* block : self->unwindCatchStack.back()) { | ||
self->link(self->currBasicBlock, block); | ||
} | ||
// If the innermost try does not have a catch_all clause, an exception | ||
// thrown can be caught by any of its outer catch block. And if that outer | ||
// try-catch also does not have a catch_all, this continues until we | ||
// encounter a try-catch_all. Create a link to all those possible catch | ||
// unwind destinations. | ||
// TODO This can be more precise for `throw`s if we compare event types | ||
// and create links to outer catch BBs only when the exception is not | ||
// caught. | ||
// TODO This can also be more precise if we analyze the structure of nested | ||
// try-catches. For example, in the example below, 'call $foo' doesn't need | ||
// a link to the BB of outer 'catch $e1', because if the exception thrown by | ||
// the call is of event $e1, it would've already been caught by the inner | ||
// 'catch $e1'. Optimize these cases later. | ||
// try | ||
// try | ||
// call $foo | ||
// catch $e1 | ||
// ... | ||
// catch $e2 | ||
// ... | ||
// end | ||
// catch $e1 | ||
// ... | ||
// catch $e3 | ||
// ... | ||
// end | ||
for (int i = self->unwindCatchStack.size() - 1; i > 0; i--) { | ||
if (self->unwindExprStack[i]->template cast<Try>()->hasCatchAll()) { | ||
break; | ||
} | ||
for (auto* block : self->unwindCatchStack[i - 1]) { | ||
self->link(self->currBasicBlock, block); | ||
} | ||
} | ||
} | ||
|
||
static void doEndCall(SubType* self, Expression** currp) { | ||
// Every call can possibly throw, but we don't end the current basic block | ||
// unless the call is within a try-catch, because the CFG will have too many | ||
// blocks that way, and if an exception is thrown, the function will be | ||
// exited anyway. | ||
if (!self->catchStack.empty()) { | ||
auto* last = self->currBasicBlock; | ||
self->startBasicBlock(); | ||
self->link(last, self->currBasicBlock); // exception not thrown | ||
self->link(last, self->catchStack.back()); // exception thrown | ||
doEndThrowingInst(self, currp); | ||
if (!self->unwindCatchStack.empty()) { | ||
// exception not thrown. link to the continuation BB | ||
self->link(self->currBasicBlock, self->startBasicBlock()); | ||
} | ||
} | ||
|
||
static void doStartTry(SubType* self, Expression** currp) { | ||
auto* curr = (*currp)->cast<Try>(); | ||
auto* last = self->currBasicBlock; | ||
self->startBasicBlock(); // catch body's first block | ||
self->catchStack.push_back(self->currBasicBlock); | ||
self->unwindCatchStack.emplace_back(); | ||
self->unwindExprStack.push_back(curr); | ||
for (Index i = 0; i < curr->catchBodies.size(); i++) { | ||
// Create the catch body's first block | ||
self->unwindCatchStack.back().push_back(self->startBasicBlock()); | ||
} | ||
self->currBasicBlock = last; // reset to the current block | ||
} | ||
|
||
static void doStartCatch(SubType* self, Expression** currp) { | ||
static void doStartCatches(SubType* self, Expression** currp) { | ||
self->tryStack.push_back(self->currBasicBlock); // last block of try body | ||
self->currBasicBlock = self->catchStack.back(); | ||
self->catchStack.pop_back(); | ||
self->processCatchStack.push_back(self->unwindCatchStack.back()); | ||
self->unwindCatchStack.pop_back(); | ||
self->unwindExprStack.pop_back(); | ||
} | ||
|
||
static void doStartCatch(SubType* self, Expression** currp, Index i) { | ||
// Get the block that starts this catch | ||
self->currBasicBlock = self->processCatchStack.back()[i]; | ||
} | ||
|
||
static void doEndCatch(SubType* self, Expression** currp, Index i) { | ||
// We are done with this catch; set the block that ends it | ||
self->processCatchStack.back()[i] = self->currBasicBlock; | ||
} | ||
|
||
static void doEndTry(SubType* self, Expression** currp) { | ||
auto* last = self->currBasicBlock; | ||
self->startBasicBlock(); // continuation block after try-catch | ||
// catch body's last block -> continuation block | ||
self->link(last, self->currBasicBlock); | ||
// each catch body's last block -> continuation block | ||
for (auto* last : self->processCatchStack.back()) { | ||
self->link(last, self->currBasicBlock); | ||
} | ||
// try body's last block -> continuation block | ||
self->link(self->tryStack.back(), self->currBasicBlock); | ||
self->tryStack.pop_back(); | ||
self->processCatchStack.pop_back(); | ||
} | ||
|
||
static void doEndThrow(SubType* self, Expression** currp) { | ||
// We unwind to the innermost catch, if any | ||
if (!self->catchStack.empty()) { | ||
self->link(self->currBasicBlock, self->catchStack.back()); | ||
} | ||
doEndThrowingInst(self, currp); | ||
self->startUnreachableBlock(); | ||
} | ||
|
||
|
@@ -254,8 +322,7 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> { | |
self->branches[self->findBreakTarget(curr->name)].push_back( | ||
self->currBasicBlock); // branch to the target | ||
auto* last = self->currBasicBlock; | ||
self->startBasicBlock(); | ||
self->link(last, self->currBasicBlock); // we might fall through | ||
self->link(last, self->startBasicBlock()); // we might fall through | ||
} | ||
|
||
static void scan(SubType* self, Expression** currp) { | ||
|
@@ -304,16 +371,24 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> { | |
break; | ||
} | ||
case Expression::Id::TryId: { | ||
// FIXME Update the implementation to match the new spec | ||
WASM_UNREACHABLE("unimp"); | ||
/* | ||
self->pushTask(SubType::doEndTry, currp); | ||
self->pushTask(SubType::scan, &curr->cast<Try>()->catchBody); | ||
self->pushTask(SubType::doStartCatch, currp); | ||
auto& catchBodies = curr->cast<Try>()->catchBodies; | ||
using namespace std::placeholders; | ||
for (Index i = 0; i < catchBodies.size(); i++) { | ||
auto doEndCatchI = [i](SubType* self, Expression** currp) { | ||
doEndCatch(self, currp, i); | ||
}; | ||
self->pushTask(doEndCatchI, currp); | ||
self->pushTask(SubType::scan, &catchBodies[i]); | ||
auto doStartCatchI = [i](SubType* self, Expression** currp) { | ||
doStartCatch(self, currp, i); | ||
}; | ||
self->pushTask(doStartCatchI, currp); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Asan says this line causes a memory leak, but I'm not very sure why... Do you have a guess? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Very odd, no idea... code looks 100% valid to me. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I figured out what's going on. Surprisingly Fixing this does not seem to be trivial. We can't call destructors on all values (there are primitive types), so typedef void (*TaskFunc)(SubType*, Expression**); to using TaskFunc = std::function<void(SubType*, Expression**)>; was the result of Maybe we should just use lambdas and make There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh wow, good find... C++ is annoying sometimes. Yes, lambdas seem best for now, I can't think of a better option. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I changed it to lambdas: #3494 (comment) About possible performance issues, I ran all wasm-opt tests with 1.
|
||
} | ||
self->pushTask(SubType::doStartCatches, currp); | ||
self->pushTask(SubType::scan, &curr->cast<Try>()->body); | ||
self->pushTask(SubType::doStartTry, currp); | ||
return; // don't do anything else | ||
*/ | ||
} | ||
case Expression::Id::ThrowId: | ||
case Expression::Id::RethrowId: { | ||
|
@@ -350,7 +425,9 @@ struct CFGWalker : public ControlFlowWalker<SubType, VisitorType> { | |
assert(ifStack.size() == 0); | ||
assert(loopStack.size() == 0); | ||
assert(tryStack.size() == 0); | ||
assert(catchStack.size() == 0); | ||
assert(unwindCatchStack.size() == 0); | ||
assert(unwindExprStack.size() == 0); | ||
assert(processCatchStack.size() == 0); | ||
} | ||
|
||
std::unordered_set<BasicBlock*> findLiveBlocks() { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't follow what these two methods do. The start assigns the current basic block, and the end does the reverse assignment - is it not redundant? If not, what is it writing over?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When there's N catches,
doStartTry
creates N BBs, each of which will serve as the start BB for each catch, and store them inunwindCatchBlock
, so that any throwing instructions within the try body can create a link to them.In
doStartCatches
, we move those N catch start BBs toprocessCatchBlock
. We need this vector because when we enter each catch indoStartCatch
, we should be able to retrieve the start BB created for that catch. But we can't leave those catch start BBs inunwindCatchBlock
anymore because that stack is for processing unwinding, the process I described above (= any throwing instruction needs a link to every catch start BB inunwindCatchBlock.back()
).In
doEndCatch
, we save the end BB of each catch toprocessCatchStack
. They are used indoEndTry
that we create a link from each of those end catch BB to the continuation BB after the try-catch. ReusingprocessCatchStack
is just to save some space, and we can use a separate vector for this; let me know if you think this is confusing and it'd be better to use separate vectors.I'm not sure if I described this well; it was hard to describe in the comments too. If you have any suggestions to improve the comments (or the code) I'd appreciate that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, I think I'm starting to get it.
Separate vectors is another option. Not sure it's worth the overhead, as I'm not sure how many catches we expect to see in practice...?
If we keep the same vector as now, I think comments in these two methods could be helpful, like
Even though they don't add much information, they increase the chance of it making sense when reading just those two methods, I think.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done