Skip to content

Commit a197d6e

Browse files
authored
[circt-synth] Parallel Prefix Lowering Performance Bug Fix (#8881)
* Corrections for reducing logic used in parallel prefix trees - incorrectly updated smallVectors * Update expected delay path based on improved lowering
1 parent 865f9f8 commit a197d6e

File tree

2 files changed

+18
-9
lines changed

2 files changed

+18
-9
lines changed

lib/Conversion/CombToAIG/CombToAIG.cpp

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ struct CombAddOpConversion : OpConversionPattern<AddOp> {
499499
// Sum bit 0 is just p[0] since carry_in = 0
500500
results[width - 1] = p[0];
501501

502-
// For remaining bits, sum_i = p_i XOR c_(i-1)
502+
// For remaining bits, sum_i = p_i XOR g_(i-1)
503503
// The carry into position i is the group generate from position i-1
504504
for (int64_t i = 1; i < width; ++i)
505505
results[width - 1 - i] =
@@ -523,6 +523,8 @@ struct CombAddOpConversion : OpConversionPattern<AddOp> {
523523
SmallVector<Value> &pPrefix,
524524
SmallVector<Value> &gPrefix) const {
525525
auto width = op.getType().getIntOrFloatBitWidth();
526+
SmallVector<Value> pPrefixNew = pPrefix;
527+
SmallVector<Value> gPrefixNew = gPrefix;
526528

527529
// Kogge-Stone parallel prefix computation
528530
for (int64_t stride = 1; stride < width; stride *= 2) {
@@ -531,13 +533,15 @@ struct CombAddOpConversion : OpConversionPattern<AddOp> {
531533
// Group generate: g_i OR (p_i AND g_j)
532534
Value andPG =
533535
comb::AndOp::create(rewriter, op.getLoc(), pPrefix[i], gPrefix[j]);
534-
gPrefix[i] =
536+
gPrefixNew[i] =
535537
comb::OrOp::create(rewriter, op.getLoc(), gPrefix[i], andPG);
536538

537539
// Group propagate: p_i AND p_j
538-
pPrefix[i] =
540+
pPrefixNew[i] =
539541
comb::AndOp::create(rewriter, op.getLoc(), pPrefix[i], pPrefix[j]);
540542
}
543+
pPrefix = pPrefixNew;
544+
gPrefix = gPrefixNew;
541545
}
542546
LLVM_DEBUG({
543547
int64_t stage = 0;
@@ -569,7 +573,8 @@ struct CombAddOpConversion : OpConversionPattern<AddOp> {
569573
SmallVector<Value> &pPrefix,
570574
SmallVector<Value> &gPrefix) const {
571575
auto width = op.getType().getIntOrFloatBitWidth();
572-
576+
SmallVector<Value> pPrefixNew = pPrefix;
577+
SmallVector<Value> gPrefixNew = gPrefix;
573578
// Brent-Kung parallel prefix computation
574579
// Forward phase
575580
int64_t stride;
@@ -580,13 +585,15 @@ struct CombAddOpConversion : OpConversionPattern<AddOp> {
580585
// Group generate: g_i OR (p_i AND g_j)
581586
Value andPG =
582587
comb::AndOp::create(rewriter, op.getLoc(), pPrefix[i], gPrefix[j]);
583-
gPrefix[i] =
588+
gPrefixNew[i] =
584589
comb::OrOp::create(rewriter, op.getLoc(), gPrefix[i], andPG);
585590

586591
// Group propagate: p_i AND p_j
587-
pPrefix[i] =
592+
pPrefixNew[i] =
588593
comb::AndOp::create(rewriter, op.getLoc(), pPrefix[i], pPrefix[j]);
589594
}
595+
pPrefix = pPrefixNew;
596+
gPrefix = gPrefixNew;
590597
}
591598

592599
// Backward phase
@@ -597,12 +604,14 @@ struct CombAddOpConversion : OpConversionPattern<AddOp> {
597604
// Group generate: g_i OR (p_i AND g_j)
598605
Value andPG =
599606
comb::AndOp::create(rewriter, op.getLoc(), pPrefix[i], gPrefix[j]);
600-
gPrefix[i] = OrOp::create(rewriter, op.getLoc(), gPrefix[i], andPG);
607+
gPrefixNew[i] = OrOp::create(rewriter, op.getLoc(), gPrefix[i], andPG);
601608

602609
// Group propagate: p_i AND p_j
603-
pPrefix[i] =
610+
pPrefixNew[i] =
604611
comb::AndOp::create(rewriter, op.getLoc(), pPrefix[i], pPrefix[j]);
605612
}
613+
pPrefix = pPrefixNew;
614+
gPrefix = gPrefixNew;
606615
}
607616

608617
LLVM_DEBUG({

test/circt-synth/path-e2e.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
// CHECK-LABEL: # Longest Path Analysis result for "counter"
55
// CHECK-NEXT: Found 288 paths
66
// CHECK-NEXT: Found 32 unique fanout points
7-
// CHECK-NEXT: Maximum path delay: 48
7+
// CHECK-NEXT: Maximum path delay: 42
88
// Don't test detailed reports as they are not stable.
99

1010
// Make sure json is emitted.

0 commit comments

Comments
 (0)