Skip to content

Commit 0d433e1

Browse files
authored
[comb-to-synth] Implement Sklanskey Tree and Architecture Selection based on Attribute (#9021)
* Add squarer partial product array reduction * Implement Sklansky adder tree * Add tests for different adder architecture selection via attributes * Only modify particular files * Remove brackets * Address comments * Discuss motivation behind adder arch selection
1 parent 932c062 commit 0d433e1

File tree

4 files changed

+249
-13
lines changed

4 files changed

+249
-13
lines changed

integration_test/circt-synth/comb-lowering-lec.mlir

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,34 @@ hw.module @add(in %arg0: i4, in %arg1: i4, in %arg2: i4, out add: i4) {
2828
hw.output %0 : i4
2929
}
3030

31+
// RUN: circt-lec %t.mlir %s -c1=add_ripple_carry -c2=add_ripple_carry --shared-libs=%libz3 | FileCheck %s --check-prefix=COMB_ADD_RIPPLE_CARRY
32+
// COMB_ADD_RIPPLE_CARRY: c1 == c2
33+
hw.module @add_ripple_carry(in %arg0: i4, in %arg1: i4, in %arg2: i4, out add: i4) {
34+
%0 = comb.add %arg0, %arg1, %arg2 {synth.test.arch = "RIPPLE-CARRY"} : i4
35+
hw.output %0 : i4
36+
}
37+
38+
// RUN: circt-lec %t.mlir %s -c1=add_sklanskey -c2=add_sklanskey --shared-libs=%libz3 | FileCheck %s --check-prefix=COMB_ADD_SKLANSKEY
39+
// COMB_ADD_SKLANSKEY: c1 == c2
40+
hw.module @add_sklanskey(in %arg0: i4, in %arg1: i4, in %arg2: i4, out add: i4) {
41+
%0 = comb.add %arg0, %arg1, %arg2 {synth.test.arch = "SKLANSKEY"} : i4
42+
hw.output %0 : i4
43+
}
44+
45+
// RUN: circt-lec %t.mlir %s -c1=add_kogge_stone -c2=add_kogge_stone --shared-libs=%libz3 | FileCheck %s --check-prefix=COMB_ADD_KOGGE_STONE
46+
// COMB_ADD_KOGGE_STONE: c1 == c2
47+
hw.module @add_kogge_stone(in %arg0: i4, in %arg1: i4, in %arg2: i4, out add: i4) {
48+
%0 = comb.add %arg0, %arg1, %arg2 {synth.test.arch = "KOGGE-STONE"} : i4
49+
hw.output %0 : i4
50+
}
51+
52+
// RUN: circt-lec %t.mlir %s -c1=add_brent_kung -c2=add_brent_kung --shared-libs=%libz3 | FileCheck %s --check-prefix=COMB_ADD_BRENT_KUNG
53+
// COMB_ADD_BRENT_KUNG: c1 == c2
54+
hw.module @add_brent_kung(in %arg0: i4, in %arg1: i4, in %arg2: i4, out add: i4) {
55+
%0 = comb.add %arg0, %arg1, %arg2 {synth.test.arch = "BRENT-KUNG"} : i4
56+
hw.output %0 : i4
57+
}
58+
3159
// RUN: circt-lec %t.mlir %s -c1=sub -c2=sub --shared-libs=%libz3 | FileCheck %s --check-prefix=COMB_SUB
3260
// COMB_SUB: c1 == c2
3361
hw.module @sub(in %lhs: i4, in %rhs: i4, out out: i4) {

lib/Conversion/CombToSynth/CombToSynth.cpp

Lines changed: 121 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -480,17 +480,47 @@ struct CombAddOpConversion : OpConversionPattern<AddOp> {
480480
return success();
481481
}
482482

483+
// Check if the architecture is specified by an attribute.
484+
auto arch = determineAdderArch(op, width);
485+
if (arch == AdderArchitecture::RippleCarry)
486+
return lowerRippleCarryAdder(op, inputs, rewriter);
487+
return lowerParallelPrefixAdder(op, inputs, rewriter);
488+
}
489+
490+
enum AdderArchitecture { RippleCarry, Sklanskey, KoggeStone, BrentKung };
491+
static AdderArchitecture determineAdderArch(Operation *op, int64_t width) {
492+
auto strAttr = op->getAttrOfType<StringAttr>("synth.test.arch");
493+
if (strAttr) {
494+
return llvm::StringSwitch<AdderArchitecture>(strAttr.getValue())
495+
.Case("SKLANSKEY", Sklanskey)
496+
.Case("KOGGE-STONE", KoggeStone)
497+
.Case("BRENT-KUNG", BrentKung)
498+
.Case("RIPPLE-CARRY", RippleCarry);
499+
}
500+
// Determine using width as a heuristic.
501+
// TODO: Perform a more thorough analysis to motivate the choices or
502+
// implement an adder synthesis algorithm to construct an optimal adder
503+
// under the given timing constraints - see the work of Zimmermann
504+
505+
// For very small adders, overhead of a parallel prefix adder is likely not
506+
// worth it.
483507
if (width < 8)
484-
lowerRippleCarryAdder(op, inputs, rewriter);
485-
else
486-
lowerParallelPrefixAdder(op, inputs, rewriter);
508+
return AdderArchitecture::RippleCarry;
487509

488-
return success();
510+
// Sklanskey is a good compromise for high-performance, but has high fanout
511+
// which may lead to wiring congestion for very large adders.
512+
if (width <= 32)
513+
return AdderArchitecture::Sklanskey;
514+
515+
// Kogge-Stone uses greater area than Sklanskey but has lower fanout thus
516+
// may be preferable for larger adders.
517+
return AdderArchitecture::KoggeStone;
489518
}
490519

491520
// Implement a basic ripple-carry adder for small bitwidths.
492-
void lowerRippleCarryAdder(comb::AddOp op, ValueRange inputs,
493-
ConversionPatternRewriter &rewriter) const {
521+
LogicalResult
522+
lowerRippleCarryAdder(comb::AddOp op, ValueRange inputs,
523+
ConversionPatternRewriter &rewriter) const {
494524
auto width = op.getType().getIntOrFloatBitWidth();
495525
// Implement a naive Ripple-carry full adder.
496526
Value carry;
@@ -528,13 +558,15 @@ struct CombAddOpConversion : OpConversionPattern<AddOp> {
528558
<< width << "\n");
529559

530560
replaceOpWithNewOpAndCopyNamehint<comb::ConcatOp>(rewriter, op, results);
561+
return success();
531562
}
532563

533564
// Implement a parallel prefix adder - with Kogge-Stone or Brent-Kung trees
534565
// Will introduce unused signals for the carry bits but these will be removed
535566
// by the AIG pass.
536-
void lowerParallelPrefixAdder(comb::AddOp op, ValueRange inputs,
537-
ConversionPatternRewriter &rewriter) const {
567+
LogicalResult
568+
lowerParallelPrefixAdder(comb::AddOp op, ValueRange inputs,
569+
ConversionPatternRewriter &rewriter) const {
538570
auto width = op.getType().getIntOrFloatBitWidth();
539571

540572
auto aBits = extractBits(rewriter, inputs[0]);
@@ -566,10 +598,33 @@ struct CombAddOpConversion : OpConversionPattern<AddOp> {
566598
// Create copies of p and g for the prefix computation
567599
SmallVector<Value> pPrefix = p;
568600
SmallVector<Value> gPrefix = g;
569-
if (width < 32)
601+
602+
// Check if the architecture is specified by an attribute.
603+
auto arch = determineAdderArch(op, width);
604+
605+
switch (arch) {
606+
case AdderArchitecture::RippleCarry:
607+
llvm_unreachable("Ripple-Carry should be handled separately");
608+
break;
609+
case AdderArchitecture::Sklanskey:
610+
lowerSklanskeyPrefixTree(op, inputs, rewriter, pPrefix, gPrefix);
611+
break;
612+
case AdderArchitecture::KoggeStone:
570613
lowerKoggeStonePrefixTree(op, inputs, rewriter, pPrefix, gPrefix);
571-
else
614+
break;
615+
case AdderArchitecture::BrentKung:
572616
lowerBrentKungPrefixTree(op, inputs, rewriter, pPrefix, gPrefix);
617+
break;
618+
}
619+
// if (arch == AdderArchitecture::Sklanskey) {
620+
// lowerSklanskeyPrefixTree(op, inputs, rewriter, pPrefix, gPrefix);
621+
// } else if (arch == AdderArchitecture::KoggeStone) {
622+
// lowerKoggeStonePrefixTree(op, inputs, rewriter, pPrefix, gPrefix);
623+
// } else if (arch == AdderArchitecture::BrentKung) {
624+
// lowerBrentKungPrefixTree(op, inputs, rewriter, pPrefix, gPrefix);
625+
// } else {
626+
// return failure();
627+
// }
573628

574629
// Generate result sum bits
575630
// NOTE: The result is stored in reverse order.
@@ -592,6 +647,62 @@ struct CombAddOpConversion : OpConversionPattern<AddOp> {
592647
for (int64_t i = 1; i < width; ++i)
593648
llvm::dbgs() << "RES" << i << " = P" << i << " XOR G" << i - 1 << "\n";
594649
});
650+
651+
return success();
652+
}
653+
654+
// Implement the Sklansky parallel prefix tree
655+
// High fan-out, low depth, low area
656+
void lowerSklanskeyPrefixTree(comb::AddOp op, ValueRange inputs,
657+
ConversionPatternRewriter &rewriter,
658+
SmallVector<Value> &pPrefix,
659+
SmallVector<Value> &gPrefix) const {
660+
auto width = op.getType().getIntOrFloatBitWidth();
661+
SmallVector<Value> pPrefixNew = pPrefix;
662+
SmallVector<Value> gPrefixNew = gPrefix;
663+
664+
for (int64_t stride = 1; stride < width; stride *= 2) {
665+
for (int64_t i = stride; i < width; i += 2 * stride) {
666+
for (int64_t k = 0; k < stride && i + k < width; ++k) {
667+
int64_t idx = i + k;
668+
int64_t j = i - 1;
669+
// Group generate: g_idx OR (p_idx AND g_j)
670+
Value andPG = comb::AndOp::create(rewriter, op.getLoc(), pPrefix[idx],
671+
gPrefix[j]);
672+
gPrefixNew[idx] =
673+
comb::OrOp::create(rewriter, op.getLoc(), gPrefix[idx], andPG);
674+
675+
// Group propagate: p_idx AND p_j
676+
pPrefixNew[idx] = comb::AndOp::create(rewriter, op.getLoc(),
677+
pPrefix[idx], pPrefix[j]);
678+
}
679+
}
680+
pPrefix = pPrefixNew;
681+
gPrefix = gPrefixNew;
682+
}
683+
LLVM_DEBUG({
684+
int64_t stage = 0;
685+
for (int64_t stride = 1; stride < width; stride *= 2) {
686+
llvm::dbgs()
687+
<< "--------------------------------------- Sklanskey Stage "
688+
<< stage << "\n";
689+
for (int64_t i = stride; i < width; i += 2 * stride) {
690+
for (int64_t k = 0; k < stride && i + k < width; ++k) {
691+
int64_t idx = i + k;
692+
int64_t j = i - 1;
693+
// Group generate: g_i OR (p_i AND g_j)
694+
llvm::dbgs() << "G" << idx << stage + 1 << " = G" << idx << stage
695+
<< " OR (P" << idx << stage << " AND G" << j << stage
696+
<< ")\n";
697+
698+
// Group propagate: p_i AND p_j
699+
llvm::dbgs() << "P" << idx << stage + 1 << " = P" << idx << stage
700+
<< " AND P" << j << stage << "\n";
701+
}
702+
}
703+
++stage;
704+
}
705+
});
595706
}
596707

597708
// Implement the Kogge-Stone parallel prefix tree

test/Conversion/CombToSynth/comb-to-aig-arith.mlir

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,107 @@ hw.module @add(in %lhs: i2, in %rhs: i2, out out: i2) {
2525
// CHECK-NEXT: %[[sum1:.*]] = comb.xor bin %[[lhs1]], %[[rhs1]], %[[carry0]] : i1
2626
// CHECK-NEXT: %[[concat:.*]] = comb.concat %[[sum1]], %[[sum0]] : i1, i1
2727
// CHECK-NEXT: hw.output %[[concat]] : i2
28-
%0 = comb.add %lhs, %rhs : i2
28+
%0 = comb.add %lhs, %rhs {synth.test.arch = "RIPPLE-CARRY"} : i2
2929
hw.output %0 : i2
3030
}
3131

32+
// CHECK-LABEL: @add_sklanskey
33+
hw.module @add_sklanskey(in %lhs: i3, in %rhs: i3, out out: i3) {
34+
// CHECK-NEXT: %[[LHS0:.+]] = comb.extract %lhs from 0 : (i3) -> i1
35+
// CHECK-NEXT: %[[LHS1:.+]] = comb.extract %lhs from 1 : (i3) -> i1
36+
// CHECK-NEXT: %[[LHS2:.+]] = comb.extract %lhs from 2 : (i3) -> i1
37+
// CHECK-NEXT: %[[RHS0:.+]] = comb.extract %rhs from 0 : (i3) -> i1
38+
// CHECK-NEXT: %[[RHS1:.+]] = comb.extract %rhs from 1 : (i3) -> i1
39+
// CHECK-NEXT: %[[RHS2:.+]] = comb.extract %rhs from 2 : (i3) -> i1
40+
// CHECK-NEXT: %[[P0:.+]] = comb.xor %[[LHS0]], %[[RHS0]] : i1
41+
// CHECK-NEXT: %[[G0:.+]] = comb.and %[[LHS0]], %[[RHS0]] : i1
42+
// CHECK-NEXT: %[[P1:.+]] = comb.xor %[[LHS1]], %[[RHS1]] : i1
43+
// CHECK-NEXT: %[[G1:.+]] = comb.and %[[LHS1]], %[[RHS1]] : i1
44+
// CHECK-NEXT: %[[P2:.+]] = comb.xor %[[LHS2]], %[[RHS2]] : i1
45+
// CHECK-NEXT: %[[G2:.+]] = comb.and %[[LHS2]], %[[RHS2]] : i1
46+
// Reduction Tree
47+
// CHECK-NEXT: %[[G10PRE:.+]] = comb.and %[[P1]], %[[G0]] : i1
48+
// CHECK-NEXT: %[[G10:.+]] = comb.or %[[G1]], %[[G10PRE]] : i1
49+
// CHECK-NEXT: comb.and %[[P1]], %[[P0]] : i1
50+
// CHECK-NEXT: comb.and %[[P2]], %[[G10]] : i1
51+
// Sum Completion
52+
// CHECK-NEXT: %[[S1:.+]] = comb.xor %[[P1]], %[[G0]] : i1
53+
// CHECK-NEXT: %[[S2:.+]] = comb.xor %[[P2]], %[[G10]] : i1
54+
// CHECK-NEXT: %[[RES:.+]] = comb.concat %[[S2]], %[[S1]], %[[P0]] : i1, i1, i1
55+
// CHECK-NEXT: hw.output %[[RES]] : i3
56+
%0 = comb.add %lhs, %rhs {synth.test.arch = "SKLANSKEY"} : i3
57+
hw.output %0 : i3
58+
}
59+
60+
// CHECK-LABEL: @add_kogge_stone
61+
hw.module @add_kogge_stone(in %lhs: i3, in %rhs: i3, out out: i3) {
62+
// CHECK-NEXT: %[[LHS0:.+]] = comb.extract %lhs from 0 : (i3) -> i1
63+
// CHECK-NEXT: %[[LHS1:.+]] = comb.extract %lhs from 1 : (i3) -> i1
64+
// CHECK-NEXT: %[[LHS2:.+]] = comb.extract %lhs from 2 : (i3) -> i1
65+
// CHECK-NEXT: %[[RHS0:.+]] = comb.extract %rhs from 0 : (i3) -> i1
66+
// CHECK-NEXT: %[[RHS1:.+]] = comb.extract %rhs from 1 : (i3) -> i1
67+
// CHECK-NEXT: %[[RHS2:.+]] = comb.extract %rhs from 2 : (i3) -> i1
68+
// CHECK-NEXT: %[[P0:.+]] = comb.xor %[[LHS0]], %[[RHS0]] : i1
69+
// CHECK-NEXT: %[[G0:.+]] = comb.and %[[LHS0]], %[[RHS0]] : i1
70+
// CHECK-NEXT: %[[P1:.+]] = comb.xor %[[LHS1]], %[[RHS1]] : i1
71+
// CHECK-NEXT: %[[G1:.+]] = comb.and %[[LHS1]], %[[RHS1]] : i1
72+
// CHECK-NEXT: %[[P2:.+]] = comb.xor %[[LHS2]], %[[RHS2]] : i1
73+
// CHECK-NEXT: %[[G2:.+]] = comb.and %[[LHS2]], %[[RHS2]] : i1
74+
// Reduction Tree
75+
// CHECK-NEXT: %[[G10PRE:.+]] = comb.and %[[P1]], %[[G0]] : i1
76+
// CHECK-NEXT: %[[G10:.+]] = comb.or %[[G1]], %[[G10PRE]] : i1
77+
// CHECK-NEXT: %[[G21PRE:.+]] = comb.and %[[P2]], %[[G1]] : i1
78+
// CHECK-NEXT: comb.or %[[G2]], %[[G21PRE]] : i1
79+
// CHECK-NEXT: %[[P21:.+]] = comb.and %[[P2]], %[[P1]] : i1
80+
// CHECK-NEXT: comb.and %[[P21]], %[[G0]] : i1
81+
// Sum Completion
82+
// CHECK-NEXT: %[[S1:.+]] = comb.xor %[[P1]], %[[G0]] : i1
83+
// CHECK-NEXT: %[[S2:.+]] = comb.xor %[[P2]], %[[G10]] : i1
84+
// CHECK-NEXT: %[[RES:.+]] = comb.concat %[[S2]], %[[S1]], %[[P0]] : i1, i1, i1
85+
// CHECK-NEXT: hw.output %[[RES]] : i3
86+
%0 = comb.add %lhs, %rhs {synth.test.arch = "KOGGE-STONE"} : i3
87+
hw.output %0 : i3
88+
}
89+
90+
// CHECK-LABEL: @add_brent_kung
91+
hw.module @add_brent_kung(in %lhs: i4, in %rhs: i4, out out: i4) {
92+
// CHECK-NEXT: %[[LHS0:.+]] = comb.extract %lhs from 0 : (i4) -> i1
93+
// CHECK-NEXT: %[[LHS1:.+]] = comb.extract %lhs from 1 : (i4) -> i1
94+
// CHECK-NEXT: %[[LHS2:.+]] = comb.extract %lhs from 2 : (i4) -> i1
95+
// CHECK-NEXT: %[[LHS3:.+]] = comb.extract %lhs from 3 : (i4) -> i1
96+
// CHECK-NEXT: %[[RHS0:.+]] = comb.extract %rhs from 0 : (i4) -> i1
97+
// CHECK-NEXT: %[[RHS1:.+]] = comb.extract %rhs from 1 : (i4) -> i1
98+
// CHECK-NEXT: %[[RHS2:.+]] = comb.extract %rhs from 2 : (i4) -> i1
99+
// CHECK-NEXT: %[[RHS3:.+]] = comb.extract %rhs from 3 : (i4) -> i1
100+
//
101+
// CHECK-NEXT: %[[P0:.+]] = comb.xor %[[LHS0]], %[[RHS0]] : i1
102+
// CHECK-NEXT: %[[G0:.+]] = comb.and %[[LHS0]], %[[RHS0]] : i1
103+
// CHECK-NEXT: %[[P1:.+]] = comb.xor %[[LHS1]], %[[RHS1]] : i1
104+
// CHECK-NEXT: %[[G1:.+]] = comb.and %[[LHS1]], %[[RHS1]] : i1
105+
// CHECK-NEXT: %[[P2:.+]] = comb.xor %[[LHS2]], %[[RHS2]] : i1
106+
// CHECK-NEXT: %[[G2:.+]] = comb.and %[[LHS2]], %[[RHS2]] : i1
107+
// CHECK-NEXT: %[[P3:.+]] = comb.xor %[[LHS3]], %[[RHS3]] : i1
108+
// CHECK-NEXT: %[[G3:.+]] = comb.and %[[LHS3]], %[[RHS3]] : i1
109+
// Reduction Tree
110+
// CHECK-NEXT: %[[G10PRE:.+]] = comb.and %[[P1]], %[[G0]] : i1
111+
// CHECK-NEXT: %[[G10:.+]] = comb.or %[[G1]], %[[G10PRE]] : i1
112+
// CHECK-NEXT: comb.and %[[P1]], %[[P0]] : i1
113+
// CHECK-NEXT: %[[G32PRE:.+]] = comb.and %[[P3]], %[[G2]] : i1
114+
// CHECK-NEXT: comb.or %[[G3]], %[[G32PRE]] : i1
115+
// CHECK-NEXT: %[[P32:.+]] = comb.and %[[P3]], %[[P2]] : i1
116+
// CHECK-NEXT: comb.and %[[P32]], %[[G10]] : i1
117+
// CHECK-NEXT: %[[G20PRE:.+]] = comb.and %[[P2]], %[[G10]] : i1
118+
// CHECK-NEXT: %[[G20:.+]] = comb.or %[[G2]], %[[G20PRE]] : i1
119+
// Sum Completion
120+
// CHECK-NEXT: %[[S1:.+]] = comb.xor %[[P1]], %[[G0]] : i1
121+
// CHECK-NEXT: %[[S2:.+]] = comb.xor %[[P2]], %[[G10]] : i1
122+
// CHECK-NEXT: %[[S3:.+]] = comb.xor %[[P3]], %[[G20]] : i1
123+
// CHECK-NEXT: %[[RES:.+]] = comb.concat %[[S3]], %[[S2]], %[[S1]], %[[P0]] : i1, i1, i1, i1
124+
// CHECK-NEXT: hw.output %[[RES]] : i4
125+
%0 = comb.add %lhs, %rhs {synth.test.arch = "BRENT-KUNG"} : i4
126+
hw.output %0 : i4
127+
}
128+
32129
// CHECK-LABEL: @add_17
33130
hw.module @add_17(in %lhs: i17, in %rhs: i17, out out: i17) {
34131
%0 = comb.add %lhs, %rhs : i17

test/circt-synth/path-e2e.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
// COMMON-LABEL: # Longest Path Analysis result for "counter"
77
// COMMON-NEXT: Found 168 paths
88
// COMMON-NEXT: Found 32 unique end points
9-
// AIG-NEXT: Maximum path delay: 41
10-
// MIG-NEXT: Maximum path delay: 41
9+
// AIG-NEXT: Maximum path delay: 40
10+
// MIG-NEXT: Maximum path delay: 40
1111
// LUT6-NEXT: Maximum path delay: 7
1212
// Don't test detailed reports as they are not stable.
1313

0 commit comments

Comments
 (0)