diff --git a/integration_test/circt-synth/comb-lowering-lec.mlir b/integration_test/circt-synth/comb-lowering-lec.mlir index 7b835a57bd5c..6b191ccb3284 100644 --- a/integration_test/circt-synth/comb-lowering-lec.mlir +++ b/integration_test/circt-synth/comb-lowering-lec.mlir @@ -28,6 +28,34 @@ hw.module @add(in %arg0: i4, in %arg1: i4, in %arg2: i4, out add: i4) { hw.output %0 : i4 } +// RUN: circt-lec %t.mlir %s -c1=add_ripple_carry -c2=add_ripple_carry --shared-libs=%libz3 | FileCheck %s --check-prefix=COMB_ADD_RIPPLE_CARRY +// COMB_ADD_RIPPLE_CARRY: c1 == c2 +hw.module @add_ripple_carry(in %arg0: i4, in %arg1: i4, in %arg2: i4, out add: i4) { + %0 = comb.add %arg0, %arg1, %arg2 {synth.test.arch = "RIPPLE-CARRY"} : i4 + hw.output %0 : i4 +} + +// RUN: circt-lec %t.mlir %s -c1=add_sklanskey -c2=add_sklanskey --shared-libs=%libz3 | FileCheck %s --check-prefix=COMB_ADD_SKLANSKEY +// COMB_ADD_SKLANSKEY: c1 == c2 +hw.module @add_sklanskey(in %arg0: i4, in %arg1: i4, in %arg2: i4, out add: i4) { + %0 = comb.add %arg0, %arg1, %arg2 {synth.test.arch = "SKLANSKEY"} : i4 + hw.output %0 : i4 +} + +// RUN: circt-lec %t.mlir %s -c1=add_kogge_stone -c2=add_kogge_stone --shared-libs=%libz3 | FileCheck %s --check-prefix=COMB_ADD_KOGGE_STONE +// COMB_ADD_KOGGE_STONE: c1 == c2 +hw.module @add_kogge_stone(in %arg0: i4, in %arg1: i4, in %arg2: i4, out add: i4) { + %0 = comb.add %arg0, %arg1, %arg2 {synth.test.arch = "KOGGE-STONE"} : i4 + hw.output %0 : i4 +} + +// RUN: circt-lec %t.mlir %s -c1=add_brent_kung -c2=add_brent_kung --shared-libs=%libz3 | FileCheck %s --check-prefix=COMB_ADD_BRENT_KUNG +// COMB_ADD_BRENT_KUNG: c1 == c2 +hw.module @add_brent_kung(in %arg0: i4, in %arg1: i4, in %arg2: i4, out add: i4) { + %0 = comb.add %arg0, %arg1, %arg2 {synth.test.arch = "BRENT-KUNG"} : i4 + hw.output %0 : i4 +} + // RUN: circt-lec %t.mlir %s -c1=sub -c2=sub --shared-libs=%libz3 | FileCheck %s --check-prefix=COMB_SUB // COMB_SUB: c1 == c2 hw.module @sub(in %lhs: i4, in %rhs: i4, out out: i4) { diff --git a/lib/Conversion/CombToSynth/CombToSynth.cpp b/lib/Conversion/CombToSynth/CombToSynth.cpp index 706cf299bbb8..70acfbcbece8 100644 --- a/lib/Conversion/CombToSynth/CombToSynth.cpp +++ b/lib/Conversion/CombToSynth/CombToSynth.cpp @@ -480,17 +480,47 @@ struct CombAddOpConversion : OpConversionPattern { return success(); } + // Check if the architecture is specified by an attribute. + auto arch = determineAdderArch(op, width); + if (arch == AdderArchitecture::RippleCarry) + return lowerRippleCarryAdder(op, inputs, rewriter); + return lowerParallelPrefixAdder(op, inputs, rewriter); + } + + enum AdderArchitecture { RippleCarry, Sklanskey, KoggeStone, BrentKung }; + static AdderArchitecture determineAdderArch(Operation *op, int64_t width) { + auto strAttr = op->getAttrOfType("synth.test.arch"); + if (strAttr) { + return llvm::StringSwitch(strAttr.getValue()) + .Case("SKLANSKEY", Sklanskey) + .Case("KOGGE-STONE", KoggeStone) + .Case("BRENT-KUNG", BrentKung) + .Case("RIPPLE-CARRY", RippleCarry); + } + // Determine using width as a heuristic. + // TODO: Perform a more thorough analysis to motivate the choices or + // implement an adder synthesis algorithm to construct an optimal adder + // under the given timing constraints - see the work of Zimmermann + + // For very small adders, overhead of a parallel prefix adder is likely not + // worth it. if (width < 8) - lowerRippleCarryAdder(op, inputs, rewriter); - else - lowerParallelPrefixAdder(op, inputs, rewriter); + return AdderArchitecture::RippleCarry; - return success(); + // Sklanskey is a good compromise for high-performance, but has high fanout + // which may lead to wiring congestion for very large adders. + if (width <= 32) + return AdderArchitecture::Sklanskey; + + // Kogge-Stone uses greater area than Sklanskey but has lower fanout thus + // may be preferable for larger adders. + return AdderArchitecture::KoggeStone; } // Implement a basic ripple-carry adder for small bitwidths. - void lowerRippleCarryAdder(comb::AddOp op, ValueRange inputs, - ConversionPatternRewriter &rewriter) const { + LogicalResult + lowerRippleCarryAdder(comb::AddOp op, ValueRange inputs, + ConversionPatternRewriter &rewriter) const { auto width = op.getType().getIntOrFloatBitWidth(); // Implement a naive Ripple-carry full adder. Value carry; @@ -528,13 +558,15 @@ struct CombAddOpConversion : OpConversionPattern { << width << "\n"); replaceOpWithNewOpAndCopyNamehint(rewriter, op, results); + return success(); } // Implement a parallel prefix adder - with Kogge-Stone or Brent-Kung trees // Will introduce unused signals for the carry bits but these will be removed // by the AIG pass. - void lowerParallelPrefixAdder(comb::AddOp op, ValueRange inputs, - ConversionPatternRewriter &rewriter) const { + LogicalResult + lowerParallelPrefixAdder(comb::AddOp op, ValueRange inputs, + ConversionPatternRewriter &rewriter) const { auto width = op.getType().getIntOrFloatBitWidth(); auto aBits = extractBits(rewriter, inputs[0]); @@ -566,10 +598,33 @@ struct CombAddOpConversion : OpConversionPattern { // Create copies of p and g for the prefix computation SmallVector pPrefix = p; SmallVector gPrefix = g; - if (width < 32) + + // Check if the architecture is specified by an attribute. + auto arch = determineAdderArch(op, width); + + switch (arch) { + case AdderArchitecture::RippleCarry: + llvm_unreachable("Ripple-Carry should be handled separately"); + break; + case AdderArchitecture::Sklanskey: + lowerSklanskeyPrefixTree(op, inputs, rewriter, pPrefix, gPrefix); + break; + case AdderArchitecture::KoggeStone: lowerKoggeStonePrefixTree(op, inputs, rewriter, pPrefix, gPrefix); - else + break; + case AdderArchitecture::BrentKung: lowerBrentKungPrefixTree(op, inputs, rewriter, pPrefix, gPrefix); + break; + } + // if (arch == AdderArchitecture::Sklanskey) { + // lowerSklanskeyPrefixTree(op, inputs, rewriter, pPrefix, gPrefix); + // } else if (arch == AdderArchitecture::KoggeStone) { + // lowerKoggeStonePrefixTree(op, inputs, rewriter, pPrefix, gPrefix); + // } else if (arch == AdderArchitecture::BrentKung) { + // lowerBrentKungPrefixTree(op, inputs, rewriter, pPrefix, gPrefix); + // } else { + // return failure(); + // } // Generate result sum bits // NOTE: The result is stored in reverse order. @@ -592,6 +647,62 @@ struct CombAddOpConversion : OpConversionPattern { for (int64_t i = 1; i < width; ++i) llvm::dbgs() << "RES" << i << " = P" << i << " XOR G" << i - 1 << "\n"; }); + + return success(); + } + + // Implement the Sklansky parallel prefix tree + // High fan-out, low depth, low area + void lowerSklanskeyPrefixTree(comb::AddOp op, ValueRange inputs, + ConversionPatternRewriter &rewriter, + SmallVector &pPrefix, + SmallVector &gPrefix) const { + auto width = op.getType().getIntOrFloatBitWidth(); + SmallVector pPrefixNew = pPrefix; + SmallVector gPrefixNew = gPrefix; + + for (int64_t stride = 1; stride < width; stride *= 2) { + for (int64_t i = stride; i < width; i += 2 * stride) { + for (int64_t k = 0; k < stride && i + k < width; ++k) { + int64_t idx = i + k; + int64_t j = i - 1; + // Group generate: g_idx OR (p_idx AND g_j) + Value andPG = comb::AndOp::create(rewriter, op.getLoc(), pPrefix[idx], + gPrefix[j]); + gPrefixNew[idx] = + comb::OrOp::create(rewriter, op.getLoc(), gPrefix[idx], andPG); + + // Group propagate: p_idx AND p_j + pPrefixNew[idx] = comb::AndOp::create(rewriter, op.getLoc(), + pPrefix[idx], pPrefix[j]); + } + } + pPrefix = pPrefixNew; + gPrefix = gPrefixNew; + } + LLVM_DEBUG({ + int64_t stage = 0; + for (int64_t stride = 1; stride < width; stride *= 2) { + llvm::dbgs() + << "--------------------------------------- Sklanskey Stage " + << stage << "\n"; + for (int64_t i = stride; i < width; i += 2 * stride) { + for (int64_t k = 0; k < stride && i + k < width; ++k) { + int64_t idx = i + k; + int64_t j = i - 1; + // Group generate: g_i OR (p_i AND g_j) + llvm::dbgs() << "G" << idx << stage + 1 << " = G" << idx << stage + << " OR (P" << idx << stage << " AND G" << j << stage + << ")\n"; + + // Group propagate: p_i AND p_j + llvm::dbgs() << "P" << idx << stage + 1 << " = P" << idx << stage + << " AND P" << j << stage << "\n"; + } + } + ++stage; + } + }); } // Implement the Kogge-Stone parallel prefix tree diff --git a/test/Conversion/CombToSynth/comb-to-aig-arith.mlir b/test/Conversion/CombToSynth/comb-to-aig-arith.mlir index 3f03ee1cbe3d..c1c3a4daad4d 100644 --- a/test/Conversion/CombToSynth/comb-to-aig-arith.mlir +++ b/test/Conversion/CombToSynth/comb-to-aig-arith.mlir @@ -25,10 +25,107 @@ hw.module @add(in %lhs: i2, in %rhs: i2, out out: i2) { // CHECK-NEXT: %[[sum1:.*]] = comb.xor bin %[[lhs1]], %[[rhs1]], %[[carry0]] : i1 // CHECK-NEXT: %[[concat:.*]] = comb.concat %[[sum1]], %[[sum0]] : i1, i1 // CHECK-NEXT: hw.output %[[concat]] : i2 - %0 = comb.add %lhs, %rhs : i2 + %0 = comb.add %lhs, %rhs {synth.test.arch = "RIPPLE-CARRY"} : i2 hw.output %0 : i2 } +// CHECK-LABEL: @add_sklanskey +hw.module @add_sklanskey(in %lhs: i3, in %rhs: i3, out out: i3) { + // CHECK-NEXT: %[[LHS0:.+]] = comb.extract %lhs from 0 : (i3) -> i1 + // CHECK-NEXT: %[[LHS1:.+]] = comb.extract %lhs from 1 : (i3) -> i1 + // CHECK-NEXT: %[[LHS2:.+]] = comb.extract %lhs from 2 : (i3) -> i1 + // CHECK-NEXT: %[[RHS0:.+]] = comb.extract %rhs from 0 : (i3) -> i1 + // CHECK-NEXT: %[[RHS1:.+]] = comb.extract %rhs from 1 : (i3) -> i1 + // CHECK-NEXT: %[[RHS2:.+]] = comb.extract %rhs from 2 : (i3) -> i1 + // CHECK-NEXT: %[[P0:.+]] = comb.xor %[[LHS0]], %[[RHS0]] : i1 + // CHECK-NEXT: %[[G0:.+]] = comb.and %[[LHS0]], %[[RHS0]] : i1 + // CHECK-NEXT: %[[P1:.+]] = comb.xor %[[LHS1]], %[[RHS1]] : i1 + // CHECK-NEXT: %[[G1:.+]] = comb.and %[[LHS1]], %[[RHS1]] : i1 + // CHECK-NEXT: %[[P2:.+]] = comb.xor %[[LHS2]], %[[RHS2]] : i1 + // CHECK-NEXT: %[[G2:.+]] = comb.and %[[LHS2]], %[[RHS2]] : i1 + // Reduction Tree + // CHECK-NEXT: %[[G10PRE:.+]] = comb.and %[[P1]], %[[G0]] : i1 + // CHECK-NEXT: %[[G10:.+]] = comb.or %[[G1]], %[[G10PRE]] : i1 + // CHECK-NEXT: comb.and %[[P1]], %[[P0]] : i1 + // CHECK-NEXT: comb.and %[[P2]], %[[G10]] : i1 + // Sum Completion + // CHECK-NEXT: %[[S1:.+]] = comb.xor %[[P1]], %[[G0]] : i1 + // CHECK-NEXT: %[[S2:.+]] = comb.xor %[[P2]], %[[G10]] : i1 + // CHECK-NEXT: %[[RES:.+]] = comb.concat %[[S2]], %[[S1]], %[[P0]] : i1, i1, i1 + // CHECK-NEXT: hw.output %[[RES]] : i3 + %0 = comb.add %lhs, %rhs {synth.test.arch = "SKLANSKEY"} : i3 + hw.output %0 : i3 +} + +// CHECK-LABEL: @add_kogge_stone +hw.module @add_kogge_stone(in %lhs: i3, in %rhs: i3, out out: i3) { + // CHECK-NEXT: %[[LHS0:.+]] = comb.extract %lhs from 0 : (i3) -> i1 + // CHECK-NEXT: %[[LHS1:.+]] = comb.extract %lhs from 1 : (i3) -> i1 + // CHECK-NEXT: %[[LHS2:.+]] = comb.extract %lhs from 2 : (i3) -> i1 + // CHECK-NEXT: %[[RHS0:.+]] = comb.extract %rhs from 0 : (i3) -> i1 + // CHECK-NEXT: %[[RHS1:.+]] = comb.extract %rhs from 1 : (i3) -> i1 + // CHECK-NEXT: %[[RHS2:.+]] = comb.extract %rhs from 2 : (i3) -> i1 + // CHECK-NEXT: %[[P0:.+]] = comb.xor %[[LHS0]], %[[RHS0]] : i1 + // CHECK-NEXT: %[[G0:.+]] = comb.and %[[LHS0]], %[[RHS0]] : i1 + // CHECK-NEXT: %[[P1:.+]] = comb.xor %[[LHS1]], %[[RHS1]] : i1 + // CHECK-NEXT: %[[G1:.+]] = comb.and %[[LHS1]], %[[RHS1]] : i1 + // CHECK-NEXT: %[[P2:.+]] = comb.xor %[[LHS2]], %[[RHS2]] : i1 + // CHECK-NEXT: %[[G2:.+]] = comb.and %[[LHS2]], %[[RHS2]] : i1 + // Reduction Tree + // CHECK-NEXT: %[[G10PRE:.+]] = comb.and %[[P1]], %[[G0]] : i1 + // CHECK-NEXT: %[[G10:.+]] = comb.or %[[G1]], %[[G10PRE]] : i1 + // CHECK-NEXT: %[[G21PRE:.+]] = comb.and %[[P2]], %[[G1]] : i1 + // CHECK-NEXT: comb.or %[[G2]], %[[G21PRE]] : i1 + // CHECK-NEXT: %[[P21:.+]] = comb.and %[[P2]], %[[P1]] : i1 + // CHECK-NEXT: comb.and %[[P21]], %[[G0]] : i1 + // Sum Completion + // CHECK-NEXT: %[[S1:.+]] = comb.xor %[[P1]], %[[G0]] : i1 + // CHECK-NEXT: %[[S2:.+]] = comb.xor %[[P2]], %[[G10]] : i1 + // CHECK-NEXT: %[[RES:.+]] = comb.concat %[[S2]], %[[S1]], %[[P0]] : i1, i1, i1 + // CHECK-NEXT: hw.output %[[RES]] : i3 + %0 = comb.add %lhs, %rhs {synth.test.arch = "KOGGE-STONE"} : i3 + hw.output %0 : i3 +} + +// CHECK-LABEL: @add_brent_kung +hw.module @add_brent_kung(in %lhs: i4, in %rhs: i4, out out: i4) { + // CHECK-NEXT: %[[LHS0:.+]] = comb.extract %lhs from 0 : (i4) -> i1 + // CHECK-NEXT: %[[LHS1:.+]] = comb.extract %lhs from 1 : (i4) -> i1 + // CHECK-NEXT: %[[LHS2:.+]] = comb.extract %lhs from 2 : (i4) -> i1 + // CHECK-NEXT: %[[LHS3:.+]] = comb.extract %lhs from 3 : (i4) -> i1 + // CHECK-NEXT: %[[RHS0:.+]] = comb.extract %rhs from 0 : (i4) -> i1 + // CHECK-NEXT: %[[RHS1:.+]] = comb.extract %rhs from 1 : (i4) -> i1 + // CHECK-NEXT: %[[RHS2:.+]] = comb.extract %rhs from 2 : (i4) -> i1 + // CHECK-NEXT: %[[RHS3:.+]] = comb.extract %rhs from 3 : (i4) -> i1 + // + // CHECK-NEXT: %[[P0:.+]] = comb.xor %[[LHS0]], %[[RHS0]] : i1 + // CHECK-NEXT: %[[G0:.+]] = comb.and %[[LHS0]], %[[RHS0]] : i1 + // CHECK-NEXT: %[[P1:.+]] = comb.xor %[[LHS1]], %[[RHS1]] : i1 + // CHECK-NEXT: %[[G1:.+]] = comb.and %[[LHS1]], %[[RHS1]] : i1 + // CHECK-NEXT: %[[P2:.+]] = comb.xor %[[LHS2]], %[[RHS2]] : i1 + // CHECK-NEXT: %[[G2:.+]] = comb.and %[[LHS2]], %[[RHS2]] : i1 + // CHECK-NEXT: %[[P3:.+]] = comb.xor %[[LHS3]], %[[RHS3]] : i1 + // CHECK-NEXT: %[[G3:.+]] = comb.and %[[LHS3]], %[[RHS3]] : i1 + // Reduction Tree + // CHECK-NEXT: %[[G10PRE:.+]] = comb.and %[[P1]], %[[G0]] : i1 + // CHECK-NEXT: %[[G10:.+]] = comb.or %[[G1]], %[[G10PRE]] : i1 + // CHECK-NEXT: comb.and %[[P1]], %[[P0]] : i1 + // CHECK-NEXT: %[[G32PRE:.+]] = comb.and %[[P3]], %[[G2]] : i1 + // CHECK-NEXT: comb.or %[[G3]], %[[G32PRE]] : i1 + // CHECK-NEXT: %[[P32:.+]] = comb.and %[[P3]], %[[P2]] : i1 + // CHECK-NEXT: comb.and %[[P32]], %[[G10]] : i1 + // CHECK-NEXT: %[[G20PRE:.+]] = comb.and %[[P2]], %[[G10]] : i1 + // CHECK-NEXT: %[[G20:.+]] = comb.or %[[G2]], %[[G20PRE]] : i1 + // Sum Completion + // CHECK-NEXT: %[[S1:.+]] = comb.xor %[[P1]], %[[G0]] : i1 + // CHECK-NEXT: %[[S2:.+]] = comb.xor %[[P2]], %[[G10]] : i1 + // CHECK-NEXT: %[[S3:.+]] = comb.xor %[[P3]], %[[G20]] : i1 + // CHECK-NEXT: %[[RES:.+]] = comb.concat %[[S3]], %[[S2]], %[[S1]], %[[P0]] : i1, i1, i1, i1 + // CHECK-NEXT: hw.output %[[RES]] : i4 + %0 = comb.add %lhs, %rhs {synth.test.arch = "BRENT-KUNG"} : i4 + hw.output %0 : i4 +} + // CHECK-LABEL: @add_17 hw.module @add_17(in %lhs: i17, in %rhs: i17, out out: i17) { %0 = comb.add %lhs, %rhs : i17 diff --git a/test/circt-synth/path-e2e.mlir b/test/circt-synth/path-e2e.mlir index 79d3e6a54b16..407a7b1052bc 100644 --- a/test/circt-synth/path-e2e.mlir +++ b/test/circt-synth/path-e2e.mlir @@ -6,8 +6,8 @@ // COMMON-LABEL: # Longest Path Analysis result for "counter" // COMMON-NEXT: Found 168 paths // COMMON-NEXT: Found 32 unique end points -// AIG-NEXT: Maximum path delay: 41 -// MIG-NEXT: Maximum path delay: 41 +// AIG-NEXT: Maximum path delay: 40 +// MIG-NEXT: Maximum path delay: 40 // LUT6-NEXT: Maximum path delay: 7 // Don't test detailed reports as they are not stable.