Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions integration_test/circt-synth/comb-lowering-lec.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,34 @@ hw.module @add(in %arg0: i4, in %arg1: i4, in %arg2: i4, out add: i4) {
hw.output %0 : i4
}

// RUN: circt-lec %t.mlir %s -c1=add_ripple_carry -c2=add_ripple_carry --shared-libs=%libz3 | FileCheck %s --check-prefix=COMB_ADD_RIPPLE_CARRY
// COMB_ADD_RIPPLE_CARRY: c1 == c2
hw.module @add_ripple_carry(in %arg0: i4, in %arg1: i4, in %arg2: i4, out add: i4) {
%0 = comb.add %arg0, %arg1, %arg2 {synth.test.arch = "RIPPLE-CARRY"} : i4
hw.output %0 : i4
}

// RUN: circt-lec %t.mlir %s -c1=add_sklanskey -c2=add_sklanskey --shared-libs=%libz3 | FileCheck %s --check-prefix=COMB_ADD_SKLANSKEY
// COMB_ADD_SKLANSKEY: c1 == c2
hw.module @add_sklanskey(in %arg0: i4, in %arg1: i4, in %arg2: i4, out add: i4) {
%0 = comb.add %arg0, %arg1, %arg2 {synth.test.arch = "SKLANSKEY"} : i4
hw.output %0 : i4
}

// RUN: circt-lec %t.mlir %s -c1=add_kogge_stone -c2=add_kogge_stone --shared-libs=%libz3 | FileCheck %s --check-prefix=COMB_ADD_KOGGE_STONE
// COMB_ADD_KOGGE_STONE: c1 == c2
hw.module @add_kogge_stone(in %arg0: i4, in %arg1: i4, in %arg2: i4, out add: i4) {
%0 = comb.add %arg0, %arg1, %arg2 {synth.test.arch = "KOGGE-STONE"} : i4
hw.output %0 : i4
}

// RUN: circt-lec %t.mlir %s -c1=add_brent_kung -c2=add_brent_kung --shared-libs=%libz3 | FileCheck %s --check-prefix=COMB_ADD_BRENT_KUNG
// COMB_ADD_BRENT_KUNG: c1 == c2
hw.module @add_brent_kung(in %arg0: i4, in %arg1: i4, in %arg2: i4, out add: i4) {
%0 = comb.add %arg0, %arg1, %arg2 {synth.test.arch = "BRENT-KUNG"} : i4
hw.output %0 : i4
}

// RUN: circt-lec %t.mlir %s -c1=sub -c2=sub --shared-libs=%libz3 | FileCheck %s --check-prefix=COMB_SUB
// COMB_SUB: c1 == c2
hw.module @sub(in %lhs: i4, in %rhs: i4, out out: i4) {
Expand Down
131 changes: 121 additions & 10 deletions lib/Conversion/CombToSynth/CombToSynth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -480,17 +480,47 @@ struct CombAddOpConversion : OpConversionPattern<AddOp> {
return success();
}

// Check if the architecture is specified by an attribute.
auto arch = determineAdderArch(op, width);
if (arch == AdderArchitecture::RippleCarry)
return lowerRippleCarryAdder(op, inputs, rewriter);
return lowerParallelPrefixAdder(op, inputs, rewriter);
}

enum AdderArchitecture { RippleCarry, Sklanskey, KoggeStone, BrentKung };
static AdderArchitecture determineAdderArch(Operation *op, int64_t width) {
auto strAttr = op->getAttrOfType<StringAttr>("synth.test.arch");
if (strAttr) {
return llvm::StringSwitch<AdderArchitecture>(strAttr.getValue())
.Case("SKLANSKEY", Sklanskey)
.Case("KOGGE-STONE", KoggeStone)
.Case("BRENT-KUNG", BrentKung)
.Case("RIPPLE-CARRY", RippleCarry);
}
// Determine using width as a heuristic.
// TODO: Perform a more thorough analysis to motivate the choices or
// implement an adder synthesis algorithm to construct an optimal adder
// under the given timing constraints - see the work of Zimmermann

// For very small adders, overhead of a parallel prefix adder is likely not
// worth it.
if (width < 8)
lowerRippleCarryAdder(op, inputs, rewriter);
else
lowerParallelPrefixAdder(op, inputs, rewriter);
return AdderArchitecture::RippleCarry;

return success();
// Sklanskey is a good compromise for high-performance, but has high fanout
// which may lead to wiring congestion for very large adders.
if (width <= 32)
return AdderArchitecture::Sklanskey;

// Kogge-Stone uses greater area than Sklanskey but has lower fanout thus
// may be preferable for larger adders.
return AdderArchitecture::KoggeStone;
}

// Implement a basic ripple-carry adder for small bitwidths.
void lowerRippleCarryAdder(comb::AddOp op, ValueRange inputs,
ConversionPatternRewriter &rewriter) const {
LogicalResult
lowerRippleCarryAdder(comb::AddOp op, ValueRange inputs,
ConversionPatternRewriter &rewriter) const {
auto width = op.getType().getIntOrFloatBitWidth();
// Implement a naive Ripple-carry full adder.
Value carry;
Expand Down Expand Up @@ -528,13 +558,15 @@ struct CombAddOpConversion : OpConversionPattern<AddOp> {
<< width << "\n");

replaceOpWithNewOpAndCopyNamehint<comb::ConcatOp>(rewriter, op, results);
return success();
}

// Implement a parallel prefix adder - with Kogge-Stone or Brent-Kung trees
// Will introduce unused signals for the carry bits but these will be removed
// by the AIG pass.
void lowerParallelPrefixAdder(comb::AddOp op, ValueRange inputs,
ConversionPatternRewriter &rewriter) const {
LogicalResult
lowerParallelPrefixAdder(comb::AddOp op, ValueRange inputs,
ConversionPatternRewriter &rewriter) const {
auto width = op.getType().getIntOrFloatBitWidth();

auto aBits = extractBits(rewriter, inputs[0]);
Expand Down Expand Up @@ -566,10 +598,33 @@ struct CombAddOpConversion : OpConversionPattern<AddOp> {
// Create copies of p and g for the prefix computation
SmallVector<Value> pPrefix = p;
SmallVector<Value> gPrefix = g;
if (width < 32)

// Check if the architecture is specified by an attribute.
auto arch = determineAdderArch(op, width);

switch (arch) {
case AdderArchitecture::RippleCarry:
llvm_unreachable("Ripple-Carry should be handled separately");
break;
case AdderArchitecture::Sklanskey:
lowerSklanskeyPrefixTree(op, inputs, rewriter, pPrefix, gPrefix);
break;
case AdderArchitecture::KoggeStone:
lowerKoggeStonePrefixTree(op, inputs, rewriter, pPrefix, gPrefix);
else
break;
case AdderArchitecture::BrentKung:
lowerBrentKungPrefixTree(op, inputs, rewriter, pPrefix, gPrefix);
break;
}
// if (arch == AdderArchitecture::Sklanskey) {
// lowerSklanskeyPrefixTree(op, inputs, rewriter, pPrefix, gPrefix);
// } else if (arch == AdderArchitecture::KoggeStone) {
// lowerKoggeStonePrefixTree(op, inputs, rewriter, pPrefix, gPrefix);
// } else if (arch == AdderArchitecture::BrentKung) {
// lowerBrentKungPrefixTree(op, inputs, rewriter, pPrefix, gPrefix);
// } else {
// return failure();
// }

// Generate result sum bits
// NOTE: The result is stored in reverse order.
Expand All @@ -592,6 +647,62 @@ struct CombAddOpConversion : OpConversionPattern<AddOp> {
for (int64_t i = 1; i < width; ++i)
llvm::dbgs() << "RES" << i << " = P" << i << " XOR G" << i - 1 << "\n";
});

return success();
}

// Implement the Sklansky parallel prefix tree
// High fan-out, low depth, low area
void lowerSklanskeyPrefixTree(comb::AddOp op, ValueRange inputs,
ConversionPatternRewriter &rewriter,
SmallVector<Value> &pPrefix,
SmallVector<Value> &gPrefix) const {
auto width = op.getType().getIntOrFloatBitWidth();
SmallVector<Value> pPrefixNew = pPrefix;
SmallVector<Value> gPrefixNew = gPrefix;

for (int64_t stride = 1; stride < width; stride *= 2) {
for (int64_t i = stride; i < width; i += 2 * stride) {
for (int64_t k = 0; k < stride && i + k < width; ++k) {
int64_t idx = i + k;
int64_t j = i - 1;
// Group generate: g_idx OR (p_idx AND g_j)
Value andPG = comb::AndOp::create(rewriter, op.getLoc(), pPrefix[idx],
gPrefix[j]);
gPrefixNew[idx] =
comb::OrOp::create(rewriter, op.getLoc(), gPrefix[idx], andPG);

// Group propagate: p_idx AND p_j
pPrefixNew[idx] = comb::AndOp::create(rewriter, op.getLoc(),
pPrefix[idx], pPrefix[j]);
}
}
pPrefix = pPrefixNew;
gPrefix = gPrefixNew;
}
LLVM_DEBUG({
int64_t stage = 0;
for (int64_t stride = 1; stride < width; stride *= 2) {
llvm::dbgs()
<< "--------------------------------------- Sklanskey Stage "
<< stage << "\n";
for (int64_t i = stride; i < width; i += 2 * stride) {
for (int64_t k = 0; k < stride && i + k < width; ++k) {
int64_t idx = i + k;
int64_t j = i - 1;
// Group generate: g_i OR (p_i AND g_j)
llvm::dbgs() << "G" << idx << stage + 1 << " = G" << idx << stage
<< " OR (P" << idx << stage << " AND G" << j << stage
<< ")\n";

// Group propagate: p_i AND p_j
llvm::dbgs() << "P" << idx << stage + 1 << " = P" << idx << stage
<< " AND P" << j << stage << "\n";
}
}
++stage;
}
});
}

// Implement the Kogge-Stone parallel prefix tree
Expand Down
99 changes: 98 additions & 1 deletion test/Conversion/CombToSynth/comb-to-aig-arith.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,107 @@ hw.module @add(in %lhs: i2, in %rhs: i2, out out: i2) {
// CHECK-NEXT: %[[sum1:.*]] = comb.xor bin %[[lhs1]], %[[rhs1]], %[[carry0]] : i1
// CHECK-NEXT: %[[concat:.*]] = comb.concat %[[sum1]], %[[sum0]] : i1, i1
// CHECK-NEXT: hw.output %[[concat]] : i2
%0 = comb.add %lhs, %rhs : i2
%0 = comb.add %lhs, %rhs {synth.test.arch = "RIPPLE-CARRY"} : i2
hw.output %0 : i2
}

// CHECK-LABEL: @add_sklanskey
hw.module @add_sklanskey(in %lhs: i3, in %rhs: i3, out out: i3) {
// CHECK-NEXT: %[[LHS0:.+]] = comb.extract %lhs from 0 : (i3) -> i1
// CHECK-NEXT: %[[LHS1:.+]] = comb.extract %lhs from 1 : (i3) -> i1
// CHECK-NEXT: %[[LHS2:.+]] = comb.extract %lhs from 2 : (i3) -> i1
// CHECK-NEXT: %[[RHS0:.+]] = comb.extract %rhs from 0 : (i3) -> i1
// CHECK-NEXT: %[[RHS1:.+]] = comb.extract %rhs from 1 : (i3) -> i1
// CHECK-NEXT: %[[RHS2:.+]] = comb.extract %rhs from 2 : (i3) -> i1
// CHECK-NEXT: %[[P0:.+]] = comb.xor %[[LHS0]], %[[RHS0]] : i1
// CHECK-NEXT: %[[G0:.+]] = comb.and %[[LHS0]], %[[RHS0]] : i1
// CHECK-NEXT: %[[P1:.+]] = comb.xor %[[LHS1]], %[[RHS1]] : i1
// CHECK-NEXT: %[[G1:.+]] = comb.and %[[LHS1]], %[[RHS1]] : i1
// CHECK-NEXT: %[[P2:.+]] = comb.xor %[[LHS2]], %[[RHS2]] : i1
// CHECK-NEXT: %[[G2:.+]] = comb.and %[[LHS2]], %[[RHS2]] : i1
// Reduction Tree
// CHECK-NEXT: %[[G10PRE:.+]] = comb.and %[[P1]], %[[G0]] : i1
// CHECK-NEXT: %[[G10:.+]] = comb.or %[[G1]], %[[G10PRE]] : i1
// CHECK-NEXT: comb.and %[[P1]], %[[P0]] : i1
// CHECK-NEXT: comb.and %[[P2]], %[[G10]] : i1
// Sum Completion
// CHECK-NEXT: %[[S1:.+]] = comb.xor %[[P1]], %[[G0]] : i1
// CHECK-NEXT: %[[S2:.+]] = comb.xor %[[P2]], %[[G10]] : i1
// CHECK-NEXT: %[[RES:.+]] = comb.concat %[[S2]], %[[S1]], %[[P0]] : i1, i1, i1
// CHECK-NEXT: hw.output %[[RES]] : i3
%0 = comb.add %lhs, %rhs {synth.test.arch = "SKLANSKEY"} : i3
hw.output %0 : i3
}

// CHECK-LABEL: @add_kogge_stone
hw.module @add_kogge_stone(in %lhs: i3, in %rhs: i3, out out: i3) {
// CHECK-NEXT: %[[LHS0:.+]] = comb.extract %lhs from 0 : (i3) -> i1
// CHECK-NEXT: %[[LHS1:.+]] = comb.extract %lhs from 1 : (i3) -> i1
// CHECK-NEXT: %[[LHS2:.+]] = comb.extract %lhs from 2 : (i3) -> i1
// CHECK-NEXT: %[[RHS0:.+]] = comb.extract %rhs from 0 : (i3) -> i1
// CHECK-NEXT: %[[RHS1:.+]] = comb.extract %rhs from 1 : (i3) -> i1
// CHECK-NEXT: %[[RHS2:.+]] = comb.extract %rhs from 2 : (i3) -> i1
// CHECK-NEXT: %[[P0:.+]] = comb.xor %[[LHS0]], %[[RHS0]] : i1
// CHECK-NEXT: %[[G0:.+]] = comb.and %[[LHS0]], %[[RHS0]] : i1
// CHECK-NEXT: %[[P1:.+]] = comb.xor %[[LHS1]], %[[RHS1]] : i1
// CHECK-NEXT: %[[G1:.+]] = comb.and %[[LHS1]], %[[RHS1]] : i1
// CHECK-NEXT: %[[P2:.+]] = comb.xor %[[LHS2]], %[[RHS2]] : i1
// CHECK-NEXT: %[[G2:.+]] = comb.and %[[LHS2]], %[[RHS2]] : i1
// Reduction Tree
// CHECK-NEXT: %[[G10PRE:.+]] = comb.and %[[P1]], %[[G0]] : i1
// CHECK-NEXT: %[[G10:.+]] = comb.or %[[G1]], %[[G10PRE]] : i1
// CHECK-NEXT: %[[G21PRE:.+]] = comb.and %[[P2]], %[[G1]] : i1
// CHECK-NEXT: comb.or %[[G2]], %[[G21PRE]] : i1
// CHECK-NEXT: %[[P21:.+]] = comb.and %[[P2]], %[[P1]] : i1
// CHECK-NEXT: comb.and %[[P21]], %[[G0]] : i1
// Sum Completion
// CHECK-NEXT: %[[S1:.+]] = comb.xor %[[P1]], %[[G0]] : i1
// CHECK-NEXT: %[[S2:.+]] = comb.xor %[[P2]], %[[G10]] : i1
// CHECK-NEXT: %[[RES:.+]] = comb.concat %[[S2]], %[[S1]], %[[P0]] : i1, i1, i1
// CHECK-NEXT: hw.output %[[RES]] : i3
%0 = comb.add %lhs, %rhs {synth.test.arch = "KOGGE-STONE"} : i3
hw.output %0 : i3
}

// CHECK-LABEL: @add_brent_kung
hw.module @add_brent_kung(in %lhs: i4, in %rhs: i4, out out: i4) {
// CHECK-NEXT: %[[LHS0:.+]] = comb.extract %lhs from 0 : (i4) -> i1
// CHECK-NEXT: %[[LHS1:.+]] = comb.extract %lhs from 1 : (i4) -> i1
// CHECK-NEXT: %[[LHS2:.+]] = comb.extract %lhs from 2 : (i4) -> i1
// CHECK-NEXT: %[[LHS3:.+]] = comb.extract %lhs from 3 : (i4) -> i1
// CHECK-NEXT: %[[RHS0:.+]] = comb.extract %rhs from 0 : (i4) -> i1
// CHECK-NEXT: %[[RHS1:.+]] = comb.extract %rhs from 1 : (i4) -> i1
// CHECK-NEXT: %[[RHS2:.+]] = comb.extract %rhs from 2 : (i4) -> i1
// CHECK-NEXT: %[[RHS3:.+]] = comb.extract %rhs from 3 : (i4) -> i1
//
// CHECK-NEXT: %[[P0:.+]] = comb.xor %[[LHS0]], %[[RHS0]] : i1
// CHECK-NEXT: %[[G0:.+]] = comb.and %[[LHS0]], %[[RHS0]] : i1
// CHECK-NEXT: %[[P1:.+]] = comb.xor %[[LHS1]], %[[RHS1]] : i1
// CHECK-NEXT: %[[G1:.+]] = comb.and %[[LHS1]], %[[RHS1]] : i1
// CHECK-NEXT: %[[P2:.+]] = comb.xor %[[LHS2]], %[[RHS2]] : i1
// CHECK-NEXT: %[[G2:.+]] = comb.and %[[LHS2]], %[[RHS2]] : i1
// CHECK-NEXT: %[[P3:.+]] = comb.xor %[[LHS3]], %[[RHS3]] : i1
// CHECK-NEXT: %[[G3:.+]] = comb.and %[[LHS3]], %[[RHS3]] : i1
// Reduction Tree
// CHECK-NEXT: %[[G10PRE:.+]] = comb.and %[[P1]], %[[G0]] : i1
// CHECK-NEXT: %[[G10:.+]] = comb.or %[[G1]], %[[G10PRE]] : i1
// CHECK-NEXT: comb.and %[[P1]], %[[P0]] : i1
// CHECK-NEXT: %[[G32PRE:.+]] = comb.and %[[P3]], %[[G2]] : i1
// CHECK-NEXT: comb.or %[[G3]], %[[G32PRE]] : i1
// CHECK-NEXT: %[[P32:.+]] = comb.and %[[P3]], %[[P2]] : i1
// CHECK-NEXT: comb.and %[[P32]], %[[G10]] : i1
// CHECK-NEXT: %[[G20PRE:.+]] = comb.and %[[P2]], %[[G10]] : i1
// CHECK-NEXT: %[[G20:.+]] = comb.or %[[G2]], %[[G20PRE]] : i1
// Sum Completion
// CHECK-NEXT: %[[S1:.+]] = comb.xor %[[P1]], %[[G0]] : i1
// CHECK-NEXT: %[[S2:.+]] = comb.xor %[[P2]], %[[G10]] : i1
// CHECK-NEXT: %[[S3:.+]] = comb.xor %[[P3]], %[[G20]] : i1
// CHECK-NEXT: %[[RES:.+]] = comb.concat %[[S3]], %[[S2]], %[[S1]], %[[P0]] : i1, i1, i1, i1
// CHECK-NEXT: hw.output %[[RES]] : i4
%0 = comb.add %lhs, %rhs {synth.test.arch = "BRENT-KUNG"} : i4
hw.output %0 : i4
}

// CHECK-LABEL: @add_17
hw.module @add_17(in %lhs: i17, in %rhs: i17, out out: i17) {
%0 = comb.add %lhs, %rhs : i17
Expand Down
4 changes: 2 additions & 2 deletions test/circt-synth/path-e2e.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
// COMMON-LABEL: # Longest Path Analysis result for "counter"
// COMMON-NEXT: Found 168 paths
// COMMON-NEXT: Found 32 unique end points
// AIG-NEXT: Maximum path delay: 41
// MIG-NEXT: Maximum path delay: 41
// AIG-NEXT: Maximum path delay: 40
// MIG-NEXT: Maximum path delay: 40
// LUT6-NEXT: Maximum path delay: 7
// Don't test detailed reports as they are not stable.

Expand Down