@@ -480,17 +480,47 @@ struct CombAddOpConversion : OpConversionPattern<AddOp> {
480480 return success ();
481481 }
482482
483+ // Check if the architecture is specified by an attribute.
484+ auto arch = determineAdderArch (op, width);
485+ if (arch == AdderArchitecture::RippleCarry)
486+ return lowerRippleCarryAdder (op, inputs, rewriter);
487+ return lowerParallelPrefixAdder (op, inputs, rewriter);
488+ }
489+
490+ enum AdderArchitecture { RippleCarry, Sklanskey, KoggeStone, BrentKung };
491+ static AdderArchitecture determineAdderArch (Operation *op, int64_t width) {
492+ auto strAttr = op->getAttrOfType <StringAttr>(" synth.test.arch" );
493+ if (strAttr) {
494+ return llvm::StringSwitch<AdderArchitecture>(strAttr.getValue ())
495+ .Case (" SKLANSKEY" , Sklanskey)
496+ .Case (" KOGGE-STONE" , KoggeStone)
497+ .Case (" BRENT-KUNG" , BrentKung)
498+ .Case (" RIPPLE-CARRY" , RippleCarry);
499+ }
500+ // Determine using width as a heuristic.
501+ // TODO: Perform a more thorough analysis to motivate the choices or
502+ // implement an adder synthesis algorithm to construct an optimal adder
503+ // under the given timing constraints - see the work of Zimmermann
504+
505+ // For very small adders, overhead of a parallel prefix adder is likely not
506+ // worth it.
483507 if (width < 8 )
484- lowerRippleCarryAdder (op, inputs, rewriter);
485- else
486- lowerParallelPrefixAdder (op, inputs, rewriter);
508+ return AdderArchitecture::RippleCarry;
487509
488- return success ();
510+ // Sklanskey is a good compromise for high-performance, but has high fanout
511+ // which may lead to wiring congestion for very large adders.
512+ if (width <= 32 )
513+ return AdderArchitecture::Sklanskey;
514+
515+ // Kogge-Stone uses greater area than Sklanskey but has lower fanout thus
516+ // may be preferable for larger adders.
517+ return AdderArchitecture::KoggeStone;
489518 }
490519
491520 // Implement a basic ripple-carry adder for small bitwidths.
492- void lowerRippleCarryAdder (comb::AddOp op, ValueRange inputs,
493- ConversionPatternRewriter &rewriter) const {
521+ LogicalResult
522+ lowerRippleCarryAdder (comb::AddOp op, ValueRange inputs,
523+ ConversionPatternRewriter &rewriter) const {
494524 auto width = op.getType ().getIntOrFloatBitWidth ();
495525 // Implement a naive Ripple-carry full adder.
496526 Value carry;
@@ -528,13 +558,15 @@ struct CombAddOpConversion : OpConversionPattern<AddOp> {
528558 << width << " \n " );
529559
530560 replaceOpWithNewOpAndCopyNamehint<comb::ConcatOp>(rewriter, op, results);
561+ return success ();
531562 }
532563
533564 // Implement a parallel prefix adder - with Kogge-Stone or Brent-Kung trees
534565 // Will introduce unused signals for the carry bits but these will be removed
535566 // by the AIG pass.
536- void lowerParallelPrefixAdder (comb::AddOp op, ValueRange inputs,
537- ConversionPatternRewriter &rewriter) const {
567+ LogicalResult
568+ lowerParallelPrefixAdder (comb::AddOp op, ValueRange inputs,
569+ ConversionPatternRewriter &rewriter) const {
538570 auto width = op.getType ().getIntOrFloatBitWidth ();
539571
540572 auto aBits = extractBits (rewriter, inputs[0 ]);
@@ -566,10 +598,33 @@ struct CombAddOpConversion : OpConversionPattern<AddOp> {
566598 // Create copies of p and g for the prefix computation
567599 SmallVector<Value> pPrefix = p;
568600 SmallVector<Value> gPrefix = g;
569- if (width < 32 )
601+
602+ // Check if the architecture is specified by an attribute.
603+ auto arch = determineAdderArch (op, width);
604+
605+ switch (arch) {
606+ case AdderArchitecture::RippleCarry:
607+ llvm_unreachable (" Ripple-Carry should be handled separately" );
608+ break ;
609+ case AdderArchitecture::Sklanskey:
610+ lowerSklanskeyPrefixTree (op, inputs, rewriter, pPrefix, gPrefix );
611+ break ;
612+ case AdderArchitecture::KoggeStone:
570613 lowerKoggeStonePrefixTree (op, inputs, rewriter, pPrefix, gPrefix );
571- else
614+ break ;
615+ case AdderArchitecture::BrentKung:
572616 lowerBrentKungPrefixTree (op, inputs, rewriter, pPrefix, gPrefix );
617+ break ;
618+ }
619+ // if (arch == AdderArchitecture::Sklanskey) {
620+ // lowerSklanskeyPrefixTree(op, inputs, rewriter, pPrefix, gPrefix);
621+ // } else if (arch == AdderArchitecture::KoggeStone) {
622+ // lowerKoggeStonePrefixTree(op, inputs, rewriter, pPrefix, gPrefix);
623+ // } else if (arch == AdderArchitecture::BrentKung) {
624+ // lowerBrentKungPrefixTree(op, inputs, rewriter, pPrefix, gPrefix);
625+ // } else {
626+ // return failure();
627+ // }
573628
574629 // Generate result sum bits
575630 // NOTE: The result is stored in reverse order.
@@ -592,6 +647,62 @@ struct CombAddOpConversion : OpConversionPattern<AddOp> {
592647 for (int64_t i = 1 ; i < width; ++i)
593648 llvm::dbgs () << " RES" << i << " = P" << i << " XOR G" << i - 1 << " \n " ;
594649 });
650+
651+ return success ();
652+ }
653+
654+ // Implement the Sklansky parallel prefix tree
655+ // High fan-out, low depth, low area
656+ void lowerSklanskeyPrefixTree (comb::AddOp op, ValueRange inputs,
657+ ConversionPatternRewriter &rewriter,
658+ SmallVector<Value> &pPrefix,
659+ SmallVector<Value> &gPrefix ) const {
660+ auto width = op.getType ().getIntOrFloatBitWidth ();
661+ SmallVector<Value> pPrefixNew = pPrefix;
662+ SmallVector<Value> gPrefixNew = gPrefix ;
663+
664+ for (int64_t stride = 1 ; stride < width; stride *= 2 ) {
665+ for (int64_t i = stride; i < width; i += 2 * stride) {
666+ for (int64_t k = 0 ; k < stride && i + k < width; ++k) {
667+ int64_t idx = i + k;
668+ int64_t j = i - 1 ;
669+ // Group generate: g_idx OR (p_idx AND g_j)
670+ Value andPG = comb::AndOp::create (rewriter, op.getLoc (), pPrefix[idx],
671+ gPrefix [j]);
672+ gPrefixNew [idx] =
673+ comb::OrOp::create (rewriter, op.getLoc (), gPrefix [idx], andPG);
674+
675+ // Group propagate: p_idx AND p_j
676+ pPrefixNew[idx] = comb::AndOp::create (rewriter, op.getLoc (),
677+ pPrefix[idx], pPrefix[j]);
678+ }
679+ }
680+ pPrefix = pPrefixNew;
681+ gPrefix = gPrefixNew ;
682+ }
683+ LLVM_DEBUG ({
684+ int64_t stage = 0 ;
685+ for (int64_t stride = 1 ; stride < width; stride *= 2 ) {
686+ llvm::dbgs ()
687+ << " --------------------------------------- Sklanskey Stage "
688+ << stage << " \n " ;
689+ for (int64_t i = stride; i < width; i += 2 * stride) {
690+ for (int64_t k = 0 ; k < stride && i + k < width; ++k) {
691+ int64_t idx = i + k;
692+ int64_t j = i - 1 ;
693+ // Group generate: g_i OR (p_i AND g_j)
694+ llvm::dbgs () << " G" << idx << stage + 1 << " = G" << idx << stage
695+ << " OR (P" << idx << stage << " AND G" << j << stage
696+ << " )\n " ;
697+
698+ // Group propagate: p_i AND p_j
699+ llvm::dbgs () << " P" << idx << stage + 1 << " = P" << idx << stage
700+ << " AND P" << j << stage << " \n " ;
701+ }
702+ }
703+ ++stage;
704+ }
705+ });
595706 }
596707
597708 // Implement the Kogge-Stone parallel prefix tree
0 commit comments