Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/components/multiplier.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ The parameters of the
- An optional `selectSignedMultiplicand` control signal which overrides the `signedMultiplicand` parameter allowing for runtime control of signed or unsigned operation with the same hardware. `signedMultiplicand` must be false if using this control signal.
- An optional `selectSignedMultiplier` control signal which overrides the `signedMultiplier` parameter allowing for runtime control of signed or unsigned operation with the same hardware. `signedMultiplier` must be false if using this control signal.
- An optional `clk`, as well as `enable` and `reset` that are used to add a pipestage in the `ColumnCompressor` to allow for pipelined operation.
- An optional `use42Compressors` boolean enables the `ColumnCompressor` to use 4:2 compressors in addition to 3:2 (Full Adder) and 2:2 (Half Adder) compressors.

Here is an example of use of the `CompressionTreeMultiplier` with one signed input:

Expand Down Expand Up @@ -145,6 +146,7 @@ The parameters of the
- An optional `selectSignedMultiplier` control signal which overrides the `signedMultiplier` parameter allowing for runtime control of signed or unsigned operation with the same hardware. `signedMultiplier` must be false if using this control signal.
- An optional `selectSignedAddend` control signal which overrides the `signedAddend` parameter allowing for runtime control of signed or unsigned operation with the same hardware. `signedAddend` must be false if using this control signal.
- An optional `clk`, as well as `enable` and `reset` that are used to add a pipestage in the `ColumnCompressor` to allow for pipelined operation.
- An optional `use42Compressors` boolean enables the `ColumnCompressor` to use 4:2 compressors in addition to 3:2 (Full Adder) and 2:2 (Half Adder) compressors.

Here is an example of using the `CompressionTreeMultiplyAccumulate` with all inputs as signed:

Expand Down
262 changes: 184 additions & 78 deletions lib/src/arithmetic/addend_compressor.dart
Original file line number Diff line number Diff line change
Expand Up @@ -11,61 +11,24 @@ import 'package:collection/collection.dart';
import 'package:meta/meta.dart';
import 'package:rohd/rohd.dart';
import 'package:rohd_hcl/src/arithmetic/multiplier_lib.dart';

/// Base class for bit-level column compressor function
abstract class BitCompressor extends Module {
/// Input bits to compress
@protected
late final Logic compressBits;

/// The addition results [sum] including carry bit
Logic get sum => output('sum');

/// The carry results [carry].
Logic get carry => output('carry');

/// Construct a column compressor
BitCompressor(Logic compressBits) {
this.compressBits = addInput(
'compressBits',
compressBits,
width: compressBits.width,
);
addOutput('sum');
addOutput('carry');
}
}

/// 2-input column compressor (half-adder)
class Compressor2 extends BitCompressor {
/// Construct a 2-input compressor (half-adder)
Compressor2(super.compressBits) {
sum <= compressBits.xor();
carry <= compressBits.and();
}
}

/// 3-input column compressor (full-adder)
class Compressor3 extends BitCompressor {
/// Construct a 3-input column compressor (full-adder)
Compressor3(super.compressBits) {
sum <= compressBits.xor();
carry <=
mux(compressBits[0], compressBits.slice(2, 1).or(),
compressBits.slice(2, 1).and());
}
}
import 'package:rohd_hcl/src/exceptions.dart';

/// Compress terms
enum CompressTermType {
/// A cout (horizontal carry)
cout,

/// A carry term
carry,

/// A sum term
sum,

/// A partial product term (from the original matrix)
pp
pp,

/// a cin (horizontal carry-in) term
cin
}

/// A compression term
Expand All @@ -76,6 +39,9 @@ class CompressTerm implements Comparable<CompressTerm> {
/// The inputs that drove this Term
late final List<CompressTerm> inputs;

/// The carry input that drove this Term
late final List<CompressTerm>? carryInputs;

/// The row of the terminal
final int row;

Expand All @@ -85,26 +51,29 @@ class CompressTerm implements Comparable<CompressTerm> {
/// The Logic wire of the term
final Logic logic;

/// Estimated delay of the output of this CompessTerm
/// Estimated delay of the output of this CompressTerm
late double delay;

/// Estimated delay of a Sum term
static const sumDelay = 1.0;

/// Estimated delay of a Carry term
static const carryDelay = 0.75;

/// CompressTerm constructor
CompressTerm(this.type, this.logic, this.inputs, this.row, this.col) {
CompressTerm(BitCompressor? compressor, this.type, this.logic, this.inputs,
this.row, this.col,
{this.carryInputs}) {
delay = 0.0;
final deltaDelay = switch (type) {
CompressTermType.carry => carryDelay,
CompressTermType.sum => sumDelay,
CompressTermType.pp => 0.0
};
for (final i in inputs) {
if (i.delay + deltaDelay > delay) {
delay = i.delay + deltaDelay;
if (compressor != null) {
final deltaDelay = compressor.evaluateDelay(type, CompressTermType.pp);
for (final i in inputs) {
if (i.delay + deltaDelay > delay) {
delay = i.delay + deltaDelay;
}
}
if (carryInputs != null) {
final deltaDelay2 =
compressor.evaluateDelay(type, CompressTermType.cin);
for (final c in carryInputs!) {
if (c.delay + deltaDelay2 > delay) {
delay = c.delay + deltaDelay2;
}
}
}
}
}
Expand Down Expand Up @@ -139,6 +108,11 @@ class CompressTerm implements Comparable<CompressTerm> {
final majority =
(count > termValues.length ~/ 2 ? LogicValue.one : LogicValue.zero);
value = majority;
case CompressTermType.cout:
throw RohdHclException('cout CompressTermType should not be evaluated');

case CompressTermType.cin:
throw RohdHclException('cin CompressTermType should not be evaluated');
}
return value;
}
Expand All @@ -149,7 +123,9 @@ class CompressTerm implements Comparable<CompressTerm> {
final ts = switch (type) {
CompressTermType.pp => 'pp',
CompressTermType.carry => 'c',
CompressTermType.sum => 's'
CompressTermType.cout => 'o',
CompressTermType.sum => 's',
CompressTermType.cin => 'i'
};
str
..write(ts)
Expand All @@ -158,15 +134,116 @@ class CompressTerm implements Comparable<CompressTerm> {
}
}

/// Base class for bit-level column compressor function
abstract class BitCompressor extends Module {
/// Input bits to compress
@protected
late final Logic compressBits;

/// Input terms to compress
late final List<CompressTerm> terms;

/// The addition results [sum] including carry bit
Logic get sum => output('sum');

/// The carry results [carry].
Logic get carry => output('carry');

late final List<List<double>> _delays;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you think it may be useful to expose a @protected API for setting these delays rather than private, so that if someone has a technology-specific delay they want to set, they could just extend one of the compressors and change the delay numbers? this also leads to another question: should there be an argument to generate compressors in ColumnCompressor (even the smaller ones) rather than just a flag on whether to use 4:2 or not?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The bare minimum is to use 2:2 and 3:2 compressors (half and full-adders). But you have a point -- people have invented even wider compressors.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you want to include changes to support arbitrary delays and user-defined compressors in this PR? one consideration is this flag to "use 4:2 compressors" is an API that wouldn't really make sense if we later replace it with a list of compressor types that can be used, for example


/// Construct a column compressor.
BitCompressor(this.terms, {super.name = 'bitcompressor'}) {
compressBits = [
for (var pos = 0; pos < terms.length; pos++)
addInput('t_$pos', terms[pos].logic)
].swizzle();
addOutput('sum');
addOutput('carry');
_delays = List.filled(CompressTermType.values.length,
List.filled(CompressTermType.values.length, 0));
}

/// Evaluate the delay between input and output
double evaluateDelay(CompressTermType outTerm, CompressTermType inTerm) =>
_delays[outTerm.index][inTerm.index];
}

/// 2-input column compressor (half-adder)
class Compressor2 extends BitCompressor {
/// Construct a 2-input compressor (half-adder).
Compressor2(super.terms, {super.name = 'bitcompressor2'}) {
sum <= compressBits.xor();
carry <= compressBits.and();
_delays[CompressTermType.sum.index][CompressTermType.pp.index] = 1.0;
_delays[CompressTermType.carry.index][CompressTermType.pp.index] = 1.5;
}
}

/// 3-input column compressor (full-adder)
class Compressor3 extends BitCompressor {
/// Construct a 3-input column compressor (full-adder).
Compressor3(super.terms, {super.name = 'bitcompressor3'}) {
sum <= compressBits.xor();
carry <=
mux(compressBits[0], compressBits.slice(2, 1).or(),
compressBits.slice(2, 1).and());
// TODO(desmonddak): wiring different inputs for different delays
// means we may need to index by input not just type
_delays[CompressTermType.sum.index][CompressTermType.pp.index] = 1.0;
_delays[CompressTermType.carry.index][CompressTermType.pp.index] = 1.5;
}
}

/// 4-input column compressor (4:2 compressor)
class Compressor4 extends BitCompressor {
/// Horizontal carry-out [cout]
Logic get cout => output('cout');

/// Construct a 4-input column compressor using two 3-input compressors.
Compressor4(List<CompressTerm> terms, List<CompressTerm> cinL,
{super.name = 'bitcompressor4'})
: super(terms) {
// We need to use internal Logic and regenerate Term lists inside
cinL = [
for (final cin in cinL)
CompressTerm(this, cin.type, addInput('cin', cin.logic), cin.inputs,
cin.row, cin.col)
];
final internalTerms = [
for (var i = 0; i < compressBits.width; i++)
CompressTerm(this, terms[i].type, compressBits.reversed[i],
terms.sublist(0, 4), terms[i].row, terms[i].col)
];
addOutput('cout');
final c3A = Compressor3(internalTerms.sublist(1, 4));
cout <= c3A.carry;
final t = CompressTerm(
c3A, CompressTermType.sum, c3A.sum, internalTerms.sublist(1, 4), 0, 0);
final c3B = Compressor3([t, internalTerms[0], cinL[0]]);
carry <= c3B.carry;
sum <= c3B.sum;

// TODO(desmonddak): wiring different inputs for different delays
_delays[CompressTermType.sum.index][CompressTermType.pp.index] = 4.0;
_delays[CompressTermType.sum.index][CompressTermType.cin.index] = 2.0;
_delays[CompressTermType.carry.index][CompressTermType.pp.index] = 3.0;
_delays[CompressTermType.carry.index][CompressTermType.cin.index] = 2.0;
_delays[CompressTermType.cout.index][CompressTermType.pp.index] = 3.0;
_delays[CompressTermType.cout.index][CompressTermType.cin.index] = 0.0;
}
}

/// A column of partial product terms
typedef ColumnQueue = PriorityQueue<CompressTerm>;

/// A column compressor
class ColumnCompressor {
/// Columns of partial product CompressTerms

late final List<ColumnQueue> columns;

/// Columns of partial product CompressTerms for carries (4:2 output)
late final List<ColumnQueue> carryColumns;

/// The partial product array to be compressed
final PartialProductArray pp;

Expand All @@ -179,19 +256,28 @@ class ColumnCompressor {
/// Optional enable for configurable pipestage.
Logic? enable;

/// Use 4:2 compressors in compression tree
bool use42Compressors;

/// Initialize a ColumnCompressor for a set of partial products
///
/// If [clk] is not null then a set of flops are used to latch the output
/// after compression (see [extractRow]). [reset] and [enable] are optional
/// inputs to control these flops when [clk] is provided. If [clk] is null,
/// the [ColumnCompressor] is built as a combinational tree of compressors.
ColumnCompressor(this.pp, {this.clk, this.reset, this.enable}) {
///
/// [use42Compressors] will combine 4:2, 3:2, and 2:2 compressors in building
/// a compression tree.
ColumnCompressor(this.pp,
{this.use42Compressors = false, this.clk, this.reset, this.enable}) {
columns = List.generate(pp.maxWidth(), (i) => ColumnQueue());

// if (use42Compressors) {
carryColumns = List.generate(pp.maxWidth(), (i) => ColumnQueue());
// }
for (var row = 0; row < pp.rows; row++) {
for (var col = 0; col < pp.partialProducts[row].length; col++) {
final trueColumn = pp.rowShift[row] + col;
final term = CompressTerm(CompressTermType.pp,
final term = CompressTerm(null, CompressTermType.pp,
pp.partialProducts[row][col], [], row, trueColumn);
columns[trueColumn].add(term);
}
Expand All @@ -200,23 +286,26 @@ class ColumnCompressor {

/// Return the longest column length
int longestColumn() =>
columns.reduce((a, b) => a.length > b.length ? a : b).length;
columns.reduce((a, b) => a.length > b.length ? a : b).length +
carryColumns.reduce((a, b) => a.length > b.length ? a : b).length;

/// Convert a row to a Logic bitvector
Logic extractRow(int row) {
final width = pp.maxWidth();

final rowBits = <Logic>[];
for (var col = columns.length - 1; col >= 0; col--) {
final colList = columns[col].toList();
final colList = carryColumns[col].toList() + columns[col].toList();
if (row < colList.length) {
final value = colList[row].logic;

rowBits.add(
clk != null ? flop(clk!, value, reset: reset, en: enable) : value);
} else {
rowBits.add(Const(0));
}
}
rowBits.addAll(List.filled(pp.rowShift[row], Const(0)));
// rowBits.addAll(List.filled(pp.rowShift[row], Const(0)));
if (width > rowBits.length) {
return rowBits.swizzle().zeroExtend(width);
}
Expand All @@ -228,28 +317,45 @@ class ColumnCompressor {
final terms = <CompressTerm>[];
for (var col = 0; col < columns.length; col++) {
final queue = columns[col];
final depth = queue.length;
final PriorityQueue<CompressTerm> carryQueue;
if (use42Compressors) {
carryQueue = carryColumns[col];
} else {
carryQueue = PriorityQueue<CompressTerm>();
}
final depth = queue.length + carryQueue.length;
if (depth > iteration) {
if (depth > 2) {
final first = queue.removeFirst();
final second = queue.removeFirst();
final inputs = <CompressTerm>[first, second];
BitCompressor compressor;
if (depth > 3) {
if (depth > 4 && use42Compressors) {
final cin = carryQueue.isNotEmpty
? carryQueue.removeFirst()
: CompressTerm(null, CompressTermType.cin, Const(0), [], 0, 0);
inputs
..add(queue.removeFirst())
..add(queue.removeFirst());
compressor = Compressor4(inputs, [cin]);
if (col < columns.length - 1) {
final t = CompressTerm(compressor, CompressTermType.carry,
(compressor as Compressor4).cout, inputs, 0, col);
carryColumns[col + 1].add(t);
}
} else if (depth > 3) {
inputs.add(queue.removeFirst());
compressor =
Compressor3([for (final i in inputs) i.logic].swizzle());
compressor = Compressor3(inputs);
} else {
compressor =
Compressor2([for (final i in inputs) i.logic].swizzle());
compressor = Compressor2(inputs);
}
final t = CompressTerm(
CompressTermType.sum, compressor.sum, inputs, 0, col);
compressor, CompressTermType.sum, compressor.sum, inputs, 0, col);
terms.add(t);
columns[col].add(t);
if (col < columns.length - 1) {
final t = CompressTerm(
CompressTermType.carry, compressor.carry, inputs, 0, col);
final t = CompressTerm(compressor, CompressTermType.carry,
compressor.carry, inputs, 0, col);
columns[col + 1].add(t);
terms.add(t);
}
Expand Down
Loading