This repository has been archived by the owner on May 5, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 11
/
Utils.cpp
492 lines (420 loc) · 16.2 KB
/
Utils.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
//===----------------------------------------------------------------------===//
//
// Adapated from ScaleHLS https://github.com/hanchenye/scalehls
//
//===----------------------------------------------------------------------===//
#include "Utils.h"
#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
#include "mlir/Dialect/Affine/Analysis/Utils.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Tosa/IR/TosaOps.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
using namespace mlir;
using namespace openhls;
//===----------------------------------------------------------------------===//
// Memory and loop analysis utils
//===----------------------------------------------------------------------===//
/// Parse array attributes.
SmallVector<int64_t, 8> openhls::getIntArrayAttrValue(Operation *op,
StringRef name) {
SmallVector<int64_t, 8> array;
if (auto arrayAttr = op->getAttrOfType<ArrayAttr>(name)) {
for (auto attr : arrayAttr)
if (auto intAttr = attr.dyn_cast<IntegerAttr>())
array.push_back(intAttr.getInt());
else
return SmallVector<int64_t, 8>();
return array;
} else
return SmallVector<int64_t, 8>();
}
/// Collect all load and store operations in the block and return them in "map".
void openhls::getMemAccessesMap(Block &block, MemAccessesMap &map,
bool includeVectorTransfer) {
for (auto &op : block) {
if (auto load = dyn_cast<AffineReadOpInterface>(op))
map[load.getMemRef()].push_back(&op);
else if (auto store = dyn_cast<AffineWriteOpInterface>(op))
map[store.getMemRef()].push_back(&op);
else if (auto read = dyn_cast<vector::TransferReadOp>(op)) {
if (includeVectorTransfer)
map[read.getSource()].push_back(&op);
} else if (auto write = dyn_cast<vector::TransferWriteOp>(op)) {
if (includeVectorTransfer)
map[write.getSource()].push_back(&op);
} else if (op.getNumRegions()) {
// Recursively collect memory access operations in each block.
for (auto ®ion : op.getRegions())
for (auto &block : region)
getMemAccessesMap(block, map);
}
}
}
// Check if the lhsOp and rhsOp are in the same block. If so, return their
// ancestors that are located at the same block. Note that in this check,
// AffineIfOp is transparent.
Optional<std::pair<Operation *, Operation *>>
openhls::checkSameLevel(Operation *lhsOp, Operation *rhsOp) {
// If lhsOp and rhsOp are already at the same level, return true.
if (lhsOp->getBlock() == rhsOp->getBlock())
return std::pair<Operation *, Operation *>(lhsOp, rhsOp);
// Helper to get all surrounding AffineIfOps.
auto getSurroundIfs =
([&](Operation *op, SmallVector<Operation *, 4> &nests) {
nests.push_back(op);
auto currentOp = op;
while (true) {
auto parentOp = currentOp->getParentOp();
if (isa<AffineIfOp, scf::IfOp>(parentOp)) {
nests.push_back(parentOp);
currentOp = parentOp;
} else
break;
}
});
SmallVector<Operation *, 4> lhsNests;
SmallVector<Operation *, 4> rhsNests;
getSurroundIfs(lhsOp, lhsNests);
getSurroundIfs(rhsOp, rhsNests);
// If any parent of lhsOp and any parent of rhsOp are at the same level,
// return true.
for (auto lhs : lhsNests)
for (auto rhs : rhsNests)
if (lhs->getBlock() == rhs->getBlock())
return std::pair<Operation *, Operation *>(lhs, rhs);
return Optional<std::pair<Operation *, Operation *>>();
}
/// Returns the number of surrounding loops common to 'loopsA' and 'loopsB',
/// where each lists loops from outer-most to inner-most in loop nest.
unsigned openhls::getCommonSurroundingLoops(Operation *A, Operation *B,
AffineLoopBand *band) {
SmallVector<AffineForOp, 4> loopsA, loopsB;
getLoopIVs(*A, &loopsA);
getLoopIVs(*B, &loopsB);
unsigned minNumLoops = std::min(loopsA.size(), loopsB.size());
unsigned numCommonLoops = 0;
for (unsigned i = 0; i < minNumLoops; ++i) {
if (loopsA[i] != loopsB[i])
break;
++numCommonLoops;
if (band != nullptr)
band->push_back(loopsB[i]);
}
return numCommonLoops;
}
/// Calculate the lower and upper bound of the affine map if possible.
Optional<std::pair<int64_t, int64_t>>
openhls::getBoundOfAffineMap(AffineMap map, ValueRange operands) {
if (map.isSingleConstant()) {
auto constBound = map.getSingleConstantResult();
return std::pair<int64_t, int64_t>(constBound, constBound);
}
// For now, we can only handle one result value map.
if (map.getNumResults() != 1)
return Optional<std::pair<int64_t, int64_t>>();
auto context = map.getContext();
SmallVector<int64_t, 4> lbs;
SmallVector<int64_t, 4> ubs;
for (auto operand : operands) {
// Only if the affine map operands are induction variable, the calculation
// is possible.
if (!isForInductionVar(operand))
return Optional<std::pair<int64_t, int64_t>>();
// Only if the owner for op of the induction variable has constant bound,
// the calculation is possible.
auto forOp = getForInductionVarOwner(operand);
if (!forOp.hasConstantBounds())
return Optional<std::pair<int64_t, int64_t>>();
auto lb = forOp.getConstantLowerBound();
auto ub = forOp.getConstantUpperBound();
auto step = forOp.getStep();
lbs.push_back(lb);
ubs.push_back(ub - 1 - (ub - 1 - lb) % step);
}
// TODO: maybe a more efficient algorithm.
auto operandNum = operands.size();
SmallVector<int64_t, 16> results;
for (unsigned i = 0, e = pow(2, operandNum); i < e; ++i) {
SmallVector<AffineExpr, 4> replacements;
for (unsigned pos = 0; pos < operandNum; ++pos) {
if (i >> pos % 2 == 0)
replacements.push_back(getAffineConstantExpr(lbs[pos], context));
else
replacements.push_back(getAffineConstantExpr(ubs[pos], context));
}
auto newExpr = map.getResult(0).replaceDimsAndSymbols(replacements, {});
if (auto constExpr = newExpr.dyn_cast<AffineConstantExpr>())
results.push_back(constExpr.getValue());
else
return Optional<std::pair<int64_t, int64_t>>();
}
auto minmax = std::minmax_element(results.begin(), results.end());
return std::pair<int64_t, int64_t>(*minmax.first, *minmax.second);
}
bool openhls::isFullyPartitioned(MemRefType memrefType) {
if (memrefType.getRank() == 0)
return true;
bool fullyPartitioned = false;
SmallVector<int64_t, 8> factors;
getPartitionFactors(memrefType, &factors);
auto shapes = memrefType.getShape();
fullyPartitioned =
factors == SmallVector<int64_t, 8>(shapes.begin(), shapes.end());
return fullyPartitioned;
}
// Calculate partition factors through analyzing the "memrefType" and return
// them in "factors". Meanwhile, the overall partition number is calculated and
// returned as well.
int64_t openhls::getPartitionFactors(MemRefType memrefType,
SmallVector<int64_t, 8> *factors) {
auto shape = memrefType.getShape();
auto layoutMap = memrefType.getLayout().getAffineMap();
int64_t accumFactor = 1;
for (int64_t dim = 0; dim < memrefType.getRank(); ++dim) {
int64_t factor = 1;
auto expr = layoutMap.getResult(dim);
if (auto binaryExpr = expr.dyn_cast<AffineBinaryOpExpr>())
if (auto rhsExpr = binaryExpr.getRHS().dyn_cast<AffineConstantExpr>()) {
if (expr.getKind() == AffineExprKind::Mod)
factor = rhsExpr.getValue();
else if (expr.getKind() == AffineExprKind::FloorDiv)
factor = (shape[dim] + rhsExpr.getValue() - 1) / rhsExpr.getValue();
}
accumFactor *= factor;
if (factors != nullptr)
factors->push_back(factor);
}
return accumFactor;
}
/// This is method for finding the number of child loops which immediatedly
/// contained by the input operation.
unsigned openhls::getChildLoopNum(Operation *op) {
unsigned childNum = 0;
for (auto ®ion : op->getRegions())
for (auto &block : region)
for (auto &op : block)
if (isa<AffineForOp>(op))
++childNum;
return childNum;
}
/// Get the whole loop band given the innermost loop and return it in "band".
static void getLoopBandFromInnermost(AffineForOp forOp, AffineLoopBand &band) {
band.clear();
AffineLoopBand reverseBand;
auto currentLoop = forOp;
while (true) {
reverseBand.push_back(currentLoop);
auto parentLoop = currentLoop->getParentOfType<AffineForOp>();
if (!parentLoop)
break;
if (getChildLoopNum(parentLoop) == 1)
currentLoop = parentLoop;
else
break;
}
band.append(reverseBand.rbegin(), reverseBand.rend());
}
/// Given a tiled loop band, return true and get the tile (tile-space) loop band
/// and the point (intra-tile) loop band. If failed, return false.
bool openhls::getTileAndPointLoopBand(const AffineLoopBand &band,
AffineLoopBand &tileBand,
AffineLoopBand &pointBand) {
tileBand.clear();
pointBand.clear();
bool isPointLoop = false;
for (auto loop : band) {
if (!isPointLoop)
tileBand.push_back(loop);
else if (isPointLoop)
pointBand.push_back(loop);
else if (!isPointLoop) {
isPointLoop = true;
pointBand.push_back(loop);
} else {
tileBand.clear();
pointBand.clear();
return false;
}
}
return true;
}
/// Get the whole loop band given the outermost loop and return it in "band".
/// Meanwhile, the return value is the innermost loop of this loop band.
AffineForOp openhls::getLoopBandFromOutermost(AffineForOp forOp,
AffineLoopBand &band) {
band.clear();
auto currentLoop = forOp;
while (true) {
band.push_back(currentLoop);
if (getChildLoopNum(currentLoop) == 1)
currentLoop = *currentLoop.getOps<AffineForOp>().begin();
else
break;
}
return band.back();
}
/// Collect all loop bands in the "block" and return them in "bands". If
/// "allowHavingChilds" is true, loop bands containing more than 1 other loop
/// bands are also collected. Otherwise, only loop bands that contains no child
/// loops are collected.
void openhls::getLoopBands(Block &block, AffineLoopBands &bands,
bool allowHavingChilds) {
bands.clear();
block.walk([&](AffineForOp loop) {
auto childNum = getChildLoopNum(loop);
if (childNum == 0 || (childNum > 1 && allowHavingChilds)) {
AffineLoopBand band;
getLoopBandFromInnermost(loop, band);
bands.push_back(band);
}
});
}
void openhls::getArrays(Block &block, SmallVectorImpl<Value> &arrays,
bool allowArguments) {
// Collect argument arrays.
if (allowArguments)
for (auto arg : block.getArguments()) {
if (arg.getType().isa<MemRefType>())
arrays.push_back(arg);
}
// Collect local arrays.
for (auto &op : block.getOperations()) {
if (isa<memref::AllocaOp, memref::AllocOp>(op))
arrays.push_back(op.getResult(0));
}
}
Optional<unsigned> openhls::getAverageTripCount(AffineForOp forOp) {
if (auto optionalTripCount = getConstantTripCount(forOp))
return optionalTripCount.getValue();
else {
// TODO: A temporary approach to estimate the trip count. For now, we take
// the average of the upper bound and lower bound of trip count as the
// estimated trip count.
auto lowerBound = getBoundOfAffineMap(forOp.getLowerBoundMap(),
forOp.getLowerBoundOperands());
auto upperBound = getBoundOfAffineMap(forOp.getUpperBoundMap(),
forOp.getUpperBoundOperands());
if (lowerBound && upperBound) {
auto lowerTripCount =
upperBound.getValue().second - lowerBound.getValue().first;
auto upperTripCount =
upperBound.getValue().first - lowerBound.getValue().second;
return (lowerTripCount + upperTripCount + 1) / 2;
} else
return Optional<unsigned>();
}
}
bool openhls::checkDependence(Operation *A, Operation *B) {
AffineLoopBand commonLoops;
unsigned numCommonLoops = getCommonSurroundingLoops(A, B, &commonLoops);
// Traverse each loop level to find dependencies.
for (unsigned depth = numCommonLoops; depth > 0; depth--) {
// Skip all parallel loop level.
FlatAffineValueConstraints depConstrs;
DependenceResult result = checkMemrefAccessDependence(
MemRefAccess(A), MemRefAccess(B), depth, &depConstrs,
/*dependenceComponents=*/nullptr);
if (hasDependence(result))
return true;
}
return false;
}
/// Localize each tosa/arith constant to right before its each use. Only
/// localize the constants whose size is below the bitsThreshold.
void openhls::localizeConstants(Block &block, int64_t bitsThreshold) {
auto builder = OpBuilder(block.getParentOp());
// Collect all constants.
SmallVector<Operation *, 16> constants;
block.walk([&](Operation *constant) {
if (isa<tosa::ConstOp, arith::ConstantOp>(constant)) {
auto type = constant->getResult(0).getType();
if (auto shapedType = type.dyn_cast<ShapedType>()) {
if (shapedType.getSizeInBits() <= bitsThreshold)
constants.push_back(constant);
} else
constants.push_back(constant);
}
});
// Localize constants to each of its use.
for (auto constant : constants) {
for (auto &use : llvm::make_early_inc_range(constant->getUses())) {
builder.setInsertionPoint(use.getOwner());
auto cloneConstant = builder.clone(*constant);
use.set(cloneConstant->getResult(0));
}
constant->erase();
}
}
FuncOp openhls::getTopFunc(ModuleOp module, std::string topFuncName) {
FuncOp topFunc;
for (auto func : module.getOps<FuncOp>())
if (func.getName() == topFuncName) {
if (!topFunc)
topFunc = func;
else
return FuncOp();
}
return topFunc;
}
FuncOp openhls::getRuntimeFunc(ModuleOp module,
std::string runtimeFuncName) {
FuncOp runtimeFunc;
for (auto func : module.getOps<FuncOp>())
if (func.getName() == runtimeFuncName) {
if (!runtimeFunc)
runtimeFunc = func;
else
return FuncOp();
}
return runtimeFunc;
}
//===----------------------------------------------------------------------===//
// PtrLikeMemRefAccess Struct Definition
//===----------------------------------------------------------------------===//
PtrLikeMemRefAccess::PtrLikeMemRefAccess(Operation *loadOrStoreOpInst) {
Operation *opInst = nullptr;
SmallVector<Value, 4> indices;
if (auto loadOp = dyn_cast<AffineReadOpInterface>(loadOrStoreOpInst)) {
memref = loadOp.getMemRef();
opInst = loadOrStoreOpInst;
auto loadMemrefType = loadOp.getMemRefType();
indices.reserve(loadMemrefType.getRank());
for (auto index : loadOp.getMapOperands()) {
indices.push_back(index);
}
} else {
assert(isa<AffineWriteOpInterface>(loadOrStoreOpInst) &&
"Affine read/write op expected");
auto storeOp = cast<AffineWriteOpInterface>(loadOrStoreOpInst);
opInst = loadOrStoreOpInst;
memref = storeOp.getMemRef();
auto storeMemrefType = storeOp.getMemRefType();
indices.reserve(storeMemrefType.getRank());
for (auto index : storeOp.getMapOperands()) {
indices.push_back(index);
}
}
// Get affine map from AffineLoad/Store.
AffineMap map;
if (auto loadOp = dyn_cast<AffineReadOpInterface>(opInst))
map = loadOp.getAffineMap();
else
map = cast<AffineWriteOpInterface>(opInst).getAffineMap();
SmallVector<Value, 8> operands(indices.begin(), indices.end());
fullyComposeAffineMapAndOperands(&map, &operands);
map = simplifyAffineMap(map);
canonicalizeMapAndOperands(&map, &operands);
accessMap.reset(map, operands);
}
bool PtrLikeMemRefAccess::operator==(const PtrLikeMemRefAccess &rhs) const {
if (memref != rhs.memref || impl != rhs.impl)
return false;
if (impl == rhs.impl && impl && rhs.impl)
return true;
AffineValueMap diff;
AffineValueMap::difference(accessMap, rhs.accessMap, &diff);
return llvm::all_of(diff.getAffineMap().getResults(),
[](AffineExpr e) { return e == 0; });
}