Skip to content

Commit

Permalink
[GR-36751] Improve handing of irreducible loops in AOT compilation.
Browse files Browse the repository at this point in the history
PullRequest: graal/11030
  • Loading branch information
dougxc committed Feb 15, 2022
2 parents 9fe1533 + 9023796 commit 2f72ff6
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 19 deletions.
5 changes: 5 additions & 0 deletions compiler/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

This changelog summarizes newly introduced optimizations that may be relevant to other teams.

## Version 22.1.0
* (GR-36751): Removed the `DuplicateIrreducibleLoops` option. To disable irreducible loop handling, set
`-Dgraal.MaxDuplicationFactor` to a value less than or equal to 1. For AOT compilations, the effort
spent to handle irreducible loops is boosted to let Native Image support more programs with irreducible loops.

## Version 22.0.0
* (GR-22707) (GR-30838): New, inner loops first, reverse post order and loop frequency calculations for the compiler.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@
import static org.graalvm.compiler.bytecode.Bytecodes.TABLESWITCH;
import static org.graalvm.compiler.bytecode.Bytecodes.WIDE;
import static org.graalvm.compiler.core.common.GraalOptions.SupportJsrBytecodes;
import static org.graalvm.compiler.java.BciBlockMapping.Options.MaxDuplicationFactor;

import java.util.ArrayDeque;
import java.util.ArrayList;
Expand Down Expand Up @@ -291,9 +292,9 @@
* maximum subroutine nesting of 4. Otherwise, a bailout is thrown.
* <p>
* Loops in the methods are detected. If a method contains an irreducible loop (a loop with more
* than one entry), a bailout is thrown or block duplication is attempted to make the loop
* reducible. This simplifies the compiler later on since only structured loops need to be
* supported.
* than one entry), a bailout is thrown or, if {@link Options#MaxDuplicationFactor} {@code > 1},
* block duplication is attempted to make the loop reducible. This simplifies the compiler later on
* since only structured loops need to be supported.
* <p>
* A data flow analysis computes the live local variables from the point of view of the interpreter.
* The result is used later to prune frame states, i.e., remove local variable entries that are
Expand All @@ -304,9 +305,8 @@
*/
public class BciBlockMapping implements JavaMethodContext {
public static class Options {
@Option(help = "When enabled, some limited amount of duplication will be performed in order compile code containing irreducible loops.")//
public static final OptionKey<Boolean> DuplicateIrreducibleLoops = new OptionKey<>(true);
@Option(help = "How much duplication can happen because of irreducible loops before bailing out.", type = OptionType.Expert)//
@Option(help = "Max amount of extra effort to expend handling irreducible loops. " +
"A value <= 1 disables support for irreducible loops.", type = OptionType.Expert)//
public static final OptionKey<Double> MaxDuplicationFactor = new OptionKey<>(2.0);
}

Expand Down Expand Up @@ -734,14 +734,30 @@ public String toString() {
private int newDuplicateBlocks;
private int duplicateBlocks;

/**
* Amount by which {@link Options#MaxDuplicationFactor} is multiplied.
*/
private final int maxDuplicationBoost;

/**
* Creates a new BlockMap instance from {@code code}.
*/
protected BciBlockMapping(Bytecode code, DebugContext debug) {
this(code, debug, 1);
}

/**
* Creates a new BlockMap instance from {@code code}.
*
* @param maxDuplicationBoost amount by which to multiply {@link Options#MaxDuplicationFactor}
*/
protected BciBlockMapping(Bytecode code, DebugContext debug, int maxDuplicationBoost) {
this.code = code;
this.debug = debug;
this.exceptionHandlers = code.getExceptionHandlers();
this.blockMap = new BciBlock[code.getCodeSize()];
assert maxDuplicationBoost >= 1 : maxDuplicationBoost;
this.maxDuplicationBoost = maxDuplicationBoost;
}

public BciBlock[] getBlocks() {
Expand Down Expand Up @@ -1604,10 +1620,10 @@ private void propagateLoopBits(TraversalStep step, BitSet loopBits) {
* <p>
* Since loops are marked eagerly, forward entries into an existing loop without going through
* the loop header (i.e., irreducible loops) can be detected easily. In this case, if
* {@link Options#DuplicateIrreducibleLoops} is enabled, the traversal starts to duplicate
* {@link Options#MaxDuplicationFactor} is greater than 1, the traversal starts to duplicate
* blocks until it either exits the loop or reaches the header. Since this is a depth-first
* traversal and the loop header is not active, we know that the loop and its inner-loops were
* until then reducible.
* reducible until then.
* <p>
* This is not recursive to avoid stack overflow issues.
*/
Expand Down Expand Up @@ -1666,7 +1682,7 @@ private void computeBlockOrder(BciBlock initialBlock) {
for (int pos = -1; (pos = checkBits.nextSetBit(pos + 1)) >= 0;) {
int id = pos;
if (!loopHeaders[id].active) {
if (!Options.DuplicateIrreducibleLoops.getValue(debug.getOptions())) {
if (Options.MaxDuplicationFactor.getValue(debug.getOptions()) <= 1.0D) {
throw new PermanentBailoutException("Irreducible");
} else if (outermostInactiveLoopId == -1 || !loopHeaders[id].loops.get(outermostInactiveLoopId)) {
outermostInactiveLoopId = id;
Expand Down Expand Up @@ -1710,10 +1726,12 @@ private void computeBlockOrder(BciBlock initialBlock) {
blocksNotYetAssignedId--;
if (blocksNotYetAssignedId < 0) {
// this should only happen if duplication is active
assert Options.DuplicateIrreducibleLoops.getValue(debug.getOptions());
OptionValues options = debug.getOptions();
double factor = MaxDuplicationFactor.getValue(options);
duplicateBlocks += newDuplicateBlocks;
if (duplicateBlocks > postJsrBlockCount * Options.MaxDuplicationFactor.getValue(debug.getOptions())) {
throw new PermanentBailoutException("Non-reducible loop requires too much duplication");
if (duplicateBlocks > postJsrBlockCount * factor * maxDuplicationBoost) {
throw new PermanentBailoutException("Non-reducible loop requires too much duplication. " +
"Setting " + MaxDuplicationFactor.getName() + " to a value higher than " + factor + " may resolve this.");
}
// there are new duplicate blocks, re-number
debug.log(DebugContext.INFO_LEVEL, "Re-numbering blocks to make room for duplicates (old length: %d; new blocks: %d)", blocks.length, newDuplicateBlocks);
Expand Down Expand Up @@ -1754,7 +1772,11 @@ private boolean checkBlocks(int start, BciBlock inserting) {
}

public static BciBlockMapping create(BytecodeStream stream, Bytecode code, OptionValues options, DebugContext debug, boolean hasAsyncExceptions) {
BciBlockMapping map = new BciBlockMapping(code, debug);
return create(stream, code, options, debug, hasAsyncExceptions, 1);
}

public static BciBlockMapping create(BytecodeStream stream, Bytecode code, OptionValues options, DebugContext debug, boolean hasAsyncExceptions, int maxDuplicationBoost) {
BciBlockMapping map = new BciBlockMapping(code, debug, maxDuplicationBoost);
buildMap(stream, code, options, debug, map, hasAsyncExceptions);
return map;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,14 +124,18 @@ protected boolean asyncExceptionLiveness() {

@Override
protected BciBlockMapping generateBlockMap() {
// Double effort expended to handle irreducible loops in AOT compilation
// since failure means native-image fails.
int maxDuplicationBoost = 2;

if (isDeoptimizationEnabled() && isMethodDeoptTarget()) {
/*
* Need to add blocks representing where deoptimization entrypoint nodes will be
* inserted.
*/
return HostedBciBlockMapping.create(stream, code, options, graph.getDebug(), false);
return HostedBciBlockMapping.create(stream, code, options, graph.getDebug(), false, maxDuplicationBoost);
} else {
return BciBlockMapping.create(stream, code, options, graph.getDebug(), asyncExceptionLiveness());
return BciBlockMapping.create(stream, code, options, graph.getDebug(), asyncExceptionLiveness(), maxDuplicationBoost);
}
}

Expand Down Expand Up @@ -300,8 +304,8 @@ final class HostedBciBlockMapping extends BciBlockMapping {
*/
private final Set<DeoptEntryInsertionPoint> insertedBlocks;

private HostedBciBlockMapping(Bytecode code, DebugContext debug) {
super(code, debug);
private HostedBciBlockMapping(Bytecode code, DebugContext debug, int maxDuplicationBoost) {
super(code, debug, maxDuplicationBoost);
insertedBlocks = new HashSet<>();
}

Expand Down Expand Up @@ -466,8 +470,8 @@ public String toString() {
* Creates a BciBlockMapping with blocks explicitly representing where DeoptEntryNodes and
* DeoptProxyAnchorNodes are to be inserted.
*/
public static BciBlockMapping create(BytecodeStream stream, Bytecode code, OptionValues options, DebugContext debug, boolean hasAsyncExceptions) {
BciBlockMapping map = new HostedBciBlockMapping(code, debug);
public static BciBlockMapping create(BytecodeStream stream, Bytecode code, OptionValues options, DebugContext debug, boolean hasAsyncExceptions, int maxDuplicationBoost) {
BciBlockMapping map = new HostedBciBlockMapping(code, debug, maxDuplicationBoost);
buildMap(stream, code, options, debug, map, hasAsyncExceptions);
return map;
}
Expand Down

0 comments on commit 2f72ff6

Please sign in to comment.