Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 62 additions & 39 deletions src/Bounds.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1194,17 +1194,13 @@ class Bounds : public IRVisitor {
return;
}

if (!const_bound &&
(op->call_type == Call::PureExtern ||
op->call_type == Call::PureIntrinsic ||
op->call_type == Call::Image)) {

// If the args are const we can return the call of those args
// for pure functions. For other types of functions, the same
// call in two different places might produce different
// results (e.g. during the update step of a reduction), so we
// can't move around call nodes.

// A helper which converts a call with single-point args into the same
// call applied to those args. For some types of Call node if the args
// are const we can return the call of those args for pure
// functions. For other types of Call, the same call in two
// different places might produce different results (e.g. during the
// update step of a reduction), so we can't move around call nodes.
auto handle_const_arg_call = [&]() {
std::vector<Expr> new_args(op->args.size());
bool const_args = true;
for (size_t i = 0; i < op->args.size() && const_args; i++) {
Expand All @@ -1219,14 +1215,37 @@ class Bounds : public IRVisitor {
Expr call = Call::make(t, op->name, new_args, op->call_type,
op->func, op->value_index, op->image, op->param);
interval = Interval::single_point(call);
return;
return true;
} else {
return false;
}
};

if (!const_bound &&
(op->call_type == Call::PureExtern ||
op->call_type == Call::Image) &&
handle_const_arg_call()) {

// PureIntrinsics could also be handled here, but they may be
// lowered by the code below into different forms, so to avoid
// exponential complexity we handle those later.
return;

// else fall thru and continue
}

const auto handle_expr_bounds = [this, t](const Expr &e) -> void {
// If we lower a call expr into another form, we want to keep the
// original form if it turns out to have been a const. This helper
// accomplishes that, and also treats undefined Exprs as being unbounded
// as a convenience for things that may or may not be able to be
// lowered.
const auto handle_lowered_expr_bounds = [this, t, op](const Expr &e) -> void {
if (e.defined()) {
e.accept(this);
if (interval.is_single_point(e)) {
// It was unvarying, so just return the original unlowered form.
interval = Interval::single_point(op);
}
} else {
// Just use the bounds of the type
this->bounds_of_type(t);
Expand Down Expand Up @@ -1265,7 +1284,7 @@ class Bounds : public IRVisitor {
internal_assert(!t.is_handle());
if (t.is_float()) {
Expr e = abs(op->args[0] - op->args[1]);
e.accept(this);
handle_lowered_expr_bounds(e);
} else {
// absd() for int types will always produce a uint result
internal_assert(t.is_uint());
Expand All @@ -1281,9 +1300,8 @@ class Bounds : public IRVisitor {
} else if (op->is_intrinsic(Call::saturating_cast)) {
internal_assert(op->args.size() == 1);

Expr a = lower_saturating_cast(op->type, op->args[0]);
a.accept(this);
return;
Expr e = lower_saturating_cast(op->type, op->args[0]);
handle_lowered_expr_bounds(e);
} else if (op->is_intrinsic(Call::unsafe_promise_clamped) ||
op->is_intrinsic(Call::promise_clamped)) {
// Unlike an explicit clamp, we are also permitted to
Expand Down Expand Up @@ -1330,7 +1348,7 @@ class Bounds : public IRVisitor {
// Probably more conservative than necessary
Expr false_value = op->args.size() == 2 ? op->args[1] : op->args[2];
Expr equivalent_select = Select::make(op->args[0], op->args[1], false_value);
equivalent_select.accept(this);
handle_lowered_expr_bounds(equivalent_select);
} else if (op->is_intrinsic(Call::require)) {
internal_assert(op->args.size() == 3);
interval = arg_bounds.get(1);
Expand Down Expand Up @@ -1407,8 +1425,8 @@ class Bounds : public IRVisitor {
}
} else if (is_const(b)) {
// We can normalize to multiplication
Expr equiv = a * (make_const(t, 1) << b);
equiv.accept(this);
Expr e = a * (make_const(t, 1) << b);
handle_lowered_expr_bounds(e);
}
} else if (op->is_intrinsic(Call::shift_right)) {
// Only try to improve on bounds-of-type if we can prove 0 <= b < t.bits,
Expand Down Expand Up @@ -1572,113 +1590,118 @@ class Bounds : public IRVisitor {
Expr e = can_widen(op->args[1]) ?
lower_widen_right_add(op->args[0], op->args[1]) :
Expr();
handle_expr_bounds(e);
handle_lowered_expr_bounds(e);
} else if (op->is_intrinsic(Call::widen_right_mul)) {
internal_assert(op->args.size() == 2);
Expr e = can_widen(op->args[1]) ?
lower_widen_right_mul(op->args[0], op->args[1]) :
Expr();
handle_expr_bounds(e);
handle_lowered_expr_bounds(e);
} else if (op->is_intrinsic(Call::widen_right_sub)) {
internal_assert(op->args.size() == 2);
Expr e = can_widen(op->args[1]) ?
lower_widen_right_sub(op->args[0], op->args[1]) :
Expr();
handle_expr_bounds(e);
handle_lowered_expr_bounds(e);
} else if (op->is_intrinsic(Call::widening_add)) {
internal_assert(op->args.size() == 2);
Expr e = can_widen_all(op->args) ?
lower_widening_add(op->args[0], op->args[1]) :
Expr();
handle_expr_bounds(e);
handle_lowered_expr_bounds(e);
} else if (op->is_intrinsic(Call::widening_mul)) {
internal_assert(op->args.size() == 2);
Expr e = can_widen_all(op->args) ?
lower_widening_mul(op->args[0], op->args[1]) :
Expr();
handle_expr_bounds(e);
handle_lowered_expr_bounds(e);
} else if (op->is_intrinsic(Call::widening_sub)) {
internal_assert(op->args.size() == 2);
Expr e = can_widen_all(op->args) ?
lower_widening_sub(op->args[0], op->args[1]) :
Expr();
handle_expr_bounds(e);
handle_lowered_expr_bounds(e);
} else if (op->is_intrinsic(Call::saturating_add)) {
internal_assert(op->args.size() == 2);
Expr e = can_widen_all(op->args) ?
narrow(clamp(widen(op->args[0]) + widen(op->args[1]), t.min(), t.max())) :
Expr();
handle_expr_bounds(e);
handle_lowered_expr_bounds(e);
} else if (op->is_intrinsic(Call::saturating_sub)) {
internal_assert(op->args.size() == 2);
Expr e = can_widen_all(op->args) ?
narrow(clamp(widen(op->args[0]) - widen(op->args[1]), t.min(), t.max())) :
Expr();
handle_expr_bounds(e);
handle_lowered_expr_bounds(e);
} else if (op->is_intrinsic(Call::widening_shift_left)) {
internal_assert(op->args.size() == 2);
Expr e = can_widen(op->args[0]) ?
lower_widening_shift_left(op->args[0], op->args[1]) :
Expr();
handle_expr_bounds(e);
handle_lowered_expr_bounds(e);
} else if (op->is_intrinsic(Call::widening_shift_right)) {
internal_assert(op->args.size() == 2);
Expr e = can_widen(op->args[0]) ?
lower_widening_shift_right(op->args[0], op->args[1]) :
Expr();
handle_expr_bounds(e);
handle_lowered_expr_bounds(e);
} else if (op->is_intrinsic(Call::rounding_shift_right)) {
internal_assert(op->args.size() == 2);
// TODO: uses bitwise ops we may not handle well
handle_expr_bounds(lower_rounding_shift_right(op->args[0], op->args[1]));
handle_lowered_expr_bounds(lower_rounding_shift_right(op->args[0], op->args[1]));
} else if (op->is_intrinsic(Call::rounding_shift_left)) {
internal_assert(op->args.size() == 2);
// TODO: uses bitwise ops we may not handle well
handle_expr_bounds(lower_rounding_shift_left(op->args[0], op->args[1]));
handle_lowered_expr_bounds(lower_rounding_shift_left(op->args[0], op->args[1]));
} else if (op->is_intrinsic(Call::halving_add)) {
internal_assert(op->args.size() == 2);
Expr e = can_widen_all(op->args) ?
narrow((widen(op->args[0]) + widen(op->args[1])) / 2) :
Expr();
handle_expr_bounds(e);
handle_lowered_expr_bounds(e);
} else if (op->is_intrinsic(Call::halving_sub)) {
internal_assert(op->args.size() == 2);
Expr e = can_widen_all(op->args) ?
narrow((widen(op->args[0]) - widen(op->args[1])) / 2) :
Expr();
handle_expr_bounds(e);
handle_lowered_expr_bounds(e);
} else if (op->is_intrinsic(Call::rounding_halving_add)) {
internal_assert(op->args.size() == 2);
Expr e = can_widen_all(op->args) ?
narrow((widen(op->args[0]) + widen(op->args[1]) + 1) / 2) :
Expr();
handle_expr_bounds(e);
handle_lowered_expr_bounds(e);
} else if (op->is_intrinsic(Call::rounding_mul_shift_right)) {
internal_assert(op->args.size() == 3);
Expr e = can_widen_all(op->args) ?
saturating_narrow(rounding_shift_right(widening_mul(op->args[0], op->args[1]), op->args[2])) :
Expr();
handle_expr_bounds(e);
handle_lowered_expr_bounds(e);
} else if (op->is_intrinsic(Call::mul_shift_right)) {
internal_assert(op->args.size() == 3);
Expr e = can_widen_all(op->args) ?
saturating_narrow(widening_mul(op->args[0], op->args[1]) >> op->args[2]) :
Expr();
handle_expr_bounds(e);
handle_lowered_expr_bounds(e);
} else if (op->is_intrinsic(Call::sorted_avg)) {
internal_assert(op->args.size() == 2);
Expr e = lower_sorted_avg(op->args[0], op->args[1]);
handle_expr_bounds(e);
handle_lowered_expr_bounds(e);
} else if (op->is_strict_float_intrinsic()) {
// We'll just unstrictify it to do the analysis. This is
// dubious. Constructing a bounds expression that uses non-strict
// operations might get simplified during bounds analysis (we call
// simplify above!) to something approximate, resulting in an
// out-of-bounds read. See https://github.com/halide/Halide/issues/8646
Expr e = unstrictify_float(op);
handle_expr_bounds(e);
handle_lowered_expr_bounds(e);
} else if (op->call_type == Call::Halide) {
bounds_of_func(op->name, op->value_index, op->type);
} else if (!const_bound &&
op->call_type == Call::PureIntrinsic) {
// It was some other pure intrinsic that we don't lower. If the args
// are const we can just preserve it.
handle_const_arg_call();
} else {
// Just use the bounds of the type
bounds_of_type(t);
Expand Down
5 changes: 5 additions & 0 deletions src/Debug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,11 @@ class DebugRule {

std::vector<DebugRule> parse_rules(const std::string &env) {
std::vector<DebugRule> rules;
if (env.empty()) {
// Treat an unset env var as HL_DEBUG_CODEGEN=0
rules.resize(1);
return rules;
}
for (const std::string &spec : split_string(env, ";")) {
if (auto rule = DebugRule::parse(spec)) {
rules.push_back(*rule);
Expand Down
1 change: 1 addition & 0 deletions test/correctness/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ tests(GROUPS correctness
bounds_of_func.cpp
bounds_of_monotonic_math.cpp
bounds_of_multiply.cpp
bounds_of_pure_intrinsics.cpp
bounds_of_split.cpp
bounds_query.cpp
bounds_query_respects_specialize_fail.cpp
Expand Down
31 changes: 31 additions & 0 deletions test/correctness/bounds_of_pure_intrinsics.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include "Halide.h"

using namespace Halide;
using namespace Halide::Internal;

int main(int argc, char **argv) {

// There were scalability problems with taking bounds of nested pure
// intrinsics. This test hangs if those problems still exist, using the
// strict float intrinsics. https://github.com/halide/Halide/issues/8686

Param<float> p1, p2, p2_min, p2_max;
Scope<Interval> scope;
scope.push(p2.name(), Interval{p2_min, p2_max});

for (int limit = 1; limit < 500; limit++) {
Expr e1 = p1, e2 = p2;
for (int i = 0; i < limit; i++) {
e1 = e1 * p1 + (i + 1);
e2 = e2 * p2 + (i + 1);
}
Expr e = e1 + e2;
bounds_of_expr_in_scope(e, scope);
e = strictify_float(e);
bounds_of_expr_in_scope(e, scope);
}

printf("Success!\n");

return 0;
}
Loading