diff --git a/src/Bounds.cpp b/src/Bounds.cpp index 06c2fc6d6ab3..0ff516e69aaa 100644 --- a/src/Bounds.cpp +++ b/src/Bounds.cpp @@ -1194,17 +1194,13 @@ class Bounds : public IRVisitor { return; } - if (!const_bound && - (op->call_type == Call::PureExtern || - op->call_type == Call::PureIntrinsic || - op->call_type == Call::Image)) { - - // If the args are const we can return the call of those args - // for pure functions. For other types of functions, the same - // call in two different places might produce different - // results (e.g. during the update step of a reduction), so we - // can't move around call nodes. - + // A helper which converts a call with single-point args into the same + // call applied to those args. For some types of Call node if the args + // are const we can return the call of those args for pure + // functions. For other types of Call, the same call in two + // different places might produce different results (e.g. during the + // update step of a reduction), so we can't move around call nodes. + auto handle_const_arg_call = [&]() { std::vector new_args(op->args.size()); bool const_args = true; for (size_t i = 0; i < op->args.size() && const_args; i++) { @@ -1219,14 +1215,37 @@ class Bounds : public IRVisitor { Expr call = Call::make(t, op->name, new_args, op->call_type, op->func, op->value_index, op->image, op->param); interval = Interval::single_point(call); - return; + return true; + } else { + return false; } + }; + + if (!const_bound && + (op->call_type == Call::PureExtern || + op->call_type == Call::Image) && + handle_const_arg_call()) { + + // PureIntrinsics could also be handled here, but they may be + // lowered by the code below into different forms, so to avoid + // exponential complexity we handle those later. + return; + // else fall thru and continue } - const auto handle_expr_bounds = [this, t](const Expr &e) -> void { + // If we lower a call expr into another form, we want to keep the + // original form if it turns out to have been a const. This helper + // accomplishes that, and also treats undefined Exprs as being unbounded + // as a convenience for things that may or may not be able to be + // lowered. + const auto handle_lowered_expr_bounds = [this, t, op](const Expr &e) -> void { if (e.defined()) { e.accept(this); + if (interval.is_single_point(e)) { + // It was unvarying, so just return the original unlowered form. + interval = Interval::single_point(op); + } } else { // Just use the bounds of the type this->bounds_of_type(t); @@ -1265,7 +1284,7 @@ class Bounds : public IRVisitor { internal_assert(!t.is_handle()); if (t.is_float()) { Expr e = abs(op->args[0] - op->args[1]); - e.accept(this); + handle_lowered_expr_bounds(e); } else { // absd() for int types will always produce a uint result internal_assert(t.is_uint()); @@ -1281,9 +1300,8 @@ class Bounds : public IRVisitor { } else if (op->is_intrinsic(Call::saturating_cast)) { internal_assert(op->args.size() == 1); - Expr a = lower_saturating_cast(op->type, op->args[0]); - a.accept(this); - return; + Expr e = lower_saturating_cast(op->type, op->args[0]); + handle_lowered_expr_bounds(e); } else if (op->is_intrinsic(Call::unsafe_promise_clamped) || op->is_intrinsic(Call::promise_clamped)) { // Unlike an explicit clamp, we are also permitted to @@ -1330,7 +1348,7 @@ class Bounds : public IRVisitor { // Probably more conservative than necessary Expr false_value = op->args.size() == 2 ? op->args[1] : op->args[2]; Expr equivalent_select = Select::make(op->args[0], op->args[1], false_value); - equivalent_select.accept(this); + handle_lowered_expr_bounds(equivalent_select); } else if (op->is_intrinsic(Call::require)) { internal_assert(op->args.size() == 3); interval = arg_bounds.get(1); @@ -1407,8 +1425,8 @@ class Bounds : public IRVisitor { } } else if (is_const(b)) { // We can normalize to multiplication - Expr equiv = a * (make_const(t, 1) << b); - equiv.accept(this); + Expr e = a * (make_const(t, 1) << b); + handle_lowered_expr_bounds(e); } } else if (op->is_intrinsic(Call::shift_right)) { // Only try to improve on bounds-of-type if we can prove 0 <= b < t.bits, @@ -1572,103 +1590,103 @@ class Bounds : public IRVisitor { Expr e = can_widen(op->args[1]) ? lower_widen_right_add(op->args[0], op->args[1]) : Expr(); - handle_expr_bounds(e); + handle_lowered_expr_bounds(e); } else if (op->is_intrinsic(Call::widen_right_mul)) { internal_assert(op->args.size() == 2); Expr e = can_widen(op->args[1]) ? lower_widen_right_mul(op->args[0], op->args[1]) : Expr(); - handle_expr_bounds(e); + handle_lowered_expr_bounds(e); } else if (op->is_intrinsic(Call::widen_right_sub)) { internal_assert(op->args.size() == 2); Expr e = can_widen(op->args[1]) ? lower_widen_right_sub(op->args[0], op->args[1]) : Expr(); - handle_expr_bounds(e); + handle_lowered_expr_bounds(e); } else if (op->is_intrinsic(Call::widening_add)) { internal_assert(op->args.size() == 2); Expr e = can_widen_all(op->args) ? lower_widening_add(op->args[0], op->args[1]) : Expr(); - handle_expr_bounds(e); + handle_lowered_expr_bounds(e); } else if (op->is_intrinsic(Call::widening_mul)) { internal_assert(op->args.size() == 2); Expr e = can_widen_all(op->args) ? lower_widening_mul(op->args[0], op->args[1]) : Expr(); - handle_expr_bounds(e); + handle_lowered_expr_bounds(e); } else if (op->is_intrinsic(Call::widening_sub)) { internal_assert(op->args.size() == 2); Expr e = can_widen_all(op->args) ? lower_widening_sub(op->args[0], op->args[1]) : Expr(); - handle_expr_bounds(e); + handle_lowered_expr_bounds(e); } else if (op->is_intrinsic(Call::saturating_add)) { internal_assert(op->args.size() == 2); Expr e = can_widen_all(op->args) ? narrow(clamp(widen(op->args[0]) + widen(op->args[1]), t.min(), t.max())) : Expr(); - handle_expr_bounds(e); + handle_lowered_expr_bounds(e); } else if (op->is_intrinsic(Call::saturating_sub)) { internal_assert(op->args.size() == 2); Expr e = can_widen_all(op->args) ? narrow(clamp(widen(op->args[0]) - widen(op->args[1]), t.min(), t.max())) : Expr(); - handle_expr_bounds(e); + handle_lowered_expr_bounds(e); } else if (op->is_intrinsic(Call::widening_shift_left)) { internal_assert(op->args.size() == 2); Expr e = can_widen(op->args[0]) ? lower_widening_shift_left(op->args[0], op->args[1]) : Expr(); - handle_expr_bounds(e); + handle_lowered_expr_bounds(e); } else if (op->is_intrinsic(Call::widening_shift_right)) { internal_assert(op->args.size() == 2); Expr e = can_widen(op->args[0]) ? lower_widening_shift_right(op->args[0], op->args[1]) : Expr(); - handle_expr_bounds(e); + handle_lowered_expr_bounds(e); } else if (op->is_intrinsic(Call::rounding_shift_right)) { internal_assert(op->args.size() == 2); // TODO: uses bitwise ops we may not handle well - handle_expr_bounds(lower_rounding_shift_right(op->args[0], op->args[1])); + handle_lowered_expr_bounds(lower_rounding_shift_right(op->args[0], op->args[1])); } else if (op->is_intrinsic(Call::rounding_shift_left)) { internal_assert(op->args.size() == 2); // TODO: uses bitwise ops we may not handle well - handle_expr_bounds(lower_rounding_shift_left(op->args[0], op->args[1])); + handle_lowered_expr_bounds(lower_rounding_shift_left(op->args[0], op->args[1])); } else if (op->is_intrinsic(Call::halving_add)) { internal_assert(op->args.size() == 2); Expr e = can_widen_all(op->args) ? narrow((widen(op->args[0]) + widen(op->args[1])) / 2) : Expr(); - handle_expr_bounds(e); + handle_lowered_expr_bounds(e); } else if (op->is_intrinsic(Call::halving_sub)) { internal_assert(op->args.size() == 2); Expr e = can_widen_all(op->args) ? narrow((widen(op->args[0]) - widen(op->args[1])) / 2) : Expr(); - handle_expr_bounds(e); + handle_lowered_expr_bounds(e); } else if (op->is_intrinsic(Call::rounding_halving_add)) { internal_assert(op->args.size() == 2); Expr e = can_widen_all(op->args) ? narrow((widen(op->args[0]) + widen(op->args[1]) + 1) / 2) : Expr(); - handle_expr_bounds(e); + handle_lowered_expr_bounds(e); } else if (op->is_intrinsic(Call::rounding_mul_shift_right)) { internal_assert(op->args.size() == 3); Expr e = can_widen_all(op->args) ? saturating_narrow(rounding_shift_right(widening_mul(op->args[0], op->args[1]), op->args[2])) : Expr(); - handle_expr_bounds(e); + handle_lowered_expr_bounds(e); } else if (op->is_intrinsic(Call::mul_shift_right)) { internal_assert(op->args.size() == 3); Expr e = can_widen_all(op->args) ? saturating_narrow(widening_mul(op->args[0], op->args[1]) >> op->args[2]) : Expr(); - handle_expr_bounds(e); + handle_lowered_expr_bounds(e); } else if (op->is_intrinsic(Call::sorted_avg)) { internal_assert(op->args.size() == 2); Expr e = lower_sorted_avg(op->args[0], op->args[1]); - handle_expr_bounds(e); + handle_lowered_expr_bounds(e); } else if (op->is_strict_float_intrinsic()) { // We'll just unstrictify it to do the analysis. This is // dubious. Constructing a bounds expression that uses non-strict @@ -1676,9 +1694,14 @@ class Bounds : public IRVisitor { // simplify above!) to something approximate, resulting in an // out-of-bounds read. See https://github.com/halide/Halide/issues/8646 Expr e = unstrictify_float(op); - handle_expr_bounds(e); + handle_lowered_expr_bounds(e); } else if (op->call_type == Call::Halide) { bounds_of_func(op->name, op->value_index, op->type); + } else if (!const_bound && + op->call_type == Call::PureIntrinsic) { + // It was some other pure intrinsic that we don't lower. If the args + // are const we can just preserve it. + handle_const_arg_call(); } else { // Just use the bounds of the type bounds_of_type(t); diff --git a/src/Debug.cpp b/src/Debug.cpp index 40f0c4c73097..71778a5579c8 100644 --- a/src/Debug.cpp +++ b/src/Debug.cpp @@ -91,6 +91,11 @@ class DebugRule { std::vector parse_rules(const std::string &env) { std::vector rules; + if (env.empty()) { + // Treat an unset env var as HL_DEBUG_CODEGEN=0 + rules.resize(1); + return rules; + } for (const std::string &spec : split_string(env, ";")) { if (auto rule = DebugRule::parse(spec)) { rules.push_back(*rule); diff --git a/test/correctness/CMakeLists.txt b/test/correctness/CMakeLists.txt index 29324603aced..d67c9ffc267e 100644 --- a/test/correctness/CMakeLists.txt +++ b/test/correctness/CMakeLists.txt @@ -30,6 +30,7 @@ tests(GROUPS correctness bounds_of_func.cpp bounds_of_monotonic_math.cpp bounds_of_multiply.cpp + bounds_of_pure_intrinsics.cpp bounds_of_split.cpp bounds_query.cpp bounds_query_respects_specialize_fail.cpp diff --git a/test/correctness/bounds_of_pure_intrinsics.cpp b/test/correctness/bounds_of_pure_intrinsics.cpp new file mode 100644 index 000000000000..2dea93c16247 --- /dev/null +++ b/test/correctness/bounds_of_pure_intrinsics.cpp @@ -0,0 +1,31 @@ +#include "Halide.h" + +using namespace Halide; +using namespace Halide::Internal; + +int main(int argc, char **argv) { + + // There were scalability problems with taking bounds of nested pure + // intrinsics. This test hangs if those problems still exist, using the + // strict float intrinsics. https://github.com/halide/Halide/issues/8686 + + Param p1, p2, p2_min, p2_max; + Scope scope; + scope.push(p2.name(), Interval{p2_min, p2_max}); + + for (int limit = 1; limit < 500; limit++) { + Expr e1 = p1, e2 = p2; + for (int i = 0; i < limit; i++) { + e1 = e1 * p1 + (i + 1); + e2 = e2 * p2 + (i + 1); + } + Expr e = e1 + e2; + bounds_of_expr_in_scope(e, scope); + e = strictify_float(e); + bounds_of_expr_in_scope(e, scope); + } + + printf("Success!\n"); + + return 0; +}