Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transducer as an optimization: map, filter and flatten #33526

Merged
merged 11 commits into from
Dec 4, 2019
148 changes: 118 additions & 30 deletions base/reduce.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,31 +36,116 @@ mul_prod(x::Real, y::Real)::Real = x * y

## foldl && mapfoldl

function mapfoldl_impl(f::F, op::OP, nt::NamedTuple{(:init,)}, itr, i...) where {F,OP}
init = nt.init
function mapfoldl_impl(f::F, op::OP, nt, itr) where {F,OP}
op′, itr′ = _xfadjoint(BottomRF(op), Generator(f, itr))
return foldl_impl(op′, nt, itr′)
end

function foldl_impl(op::OP, nt, itr) where {OP}
v = _foldl_impl(op, get(nt, :init, _InitialValue()), itr)
v isa _InitialValue && return reduce_empty_iter(op, itr)
return v
end

function _foldl_impl(op::OP, init, itr) where {OP}
# Unroll the while loop once; if init is known, the call to op may
# be evaluated at compile time
y = iterate(itr, i...)
y = iterate(itr)
y === nothing && return init
v = op(init, f(y[1]))
v = op(init, y[1])
while true
y = iterate(itr, y[2])
y === nothing && break
v = op(v, f(y[1]))
v = op(v, y[1])
end
return v
end

function mapfoldl_impl(f::F, op::OP, nt::NamedTuple{()}, itr) where {F,OP}
y = iterate(itr)
if y === nothing
return Base.mapreduce_empty_iter(f, op, itr, IteratorEltype(itr))
end
x, i = y
init = mapreduce_first(f, op, x)
return mapfoldl_impl(f, op, (init=init,), itr, i)
struct _InitialValue end

"""
BottomRF(rf) -> rf′

"Bottom" reducing function. This is a thin wrapper around the `op` argument
passed to `foldl`-like functions for handling the initial invocation to call
[`reduce_first`](@ref).
"""
struct BottomRF{T}
rf::T
end

@inline (op::BottomRF)(::_InitialValue, x) = reduce_first(op.rf, x)
@inline (op::BottomRF)(acc, x) = op.rf(acc, x)

"""
MappingRF(f, rf) -> rf′

Create a mapping reducing function `rf′(acc, x) = rf(acc, f(x))`.
"""
struct MappingRF{F, T}
f::F
rf::T
end

@inline (op::MappingRF)(acc, x) = op.rf(acc, op.f(x))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious about the use of @inline here; with such a minimal implementation, you'd think it wouldn't be needed. Yet I think I've found myself doing the same thing because, if I understand correctly, even though op.f might itself have @inline, having this simple function barrier can prevent total inlining (I imagine because the op.f gets inlined into this one-liner, but then itself doesn't get inlined due to the now-non-inlineable nature). Is that correct thinking?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's probably what happens.


"""
FilteringRF(f, rf) -> rf′

Create a filtering reducing function `rf′(acc, x) = f(x) ? rf(acc, x) : acc`.
"""
struct FilteringRF{F, T}
f::F
rf::T
end

@inline (op::FilteringRF)(acc, x) = op.f(x) ? op.rf(acc, x) : acc

"""
FlatteningRF(rf) -> rf′

Create a flattening reducing function that is roughly equivalent to
`rf′(acc, x) = foldl(rf, x; init=acc)`.
"""
struct FlatteningRF{T}
rf::T
end

@inline function (op::FlatteningRF)(acc, x)
op′, itr′ = _xfadjoint(BottomRF(op.rf), x)
return _foldl_impl(op′, acc, itr′)
end

"""
_xfadjoint(op, itr) -> op′, itr′

Given a pair of reducing function `op` and an iterator `itr`, return a pair
`(op′, itr′)` of similar types. If the iterator `itr` is transformed by an
iterator transform `ixf` whose adjoint transducer `xf` is known, `op′ = xf(op)`
and `itr′ = ixf⁻¹(itr)` is returned. Otherwise, `op` and `itr` are returned
as-is. For example, transducer `rf -> MappingRF(f, rf)` is the adjoint of
iterator transform `itr -> Generator(f, itr)`.

Nested iterator transforms are converted recursively. That is to say,
given `op` and

itr = (ixf₁ ∘ ixf₂ ∘ ... ∘ ixfₙ)(itr′)

what is returned is `itr′` and

op′ = (xfₙ ∘ ... ∘ xf₂ ∘ xf₁)(op)
"""
_xfadjoint(op, itr) = (op, itr)
_xfadjoint(op, itr::Generator) =
if itr.f === identity
_xfadjoint(op, itr.iter)
else
_xfadjoint(MappingRF(itr.f, op), itr.iter)
end
_xfadjoint(op, itr::Filter) =
_xfadjoint(FilteringRF(itr.flt, op), itr.itr)
_xfadjoint(op, itr::Flatten) =
_xfadjoint(FlatteningRF(op), itr.it)

"""
mapfoldl(f, op, itr; [init])
Expand Down Expand Up @@ -91,22 +176,17 @@ foldl(op, itr; kw...) = mapfoldl(identity, op, itr; kw...)

## foldr & mapfoldr

mapfoldr_impl(f, op, nt::NamedTuple{(:init,)}, itr) =
mapfoldl_impl(f, (x,y) -> op(y,x), nt, Iterators.reverse(itr))
function mapfoldr_impl(f, op, nt, itr)
op′, itr′ = _xfadjoint(BottomRF(FlipArgs(op)), Generator(f, itr))
return foldl_impl(op′, nt, Iterators.reverse(itr′))
end

# we can't just call mapfoldl_impl with (x,y) -> op(y,x), because
# we need to use the type of op for mapreduce_empty_iter and mapreduce_first.
function mapfoldr_impl(f, op, nt::NamedTuple{()}, itr)
ritr = Iterators.reverse(itr)
y = iterate(ritr)
if y === nothing
return Base.mapreduce_empty_iter(f, op, itr, IteratorEltype(itr))
end
x, i = y
init = mapreduce_first(f, op, x)
return mapfoldl_impl(f, (x,y) -> op(y,x), (init=init,), ritr, i)
struct FlipArgs{F}
f::F
end

@inline (f::FlipArgs)(x, y) = f.f(y, x)

"""
mapfoldr(f, op, itr; [init])

Expand Down Expand Up @@ -234,6 +314,11 @@ reduce_empty(::typeof(mul_prod), T) = reduce_empty(*, T)
reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallSigned} = one(Int)
reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallUnsigned} = one(UInt)

reduce_empty(op::BottomRF, T) = reduce_empty(op.rf, T)
reduce_empty(op::MappingRF, T) = mapreduce_empty(op.f, op.rf, T)
reduce_empty(op::FilteringRF, T) = reduce_empty(op.rf, T)
reduce_empty(op::FlipArgs, T) = reduce_empty(op.f, T)

"""
Base.mapreduce_empty(f, op, T)

Expand All @@ -251,10 +336,13 @@ mapreduce_empty(::typeof(abs2), op, T) = abs2(reduce_empty(op, T))
mapreduce_empty(f::typeof(abs), ::typeof(max), T) = abs(zero(T))
mapreduce_empty(f::typeof(abs2), ::typeof(max), T) = abs2(zero(T))

mapreduce_empty_iter(f, op, itr, ::HasEltype) = mapreduce_empty(f, op, eltype(itr))
mapreduce_empty_iter(f, op::typeof(&), itr, ::EltypeUnknown) = true
mapreduce_empty_iter(f, op::typeof(|), itr, ::EltypeUnknown) = false
mapreduce_empty_iter(f, op, itr, ::EltypeUnknown) = _empty_reduce_error()
# For backward compatibility:
mapreduce_empty_iter(f, op, itr, ItrEltype) =
reduce_empty_iter(MappingRF(f, op), itr, ItrEltype)

@inline reduce_empty_iter(op, itr) = reduce_empty_iter(op, itr, IteratorEltype(itr))
@inline reduce_empty_iter(op, itr, ::HasEltype) = reduce_empty(op, eltype(itr))
reduce_empty_iter(op, itr, ::EltypeUnknown) = _empty_reduce_error()

# handling of single-element iterators
"""
Expand Down
13 changes: 1 addition & 12 deletions base/tuple.jl
Original file line number Diff line number Diff line change
Expand Up @@ -204,18 +204,7 @@ function map(f, t1::Any16, t2::Any16, ts::Any16...)
(A...,)
end

# mapafoldl, based on afold in operators.jl
mapafoldl(F,op,a) = a
mapafoldl(F,op,a,b) = op(a,F(b))
mapafoldl(F,op,a,b,c...) = mapafoldl(F, op, op(a,F(b)), c...)
function mapafoldl(F,op,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,qs...)
y = op(op(op(op(op(op(op(op(op(op(op(op(op(op(op(a,F(b)),F(c)),F(d)),F(e)),F(f)),F(g)),F(h)),F(i)),F(j)),F(k)),F(l)),F(m)),F(n)),F(o)),F(p))
for x in qs; y = op(y,F(x)); end
y
end
mapfoldl_impl(f, op, nt::NamedTuple{(:init,)}, t::Tuple) = mapafoldl(f, op, nt.init, t...)
mapfoldl_impl(f, op, nt::NamedTuple{()}, t::Tuple) = mapafoldl(f, op, f(t[1]), tail(t)...)
mapfoldl_impl(f, op, nt::NamedTuple{()}, t::Tuple{}) = mapreduce_empty_iter(f, op, t, IteratorEltype(t))
_foldl_impl(op, init, itr::Tuple) = afoldl(op, init, itr...)

# type-stable padding
fill_to_length(t::NTuple{N,Any}, val, ::Val{N}) where {N} = t
Expand Down