From 84874c91cac444036c2e62cfce6a4c711c6d5682 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 9 Feb 2023 23:19:26 -0500 Subject: [PATCH] add replace(io, str, patterns...) --- NEWS.md | 2 ++ base/strings/util.jl | 60 ++++++++++++++++++++++++++++++++++------- doc/src/base/strings.md | 2 +- 3 files changed, 53 insertions(+), 11 deletions(-) diff --git a/NEWS.md b/NEWS.md index e875921f827fd2..20553ddba771a6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -33,6 +33,8 @@ New library features * The `initialized=true` keyword assignment for `sortperm!` and `partialsortperm!` is now a no-op ([#47979]). It previously exposed unsafe behavior ([#47977]). * `binomial(x, k)` now supports non-integer `x` ([#48124]). +* `replace(string, pattern...)` now supports an optional `IO` argument to + write the output to a stream rather than returning a string ([#48625]). * A `CartesianIndex` is now treated as a "scalar" for broadcasting ([#47044]). Standard library changes diff --git a/base/strings/util.jl b/base/strings/util.jl index dabb84ae656398..3eb992e27d47ce 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -681,8 +681,8 @@ _free_pat_replacer(x) = nothing _pat_replacer(x::AbstractChar) = isequal(x) _pat_replacer(x::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}}) = in(x) -function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(Int)) where N - count == 0 && return str +# note: leave str untyped here to make it easier for packages like StringViews to hook in +function _replace_init(str, pat_repl::NTuple{N, Pair}, count::Int) where N count < 0 && throw(DomainError(count, "`count` must be non-negative.")) n = 1 e1 = nextind(str, lastindex(str)) # sizeof(str) @@ -697,11 +697,11 @@ function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(I r isa Int && (r = r:r) # findnext / performance fix return r end - if all(>(e1), map(first, rs)) - foreach(_free_pat_replacer, patterns) - return str - end - out = IOBuffer(sizehint=floor(Int, 1.2sizeof(str))) + return patterns, replaces, rs, all(>(e1), map(first, rs)) +end + +# note: leave str untyped here to make it easier for packages like StringViews to hook in +function _replace_finish(out::IO, str, count::Int, patterns::NTuple{N}, replaces::NTuple{N}, rs) where N while true p = argmin(map(first, rs)) # TODO: or argmin(rs), to pick the shortest first match ? r = rs[p] @@ -737,12 +737,34 @@ function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(I end foreach(_free_pat_replacer, patterns) write(out, SubString(str, i)) - return String(take!(out)) + return out +end + +# note: leave str untyped here to make it easier for packages like StringViews to hook in +function _replace_io(out::IO, retval, str, pat_repl::Pair...; count::Integer=typemax(Int)) + count == 0 && return out + patterns, replaces, rs, notfound = _replace_init(str, pat_repl, count) + if notfound + foreach(_free_pat_replacer, patterns) + return out + end + return _replace_finish(out, str, count, patterns, replaces, rs) end +# note: leave str untyped here to make it easier for packages like StringViews to hook in +function _replace_str(str, pat_repl::Pair...; count::Integer=typemax(Int)) + count == 0 && return str + patterns, replaces, rs, notfound = _replace_init(str, pat_repl, count) + if notfound + foreach(_free_pat_replacer, patterns) + return str + end + out = IOBuffer(sizehint=floor(Int, 1.2sizeof(str))) + return String(take!(_replace_finish(out, str, count, patterns, replaces, rs))) +end """ - replace(s::AbstractString, pat=>r, [pat2=>r2, ...]; [count::Integer]) + replace([out::IO], s::AbstractString, pat=>r, [pat2=>r2, ...]; [count::Integer]) Search for the given pattern `pat` in `s`, and replace each occurrence with `r`. If `count` is provided, replace at most `count` occurrences. @@ -755,6 +777,11 @@ If `pat` is a regular expression and `r` is a [`SubstitutionString`](@ref), then references in `r` are replaced with the corresponding matched text. To remove instances of `pat` from `string`, set `r` to the empty `String` (`""`). +The return value is a new string with the replacements. If the `out::IO` argument +is supplied, the transformed string is instead written to `out` (returning `out`). +(For example, this can be used in conjunction with [`IOBuffer`](@ref) to re-use +an pre-allocated buffer array in-place.) + Multiple patterns can be specified, and they will be applied left-to-right simultaneously, so only one pattern will be applied to any character, and the patterns will only be applied to the input text, not the replacements. @@ -762,6 +789,9 @@ patterns will only be applied to the input text, not the replacements. !!! compat "Julia 1.7" Support for multiple patterns requires version 1.7. +!!! compat "Julia 1.10" + The `out::IO` argument requires version 1.10. + # Examples ```jldoctest julia> replace("Python is a programming language.", "Python" => "Julia") @@ -780,8 +810,18 @@ julia> replace("abcabc", "a" => "b", "b" => "c", r".+" => "a") "bca" ``` """ +replace(out::IO, s::AbstractString, pat_f::Pair...; count=typemax(Int)) = + _replace_io(out, String(s), pat_f..., count=count) + replace(s::AbstractString, pat_f::Pair...; count=typemax(Int)) = - replace(String(s), pat_f..., count=count) + _replace_str(String(s), pat_f..., count=count) + +# no copy needed for SubString{String} +replace(out::IO, s::SubString{String}, pat_f::Pair...; count=typemax(Int)) = + _replace_io(out, s, pat_f..., count=count) +replace(s::SubString{String}, pat_f::Pair...; count=typemax(Int)) = + _replace_str(s, pat_f..., count=count) + # TODO: allow transform as the first argument to replace? diff --git a/doc/src/base/strings.md b/doc/src/base/strings.md index 263c0019788c33..226e8aa92a9893 100644 --- a/doc/src/base/strings.md +++ b/doc/src/base/strings.md @@ -51,7 +51,7 @@ Base.findlast(::AbstractChar, ::AbstractString) Base.findprev(::AbstractString, ::AbstractString, ::Integer) Base.occursin Base.reverse(::Union{String,SubString{String}}) -Base.replace(s::AbstractString, ::Pair...) +Base.replace(::IO, s::AbstractString, ::Pair...) Base.eachsplit Base.split Base.rsplit