diff --git a/NEWS.md b/NEWS.md index 3f12583f76c662..fb6d79ec788363 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,11 +8,11 @@ New language features difference between `public` and `export` is that `public` names do not become available when `using` a package/module. ([#50105]) * `ScopedValue` implement dynamic scope with inheritance across tasks ([#50958]). -* A new `AbstractString` type, `TaggedString`, is introduced that allows for - (arbitrary) regional annotations to be attached to an underlying string. This is - very useful for allowing styling information to be encoded separately, and is - used extensively in the new `StyledStrings` standard library. There is also a - new `TaggedChar` type, that is the equivalent new `AbstractChar` type. +* A new `AbstractString` type, `AnnotatedString`, is introduced that allows for + regional annotations to be attached to an underlying string. This type is + particularly useful for holding styling information, and is used extensively + in the new `StyledStrings` standard library. There is also a new `AnnotatedChar` + type, that is the equivalent new `AbstractChar` type. Language changes ---------------- @@ -60,11 +60,12 @@ Standard library changes * A new standard library for handling styling in a more comprehensive and structured way. * The new `Faces` struct serves as a container for text styling information - (think typeface), and comes with a framework to provide a convenient, - extensible (via `addface!`), and customisable (with a user's `Faces.toml` and `loadfaces!`) approach to + (think typeface, as well as color and decoration), and comes with a framework + to provide a convenient, extensible (via `addface!`), and customisable (with a + user's `Faces.toml` and `loadfaces!`) approach to styled content. * The new `@styled_str` string macro provides a convenient way of creating a - `TaggedString` with various faces or other attributes applied. + `AnnotatedString` with various faces or other attributes applied. #### Package Manager diff --git a/base/exports.jl b/base/exports.jl index 55ad69196bac3e..b6f7ea0d6ad356 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -1089,12 +1089,12 @@ public Generator, ImmutableDict, OneTo, - TaggedString, - TaggedChar, + AnnotatedString, + AnnotatedChar, UUID, -# Tagged strings - taggedstring, +# Annotated strings + annotatedstring, annotate!, annotations, diff --git a/base/regex.jl b/base/regex.jl index 5316c98492ede7..78eefa1741b0c9 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -427,35 +427,35 @@ function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer, return result end -function _taggedmatch(m::RegexMatch{S}, str::TaggedString{S}) where {S<:AbstractString} - RegexMatch{TaggedString{S}}( - (@inbounds SubString{TaggedString{S}}( +function _annotatedmatch(m::RegexMatch{S}, str::AnnotatedString{S}) where {S<:AbstractString} + RegexMatch{AnnotatedString{S}}( + (@inbounds SubString{AnnotatedString{S}}( str, m.match.offset, m.match.ncodeunits, Val(:noshift))), - Union{Nothing,SubString{TaggedString{S}}}[ + Union{Nothing,SubString{AnnotatedString{S}}}[ if !isnothing(cap) - (@inbounds SubString{TaggedString{S}}( + (@inbounds SubString{AnnotatedString{S}}( str, cap.offset, cap.ncodeunits, Val(:noshift))) end for cap in m.captures], m.offset, m.offsets, m.regex) end -function match(re::Regex, str::TaggedString) +function match(re::Regex, str::AnnotatedString) m = match(re, str.string) if !isnothing(m) - _taggedmatch(m, str) + _annotatedmatch(m, str) end end -function match(re::Regex, str::TaggedString, idx::Integer, add_opts::UInt32=UInt32(0)) +function match(re::Regex, str::AnnotatedString, idx::Integer, add_opts::UInt32=UInt32(0)) m = match(re, str.string, idx, add_opts) if !isnothing(m) - _taggedmatch(m, str) + _annotatedmatch(m, str) end end match(r::Regex, s::AbstractString) = match(r, s, firstindex(s)) match(r::Regex, s::AbstractString, i::Integer) = throw(ArgumentError( - "regex matching is only available for the String and TaggedString types; use String(s) to convert" + "regex matching is only available for the String and AnnotatedString types; use String(s) to convert" )) findnext(re::Regex, str::Union{String,SubString}, idx::Integer) = _findnext_re(re, str, idx, C_NULL) @@ -708,8 +708,8 @@ struct RegexMatchIterator{S <: AbstractString} RegexMatchIterator(regex::Regex, string::AbstractString, ovr::Bool=false) = new{String}(regex, String(string), ovr) - RegexMatchIterator(regex::Regex, string::TaggedString, ovr::Bool=false) = - new{TaggedString{String}}(regex, TaggedString(String(string.string), string.annotations), ovr) + RegexMatchIterator(regex::Regex, string::AnnotatedString, ovr::Bool=false) = + new{AnnotatedString{String}}(regex, AnnotatedString(String(string.string), string.annotations), ovr) end compile(itr::RegexMatchIterator) = (compile(itr.regex); itr) eltype(::Type{<:RegexMatchIterator}) = RegexMatch diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl new file mode 100644 index 00000000000000..2df0ece80da1f1 --- /dev/null +++ b/base/strings/annotated.jl @@ -0,0 +1,388 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +""" + AnnotatedString{S <: AbstractString} <: AbstractString + +A string with metadata, in the form of annotated regions. + +More specifically, this is a simple wrapper around any other +[`AbstractString`](@ref) that allows for regions of the wrapped string to be +annotated with labeled values. + +```text + C + ┌──────┸─────────┐ + "this is an example annotated string" + └──┰────────┼─────┘ │ + A └─────┰─────────┘ + B +``` + +The above diagram represents a `AnnotatedString` where three ranges have been +annotated (labeled `A`, `B`, and `C`). Each annotation holds a label (`Symbol`) +and a value (`Any`), paired together as a `Pair{Symbol, <:Any}`. + +Labels do not need to be unique, the same region can hold multiple annotations +with the same label. + +See also [`AnnotatedChar`](@ref), [`annotatedstring`](@ref), +[`annotations`](@ref), and [`annotate!`](@ref). + +!!! warning + While the constructors are part of the Base public API, the fields + of `AnnotatedString` are not. This is to allow for potential future + changes in the implementation of this type. Instead use the + [`annotations`](@ref), and [`annotate!`](@ref) getter/setter + functions. + +# Constructors + +```julia +AnnotatedString(s::S<:AbstractString) -> AnnotatedString{S} +AnnotatedString(s::S<:AbstractString, annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, <:Any}}}) +``` + +A AnnotatedString can also be created with [`annotatedstring`](@ref), which acts much +like [`string`](@ref) but preserves any annotations present in the arguments. + +# Example + +```julia-repl +julia> AnnotatedString("this is an example annotated string", + [(1:18, :A => 1), (12:28, :B => 2), (18:35, :C => 3)]) +"this is an example annotated string" +``` +""" +struct AnnotatedString{S <: AbstractString} <: AbstractString + string::S + annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}} +end + +""" + AnnotatedChar{S <: AbstractChar} <: AbstractChar + +A Char with annotations. + +More specifically, this is a simple wrapper around any other +[`AbstractChar`](@ref), which holds a list of arbitrary labeled annotations +(`Pair{Symbol, <:Any}`) with the wrapped character. + +See also: [`AnnotatedString`](@ref), [`annotatedstring`](@ref), `annotations`, +and `annotate!`. + +!!! warning + While the constructors are part of the Base public API, the fields + of `AnnotatedChar` are not. This it to allow for potential future + changes in the implementation of this type. Instead use the + [`annotations`](@ref), and [`annotate!`](@ref) getter/setter + functions. + +# Constructors + +```julia +AnnotatedChar(s::S) -> AnnotatedChar{S} +AnnotatedChar(s::S, annotations::Vector{Pair{Symbol, <:Any}}) +``` + +# Examples + +```julia-repl +julia> AnnotatedChar('j', :label => 1) +'j': ASCII/Unicode U+006A (category Ll: Letter, lowercase) +``` +""" +struct AnnotatedChar{C <: AbstractChar} <: AbstractChar + char::C + annotations::Vector{Pair{Symbol, Any}} +end + +## Constructors ## + +# When called with overly-specialised arguments + +AnnotatedString(s::AbstractString, annots::Vector{<:Tuple{UnitRange{Int}, <:Pair{Symbol, <:Any}}}) = + AnnotatedString(s, Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}(annots)) + +AnnotatedChar(c::AbstractChar, annots::Vector{<:Pair{Symbol, <:Any}}) = + AnnotatedChar(c, Vector{Pair{Symbol, Any}}(annots)) + +# Constructors to avoid recursive wrapping + +AnnotatedString(s::AnnotatedString, annots::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}) = + AnnotatedString(s.string, vcat(s.annotations, annots)) + +AnnotatedChar(c::AnnotatedChar, annots::Vector{Pair{Symbol, Any}}) = + AnnotatedChar(c.char, vcat(s.annotations, annots)) + +String(s::AnnotatedString{String}) = s.string # To avoid pointless overhead + +## Conversion/promotion ## + +convert(::Type{AnnotatedString}, s::AnnotatedString) = s +convert(::Type{AnnotatedString{S}}, s::S) where {S <: AbstractString} = + AnnotatedString(s, Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}()) +convert(::Type{AnnotatedString}, s::S) where {S <: AbstractString} = + convert(AnnotatedString{S}, s) +AnnotatedString(s::S) where {S <: AbstractString} = convert(AnnotatedString{S}, s) + +convert(::Type{AnnotatedChar}, c::AnnotatedChar) = c +convert(::Type{AnnotatedChar{C}}, c::C) where { C <: AbstractChar } = + AnnotatedChar{C}(c, Vector{Pair{Symbol, Any}}()) +convert(::Type{AnnotatedChar}, c::C) where { C <: AbstractChar } = + convert(AnnotatedChar{C}, c) + +AnnotatedChar(c::AbstractChar) = convert(AnnotatedChar, c) +AnnotatedChar(c::UInt32) = convert(AnnotatedChar, Char(c)) +AnnotatedChar{C}(c::UInt32) where {C <: AbstractChar} = convert(AnnotatedChar, C(c)) + +promote_rule(::Type{<:AnnotatedString}, ::Type{<:AbstractString}) = AnnotatedString + +## AbstractString interface ## + +ncodeunits(s::AnnotatedString) = ncodeunits(s.string) +codeunits(s::AnnotatedString) = codeunits(s.string) +codeunit(s::AnnotatedString) = codeunit(s.string) +codeunit(s::AnnotatedString, i::Integer) = codeunit(s.string, i) +isvalid(s::AnnotatedString, i::Integer) = isvalid(s.string, i) +@propagate_inbounds iterate(s::AnnotatedString, i::Integer=firstindex(s)) = + if i <= lastindex(s.string); (s[i], nextind(s, i)) end +eltype(::Type{<:AnnotatedString{S}}) where {S} = AnnotatedChar{eltype(S)} +firstindex(s::AnnotatedString) = firstindex(s.string) +lastindex(s::AnnotatedString) = lastindex(s.string) + +function getindex(s::AnnotatedString, i::Integer) + @boundscheck checkbounds(s, i) + @inbounds if isvalid(s, i) + AnnotatedChar(s.string[i], annotations(s, i)) + else + string_index_err(s, i) + end +end + +## AbstractChar interface ## + +ncodeunits(c::AnnotatedChar) = ncodeunits(c.char) +codepoint(c::AnnotatedChar) = codepoint(c.char) + +# Avoid the iteration fallback with comparison +cmp(a::AnnotatedString, b::AbstractString) = cmp(a.string, b) +cmp(a::AbstractString, b::AnnotatedString) = cmp(a, b.string) +# To avoid method ambiguity +cmp(a::AnnotatedString, b::AnnotatedString) = cmp(a.string, b.string) + +==(a::AnnotatedString, b::AnnotatedString) = + a.string == b.string && a.annotations == b.annotations + +==(a::AnnotatedString, b::AbstractString) = isempty(a.annotations) && a.string == b +==(a::AbstractString, b::AnnotatedString) = isempty(b.annotations) && a == b.string + +""" + annotatedstring(values...) + +Create a `AnnotatedString` from any number of `values` using their +[`print`](@ref)ed representation. + +This acts like [`string`](@ref), but takes care to preserve any annotations +present (in the form of [`AnnotatedString`](@ref) or [`AnnotatedChar`](@ref) values). + +See also [`AnnotatedString`](@ref) and [`AnnotatedChar`](@ref). + +## Examples + +```julia-repl +julia> annotatedstring("now a AnnotatedString") +"now a AnnotatedString" + +julia> annotatedstring(AnnotatedString("annotated", [(1:9, :label => 1)]), ", and unannotated") +"annotated, and unannotated" +``` +""" +function annotatedstring(xs...) + isempty(xs) && return AnnotatedString("") + size = mapreduce(_str_sizehint, +, xs) + s = IOContext(IOBuffer(sizehint=size), :color => true) + annotations = Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}() + for x in xs + if x isa AnnotatedString + for (region, annot) in x.annotations + push!(annotations, (s.io.size .+ (region), annot)) + end + print(s, x.string) + elseif x isa SubString{<:AnnotatedString} + for (region, annot) in x.string.annotations + start, stop = first(region), last(region) + if start <= x.offset + x.ncodeunits && stop > x.offset + rstart = s.io.size + max(0, start - x.offset) + 1 + rstop = s.io.size + min(stop, x.offset + x.ncodeunits) - x.offset + push!(annotations, (rstart:rstop, annot)) + end + end + print(s, SubString(x.string.string, x.offset, x.ncodeunits, Val(:noshift))) + elseif x isa AnnotatedChar + for annot in x.annotations + push!(annotations, (1+s.io.size:1+s.io.size, annot)) + end + print(s, x.char) + else + print(s, x) + end + end + str = String(resize!(s.io.data, s.io.size)) + AnnotatedString(str, annotations) +end + +annotatedstring(s::AnnotatedString) = s +annotatedstring(c::AnnotatedChar) = + AnnotatedString(string(c.char), [(1:ncodeunits(c), annot) for annot in c.annotations]) + +AnnotatedString(s::SubString{<:AnnotatedString}) = annotatedstring(s) + +""" + annotatedstring_optimize!(str::AnnotatedString) + +Merge contiguous identical annotations in `str`. +""" +function annotatedstring_optimize!(s::AnnotatedString) + last_seen = Dict{Pair{Symbol, Any}, Int}() + i = 1 + while i <= length(s.annotations) + region, keyval = s.annotations[i] + prev = get(last_seen, keyval, 0) + if prev > 0 + lregion, _ = s.annotations[prev] + if last(lregion) + 1 == first(region) + s.annotations[prev] = + setindex(s.annotations[prev], + first(lregion):last(region), + 1) + deleteat!(s.annotations, i) + else + delete!(last_seen, keyval) + end + else + last_seen[keyval] = i + i += 1 + end + end + s +end + +function repeat(str::AnnotatedString, r::Integer) + r == 0 && return one(AnnotatedString) + r == 1 && return str + unannot = repeat(str.string, r) + annotations = Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}() + len = ncodeunits(str) + fullregion = firstindex(str):lastindex(str) + for (region, annot) in str.annotations + if region == fullregion + push!(annotations, (firstindex(unannot):lastindex(unannot), annot)) + end + end + for offset in 0:len:(r-1)*len + for (region, annot) in str.annotations + if region != fullregion + push!(annotations, (region .+ offset, annot)) + end + end + end + AnnotatedString(unannot, annotations) |> annotatedstring_optimize! +end + +repeat(str::SubString{<:AnnotatedString}, r::Integer) = + repeat(AnnotatedString(str), r) + +function repeat(c::AnnotatedChar, r::Integer) + str = repeat(c.char, r) + fullregion = firstindex(str):lastindex(str) + AnnotatedString(str, [(fullregion, annot) for annot in c.annotations]) +end + +function reverse(s::AnnotatedString) + lastind = lastindex(s) + AnnotatedString(reverse(s.string), + [(UnitRange(1 + lastind - last(region), + 1 + lastind - first(region)), + annot) + for (region, annot) in s.annotations]) +end + +# TODO optimise? +reverse(s::SubString{<:AnnotatedString}) = reverse(AnnotatedString(s)) + +# TODO implement `replace(::AnnotatedString, ...)` + +## End AbstractString interface ## + +""" + annotate!(str::AnnotatedString, [range::UnitRange{Int}], label::Symbol => value) + annotate!(str::SubString{AnnotatedString}, [range::UnitRange{Int}], label::Symbol => value) + +Annotate a `range` of `str` (or the entire string) with a labeled value (`label` => `value`). +To remove existing `label` annotations, use a value of `nothing`. +""" +function annotate!(s::AnnotatedString, range::UnitRange{Int}, @nospecialize(labelval::Pair{Symbol, <:Any})) + label, val = labelval + indices = searchsorted(s.annotations, (range,), by=first) + if val === nothing + labelindex = filter(i -> first(s.annotations[i][2]) === label, indices) + for index in Iterators.reverse(labelindex) + deleteat!(s.annotations, index) + end + else + splice!(s.annotations, indices, [(range, Pair{Symbol, Any}(label, val))]) + end + s +end + +annotate!(ss::AnnotatedString, @nospecialize(labelval::Pair{Symbol, <:Any})) = + annotate!(ss, firstindex(ss):lastindex(ss), labelval) + +annotate!(s::SubString{<:AnnotatedString}, range::UnitRange{Int}, @nospecialize(labelval::Pair{Symbol, <:Any})) = + (annotate!(s.string, s.offset .+ (range), labelval); s) + +annotate!(s::SubString{<:AnnotatedString}, @nospecialize(labelval::Pair{Symbol, <:Any})) = + (annotate!(s.string, s.offset .+ (1:s.ncodeunits), labelval); s) + +""" + annotate!(char::AnnotatedChar, label::Symbol => value) + +Annotate `char` with the pair `label => value`. +""" +annotate!(c::AnnotatedChar, @nospecialize(labelval::Pair{Symbol, <:Any})) = + (push!(c.annotations, labelval); c) + +""" + annotations(str::AnnotatedString, [position::Union{Integer, UnitRange}]) + annotations(str::SubString{AnnotatedString}, [position::Union{Integer, UnitRange}]) + +Get all annotations that apply to `str`. Should `position` be provided, only +annotations that overlap with `position` will be returned. + +See also: `annotate!`. +""" +annotations(s::AnnotatedString) = s.annotations + +annotations(s::SubString{<:AnnotatedString}) = + annotations(s, s.offset+1:s.offset+s.ncodeunits) + +function annotations(s::AnnotatedString, pos::UnitRange{<:Integer}) + # TODO optimise + annots = filter(label -> !isempty(intersect(pos, first(label))), + s.annotations) + last.(annots) +end + +annotations(s::AnnotatedString, pos::Integer) = annotations(s, pos:pos) + +annotations(s::SubString{<:AnnotatedString}, pos::Integer) = + annotations(s.string, s.offset + pos) +annotations(s::SubString{<:AnnotatedString}, pos::UnitRange{<:Integer}) = + annotations(s.string, first(pos)+s.offset:last(pos)+s.offset) + +""" + annotations(chr::AnnotatedChar) + +Get all annotations of `chr`. +""" +annotations(c::AnnotatedChar) = c.annotations diff --git a/base/strings/basic.jl b/base/strings/basic.jl index 640e715bee3fc5..330cff1cf8f00f 100644 --- a/base/strings/basic.jl +++ b/base/strings/basic.jl @@ -242,8 +242,8 @@ end *(s::Union{AbstractString, AbstractChar}, t::Union{AbstractString, AbstractChar}...) -> AbstractString Concatenate strings and/or characters, producing a [`String`](@ref) or -[`TaggedString`](@ref) (as appropriate). This is equivalent to calling the -[`string`](@ref) or [`taggedstring`](@ref) function on the arguments. Concatenation of built-in string +[`AnnotatedString`](@ref) (as appropriate). This is equivalent to calling the +[`string`](@ref) or [`annotatedstring`](@ref) function on the arguments. Concatenation of built-in string types always produces a value of type `String` but other string types may choose to return a string of a different type as appropriate. @@ -257,10 +257,10 @@ julia> 'j' * "ulia" ``` """ function (*)(s1::Union{AbstractChar, AbstractString}, ss::Union{AbstractChar, AbstractString}...) - istagged = s1 isa TaggedString || s1 isa TaggedChar || - any(s -> s isa TaggedString || s isa TaggedChar, ss) - if istagged - taggedstring(s1, ss...) + isannotated = s1 isa AnnotatedString || s1 isa AnnotatedChar || + any(s -> s isa AnnotatedString || s isa AnnotatedChar, ss) + if isannotated + annotatedstring(s1, ss...) else string(s1, ss...) end @@ -318,7 +318,7 @@ end ==(a::AbstractString, b::AbstractString) -> Bool Test whether two strings are equal character by character (technically, Unicode -code point by code point). Should either string be a [`TaggedString`](@ref) the +code point by code point). Should either string be a [`AnnotatedString`](@ref) the string properties must match too. # Examples diff --git a/base/strings/io.jl b/base/strings/io.jl index 7f762b75473ff7..c45d0ac84640e6 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -353,31 +353,31 @@ function join(io::IO, iterator, delim="") end end -# TODO: If/when we have `TaggedIO`, we can revisit this and +# TODO: If/when we have `AnnotatedIO`, we can revisit this and # implement it more nicely. -function join_tagged(iterator, delim="", last=delim) +function join_annotated(iterator, delim="", last=delim) xs = zip(iterator, Iterators.repeated(delim)) |> Iterators.flatten |> collect xs = xs[1:end-1] if length(xs) > 1 xs[end-1] = last end - taggedstring(xs...)::TaggedString{String} + annotatedstring(xs...)::AnnotatedString{String} end -function _join_maybe_tagged(args...) +function _join_maybe_annotated(args...) if any(function (arg) t = eltype(arg) - !(t == Union{}) && (t <: TaggedString || t <: TaggedChar) + !(t == Union{}) && (t <: AnnotatedString || t <: AnnotatedChar) end, args) - join_tagged(args...) + join_annotated(args...) else sprint(join, args...) end end -join(iterator) = _join_maybe_tagged(iterator) -join(iterator, delim) = _join_maybe_tagged(iterator, delim) -join(iterator, delim, last) = _join_maybe_tagged(iterator, delim, last) +join(iterator) = _join_maybe_annotated(iterator) +join(iterator, delim) = _join_maybe_annotated(iterator, delim) +join(iterator, delim, last) = _join_maybe_annotated(iterator, delim, last) ## string escaping & unescaping ## @@ -787,8 +787,8 @@ function String(chars::AbstractVector{<:AbstractChar}) end end -function TaggedString(chars::AbstractVector{C}) where {C<:AbstractChar} - str = if C <: TaggedChar +function AnnotatedString(chars::AbstractVector{C}) where {C<:AbstractChar} + str = if C <: AnnotatedChar String(getfield.(chars, :char)) else sprint(sizehint=length(chars)) do io @@ -800,12 +800,12 @@ function TaggedString(chars::AbstractVector{C}) where {C<:AbstractChar} props = Tuple{UnitRange{Int}, Pair{Symbol, Any}}[] point = 1 for c in chars - if c isa TaggedChar + if c isa AnnotatedChar for prop in c.properties push!(props, (point:point, prop)) end end point += ncodeunits(c) end - TaggedString(str, props) + AnnotatedString(str, props) end diff --git a/base/strings/strings.jl b/base/strings/strings.jl index 4d14cfbcf98914..8dae311f475b49 100644 --- a/base/strings/strings.jl +++ b/base/strings/strings.jl @@ -1,6 +1,6 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -include("strings/tagged.jl") +include("strings/annotated.jl") include("strings/search.jl") include("strings/unicode.jl") diff --git a/base/strings/tagged.jl b/base/strings/tagged.jl deleted file mode 100644 index 8435361a814503..00000000000000 --- a/base/strings/tagged.jl +++ /dev/null @@ -1,388 +0,0 @@ -# This file is a part of Julia. License is MIT: https://julialang.org/license - -""" - TaggedString{S <: AbstractString} <: AbstractString - -A string with metadata, in the form of annotated regions. - -More specifically, this is a simple wrapper around any other -[`AbstractString`](@ref) that allows for regions of the wrapped string to be -annotated with tagged values. - -```text - C - ┌───┸─────────┐ - "this is an example tagged string" - └──┰────────┼─────┘ │ - A └───┰────────┘ - B -``` - -The above diagram represents a `TaggedString` where three ranges have been -annotated (labeled `A`, `B`, and `C`). Each annotation must take the form of a -`Pair{Symbol, <:Any}`, where a `Symbol` "tag" is used to label `Any` "value". - -Tags do not need to be unique, the same region can be annotated with the same -tag multiple times. - -See also [`TaggedChar`](@ref), [`taggedstring`](@ref), [`annotations`](@ref), and -[`annotate!`](@ref). - -!!! warning - While the constructors are part of the Base public API, the fields - of `TaggedString` are not. This is to allow for potential future - changes in the implementation of this type. Instead use the - [`annotations`](@ref), and [`annotate!`](@ref) getter/setter - functions. - -# Constructors - -```julia -TaggedString(s::S<:AbstractString) -> TaggedString{S} -TaggedString(s::S<:AbstractString, annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, <:Any}}}) -``` - -A TaggedString can also be created with [`taggedstring`](@ref), which acts much -like [`string`](@ref) but preserves any tags present in the arguments. - -# Example - -```julia-repl -julia> TaggedString("this is an example tagged string", - [(1:18, :A => 1), (12:25, :B => 2), (18:32, :C => 3)]) -"this is an example tagged string" -``` -""" -struct TaggedString{S <: AbstractString} <: AbstractString - string::S - annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}} -end - -""" - TaggedChar{S <: AbstractChar} <: AbstractChar - -A Char annotated with tagged values. - -More specifically, this is a simple wrapper around any other -[`AbstractChar`](@ref), which holds a list of arbitrary tagged values -(`Pair{Symbol, <:Any}`) with the wrapped character. - -See also: [`TaggedString`](@ref), [`taggedstring`](@ref), `annotations`, -and `annotate!`. - -!!! warning - While the constructors are part of the Base public API, the fields - of `TaggedChar` are not. This it to allow for potential future - changes in the implementation of this type. Instead use the - [`annotations`](@ref), and [`annotate!`](@ref) getter/setter - functions. - -# Constructors - -```julia -TaggedChar(s::S) -> TaggedChar{S} -TaggedChar(s::S, annotations::Vector{Pair{Symbol, <:Any}}) -``` - -# Examples - -```julia-repl -julia> TaggedChar('j', :tag => 1) -'j': ASCII/Unicode U+006A (category Ll: Letter, lowercase) -``` -""" -struct TaggedChar{C <: AbstractChar} <: AbstractChar - char::C - annotations::Vector{Pair{Symbol, Any}} -end - -## Constructors ## - -# When called with overly-specialised arguments - -TaggedString(s::AbstractString, annots::Vector{<:Tuple{UnitRange{Int}, <:Pair{Symbol, <:Any}}}) = - TaggedString(s, Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}(annots)) - -TaggedChar(c::AbstractChar, annots::Vector{<:Pair{Symbol, <:Any}}) = - TaggedChar(c, Vector{Pair{Symbol, Any}}(annots)) - -# Constructors to avoid recursive wrapping - -TaggedString(s::TaggedString, annots::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}) = - TaggedString(s.string, vcat(s.annotations, annots)) - -TaggedChar(c::TaggedChar, annots::Vector{Pair{Symbol, Any}}) = - TaggedChar(c.char, vcat(s.annotations, annots)) - -String(s::TaggedString{String}) = s.string # To avoid pointless overhead - -## Conversion/promotion ## - -convert(::Type{TaggedString}, s::TaggedString) = s -convert(::Type{TaggedString{S}}, s::S) where {S <: AbstractString} = - TaggedString(s, Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}()) -convert(::Type{TaggedString}, s::S) where {S <: AbstractString} = - convert(TaggedString{S}, s) -TaggedString(s::S) where {S <: AbstractString} = convert(TaggedString{S}, s) - -convert(::Type{TaggedChar}, c::TaggedChar) = c -convert(::Type{TaggedChar{C}}, c::C) where { C <: AbstractChar } = - TaggedChar{C}(c, Vector{Pair{Symbol, Any}}()) -convert(::Type{TaggedChar}, c::C) where { C <: AbstractChar } = - convert(TaggedChar{C}, c) - -TaggedChar(c::AbstractChar) = convert(TaggedChar, c) -TaggedChar(c::UInt32) = convert(TaggedChar, Char(c)) -TaggedChar{C}(c::UInt32) where {C <: AbstractChar} = convert(TaggedChar, C(c)) - -promote_rule(::Type{<:TaggedString}, ::Type{<:AbstractString}) = TaggedString - -## AbstractString interface ## - -ncodeunits(s::TaggedString) = ncodeunits(s.string) -codeunits(s::TaggedString) = codeunits(s.string) -codeunit(s::TaggedString) = codeunit(s.string) -codeunit(s::TaggedString, i::Integer) = codeunit(s.string, i) -isvalid(s::TaggedString, i::Integer) = isvalid(s.string, i) -@propagate_inbounds iterate(s::TaggedString, i::Integer=firstindex(s)) = - if i <= lastindex(s.string); (s[i], nextind(s, i)) end -eltype(::Type{<:TaggedString{S}}) where {S} = TaggedChar{eltype(S)} -firstindex(s::TaggedString) = firstindex(s.string) -lastindex(s::TaggedString) = lastindex(s.string) - -function getindex(s::TaggedString, i::Integer) - @boundscheck checkbounds(s, i) - @inbounds if isvalid(s, i) - TaggedChar(s.string[i], annotations(s, i)) - else - string_index_err(s, i) - end -end - -## AbstractChar interface ## - -ncodeunits(c::TaggedChar) = ncodeunits(c.char) -codepoint(c::TaggedChar) = codepoint(c.char) - -# Avoid the iteration fallback with comparison -cmp(a::TaggedString, b::AbstractString) = cmp(a.string, b) -cmp(a::AbstractString, b::TaggedString) = cmp(a, b.string) -# To avoid method ambiguity -cmp(a::TaggedString, b::TaggedString) = cmp(a.string, b.string) - -==(a::TaggedString, b::TaggedString) = - a.string == b.string && a.annotations == b.annotations - -==(a::TaggedString, b::AbstractString) = isempty(a.annotations) && a.string == b -==(a::AbstractString, b::TaggedString) = isempty(b.annotations) && a == b.string - -""" - taggedstring(values...) - -Create a `TaggedString` from any number of `values` using their -[`print`](@ref)ed representation. - -This acts like [`string`](@ref), but takes care to preserve any annotations -present (in the form of [`TaggedString`](@ref) or [`TaggedChar`](@ref) values). - -See also [`TaggedString`](@ref) and [`TaggedChar`](@ref). - -## Examples - -``` -julia> taggedstring("now a TaggedString") -"now a TaggedString" - -julia> taggedstring(TaggedString("tagged", [(1:6, :tag => 1)]), ", and untagged") -"tagged, and untagged" -``` -""" -function taggedstring(xs...) - isempty(xs) && return TaggedString("") - size = mapreduce(_str_sizehint, +, xs) - s = IOContext(IOBuffer(sizehint=size), :color => true) - annotations = Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}() - for x in xs - if x isa TaggedString - for (region, annot) in x.annotations - push!(annotations, (s.io.size .+ (region), annot)) - end - print(s, x.string) - elseif x isa SubString{<:TaggedString} - for (region, annot) in x.string.annotations - start, stop = first(region), last(region) - if start <= x.offset + x.ncodeunits && stop > x.offset - rstart = s.io.size + max(0, start - x.offset) + 1 - rstop = s.io.size + min(stop, x.offset + x.ncodeunits) - x.offset - push!(annotations, (rstart:rstop, annot)) - end - end - print(s, SubString(x.string.string, x.offset, x.ncodeunits, Val(:noshift))) - elseif x isa TaggedChar - for annot in x.annotations - push!(annotations, (1+s.io.size:1+s.io.size, annot)) - end - print(s, x.char) - else - print(s, x) - end - end - str = String(resize!(s.io.data, s.io.size)) - TaggedString(str, annotations) -end - -taggedstring(s::TaggedString) = s -taggedstring(c::TaggedChar) = - TaggedString(string(c.char), [(1:ncodeunits(c), annot) for annot in c.annotations]) - -TaggedString(s::SubString{<:TaggedString}) = taggedstring(s) - -""" - taggedstring_optimize!(str::TaggedString) - -Merge contiguous identical tags in `str`. -""" -function taggedstring_optimize!(s::TaggedString) - last_seen = Dict{Pair{Symbol, Any}, Int}() - i = 1 - while i <= length(s.annotations) - region, keyval = s.annotations[i] - prev = get(last_seen, keyval, 0) - if prev > 0 - lregion, _ = s.annotations[prev] - if last(lregion) + 1 == first(region) - s.annotations[prev] = - setindex(s.annotations[prev], - first(lregion):last(region), - 1) - deleteat!(s.annotations, i) - else - delete!(last_seen, keyval) - end - else - last_seen[keyval] = i - i += 1 - end - end - s -end - -function repeat(str::TaggedString, r::Integer) - r == 0 && return one(TaggedString) - r == 1 && return str - untagged = repeat(str.string, r) - annotations = Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}() - len = ncodeunits(str) - fullregion = firstindex(str):lastindex(str) - for (region, annot) in str.annotations - if region == fullregion - push!(annotations, (firstindex(untagged):lastindex(untagged), annot)) - end - end - for offset in 0:len:(r-1)*len - for (region, annot) in str.annotations - if region != fullregion - push!(annotations, (region .+ offset, annot)) - end - end - end - TaggedString(untagged, annotations) |> taggedstring_optimize! -end - -repeat(str::SubString{<:TaggedString}, r::Integer) = - repeat(TaggedString(str), r) - -function repeat(c::TaggedChar, r::Integer) - str = repeat(c.char, r) - fullregion = firstindex(str):lastindex(str) - TaggedString(str, [(fullregion, annot) for annot in c.annotations]) -end - -function reverse(s::TaggedString) - lastind = lastindex(s) - TaggedString(reverse(s.string), - [(UnitRange(1 + lastind - last(region), - 1 + lastind - first(region)), - annot) - for (region, annot) in s.annotations]) -end - -# TODO optimise? -reverse(s::SubString{<:TaggedString}) = reverse(TaggedString(s)) - -# TODO implement `replace(::TaggedString, ...)` - -## End AbstractString interface ## - -""" - annotate!(str::TaggedString, [range::UnitRange{Int}], tag::Symbol => value) - annotate!(str::SubString{TaggedString}, [range::UnitRange{Int}], tag::Symbol => value) - -Annotate a `range` of `str` (or the entire string) with a tagged value (`tag` => -`value`). To remove existing `tag` annotations, use a value of `nothing`. -""" -function annotate!(s::TaggedString, range::UnitRange{Int}, @nospecialize(tagval::Pair{Symbol, <:Any})) - tag, val = tagval - indices = searchsorted(s.annotations, (range,), by=first) - if val === nothing - tagindex = filter(i -> first(s.annotations[i][2]) === tag, indices) - for index in Iterators.reverse(tagindex) - deleteat!(s.annotations, index) - end - else - splice!(s.annotations, indices, [(range, Pair{Symbol, Any}(tag, val))]) - end - s -end - -annotate!(ss::TaggedString, @nospecialize(tagval::Pair{Symbol, <:Any})) = - annotate!(ss, firstindex(ss):lastindex(ss), tagval) - -annotate!(s::SubString{<:TaggedString}, range::UnitRange{Int}, @nospecialize(tagval::Pair{Symbol, <:Any})) = - (annotate!(s.string, s.offset .+ (range), tagval); s) - -annotate!(s::SubString{<:TaggedString}, @nospecialize(tagval::Pair{Symbol, <:Any})) = - (annotate!(s.string, s.offset .+ (1:s.ncodeunits), tagval); s) - -""" - annotate!(char::TaggedChar, tag::Symbol => value) - -Annotate `char` with the pair `tag => value`. -""" -annotate!(c::TaggedChar, @nospecialize(tagval::Pair{Symbol, <:Any})) = - (push!(c.annotations, tagval); c) - -""" - annotations(str::TaggedString, [position::Union{Integer, UnitRange}]) - annotations(str::SubString{TaggedString}, [position::Union{Integer, UnitRange}]) - -Get all annotations that apply to `str`. Should `position` be provided, only -annotations that overlap with `position` will be returned. - -See also: `annotate!`. -""" -annotations(s::TaggedString) = s.annotations - -annotations(s::SubString{<:TaggedString}) = - annotations(s, s.offset+1:s.offset+s.ncodeunits) - -function annotations(s::TaggedString, pos::UnitRange{<:Integer}) - # TODO optimise - annots = filter(tag -> !isempty(intersect(pos, first(tag))), - s.annotations) - last.(annots) -end - -annotations(s::TaggedString, pos::Integer) = annotations(s, pos:pos) - -annotations(s::SubString{<:TaggedString}, pos::Integer) = - annotations(s.string, s.offset + pos) -annotations(s::SubString{<:TaggedString}, pos::UnitRange{<:Integer}) = - annotations(s.string, first(pos)+s.offset:last(pos)+s.offset) - -""" - annotations(chr::TaggedChar) - -Get all annotations of `chr`. -""" -annotations(c::TaggedChar) = c.annotations diff --git a/base/strings/util.jl b/base/strings/util.jl index 226c0e3e3cc8e6..fae40cb568842b 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -459,8 +459,8 @@ function lpad( n::Integer, p::Union{AbstractChar,AbstractString}=' ', ) - stringfn = if any(isa.((s, p), Union{TaggedString, TaggedChar, SubString{<:TaggedString}})) - taggedstring else string end + stringfn = if any(isa.((s, p), Union{AnnotatedString, AnnotatedChar, SubString{<:AnnotatedString}})) + annotatedstring else string end n = Int(n)::Int m = signed(n) - Int(textwidth(s))::Int m ≤ 0 && return stringfn(s) @@ -491,8 +491,8 @@ function rpad( n::Integer, p::Union{AbstractChar,AbstractString}=' ', ) - stringfn = if any(isa.((s, p), Union{TaggedString, TaggedChar, SubString{<:TaggedString}})) - taggedstring else string end + stringfn = if any(isa.((s, p), Union{AnnotatedString, AnnotatedChar, SubString{<:AnnotatedString}})) + annotatedstring else string end n = Int(n)::Int m = signed(n) - Int(textwidth(s))::Int m ≤ 0 && return stringfn(s) diff --git a/deps/checksums/StyledStrings-c63466372f78aae651c8a6cfd8f590de906658be.tar.gz/md5 b/deps/checksums/StyledStrings-c63466372f78aae651c8a6cfd8f590de906658be.tar.gz/md5 deleted file mode 100644 index f7d40fc4738ada..00000000000000 --- a/deps/checksums/StyledStrings-c63466372f78aae651c8a6cfd8f590de906658be.tar.gz/md5 +++ /dev/null @@ -1 +0,0 @@ -c297fb852bddeb369cccf25df0af4c46 diff --git a/deps/checksums/StyledStrings-c63466372f78aae651c8a6cfd8f590de906658be.tar.gz/sha512 b/deps/checksums/StyledStrings-c63466372f78aae651c8a6cfd8f590de906658be.tar.gz/sha512 deleted file mode 100644 index cd5f9943536986..00000000000000 --- a/deps/checksums/StyledStrings-c63466372f78aae651c8a6cfd8f590de906658be.tar.gz/sha512 +++ /dev/null @@ -1 +0,0 @@ -99f0869f8ef818cb182731101b6fce132cfbb685c6777abc3b951de16ce710d36b6957791b88c3759a198f6709d462e537198f9e80072a707289b03995acee19 diff --git a/deps/checksums/StyledStrings-e2b48e22b2329d0979ec7402693118f36b63cd03.tar.gz/md5 b/deps/checksums/StyledStrings-e2b48e22b2329d0979ec7402693118f36b63cd03.tar.gz/md5 new file mode 100644 index 00000000000000..ec3252dd9411c6 --- /dev/null +++ b/deps/checksums/StyledStrings-e2b48e22b2329d0979ec7402693118f36b63cd03.tar.gz/md5 @@ -0,0 +1 @@ +258c2072d7316804a2cf79e1dd588b0f diff --git a/deps/checksums/StyledStrings-e2b48e22b2329d0979ec7402693118f36b63cd03.tar.gz/sha512 b/deps/checksums/StyledStrings-e2b48e22b2329d0979ec7402693118f36b63cd03.tar.gz/sha512 new file mode 100644 index 00000000000000..fcf85f7c44e0a0 --- /dev/null +++ b/deps/checksums/StyledStrings-e2b48e22b2329d0979ec7402693118f36b63cd03.tar.gz/sha512 @@ -0,0 +1 @@ +db378c2a5f31181232b328fa3f595237c922d5d4cf1a5f446ffe9c283d0947549cb4a62fac23b37ddbc50544321e0f2290396a00294d78353f731691793a49b2 diff --git a/doc/src/base/strings.md b/doc/src/base/strings.md index 44ec0fe275b97e..2504f3dbd583ad 100644 --- a/doc/src/base/strings.md +++ b/doc/src/base/strings.md @@ -17,9 +17,9 @@ Core.String(::AbstractString) Base.SubString Base.LazyString Base.@lazy_str -Base.TaggedString -Base.TaggedChar -Base.taggedstring +Base.AnnotatedString +Base.AnnotatedChar +Base.annotatedstring Base.annotations Base.annotate! Base.transcode diff --git a/doc/src/manual/strings.md b/doc/src/manual/strings.md index d29358d2a56ff3..ec146125024b84 100644 --- a/doc/src/manual/strings.md +++ b/doc/src/manual/strings.md @@ -1205,28 +1205,28 @@ However, the next backslash character escapes the backslash that follows it, and last backslash escapes a quote, since these backslashes appear before a quote. -## [Tagged Strings](@id man-tagged-strings) +## [Annotated Strings](@id man-annotated-strings) It is sometimes useful to be able to hold metadata relating to regions of a -string. A [`TaggedString`](@ref Base.TaggedString) wraps another string and -allows for regions of it to be annotated with tagged values (`:tag => value`). +string. A [`AnnotatedString`](@ref Base.AnnotatedString) wraps another string and +allows for regions of it to be annotated with labelled values (`:label => value`). All generic string operations are applied to the underlying string. However, when possible, styling information is preserved. This means you can manipulate a -[`TaggedString`](@ref Base.TaggedString) —taking substrings, padding them, +[`AnnotatedString`](@ref Base.AnnotatedString) —taking substrings, padding them, concatenating them with other strings— and the metadata annotations will "come along for the ride". This string type is fundamental to the [StyledStrings stdlib](@ref -stdlib-styledstrings), which uses `:face`-tagged annotations to hold styling +stdlib-styledstrings), which uses `:face`-labelled annotations to hold styling information. -When concatenating a [`TaggedString`](@ref Base.TaggedString), take care to use -[`taggedstring`](@ref Base.taggedstring) instead of [`string`](@ref) if you want -to keep the tagged string annotations. +When concatenating a [`AnnotatedString`](@ref Base.AnnotatedString), take care to use +[`annotatedstring`](@ref Base.annotatedstring) instead of [`string`](@ref) if you want +to keep the string annotations. ```jldoctest -julia> str = Base.TaggedString("hello there", - [(1:5, :word => :greeting), (7:11, :tag => 1)]) +julia> str = Base.AnnotatedString("hello there", + [(1:5, :word => :greeting), (7:11, :label => 1)]) "hello there" julia> length(str) @@ -1236,18 +1236,18 @@ julia> lpad(str, 14) " hello there" julia> typeof(lpad(str, 7)) -Base.TaggedString{String} +Base.AnnotatedString{String} -julia> str2 = Base.TaggedString(" julia", [(2:6, :face => :magenta)]) +julia> str2 = Base.AnnotatedString(" julia", [(2:6, :face => :magenta)]) " julia" -julia> Base.taggedstring(str, str2) +julia> Base.annotatedstring(str, str2) "hello there julia" -julia> str * str2 == Base.taggedstring(str, str2) # *-concatination still works +julia> str * str2 == Base.annotatedstring(str, str2) # *-concatenation still works true ``` -The annotations of a [`TaggedString`](@ref Base.TaggedString) can be accessed +The annotations of a [`AnnotatedString`](@ref Base.AnnotatedString) can be accessed and modified via the [`annotations`](@ref Base.annotations) and [`annotate!`](@ref Base.annotate!) functions. diff --git a/stdlib/StyledStrings.version b/stdlib/StyledStrings.version index e7dad1ffa00e39..987d5c4ac2e924 100644 --- a/stdlib/StyledStrings.version +++ b/stdlib/StyledStrings.version @@ -1,4 +1,4 @@ STYLEDSTRINGS_BRANCH = main -STYLEDSTRINGS_SHA1 = c63466372f78aae651c8a6cfd8f590de906658be +STYLEDSTRINGS_SHA1 = e2b48e22b2329d0979ec7402693118f36b63cd03 STYLEDSTRINGS_GIT_URL := https://github.com/JuliaLang/StyledStrings.jl.git STYLEDSTRINGS_TAR_URL = https://api.github.com/repos/JuliaLang/StyledStrings.jl/tarball/$1 diff --git a/test/choosetests.jl b/test/choosetests.jl index 8211d59c40bcae..6ba0c8ea145b8e 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -151,7 +151,7 @@ function choosetests(choices = []) filtertests!(tests, "unicode", ["unicode/utf8"]) filtertests!(tests, "strings", ["strings/basic", "strings/search", "strings/util", - "strings/io", "strings/types", "strings/tagged"]) + "strings/io", "strings/types", "strings/annotated"]) # do subarray before sparse but after linalg filtertests!(tests, "subarray") filtertests!(tests, "compiler", [ diff --git a/test/strings/annotated.jl b/test/strings/annotated.jl new file mode 100644 index 00000000000000..324c1ccb495f68 --- /dev/null +++ b/test/strings/annotated.jl @@ -0,0 +1,99 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +@testset "AnnotatedString" begin + str = Base.AnnotatedString("some string") + @test str == Base.AnnotatedString(str.string, Tuple{UnitRange{Int}, Pair{Symbol, Any}}[]) + @test length(str) == 11 + @test ncodeunits(str) == 11 + @test eltype(str) == Base.AnnotatedChar{eltype(str.string)} + @test first(str) == Base.AnnotatedChar(first(str.string), Pair{Symbol, Any}[]) + @test str[1:4] isa SubString{typeof(str)} + @test str[1:4] == Base.AnnotatedString("some") + @test "a" * str == Base.AnnotatedString("asome string") + @test str * "a" == Base.AnnotatedString("some stringa") + @test str * str == Base.AnnotatedString("some stringsome string") + Base.annotate!(str, 1:4, :thing => 0x01) + Base.annotate!(str, 5:11, :other => 0x02) + Base.annotate!(str, 1:11, :all => 0x03) + @test str[3:4] == SubString(str, 3, 4) + @test Base.AnnotatedString(str[3:4]) == + Base.AnnotatedString("me", [(1:2, :thing => 0x01), (1:2, :all => 0x03)]) + @test str == Base.AnnotatedString("some string", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (5:11, :other => 0x02)]) + @test str != Base.AnnotatedString("some string") + @test str != Base.AnnotatedString("some string", [(1:1, :thing => 0x01), (5:5, :other => 0x02), (11:11, :all => 0x03)]) + @test str != Base.AnnotatedString("some string", [(1:4, :thing => 0x11), (1:11, :all => 0x13), (5:11, :other => 0x12)]) + @test str != Base.AnnotatedString("some thingg", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (5:11, :other => 0x02)]) + let allstrings = + ['a', Base.AnnotatedChar('a'), Base.AnnotatedChar('a', [:aaa => 0x04]), + "a string", Base.AnnotatedString("a string"), + Base.AnnotatedString("a string", [(1:2, :hmm => '%')])] + for str1 in repeat(allstrings, 2) + for str2 in repeat(allstrings, 2) + @test String(str1 * str2) == + String(string(str1, str2)) == + String(string(str1)) * String(string(str2)) + @test Base.annotatedstring(str1 * str2) == + Base.annotatedstring(str1, str2) == + Base.annotatedstring(str1) * Base.annotatedstring(str2) + end + end + end + # @test collect(Base.eachstyle(str)) == + # [("some", [:thing => 0x01, :all => 0x03]), + # (" string", [:all => 0x03, :other => 0x02])] + @test ==(Base.annotatedstring_optimize!( + Base.AnnotatedString("abc", [(1:1, :val => 1), + (2:2, :val => 2), + (2:2, :val => 1), + (3:3, :val => 2)])), + Base.AnnotatedString("abc", [(1:2, :val => 1), + (2:3, :val => 2)])) +end + +@testset "AnnotatedChar" begin + chr = Base.AnnotatedChar('c') + @test chr == Base.AnnotatedChar(chr.char, Pair{Symbol, Any}[]) + str = Base.AnnotatedString("hmm", [(1:1, :attr => "h0h0"), + (1:2, :attr => "h0m1"), + (2:3, :attr => "m1m2")]) + @test str[1] == Base.AnnotatedChar('h', Pair{Symbol, Any}[:attr => "h0h0"]) + @test str[2] == Base.AnnotatedChar('m', Pair{Symbol, Any}[:attr => "h0m1", :attr => "m1m2"]) + @test str[3] == Base.AnnotatedChar('m', Pair{Symbol, Any}[:attr => "m1m2"]) +end + +@testset "Styling preservation" begin + str = Base.AnnotatedString("some string", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (5:11, :other => 0x02)]) + @test match(r".e", str).match == str[3:4] + @test match(r"(.e)", str).captures == [str[3:4]] + let m0 = match(r"(.)e", str) + m1 = first(eachmatch(r"(.)e", str)) + for f in fieldnames(RegexMatch) + @test getfield(m0, f) == getfield(m1, f) + end + end + @test lpad(str, 12) == + Base.AnnotatedString(" some string", [(2:5, :thing => 0x01), + (2:12, :all => 0x03), + (6:12, :other => 0x02)]) + @test rpad(str, 12) == + Base.AnnotatedString("some string ", [(1:4, :thing => 0x01), + (1:11, :all => 0x03), + (5:11, :other => 0x02)]) + str1 = Base.AnnotatedString("test", [(1:4, :label => 5)]) + str2 = Base.AnnotatedString("case", [(2:3, :label => "oomph")]) + @test join([str1, str1], Base.AnnotatedString(" ")) == + Base.AnnotatedString("test test", + [(1:4, :label => 5), + (6:9, :label => 5)]) + @test join([str1, str1], Base.AnnotatedString(" ", [(1:1, :label => 2)])) == + Base.AnnotatedString("test test", + [(1:4, :label => 5), + (5:5, :label => 2), + (6:9, :label => 5)]) + @test repeat(str1, 2) == Base.AnnotatedString("testtest", [(1:8, :label => 5)]) + @test repeat(str2, 2) == Base.AnnotatedString("casecase", [(2:3, :label => "oomph"), + (6:7, :label => "oomph")]) + @test repeat(str1[1], 3) == Base.AnnotatedString("ttt", [(1:3, :label => 5)]) + @test reverse(str1) == Base.AnnotatedString("tset", [(1:4, :label => 5)]) + @test reverse(str2) == Base.AnnotatedString("esac", [(2:3, :label => "oomph")]) +end diff --git a/test/strings/tagged.jl b/test/strings/tagged.jl deleted file mode 100644 index 229fc53ef9e77a..00000000000000 --- a/test/strings/tagged.jl +++ /dev/null @@ -1,99 +0,0 @@ -# This file is a part of Julia. License is MIT: https://julialang.org/license - -@testset "TaggedString" begin - str = Base.TaggedString("some string") - @test str == Base.TaggedString(str.string, Tuple{UnitRange{Int}, Pair{Symbol, Any}}[]) - @test length(str) == 11 - @test ncodeunits(str) == 11 - @test eltype(str) == Base.TaggedChar{eltype(str.string)} - @test first(str) == Base.TaggedChar(first(str.string), Pair{Symbol, Any}[]) - @test str[1:4] isa SubString{typeof(str)} - @test str[1:4] == Base.TaggedString("some") - @test "a" * str == Base.TaggedString("asome string") - @test str * "a" == Base.TaggedString("some stringa") - @test str * str == Base.TaggedString("some stringsome string") - Base.annotate!(str, 1:4, :thing => 0x01) - Base.annotate!(str, 5:11, :other => 0x02) - Base.annotate!(str, 1:11, :all => 0x03) - @test str[3:4] == SubString(str, 3, 4) - @test Base.TaggedString(str[3:4]) == - Base.TaggedString("me", [(1:2, :thing => 0x01), (1:2, :all => 0x03)]) - @test str == Base.TaggedString("some string", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (5:11, :other => 0x02)]) - @test str != Base.TaggedString("some string") - @test str != Base.TaggedString("some string", [(1:1, :thing => 0x01), (5:5, :other => 0x02), (11:11, :all => 0x03)]) - @test str != Base.TaggedString("some string", [(1:4, :thing => 0x11), (1:11, :all => 0x13), (5:11, :other => 0x12)]) - @test str != Base.TaggedString("some thingg", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (5:11, :other => 0x02)]) - let allstrings = - ['a', Base.TaggedChar('a'), Base.TaggedChar('a', [:aaa => 0x04]), - "a string", Base.TaggedString("a string"), - Base.TaggedString("a string", [(1:2, :hmm => '%')])] - for str1 in repeat(allstrings, 2) - for str2 in repeat(allstrings, 2) - @test String(str1 * str2) == - String(string(str1, str2)) == - String(string(str1)) * String(string(str2)) - @test Base.taggedstring(str1 * str2) == - Base.taggedstring(str1, str2) == - Base.taggedstring(str1) * Base.taggedstring(str2) - end - end - end - # @test collect(Base.eachstyle(str)) == - # [("some", [:thing => 0x01, :all => 0x03]), - # (" string", [:all => 0x03, :other => 0x02])] - @test ==(Base.taggedstring_optimize!( - Base.TaggedString("abc", [(1:1, :val => 1), - (2:2, :val => 2), - (2:2, :val => 1), - (3:3, :val => 2)])), - Base.TaggedString("abc", [(1:2, :val => 1), - (2:3, :val => 2)])) -end - -@testset "TaggedChar" begin - chr = Base.TaggedChar('c') - @test chr == Base.TaggedChar(chr.char, Pair{Symbol, Any}[]) - str = Base.TaggedString("hmm", [(1:1, :attr => "h0h0"), - (1:2, :attr => "h0m1"), - (2:3, :attr => "m1m2")]) - @test str[1] == Base.TaggedChar('h', Pair{Symbol, Any}[:attr => "h0h0"]) - @test str[2] == Base.TaggedChar('m', Pair{Symbol, Any}[:attr => "h0m1", :attr => "m1m2"]) - @test str[3] == Base.TaggedChar('m', Pair{Symbol, Any}[:attr => "m1m2"]) -end - -@testset "Styling preservation" begin - str = Base.TaggedString("some string", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (5:11, :other => 0x02)]) - @test match(r".e", str).match == str[3:4] - @test match(r"(.e)", str).captures == [str[3:4]] - let m0 = match(r"(.)e", str) - m1 = first(eachmatch(r"(.)e", str)) - for f in fieldnames(RegexMatch) - @test getfield(m0, f) == getfield(m1, f) - end - end - @test lpad(str, 12) == - Base.TaggedString(" some string", [(2:5, :thing => 0x01), - (2:12, :all => 0x03), - (6:12, :other => 0x02)]) - @test rpad(str, 12) == - Base.TaggedString("some string ", [(1:4, :thing => 0x01), - (1:11, :all => 0x03), - (5:11, :other => 0x02)]) - str1 = Base.TaggedString("test", [(1:4, :tag => 5)]) - str2 = Base.TaggedString("case", [(2:3, :tag => "oomph")]) - @test join([str1, str1], Base.TaggedString(" ")) == - Base.TaggedString("test test", - [(1:4, :tag => 5), - (6:9, :tag => 5)]) - @test join([str1, str1], Base.TaggedString(" ", [(1:1, :tag => 2)])) == - Base.TaggedString("test test", - [(1:4, :tag => 5), - (5:5, :tag => 2), - (6:9, :tag => 5)]) - @test repeat(str1, 2) == Base.TaggedString("testtest", [(1:8, :tag => 5)]) - @test repeat(str2, 2) == Base.TaggedString("casecase", [(2:3, :tag => "oomph"), - (6:7, :tag => "oomph")]) - @test repeat(str1[1], 3) == Base.TaggedString("ttt", [(1:3, :tag => 5)]) - @test reverse(str1) == Base.TaggedString("tset", [(1:4, :tag => 5)]) - @test reverse(str2) == Base.TaggedString("esac", [(2:3, :tag => "oomph")]) -end