diff --git a/base/markdown/Common/Common.jl b/base/markdown/Common/Common.jl index 0152fd04b6cf3..de021f7be1f2e 100644 --- a/base/markdown/Common/Common.jl +++ b/base/markdown/Common/Common.jl @@ -7,4 +7,5 @@ include("inline.jl") paragraph, linebreak, escapes, inline_code, - asterisk_bold, asterisk_italic, image, footnote_link, link] + asterisk_bold, asterisk_italic, image, footnote_link, link, autolink] + diff --git a/base/markdown/Common/inline.jl b/base/markdown/Common/inline.jl index 8e48dc9256796..46f94264d4761 100644 --- a/base/markdown/Common/inline.jl +++ b/base/markdown/Common/inline.jl @@ -106,6 +106,43 @@ function footnote_link(stream::IO, md::MD) end end +@trigger '<' -> +function autolink(stream::IO, md::MD) + withstream(stream) do + startswith(stream, '<') || return + url = readuntil(stream, '>') + url ≡ nothing && return + _is_link(url) && return Link(url, url) + _is_mailto(url) && return Link(url, url) + return + end +end + +# This list is taken from the commonmark spec +# http://spec.commonmark.org/0.19/#absolute-uri +const _allowable_schemes = Set(split("coap doi javascript aaa aaas about acap cap cid crid data dav dict dns file ftp geo go gopher h323 http https iax icap im imap info ipp iris iris.beep iris.xpc iris.xpcs iris.lwz ldap mailto mid msrp msrps mtqp mupdate news nfs ni nih nntp opaquelocktoken pop pres rtsp service session shttp sieve sip sips sms snmp,soap.beep soap.beeps tag tel telnet tftp thismessage tn3270 tip tv urn vemmi ws wss xcon xcon-userid xmlrpc.beep xmlrpc.beeps xmpp z39.50r z39.50s +adiumxtra afp afs aim apt,attachment aw beshare bitcoin bolo callto chrome,chrome-extension com-eventbrite-attendee content cvs,dlna-playsingle dlna-playcontainer dtn dvb ed2k facetime feed finger fish gg git gizmoproject gtalk hcp icon ipn irc irc6 ircs itms jar jms keyparc lastfm ldaps magnet maps market,message mms ms-help msnim mumble mvn notes oid palm paparazzi platform proxy psyc query res resource rmi rsync rtmp secondlife sftp sgn skype smb soldat spotify ssh steam svn teamspeak +things udp unreal ut2004 ventrilo view-source webcal wtai wyciwyg xfire xri ymsgr")) + +function _is_link(s::AbstractString) + '<' in s && return false + + m = match(r"^(.*)://(\S+?)(:\S*)?$", s) + m ≡ nothing && return false + scheme = lowercase(m.captures[1]) + return scheme in _allowable_schemes +end + +# non-normative regex from the HTML5 spec +const _email_regex = r"^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$" + +function _is_mailto(s::AbstractString) + length(s) < 6 && return false + # slicing strings is a bit risky, but this equality check is safe + lowercase(s[1:6]) == "mailto:" || return false + return ismatch(_email_regex, s[6:end]) +end + # ––––––––––– # Punctuation # ––––––––––– diff --git a/base/markdown/GitHub/GitHub.jl b/base/markdown/GitHub/GitHub.jl index 591634d8e50b5..3692a8c515971 100644 --- a/base/markdown/GitHub/GitHub.jl +++ b/base/markdown/GitHub/GitHub.jl @@ -62,4 +62,5 @@ end github_table, github_paragraph, linebreak, escapes, en_dash, inline_code, asterisk_bold, - asterisk_italic, image, footnote_link, link] + asterisk_italic, image, footnote_link, link, autolink] + diff --git a/base/markdown/Julia/Julia.jl b/base/markdown/Julia/Julia.jl index 2297b8339899c..ab18b8d92eeaa 100644 --- a/base/markdown/Julia/Julia.jl +++ b/base/markdown/Julia/Julia.jl @@ -11,4 +11,5 @@ include("interp.jl") blockquote, admonition, footnote, github_table, horizontalrule, setextheader, paragraph, linebreak, escapes, tex, interp, en_dash, inline_code, - asterisk_bold, asterisk_italic, image, footnote_link, link] + asterisk_bold, asterisk_italic, image, footnote_link, link, autolink] + diff --git a/base/markdown/render/html.jl b/base/markdown/render/html.jl index a0404bf8f845e..7e2f256f4aa5c 100644 --- a/base/markdown/render/html.jl +++ b/base/markdown/render/html.jl @@ -26,7 +26,7 @@ const _htmlescape_chars = Dict('<'=>"<", '>'=>">", '"'=>""", '&'=>"&", # ' '=>" ", ) -for ch in "'`!@\$\%()=+{}[]" +for ch in "'`!\$\%()=+{}[]" _htmlescape_chars[ch] = "&#$(Int(ch));" end diff --git a/test/markdown.jl b/test/markdown.jl index 391e35e8e042d..59782b40d28b4 100644 --- a/test/markdown.jl +++ b/test/markdown.jl @@ -250,6 +250,11 @@ end @test md"* World" |> html == "\n" @test md"# title *blah*" |> html == "

title blah

\n" @test md"## title *blah*" |> html == "

title blah

\n" +@test md"" |> html == """

https://julialang.org

\n""" +@test md"" |> html == """

mailto://a@example.com

\n""" +@test md"" |> html == "

<https://julialang.org/not a link>

\n" +@test md"""""" |> html == "

<https://julialang.org/nota link>

\n" @test md"""Hello ---