From 94c81f91b1e14c971433ede8a75714f58174332f Mon Sep 17 00:00:00 2001 From: Andy Hayden Date: Thu, 17 Sep 2015 12:40:15 -0700 Subject: [PATCH] Parse autolinks for urls and emails. Remove `@` from escaped html. --- base/markdown/Common/Common.jl | 2 +- base/markdown/Common/inline.jl | 36 ++++++++++++++++++++++++++++++++++ base/markdown/GitHub/GitHub.jl | 2 +- base/markdown/Julia/Julia.jl | 2 +- base/markdown/render/html.jl | 2 +- test/markdown.jl | 2 ++ 6 files changed, 42 insertions(+), 4 deletions(-) diff --git a/base/markdown/Common/Common.jl b/base/markdown/Common/Common.jl index 6b5807775f3470..766d0bad5fb8cd 100644 --- a/base/markdown/Common/Common.jl +++ b/base/markdown/Common/Common.jl @@ -7,4 +7,4 @@ include("inline.jl") paragraph, linebreak, escapes, inline_code, - asterisk_bold, asterisk_italic, image, link] + asterisk_bold, asterisk_italic, image, link, autolink] diff --git a/base/markdown/Common/inline.jl b/base/markdown/Common/inline.jl index 9e10f7a28ac645..6843273f8e50a1 100644 --- a/base/markdown/Common/inline.jl +++ b/base/markdown/Common/inline.jl @@ -76,6 +76,42 @@ function link(stream::IO, md::MD) end end +@trigger '<' -> +function autolink(stream::IO, md::MD) + withstream(stream) do + startswith(stream, '<') || return + url = readuntil(stream, '>') + _is_link(url) && return Link(url, url) + _is_mailto(url) && return Link(url, url) + return + end +end + +# This list is take from the commonmark spec +# http://spec.commonmark.org/0.19/#absolute-uri +const _allowable_schemes = Set(split("coap doi javascript aaa aaas about acap cap cid crid data dav dict dns file ftp geo go gopher h323 http https iax icap im imap info ipp iris iris.beep iris.xpc iris.xpcs iris.lwz ldap mailto mid msrp msrps mtqp mupdate news nfs ni nih nntp opaquelocktoken pop pres rtsp service session shttp sieve sip sips sms snmp,soap.beep soap.beeps tag tel telnet tftp thismessage tn3270 tip tv urn vemmi ws wss xcon xcon-userid xmlrpc.beep xmlrpc.beeps xmpp z39.50r z39.50s +adiumxtra afp afs aim apt,attachment aw beshare bitcoin bolo callto chrome,chrome-extension com-eventbrite-attendee content cvs,dlna-playsingle dlna-playcontainer dtn dvb ed2k facetime feed finger fish gg git gizmoproject gtalk hcp icon ipn irc irc6 ircs itms jar jms keyparc lastfm ldaps magnet maps market,message mms ms-help msnim mumble mvn notes oid palm paparazzi platform proxy psyc query res resource rmi rsync rtmp secondlife sftp sgn skype smb soldat spotify ssh steam svn teamspeak +things udp unreal ut2004 ventrilo view-source webcal wtai wyciwyg xfire xri ymsgr")) + +function _is_link(s::String) + '<' in s && return false + + m = match(r"^(.*)://(\S+?)(:\S*)?$", s) + m == nothing && return false + scheme = lowercase(m.captures[1]) + return scheme in _allowable_schemes +end + +# non-normative regex from the HTML5 spec +const _email_regex = r"^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$" + +function _is_mailto(s::String) + length(s) < 6 && return false + # slicing strings is a bit risky, but this equality check is safe + lowercase(s[1:6]) == "mailto:" || return false + return ismatch(_email_regex, s[6:end]) +end + # ––––––––––– # Punctuation # ––––––––––– diff --git a/base/markdown/GitHub/GitHub.jl b/base/markdown/GitHub/GitHub.jl index c0ed6b9b036f80..321d808094cbf9 100644 --- a/base/markdown/GitHub/GitHub.jl +++ b/base/markdown/GitHub/GitHub.jl @@ -58,4 +58,4 @@ end github_table, github_paragraph, linebreak, escapes, en_dash, inline_code, asterisk_bold, - asterisk_italic, image, link] + asterisk_italic, image, link, autolink] diff --git a/base/markdown/Julia/Julia.jl b/base/markdown/Julia/Julia.jl index feaa28db916fab..f87fb191b29961 100644 --- a/base/markdown/Julia/Julia.jl +++ b/base/markdown/Julia/Julia.jl @@ -11,4 +11,4 @@ include("interp.jl") blockquote, github_table, horizontalrule, setextheader, paragraph, linebreak, escapes, tex, interp, en_dash, inline_code, - asterisk_bold, asterisk_italic, image, link] + asterisk_bold, asterisk_italic, image, link, autolink] diff --git a/base/markdown/render/html.jl b/base/markdown/render/html.jl index 51eea3f970f35b..855da94b3f9b09 100644 --- a/base/markdown/render/html.jl +++ b/base/markdown/render/html.jl @@ -26,7 +26,7 @@ const _htmlescape_chars = Dict('<'=>"<", '>'=>">", '"'=>""", '&'=>"&", # ' '=>" ", ) -for ch in "'`!@\$\%()=+{}[]" +for ch in "'`!\$\%()=+{}[]" _htmlescape_chars[ch] = "&#$(Int(ch));" end diff --git a/test/markdown.jl b/test/markdown.jl index b02755dbc86b59..c20f53662a7208 100644 --- a/test/markdown.jl +++ b/test/markdown.jl @@ -88,6 +88,8 @@ World""" |> plain == "Hello\n\n–––\n\nWorld\n" @test md"* World" |> html == "\n" @test md"# title *blah*" |> html == "

title blah

\n" @test md"## title *blah*" |> html == "

title blah

\n" +@test md"" |> html == """

https://julialang.org

\n""" +@test md"" |> html == """

mailto://a@example.com

\n""" @test md"""Hello ---