From 9c4c744441f147000f7b78eacc98c11b3ea90c57 Mon Sep 17 00:00:00 2001 From: Andy Hayden Date: Thu, 17 Sep 2015 12:40:15 -0700 Subject: [PATCH] Parse autolinks for urls and emails. Remove `@` from escaped html. Use AbstractString rather than String. --- base/markdown/Common/Common.jl | 3 ++- base/markdown/Common/inline.jl | 37 ++++++++++++++++++++++++++++++++++ base/markdown/GitHub/GitHub.jl | 3 ++- base/markdown/Julia/Julia.jl | 3 ++- base/markdown/render/html.jl | 2 +- test/markdown.jl | 2 ++ 6 files changed, 46 insertions(+), 4 deletions(-) diff --git a/base/markdown/Common/Common.jl b/base/markdown/Common/Common.jl index 0152fd04b6cf38..de021f7be1f2e3 100644 --- a/base/markdown/Common/Common.jl +++ b/base/markdown/Common/Common.jl @@ -7,4 +7,5 @@ include("inline.jl") paragraph, linebreak, escapes, inline_code, - asterisk_bold, asterisk_italic, image, footnote_link, link] + asterisk_bold, asterisk_italic, image, footnote_link, link, autolink] + diff --git a/base/markdown/Common/inline.jl b/base/markdown/Common/inline.jl index 8e48dc92567960..203cc667fa8d6a 100644 --- a/base/markdown/Common/inline.jl +++ b/base/markdown/Common/inline.jl @@ -106,6 +106,43 @@ function footnote_link(stream::IO, md::MD) end end +@trigger '<' -> +function autolink(stream::IO, md::MD) + withstream(stream) do + startswith(stream, '<') || return + url = readuntil(stream, '>') + url ≡ nothing && return + _is_link(url) && return Link(url, url) + _is_mailto(url) && return Link(url, url) + return + end +end + +# This list is take from the commonmark spec +# http://spec.commonmark.org/0.19/#absolute-uri +const _allowable_schemes = Set(split("coap doi javascript aaa aaas about acap cap cid crid data dav dict dns file ftp geo go gopher h323 http https iax icap im imap info ipp iris iris.beep iris.xpc iris.xpcs iris.lwz ldap mailto mid msrp msrps mtqp mupdate news nfs ni nih nntp opaquelocktoken pop pres rtsp service session shttp sieve sip sips sms snmp,soap.beep soap.beeps tag tel telnet tftp thismessage tn3270 tip tv urn vemmi ws wss xcon xcon-userid xmlrpc.beep xmlrpc.beeps xmpp z39.50r z39.50s +adiumxtra afp afs aim apt,attachment aw beshare bitcoin bolo callto chrome,chrome-extension com-eventbrite-attendee content cvs,dlna-playsingle dlna-playcontainer dtn dvb ed2k facetime feed finger fish gg git gizmoproject gtalk hcp icon ipn irc irc6 ircs itms jar jms keyparc lastfm ldaps magnet maps market,message mms ms-help msnim mumble mvn notes oid palm paparazzi platform proxy psyc query res resource rmi rsync rtmp secondlife sftp sgn skype smb soldat spotify ssh steam svn teamspeak +things udp unreal ut2004 ventrilo view-source webcal wtai wyciwyg xfire xri ymsgr")) + +function _is_link(s::AbstractString) + '<' in s && return false + + m = match(r"^(.*)://(\S+?)(:\S*)?$", s) + m == nothing && return false + scheme = lowercase(m.captures[1]) + return scheme in _allowable_schemes +end + +# non-normative regex from the HTML5 spec +const _email_regex = r"^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$" + +function _is_mailto(s::AbstractString) + length(s) < 6 && return false + # slicing strings is a bit risky, but this equality check is safe + lowercase(s[1:6]) == "mailto:" || return false + return ismatch(_email_regex, s[6:end]) +end + # ––––––––––– # Punctuation # ––––––––––– diff --git a/base/markdown/GitHub/GitHub.jl b/base/markdown/GitHub/GitHub.jl index 591634d8e50b58..3692a8c5159710 100644 --- a/base/markdown/GitHub/GitHub.jl +++ b/base/markdown/GitHub/GitHub.jl @@ -62,4 +62,5 @@ end github_table, github_paragraph, linebreak, escapes, en_dash, inline_code, asterisk_bold, - asterisk_italic, image, footnote_link, link] + asterisk_italic, image, footnote_link, link, autolink] + diff --git a/base/markdown/Julia/Julia.jl b/base/markdown/Julia/Julia.jl index 2297b8339899c2..ab18b8d92eeaac 100644 --- a/base/markdown/Julia/Julia.jl +++ b/base/markdown/Julia/Julia.jl @@ -11,4 +11,5 @@ include("interp.jl") blockquote, admonition, footnote, github_table, horizontalrule, setextheader, paragraph, linebreak, escapes, tex, interp, en_dash, inline_code, - asterisk_bold, asterisk_italic, image, footnote_link, link] + asterisk_bold, asterisk_italic, image, footnote_link, link, autolink] + diff --git a/base/markdown/render/html.jl b/base/markdown/render/html.jl index a0404bf8f845eb..7e2f256f4aa5cd 100644 --- a/base/markdown/render/html.jl +++ b/base/markdown/render/html.jl @@ -26,7 +26,7 @@ const _htmlescape_chars = Dict('<'=>"<", '>'=>">", '"'=>""", '&'=>"&", # ' '=>" ", ) -for ch in "'`!@\$\%()=+{}[]" +for ch in "'`!\$\%()=+{}[]" _htmlescape_chars[ch] = "&#$(Int(ch));" end diff --git a/test/markdown.jl b/test/markdown.jl index 391e35e8e042dc..d0c0f45406c8e3 100644 --- a/test/markdown.jl +++ b/test/markdown.jl @@ -250,6 +250,8 @@ end @test md"* World" |> html == "\n" @test md"# title *blah*" |> html == "

title blah

\n" @test md"## title *blah*" |> html == "

title blah

\n" +@test md"" |> html == """

https://julialang.org

\n""" +@test md"" |> html == """

mailto://a@example.com

\n""" @test md"""Hello ---