Skip to content

Commit

Permalink
Parse autolinks for urls and emails.
Browse files Browse the repository at this point in the history
Remove `@` from escaped html.

Use AbstractString rather than String.
  • Loading branch information
hayd committed Dec 15, 2016
1 parent 38ff38c commit c3e3e36
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 4 deletions.
3 changes: 2 additions & 1 deletion base/markdown/Common/Common.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ include("inline.jl")
paragraph,

linebreak, escapes, inline_code,
asterisk_bold, asterisk_italic, image, footnote_link, link]
asterisk_bold, asterisk_italic, image, footnote_link, link, autolink]

36 changes: 36 additions & 0 deletions base/markdown/Common/inline.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,42 @@ function footnote_link(stream::IO, md::MD)
end
end

@trigger '<' ->
function autolink(stream::IO, md::MD)
withstream(stream) do
startswith(stream, '<') || return
url = readuntil(stream, '>')
_is_link(url) && return Link(url, url)
_is_mailto(url) && return Link(url, url)
return
end
end

# This list is take from the commonmark spec
# http://spec.commonmark.org/0.19/#absolute-uri
const _allowable_schemes = Set(split("coap doi javascript aaa aaas about acap cap cid crid data dav dict dns file ftp geo go gopher h323 http https iax icap im imap info ipp iris iris.beep iris.xpc iris.xpcs iris.lwz ldap mailto mid msrp msrps mtqp mupdate news nfs ni nih nntp opaquelocktoken pop pres rtsp service session shttp sieve sip sips sms snmp,soap.beep soap.beeps tag tel telnet tftp thismessage tn3270 tip tv urn vemmi ws wss xcon xcon-userid xmlrpc.beep xmlrpc.beeps xmpp z39.50r z39.50s
adiumxtra afp afs aim apt,attachment aw beshare bitcoin bolo callto chrome,chrome-extension com-eventbrite-attendee content cvs,dlna-playsingle dlna-playcontainer dtn dvb ed2k facetime feed finger fish gg git gizmoproject gtalk hcp icon ipn irc irc6 ircs itms jar jms keyparc lastfm ldaps magnet maps market,message mms ms-help msnim mumble mvn notes oid palm paparazzi platform proxy psyc query res resource rmi rsync rtmp secondlife sftp sgn skype smb soldat spotify ssh steam svn teamspeak
things udp unreal ut2004 ventrilo view-source webcal wtai wyciwyg xfire xri ymsgr"))

function _is_link(s::AbstractString)
'<' in s && return false

m = match(r"^(.*)://(\S+?)(:\S*)?$", s)
m == nothing && return false
scheme = lowercase(m.captures[1])
return scheme in _allowable_schemes
end

# non-normative regex from the HTML5 spec
const _email_regex = r"^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"

function _is_mailto(s::AbstractString)
length(s) < 6 && return false
# slicing strings is a bit risky, but this equality check is safe
lowercase(s[1:6]) == "mailto:" || return false
return ismatch(_email_regex, s[6:end])
end

# –––––––––––
# Punctuation
# –––––––––––
Expand Down
3 changes: 2 additions & 1 deletion base/markdown/GitHub/GitHub.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,5 @@ end
github_table, github_paragraph,

linebreak, escapes, en_dash, inline_code, asterisk_bold,
asterisk_italic, image, footnote_link, link]
asterisk_italic, image, footnote_link, link, autolink]

3 changes: 2 additions & 1 deletion base/markdown/Julia/Julia.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ include("interp.jl")
blockquote, admonition, footnote, github_table, horizontalrule, setextheader, paragraph,

linebreak, escapes, tex, interp, en_dash, inline_code,
asterisk_bold, asterisk_italic, image, footnote_link, link]
asterisk_bold, asterisk_italic, image, footnote_link, link, autolink]

2 changes: 1 addition & 1 deletion base/markdown/render/html.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ const _htmlescape_chars = Dict('<'=>"&lt;", '>'=>"&gt;",
'"'=>"&quot;", '&'=>"&amp;",
# ' '=>"&nbsp;",
)
for ch in "'`!@\$\%()=+{}[]"
for ch in "'`!\$\%()=+{}[]"
_htmlescape_chars[ch] = "&#$(Int(ch));"
end

Expand Down
2 changes: 2 additions & 0 deletions test/markdown.jl
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,8 @@ end
@test md"* World" |> html == "<ul>\n<li><p>World</p>\n</li>\n</ul>\n"
@test md"# title *blah*" |> html == "<h1>title <em>blah</em></h1>\n"
@test md"## title *blah*" |> html == "<h2>title <em>blah</em></h2>\n"
@test md"<https://julialang.org>" |> html == """<p><a href="https://julialang.org">https://julialang.org</a></p>\n"""
@test md"<mailto://[email protected]>" |> html == """<p><a href="mailto://[email protected]">mailto://[email protected]</a></p>\n"""
@test md"""Hello
---
Expand Down

0 comments on commit c3e3e36

Please sign in to comment.