From 0c72c012099062fbaccfcc818d36d51afafdd31f Mon Sep 17 00:00:00 2001 From: Tony Fong Date: Tue, 4 Nov 2014 02:31:41 +0700 Subject: [PATCH] new brackets: Angle, Brack, Brace Their latex symbols are \lAngle,\lBrack,\lBrace ( the closing ones should be obvious). Brack and Brace has ASCII equivalent of [| |] and {| |} They map to : * With prefix expression: `@call_Angle, @call_Brack, @call_Brace` * Without prefix: `@enclose_Angle, @enclose_Brack, @enclose_Brace` The further lowering of @call_Brace is different from others to allow for the need to look at the prefix symbolically, instead of its dereferenced value. --- base/brackets.jl | 68 +++++++++++++++ base/exports.jl | 14 ++++ base/latex_symbols.jl | 8 +- base/sysimg.jl | 3 + doc/manual/unicode-input-table.rst | 6 ++ src/julia-parser.scm | 128 +++++++++++++++++++++++++++-- test/parser.jl | 91 ++++++++++++++++++++ test/runtests.jl | 2 +- 8 files changed, 312 insertions(+), 8 deletions(-) create mode 100644 base/brackets.jl create mode 100644 test/parser.jl diff --git a/base/brackets.jl b/base/brackets.jl new file mode 100644 index 0000000000000..0a3b2b76b1872 --- /dev/null +++ b/base/brackets.jl @@ -0,0 +1,68 @@ +# all enclose styles work the same way + +@doc "`⟪ args... ⟫` becomes `enclose_Angle( args... )`" -> +macro enclose_Angle(args...) + esc( Expr( :call, :enclose_Angle, args... ) ) +end + +@doc "`{| args... |}` becomes `enclose_Brace( args... )`" -> +macro enclose_Brace(args...) + esc( Expr( :call, :enclose_Brace, args... ) ) +end + +@doc "`[| args... |]` becomes `enclose_Brack( args... )`" -> +macro enclose_Brack(args...) + esc( Expr( :call, :enclose_Brack, args... ) ) +end + +@doc """ +`foobar⟪ args... ⟫` becomes `call_Angle( foobar, args... )` + +`foo.bar⟪ args... ⟫` becomes `call_Angle( foo.bar, args... )` + +In practice, the type of foobar usually drives the dispatch. +"""-> +macro call_Angle(sym, args...) + esc( Expr( :call, :call_Angle, sym, args... ) ) +end + +@doc """ +`foobar{| args... |}` becomes `call_Brace( :foobar, args... )` + +Note that `foobar` does not need to exist at all! It is +up to the remaining argument type signature to drive dispatch. + +This behavior is different from Angle and Brack brackets. +This is to accommodate the situation where we want to pass the +called function as a symbol instead of an existing value. +See `@doc @call_Angle` and `@doc @call_Brack`. + +In practice, packages that employ `foobar{| args... |}` are expected to use at least one +custom type in the function signature `args...`. + +Also, we can pass an Expr as the 1st argument as well: + +`foo.bar{| args... |}` becomes `call_Brace( :(foo.bar), args... )` + +""" -> +macro call_Brace(sym, args...) + esc( Expr( :call, :call_Brace, Expr( :quote, sym ), args... ) ) +end + +@doc """ +`foobar[| args...|]` becomes `call_Brack( foobar, args... )` + +`foo.bar[| args...|]` becomes `call_Brack( foo.bar, args... )` + +In practice, the type of foobar usually drives the dispatch. +"""-> +macro call_Brack(sym, args...) + esc( Expr( :call, :call_Brack, sym, args... ) ) +end + +enclose_Angle( args... ) = throw(ArgumentError( "Undefined enclose_Angle for " * string(args) ) ) +enclose_Brace( args... ) = throw(ArgumentError( "Undefined enclose_Brace for " * string(args) ) ) +enclose_Brack( args... ) = throw(ArgumentError( "Undefined enclose_Brack for " * string(args) ) ) +call_Angle( args... ) = throw(ArgumentError( "Undefined call_Angle for " * string(args) ) ) +call_Brace( args... ) = throw(ArgumentError( "Undefined call_Brace for " * string(args) ) ) +call_Brack( args... ) = throw(ArgumentError( "Undefined call_Brack for " * string(args) ) ) diff --git a/base/exports.jl b/base/exports.jl index 5b153f7b3ae9b..1adf57ed5d412 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -1339,6 +1339,20 @@ export # nullable types isnull, +# customizable special brackets ⟨⟩⟦⟧⦃⦄ + @enclose_Angle, + @enclose_Brack, + @enclose_Brace, + @call_Angle, + @call_Brack, + @call_Brace, + enclose_Angle, + enclose_Brack, + enclose_Brace, + call_Angle, + call_Brack, + call_Brace, + # Macros @__FILE__, @b_str, diff --git a/base/latex_symbols.jl b/base/latex_symbols.jl index 288dc21fd7847..57b8dbd09fe59 100644 --- a/base/latex_symbols.jl +++ b/base/latex_symbols.jl @@ -703,8 +703,12 @@ const latex_symbols = Dict( "\\lrcorner" => "⌟", "\\frown" => "⌢", "\\smile" => "⌣", - "\\langle" => "⟨", - "\\rangle" => "⟩", + "\\lAngle" => "⟪", #U27ea + "\\rAngle" => "⟫", #U27eb + "\\lBrack" => "⟦", #U27e6 + "\\rBrack" => "⟧", #U27e7 + "\\lBrace" => "⦃", #U2983 + "\\rBrace" => "⦄", #U2984 "\\obar" => "⌽", "\\Elzdlcorn" => "⎣", "\\lmoustache" => "⎰", diff --git a/base/sysimg.jl b/base/sysimg.jl index 3d1644555f43d..f1eab8d333009 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -295,6 +295,9 @@ end include("precompile.jl") +# handling special brackets, ⟨⟩⟦⟧⦃⦄ +include( "brackets.jl") + include = include_from_node1 end # baremodule Base diff --git a/doc/manual/unicode-input-table.rst b/doc/manual/unicode-input-table.rst index 1738bc6240635..b6378e25ab762 100644 --- a/doc/manual/unicode-input-table.rst +++ b/doc/manual/unicode-input-table.rst @@ -792,6 +792,12 @@ U+0231E ⌞ \\llcorner BOTTOM LEFT CORNER U+0231F ⌟ \\lrcorner BOTTOM RIGHT CORNER U+02322 ⌢ \\frown FROWN U+02323 ⌣ \\smile SMILE +U+027EA ⟪ \\lAngle MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +U+027EB ⟫ \\rAngle MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +U+027E6 ⟦ \\lBrack MATHEMATICAL LEFT WHITE SQUARE BRACKET +U+027E7 ⟧ \\rBrack MATHEMATICAL RIGHT WHITE SQUARE BRACKET +U+02983 ⦃ \\lBrace LEFT WHITE CURLY BRACKET +U+02984 ⦄ \\rBrace RIGHT WHITE CURLY BRACKET U+0232C ⌬ \\varhexagonlrbonds BENZENE RING U+02332 ⌲ \\conictaper CONICAL TAPER U+02336 ⌶ \\topbot APL FUNCTIONAL SYMBOL I-BEAM diff --git a/src/julia-parser.scm b/src/julia-parser.scm index b06b8407d4746..41f9ac6728805 100644 --- a/src/julia-parser.scm +++ b/src/julia-parser.scm @@ -58,6 +58,10 @@ (define unary-ops '(+ - ! ¬ ~ |<:| |>:| √ ∛ ∜)) +; We have to use tuples to disambiguate compound brackets from normal symbol operators +(define closing-brackets '(#\) #\] #\} #\❯ #\❱ #\⟫ #\⟧ #\⦄ (close-bracket |\|]|) (close-bracket |\|\}|))) +(define closing-brackets? (Set closing-brackets)) + ; operators that are both unary and binary (define unary-and-binary-ops '(+ - $ & ~)) @@ -155,7 +159,7 @@ ;; --- lexer --- (define special-char? - (let ((chrs (string->list "()[]{},;\"`@"))) + (let ((chrs (string->list "()[]{}❮❯❰❱⟪⟫⟦⟧⦃⦄,;\"`@"))) (lambda (c) (memv c chrs)))) (define (newline? c) (eqv? c #\newline)) @@ -431,6 +435,45 @@ (let ((c (peek-char port))) (cond ((or (eof-object? c) (newline? c)) (read-char port)) + ;; it could be a normal curly brace or a compound brace {| + ((eqv? c #\{) + (let ((c (read-char port)) + (nextc (peek-char port))) + (if (eqv? nextc #\|) + ( begin ( read-char port ) + (string->symbol "{|") ) + #\{ ))) + + ;; it could be a normal square bracket or a compound [| + ((eqv? c #\[) + (let ((c (read-char port)) + (nextc (peek-char port))) + (if (eqv? nextc #\|) + ( begin ( read-char port ) + (string->symbol "[|") ) + #\[ ))) + + ;; it could be a closer |], |}, or operators |>, |=, ||, or just | + ((eqv? c #\|) + (let ((c (read-char port)) + (nextc (peek-char port))) + (cond ((eqv? nextc #\>) + (begin ( read-char port ) + (string->symbol "|>" ))) + ((eqv? nextc #\=) + (begin ( read-char port ) + (string->symbol "|=" ))) + ((eqv? nextc #\|) + (begin ( read-char port ) + (string->symbol "||" ))) + ((eqv? nextc #\] ) + (begin ( read-char port ) + '(close-bracket |\|\]| ) )) + ((eqv? nextc #\} ) + (begin ( read-char port ) + '(close-bracket |\|\}| ) )) + (else '|\|| )))) + ((special-char? c) (read-char port)) ((char-numeric? c) (read-number port #f #f)) @@ -572,7 +615,8 @@ (define (invalid-initial-token? tok) (or (eof-object? tok) - (memv tok '(#\) #\] #\} else elseif catch finally =)))) + (closing-brackets? tok) + (memv tok '(else elseif catch finally =)))) (define (line-number-node s) `(line ,(input-port-line (ts:port s)))) @@ -780,7 +824,8 @@ (define (closing-token? tok) (or (eof-object? tok) (and (eq? tok 'end) (not end-symbol)) - (memv tok '(#\, #\) #\] #\} #\; else elseif catch finally)))) + (closing-brackets? tok) + (memv tok '(#\, #\; else elseif catch finally)))) (define (maybe-negate op num) (if (eq? op '-) @@ -804,7 +849,7 @@ (not (and (pair? expr) (eq? (car expr) '...))) (or (number? expr) (large-number? expr) - (not (memv t '(#\( #\[ #\{)))))) + (not (memv t '(#\( #\[ #\{ #\❮ #\❰ #\⟪ #\⟦ #\⦃)))))) (define (parse-juxtapose ex s) (let ((next (peek-token s))) @@ -985,6 +1030,20 @@ ((#\{ ) (take-token s) (loop (list* 'curly ex (map subtype-syntax (parse-arglist s #\} ))))) + ((#\❮ ) (take-token s) + (loop (list* 'macrocall '@call_Angle ex (parse-special-bracket s #\❯ )))) + ((#\❰ ) (take-token s) + (loop (list* 'macrocall '@call_Angle ex (parse-special-bracket s #\❱ )))) + ((#\⟪ ) (take-token s) + (loop (list* 'macrocall '@call_Angle ex (parse-special-bracket s #\⟫ )))) + ((#\⟦ ) (take-token s) + (loop (list* 'macrocall '@call_Brack ex (parse-special-bracket s #\⟧ )))) + (( |\[\|| ) (take-token s) + (loop (list* 'macrocall '@call_Brack ex (parse-special-bracket s '(close-bracket |\|\]|) )))) + ((#\⦃ ) (take-token s) + (loop (list* 'macrocall '@call_Brace ex (parse-special-bracket s #\⦄ )))) + (( |\{\|| ) (take-token s) + (loop (list* 'macrocall '@call_Brace ex (parse-special-bracket s '(close-bracket |\|\}| ))))) ((#\") (if (and (symbol? ex) (not (operator? ex)) (not (ts:space? s))) @@ -1400,7 +1459,7 @@ ;; newline character isn't detectable here #;((eqv? c #\newline) (error "unexpected line break in argument list")) - ((memv c '(#\] #\})) + ((closing-brackets? c ) (error (string "unexpected \"" c "\" in argument list"))) (else (error (string "missing comma or " closer @@ -1521,6 +1580,25 @@ (else (parse-matrix s first closer))))))))) +(define ( parse-special-bracket s closer ) + (with-normal-ops + (with-whitespace-newline + (parse-special-bracket- s closer )))) +(define (parse-special-bracket- s closer) + (let loop ((lst `())) + (let ((t (require-token s))) + (if (equal? t closer) + (begin (take-token s) (reverse lst )) + (let* ((nxt (parse-eq* s)) + (c (require-token s))) + (cond ((eqv? c #\,) + (begin (take-token s) (loop (cons nxt lst)))) + ((equal? c closer) (loop (cons nxt lst))) + ((closing-brackets? c) + (error (string "unexpected " c " in bracket. Close with " closer ))) + (else + (error (string "unexpected " c ". Expect separator , or " closer ))))))))) + ; for sequenced evaluation inside expressions: e.g. (a;b, c;d) (define (parse-stmts-within-expr s) (parse-Nary s parse-eq* '(#\;) 'block '(#\, #\) ) #t)) @@ -1725,6 +1803,46 @@ ;; misplaced = ((eq? t '=) (error "unexpected \"=\"")) + ;; \lBrack \rBrack expression + ((equal? t '|\[\|| ) + (take-token s) + (let ((vex (parse-special-bracket s '(close-bracket |\|\]| ) ))) + (list* 'macrocall '@enclose_Brack vex ))) + + ;; \lBrace \rBrace expression + ((equal? t '|\{\|| ) + (take-token s) + (let ((vex (parse-special-bracket s '(close-bracket |\|\}| ) ))) + (list* 'macrocall '@enclose_Brace vex ))) + + ;; \lBrack \rBrack expression + ((eqv? t #\⟦ ) + (take-token s) + (let ((vex (parse-special-bracket s #\⟧ ))) + (list* 'macrocall '@enclose_Brack vex ))) + + ;; \lBrace \rBrace expression + ((eqv? t #\⦃ ) + (take-token s) + (let ((vex (parse-special-bracket s #\⦄ ))) + (list* 'macrocall '@enclose_Brace vex ))) + + ;; \ldAngle \rdAngle expression + ((eqv? t #\❰ ) + (take-token s) + (let ((vex (parse-special-bracket s #\❱ ))) + (list* 'macrocall '@enclose_Angle vex ))) + + ((eqv? t #\❮ ) + (take-token s) + (let ((vex (parse-special-bracket s #\❯ ))) + (list* 'macrocall '@enclose_Angle vex ))) + + ((eqv? t #\⟪ ) + (take-token s) + (let ((vex (parse-special-bracket s #\⟫ ))) + (list* 'macrocall '@enclose_Angle vex ))) + ;; identifier ((symbol? t) (take-token s)) diff --git a/test/parser.jl b/test/parser.jl new file mode 100644 index 0000000000000..3e55560ea0618 --- /dev/null +++ b/test/parser.jl @@ -0,0 +1,91 @@ + +# Make sure paired brackets parse +pairs = Any[ + ("[|","|]", symbol("@enclose_Brack"), symbol("@call_Brack")), + ("⟦" , "⟧", symbol("@enclose_Brack"), symbol("@call_Brack")), + ("{|","|}", symbol("@enclose_Brace"), symbol("@call_Brace")), + ("⦃" , "⦄", symbol("@enclose_Brace"), symbol("@call_Brace")), + ("⟪" , "⟫", symbol("@enclose_Angle"), symbol("@call_Angle")), + ("❮" , "❯", symbol("@enclose_Angle"), symbol("@call_Angle")), + ("❰" , "❱", symbol("@enclose_Angle"), symbol("@call_Angle")) + ] + +args = Any[ +( "", 0 ), +( " ", 0 ), +( "1", 1 ), +( "a", 1 ), +( "a,", 1 ), # test trailing commas +( "a,b,", 2 ), # tst trailing commas +( "a,1", 2 ), +( "2,a", 2 ), +( "2,a,", 2 ), +] + +for t in pairs + left = t[1] + right = t[2] + expecthead = t[3] + expectcallhead = t[4] + for arg in args + try + str = left * arg[1] * right + #println( str ) + ex = parse( str ) + catch er + println( er ) + println( left * arg[1] * right ) + @test false + end + @test ex.head == :macrocall + @test ex.args[1]== expecthead + @test length( ex.args ) == arg[2]+1 + + try + str = "foo" * left * arg[1] * right + #println( str ) + ex = parse( str ) + catch er + println( er ) + println( "foo" * left * arg[1] * right ) + @test false + end + @test ex.head == :macrocall + @test ex.args[1]== expectcallhead + @test ex.args[2] == :foo + @test length( ex.args ) == arg[2] + 2 + end + + # this is not a normal function call so kwargs with semi-colons do not work + @test_throws( ParseError, parse( "foo" * left * "a,1;b=2" * right )) + + # however, commas separated assignment works + ex = parse( "foo" * left * "a,1,b=2" * right ) + @test Base.Meta.isexpr( ex.args[5], :(=) ) +end + +# make sure similar but unmatched brackets do NOT parse +@test_throws( ParseError, parse( "⟪1⟩" ) ) +@test_throws( ParseError, parse( "⟪1❯" ) ) +@test_throws( ParseError, parse( "[|1⟧" ) ) + +# make sure ill-formed nesting throws +@test_throws( ParseError, parse( "[⟪1]⟫" ) ) +@test_throws( ParseError, parse( "[| ⟪ 1 |] ⟫" ) ) + +# test deep nesting +ex = parse( "⟪ a * [| D{ c }(T) = {| foobar |} |] ⟫" ) +@test ex.args[1] == symbol( "@enclose_Angle" ) +@test ex.args[2].args[3].args[1] == symbol( "@enclose_Brack" ) +@test ex.args[2].args[3].args[2].args[2].args[1]== symbol( "@enclose_Brace" ) + +# we may bestow meaning to such expressions later but for now they throw +@test_throws( ArgumentError, eval( parse( "{| 1 |}")) ) +@test_throws( ArgumentError, eval( parse( "b{| 1 |}")) ) +@test_throws( ArgumentError, eval( parse( "[| 1 |]")) ) +@test_throws( UndefVarError, eval( parse( "non_existent_value[| 1 |]")) ) +@test_throws( ArgumentError, eval( parse( "⟪1⟫")) ) +@test_throws( UndefVarError, eval( parse( "non_existent_value⟪1⟫")) ) + +# However, this should just throw a standard exception +@test_throws( UndefVarError, eval( parse( "{| non_existent_value |}")) ) diff --git a/test/runtests.jl b/test/runtests.jl index 3a0e79ba177a8..f24b81a93cd2d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,7 +11,7 @@ testnames = [ "sysinfo", "rounding", "ranges", "mod2pi", "euler", "show", "lineedit", "replcompletions", "repl", "test", "goto", "llvmcall", "grisu", "nullable", "meta", "profile", - "libgit2", "docs" + "libgit2", "docs", "parser" ] if isdir(joinpath(JULIA_HOME, Base.DOCDIR, "examples"))