diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestCaseChangingCharStream.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestCaseChangingCharStream.java new file mode 100644 index 0000000000..5cebdd8cba --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestCaseChangingCharStream.java @@ -0,0 +1,49 @@ +package org.antlr.v4.runtime; + +import com.google.common.collect.Lists; +import org.junit.Test; + +import java.util.List; + +import static org.antlr.v4.runtime.CharStreams.fromString; +import static org.junit.Assert.assertEquals; + +public class TestCaseChangingCharStream { + + /** + * Helper function to return a complete list of symbols read from the stream. + * @param stream + * @return + */ + private static List readAll(CharStream stream) { + List symbols = Lists.newArrayList(); + + for (int i = 1; i <= stream.size()+1; i++) { + symbols.add( stream.LA(i) ); + } + + return symbols; + } + + @Test + public void testUpper() { + List expected = Lists.newArrayList((int)'A', (int)'B', (int)'C', (int)'D', IntStream.EOF); + + CharStream stream = CharStreams.toUpper(fromString("abcd")); + assertEquals(expected, readAll(stream)); + + stream = CharStreams.toUpper(fromString("ABCD")); + assertEquals(expected, readAll(stream)); + } + + @Test + public void testLower() { + List expected = Lists.newArrayList((int)'a', (int)'b', (int)'c', (int)'d', IntStream.EOF); + + CharStream stream = CharStreams.toLower(fromString("abcd")); + assertEquals(expected, readAll(stream)); + + stream = CharStreams.toLower(fromString("ABCD")); + assertEquals(expected, readAll(stream)); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/runtime/TestCodePointCharStream.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestCodePointCharStream.java similarity index 100% rename from runtime-testsuite/test/org/antlr/v4/runtime/TestCodePointCharStream.java rename to runtime-testsuite/test/org/antlr/v4/test/runtime/java/TestCodePointCharStream.java diff --git a/runtime/Go/antlr/case_changing_stream.go b/runtime/Go/antlr/case_changing_stream.go new file mode 100644 index 0000000000..2963acf891 --- /dev/null +++ b/runtime/Go/antlr/case_changing_stream.go @@ -0,0 +1,37 @@ +package antlr + +import ( + "unicode" +) + +// CaseChangingStream wraps an existing CharStream, but upper cases, or +// lower cases the input before it is tokenized. +type CaseChangingStream struct { + CharStream + + upper bool +} + +// NewCaseChangingStream returns a new CaseChangingStream that forces +// all tokens read from the underlying stream to be either upper case +// or lower case based on the upper argument. +func NewCaseChangingStream(in CharStream, upper bool) *CaseChangingStream { + return &CaseChangingStream{ + in, upper, + } +} + +// LA gets the value of the symbol at offset from the current position +// from the underlying CharStream and converts it to either upper case +// or lower case. +func (is *CaseChangingStream) LA(offset int) int { + in := is.CharStream.LA(offset) + if in < 0 { + // Such as antlr.TokenEOF which is -1 + return in + } + if is.upper { + return int(unicode.ToUpper(rune(in))) + } + return int(unicode.ToLower(rune(in))) +} diff --git a/runtime/Go/antlr/case_changing_stream_test.go b/runtime/Go/antlr/case_changing_stream_test.go new file mode 100644 index 0000000000..a0fba28d24 --- /dev/null +++ b/runtime/Go/antlr/case_changing_stream_test.go @@ -0,0 +1,32 @@ +package antlr + +import ( + "github.com/kylelemons/godebug/pretty" + "testing" +) + +func TestCaseChangingStream(t *testing.T) { + tests := []struct { + input string + upper bool + want []int + }{ + {"abcd", true, []int{'A', 'B', 'C', 'D', TokenEOF}}, + {"ABCD", true, []int{'A', 'B', 'C', 'D', TokenEOF}}, + {"abcd", false, []int{'a', 'b', 'c', 'd', TokenEOF}}, + {"ABCD", false, []int{'a', 'b', 'c', 'd', TokenEOF}}, + {"", false, []int{TokenEOF}}, + } + + for _, test := range tests { + var got []int + is := NewCaseChangingStream(NewInputStream(test.input), test.upper) + for i := 1; i <= is.Size()+1; i++ { + got = append(got, is.LA(i)) + } + + if diff := pretty.Compare(test.want, got); diff != "" { + t.Errorf("NewCaseChangingStream(%q, %v) diff: (-got +want)\n%s", test.input, test.upper, diff) + } + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/CaseChangingCharStream.java b/runtime/Java/src/org/antlr/v4/runtime/CaseChangingCharStream.java new file mode 100644 index 0000000000..d069d0188a --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/CaseChangingCharStream.java @@ -0,0 +1,81 @@ +package org.antlr.v4.runtime; + +import org.antlr.v4.runtime.misc.Interval; + +/** + * This class supports case-insensitive lexing by wrapping an existing + * {@link CharStream} and forcing the lexer to see either upper or + * lowercase characters. Grammar literals should then be either upper or + * lower case such as 'BEGIN' or 'begin'. The text of the character + * stream is unaffected. Example: input 'BeGiN' would match lexer rule + * 'BEGIN' if constructor parameter upper=true but getText() would return + * 'BeGiN'. + */ +public class CaseChangingCharStream implements CharStream { + + final CharStream stream; + final boolean upper; + + /** + * Constructs a new CaseChangingCharStream wrapping the given {@link CharStream} forcing + * all characters to upper case or lower case. + * @param stream The stream to wrap. + * @param upper If true force each symbol to upper case, otherwise force to lower. + */ + public CaseChangingCharStream(CharStream stream, boolean upper) { + this.stream = stream; + this.upper = upper; + } + + @Override + public String getText(Interval interval) { + return stream.getText(interval); + } + + @Override + public void consume() { + stream.consume(); + } + + @Override + public int LA(int i) { + int c = stream.LA(i); + if (c <= 0) { + return c; + } + if (upper) { + return Character.toUpperCase(c); + } + return Character.toLowerCase(c); + } + + @Override + public int mark() { + return stream.mark(); + } + + @Override + public void release(int marker) { + stream.release(marker); + } + + @Override + public int index() { + return stream.index(); + } + + @Override + public void seek(int index) { + stream.seek(index); + } + + @Override + public int size() { + return stream.size(); + } + + @Override + public String getSourceName() { + return stream.getSourceName(); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/CharStreams.java b/runtime/Java/src/org/antlr/v4/runtime/CharStreams.java index 73fb875999..d0ca125fbd 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/CharStreams.java +++ b/runtime/Java/src/org/antlr/v4/runtime/CharStreams.java @@ -303,4 +303,26 @@ public static CodePointCharStream fromChannel( channel.close(); } } + + /** + * Takes the stream and forces all symbols to uppercase for lexing purposes + * but leaves the original text as-is. + * + * @param in + * @return + */ + public static CharStream toUpper(CharStream in) { + return new CaseChangingCharStream(in, true); + } + + /** + * Takes the stream and forces all symbols to lowercase for lexing purposes + * but leaves the original text as-is. + * + * @param in + * @return + */ + public static CharStream toLower(CharStream in) { + return new CaseChangingCharStream(in, false); + } } diff --git a/runtime/JavaScript/src/antlr4/CaseInsensitiveInputStream.js b/runtime/JavaScript/src/antlr4/CaseInsensitiveInputStream.js new file mode 100644 index 0000000000..5ec762de9a --- /dev/null +++ b/runtime/JavaScript/src/antlr4/CaseInsensitiveInputStream.js @@ -0,0 +1,54 @@ +// +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ +// + +function CaseInsensitiveInputStream(stream, upper) { + this._stream = stream; + this._case = upper ? String.toUpperCase : String.toLowerCase; + return this; +} + +CaseInsensitiveInputStream.prototype.LA = function (offset) { + c = this._stream.LA(i); + if (c <= 0) { + return c; + } + return this._case.call(String.fromCodePoint(c)) +}; + +CaseInsensitiveInputStream.prototype.reset = function() { + return this._stream.reset(); +}; + +CaseInsensitiveInputStream.prototype.consume = function() { + return this._stream.consume(); +}; + +CaseInsensitiveInputStream.prototype.LT = function(offset) { + return this._stream.LT(offset); +}; + +CaseInsensitiveInputStream.prototype.mark = function() { + return this._stream.mark(); +}; + +CaseInsensitiveInputStream.prototype.release = function(marker) { + return this._stream.release(marker); +}; + +CaseInsensitiveInputStream.prototype.seek = function(_index) { + return this._stream.getText(start, stop); +}; + +CaseInsensitiveInputStream.prototype.getText = function(start, stop) { + return this._stream.getText(start, stop); +}; + +CaseInsensitiveInputStream.prototype.toString = function() { + return this._stream.toString(); +}; + +exports.CaseInsensitiveInputStream = CaseInsensitiveInputStream; diff --git a/runtime/JavaScript/src/antlr4/CharStreams.js b/runtime/JavaScript/src/antlr4/CharStreams.js index 71c5076166..75f777ba87 100644 --- a/runtime/JavaScript/src/antlr4/CharStreams.js +++ b/runtime/JavaScript/src/antlr4/CharStreams.js @@ -5,6 +5,7 @@ */ // +var CaseInsensitiveInputStream = require('./CaseInsensitiveInputStream').CaseInsensitiveInputStream; var InputStream = require('./InputStream').InputStream; var isNodeJs = typeof window === 'undefined' && typeof importScripts === 'undefined'; @@ -65,6 +66,14 @@ var CharStreams = { fromPathSync: function(path, encoding) { var data = fs.readFileSync(path, encoding); return new InputStream(data, true); + }, + + toUpper: function(stream) { + return new CaseInsensitiveInputStream(stream, true); + }, + + toLower: function(stream) { + return new CaseInsensitiveInputStream(stream, false); } };