Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package org.antlr.v4.runtime;

import com.google.common.collect.Lists;
import org.junit.Test;

import java.util.List;

import static org.antlr.v4.runtime.CharStreams.fromString;
import static org.junit.Assert.assertEquals;

public class TestCaseChangingCharStream {

/**
* Helper function to return a complete list of symbols read from the stream.
* @param stream
* @return
*/
private static List<Integer> readAll(CharStream stream) {
List<Integer> symbols = Lists.newArrayList();

for (int i = 1; i <= stream.size()+1; i++) {
symbols.add( stream.LA(i) );
}

return symbols;
}

@Test
public void testUpper() {
List<Integer> expected = Lists.newArrayList((int)'A', (int)'B', (int)'C', (int)'D', IntStream.EOF);

CharStream stream = CharStreams.toUpper(fromString("abcd"));
assertEquals(expected, readAll(stream));

stream = CharStreams.toUpper(fromString("ABCD"));
assertEquals(expected, readAll(stream));
}

@Test
public void testLower() {
List<Integer> expected = Lists.newArrayList((int)'a', (int)'b', (int)'c', (int)'d', IntStream.EOF);

CharStream stream = CharStreams.toLower(fromString("abcd"));
assertEquals(expected, readAll(stream));

stream = CharStreams.toLower(fromString("ABCD"));
assertEquals(expected, readAll(stream));
}
}
37 changes: 37 additions & 0 deletions runtime/Go/antlr/case_changing_stream.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package antlr

import (
"unicode"
)

// CaseChangingStream wraps an existing CharStream, but upper cases, or
// lower cases the input before it is tokenized.
type CaseChangingStream struct {
CharStream

upper bool
}

// NewCaseChangingStream returns a new CaseChangingStream that forces
// all tokens read from the underlying stream to be either upper case
// or lower case based on the upper argument.
func NewCaseChangingStream(in CharStream, upper bool) *CaseChangingStream {
return &CaseChangingStream{
in, upper,
}
}

// LA gets the value of the symbol at offset from the current position
// from the underlying CharStream and converts it to either upper case
// or lower case.
func (is *CaseChangingStream) LA(offset int) int {
in := is.CharStream.LA(offset)
if in < 0 {
// Such as antlr.TokenEOF which is -1
return in
}
if is.upper {
return int(unicode.ToUpper(rune(in)))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it's better to convert case at the beginning one time and use upper/lower values from a prepared array for the performance reason? LA function can be called multiple times.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could cache the result, but I didn't want this to work by reading the full input up front, because this data could be streamed, or extremely large.

However, I don't think caching will save much, as this is quite a cheap operation for the CPU. But I'm happy to benchmark it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, got it. Maybe it's right.

}
return int(unicode.ToLower(rune(in)))
}
32 changes: 32 additions & 0 deletions runtime/Go/antlr/case_changing_stream_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package antlr

import (
"github.com/kylelemons/godebug/pretty"
"testing"
)

func TestCaseChangingStream(t *testing.T) {
tests := []struct {
input string
upper bool
want []int
}{
{"abcd", true, []int{'A', 'B', 'C', 'D', TokenEOF}},
{"ABCD", true, []int{'A', 'B', 'C', 'D', TokenEOF}},
{"abcd", false, []int{'a', 'b', 'c', 'd', TokenEOF}},
{"ABCD", false, []int{'a', 'b', 'c', 'd', TokenEOF}},
{"", false, []int{TokenEOF}},
}

for _, test := range tests {
var got []int
is := NewCaseChangingStream(NewInputStream(test.input), test.upper)
for i := 1; i <= is.Size()+1; i++ {
got = append(got, is.LA(i))
}

if diff := pretty.Compare(test.want, got); diff != "" {
t.Errorf("NewCaseChangingStream(%q, %v) diff: (-got +want)\n%s", test.input, test.upper, diff)
}
}
}
81 changes: 81 additions & 0 deletions runtime/Java/src/org/antlr/v4/runtime/CaseChangingCharStream.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package org.antlr.v4.runtime;

import org.antlr.v4.runtime.misc.Interval;

/**
* This class supports case-insensitive lexing by wrapping an existing
* {@link CharStream} and forcing the lexer to see either upper or
* lowercase characters. Grammar literals should then be either upper or
* lower case such as 'BEGIN' or 'begin'. The text of the character
* stream is unaffected. Example: input 'BeGiN' would match lexer rule
* 'BEGIN' if constructor parameter upper=true but getText() would return
* 'BeGiN'.
*/
public class CaseChangingCharStream implements CharStream {

final CharStream stream;
final boolean upper;

/**
* Constructs a new CaseChangingCharStream wrapping the given {@link CharStream} forcing
* all characters to upper case or lower case.
* @param stream The stream to wrap.
* @param upper If true force each symbol to upper case, otherwise force to lower.
*/
public CaseChangingCharStream(CharStream stream, boolean upper) {
this.stream = stream;
this.upper = upper;
}

@Override
public String getText(Interval interval) {
return stream.getText(interval);
}

@Override
public void consume() {
stream.consume();
}

@Override
public int LA(int i) {
int c = stream.LA(i);
if (c <= 0) {
return c;
}
if (upper) {
return Character.toUpperCase(c);
}
return Character.toLowerCase(c);
}

@Override
public int mark() {
return stream.mark();
}

@Override
public void release(int marker) {
stream.release(marker);
}

@Override
public int index() {
return stream.index();
}

@Override
public void seek(int index) {
stream.seek(index);
}

@Override
public int size() {
return stream.size();
}

@Override
public String getSourceName() {
return stream.getSourceName();
}
}
22 changes: 22 additions & 0 deletions runtime/Java/src/org/antlr/v4/runtime/CharStreams.java
Original file line number Diff line number Diff line change
Expand Up @@ -303,4 +303,26 @@ public static CodePointCharStream fromChannel(
channel.close();
}
}

/**
* Takes the stream and forces all symbols to uppercase for lexing purposes
* but leaves the original text as-is.
*
* @param in
* @return
*/
public static CharStream toUpper(CharStream in) {
return new CaseChangingCharStream(in, true);
}

/**
* Takes the stream and forces all symbols to lowercase for lexing purposes
* but leaves the original text as-is.
*
* @param in
* @return
*/
public static CharStream toLower(CharStream in) {
return new CaseChangingCharStream(in, false);
}
}
54 changes: 54 additions & 0 deletions runtime/JavaScript/src/antlr4/CaseInsensitiveInputStream.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
//
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
//

function CaseInsensitiveInputStream(stream, upper) {
this._stream = stream;
this._case = upper ? String.toUpperCase : String.toLowerCase;
return this;
}

CaseInsensitiveInputStream.prototype.LA = function (offset) {
c = this._stream.LA(i);
if (c <= 0) {
return c;
}
return this._case.call(String.fromCodePoint(c))
};

CaseInsensitiveInputStream.prototype.reset = function() {
return this._stream.reset();
};

CaseInsensitiveInputStream.prototype.consume = function() {
return this._stream.consume();
};

CaseInsensitiveInputStream.prototype.LT = function(offset) {
return this._stream.LT(offset);
};

CaseInsensitiveInputStream.prototype.mark = function() {
return this._stream.mark();
};

CaseInsensitiveInputStream.prototype.release = function(marker) {
return this._stream.release(marker);
};

CaseInsensitiveInputStream.prototype.seek = function(_index) {
return this._stream.getText(start, stop);
};

CaseInsensitiveInputStream.prototype.getText = function(start, stop) {
return this._stream.getText(start, stop);
};

CaseInsensitiveInputStream.prototype.toString = function() {
return this._stream.toString();
};

exports.CaseInsensitiveInputStream = CaseInsensitiveInputStream;
9 changes: 9 additions & 0 deletions runtime/JavaScript/src/antlr4/CharStreams.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/
//

var CaseInsensitiveInputStream = require('./CaseInsensitiveInputStream').CaseInsensitiveInputStream;
var InputStream = require('./InputStream').InputStream;

var isNodeJs = typeof window === 'undefined' && typeof importScripts === 'undefined';
Expand Down Expand Up @@ -65,6 +66,14 @@ var CharStreams = {
fromPathSync: function(path, encoding) {
var data = fs.readFileSync(path, encoding);
return new InputStream(data, true);
},

toUpper: function(stream) {
return new CaseInsensitiveInputStream(stream, true);
},

toLower: function(stream) {
return new CaseInsensitiveInputStream(stream, false);
}
};

Expand Down