Skip to content

Commit 3f11e95

Browse files
author
hannesw
committed
8019963: empty char range in regex
Reviewed-by: jlaskey, sundar
1 parent c96d064 commit 3f11e95

File tree

4 files changed

+102
-52
lines changed

4 files changed

+102
-52
lines changed

src/jdk/nashorn/internal/runtime/regexp/joni/CodeRangeBuffer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ public static CodeRangeBuffer addCodeRangeToBuff(CodeRangeBuffer pbuf, int from,
183183

184184
// add_code_range, be aware of it returning null!
185185
public static CodeRangeBuffer addCodeRange(CodeRangeBuffer pbuf, ScanEnvironment env, int from, int to) {
186-
if (from >to) {
186+
if (from > to) {
187187
if (env.syntax.allowEmptyRangeInCC()) {
188188
return pbuf;
189189
} else {

src/jdk/nashorn/internal/runtime/regexp/joni/Parser.java

Lines changed: 6 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -125,32 +125,8 @@ private CClassNode parseCharClass() {
125125
break;
126126

127127
case RAW_BYTE:
128-
if (token.base != 0) { /* tok->base != 0 : octal or hexadec. */
129-
byte[] buf = new byte[4];
130-
int psave = p;
131-
int base = token.base;
132-
buf[0] = (byte)token.getC();
133-
int i;
134-
for (i=1; i<4; i++) {
135-
fetchTokenInCC();
136-
if (token.type != TokenType.RAW_BYTE || token.base != base) {
137-
fetched = true;
138-
break;
139-
}
140-
buf[i] = (byte)token.getC();
141-
}
142-
143-
if (i == 1) {
144-
arg.v = buf[0] & 0xff;
145-
arg.inType = CCVALTYPE.SB; // goto raw_single
146-
} else {
147-
arg.v = EncodingHelper.mbcToCode(buf, 0, buf.length);
148-
arg.inType = CCVALTYPE.CODE_POINT;
149-
}
150-
} else {
151-
arg.v = token.getC();
152-
arg.inType = CCVALTYPE.SB; // raw_single:
153-
}
128+
arg.v = token.getC();
129+
arg.inType = CCVALTYPE.SB; // raw_single:
154130
arg.vIsRaw = true;
155131
parseCharClassValEntry2(cc, arg); // goto val_entry2
156132
break;
@@ -615,31 +591,10 @@ private Node parseExpTkRawByte(boolean group) {
615591
StringNode node = new StringNode((char)token.getC());
616592
node.setRaw();
617593

618-
int len = 1;
619-
while (true) {
620-
if (len >= 1) {
621-
if (len == 1) {
622-
fetchToken();
623-
node.clearRaw();
624-
// !goto string_end;!
625-
return parseExpRepeat(node, group);
626-
}
627-
}
628-
629-
fetchToken();
630-
if (token.type != TokenType.RAW_BYTE) {
631-
/* Don't use this, it is wrong for little endian encodings. */
632-
// USE_PAD_TO_SHORT_BYTE_CHAR ...
633-
634-
newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING);
635-
}
636-
637-
// important: we don't use 0xff mask here neither in the compiler
638-
// (in the template string) so we won't have to mask target
639-
// strings when comparing against them in the matcher
640-
node.cat((char)token.getC());
641-
len++;
642-
} // while
594+
fetchToken();
595+
node.clearRaw();
596+
// !goto string_end;!
597+
return parseExpRepeat(node, group);
643598
}
644599

645600
private Node parseExpRepeat(Node target, boolean group) {

test/script/basic/JDK-8019963.js

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
* Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
/**
25+
* JDK-8019963: empty char range in regex
26+
*
27+
* @test
28+
* @run
29+
*/
30+
31+
var re1 = /[\x00-\x08\x0B\x0C\x0E-\x9F\uD800-\uDFFF\uFFFE\uFFFF]/;
32+
33+
print(re1.test("\x00"));
34+
print(re1.test("\x04"));
35+
print(re1.test("\x08"));
36+
print(re1.test("\x0a"));
37+
print(re1.test("\x0B"));
38+
print(re1.test("\x0C"));
39+
print(re1.test("\x0E"));
40+
print(re1.test("\x10"));
41+
print(re1.test("\x1A"));
42+
print(re1.test("\x2F"));
43+
print(re1.test("\x8E"));
44+
print(re1.test("\x8F"));
45+
print(re1.test("\x9F"));
46+
print(re1.test("\xA0"));
47+
print(re1.test("\xAF"));
48+
print(re1.test("\uD800"));
49+
print(re1.test("\xDA00"));
50+
print(re1.test("\xDCFF"));
51+
print(re1.test("\xDFFF"));
52+
print(re1.test("\xFFFE"));
53+
print(re1.test("\xFFFF"));
54+
55+
var re2 = /[\x1F\x7F-\x84\x86]/;
56+
57+
print(re2.test("\x1F"));
58+
print(re2.test("\x2F"));
59+
print(re2.test("\x3F"));
60+
print(re2.test("\x7F"));
61+
print(re2.test("\x80"));
62+
print(re2.test("\x84"));
63+
print(re2.test("\x85"));
64+
print(re2.test("\x86"));
65+
66+
var re3 = /^([\x00-\x7F]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$/;
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
true
2+
true
3+
true
4+
false
5+
true
6+
true
7+
true
8+
true
9+
true
10+
true
11+
true
12+
true
13+
true
14+
false
15+
false
16+
true
17+
true
18+
true
19+
true
20+
true
21+
true
22+
true
23+
false
24+
false
25+
true
26+
true
27+
true
28+
false
29+
true

0 commit comments

Comments
 (0)