Skip to content

Commit 6879f6a

Browse files
author
Greg Guthe
committed
1 parent 90cb80b commit 6879f6a

File tree

2 files changed

+25
-2
lines changed

2 files changed

+25
-2
lines changed

bleach/html5lib_shim.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -459,9 +459,22 @@ def convert_entity(value):
459459
if value[0] == "#":
460460
if len(value) < 2:
461461
return None
462+
462463
if value[1] in ("x", "X"):
463-
return six.unichr(int(value[2:], 16))
464-
return six.unichr(int(value[1:], 10))
464+
# hex-encoded code point
465+
int_as_string, base = value[2:], 16
466+
else:
467+
# decimal code point
468+
int_as_string, base = value[1:], 10
469+
470+
if int_as_string == "":
471+
return None
472+
473+
code_point = int(int_as_string, base)
474+
if 0 < code_point < 0x110000:
475+
return six.unichr(code_point)
476+
else:
477+
return None
465478

466479
return ENTITIES.get(value, None)
467480

tests/test_html5lib_shim.py

+10
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,16 @@
1919
("&xx;", "&xx;"),
2020
# Handles multiple entities in the same string
2121
("this &amp; that &amp; that", "this & that & that"),
22+
# Handles empty decimal and hex encoded code points
23+
("&#x;", "&#x;"),
24+
("&#;", "&#;"),
25+
# Handles too high unicode points
26+
("&#x110000;", "&#x110000;"),
27+
("&#x110111;", "&#x110111;"),
28+
("&#9277809;", "&#9277809;"),
29+
# Handles negative unicode points
30+
("&#-1;", "&#-1;"),
31+
("&#x-1;", "&#x-1;"),
2232
],
2333
)
2434
def test_convert_entities(data, expected):

0 commit comments

Comments
 (0)