Skip to content

Commit 4b46397

Browse files
committed
Short-circuit tag scans for custom tags
1 parent d3f4e31 commit 4b46397

File tree

3 files changed

+22
-7
lines changed

3 files changed

+22
-7
lines changed

src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,9 @@ private boolean inBodyStartTag(Token t, HtmlTreeBuilder tb) {
632632
break;
633633
default:
634634
// todo - bring scan groups in if desired
635-
if (inSorted(name, Constants.InBodyStartEmptyFormatters)) {
635+
if (!Tag.isKnownTag(name)) { // no special rules for custom tags
636+
tb.insert(startTag);
637+
} else if (inSorted(name, Constants.InBodyStartEmptyFormatters)) {
636638
tb.reconstructFormattingElements();
637639
tb.insertEmpty(startTag);
638640
tb.framesetOk(false);
@@ -658,8 +660,7 @@ private boolean inBodyStartTag(Token t, HtmlTreeBuilder tb) {
658660
tb.error(this);
659661
return false;
660662
} else {
661-
if (Tag.isKnownTag(name)) // don't reconstruct for custom elements
662-
tb.reconstructFormattingElements();
663+
tb.reconstructFormattingElements();
663664
tb.insert(startTag);
664665
}
665666
}

src/main/java/org/jsoup/parser/Tag.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,15 +237,16 @@ protected Tag clone() {
237237
"ul", "ol", "pre", "div", "blockquote", "hr", "address", "figure", "figcaption", "form", "fieldset", "ins",
238238
"del", "dl", "dt", "dd", "li", "table", "caption", "thead", "tfoot", "tbody", "colgroup", "col", "tr", "th",
239239
"td", "video", "audio", "canvas", "details", "menu", "plaintext", "template", "article", "main",
240-
"svg", "math", "center", "template"
240+
"svg", "math", "center", "template",
241+
"dir", "applet", "marquee", "listing" // deprecated but still known / special handling
241242
};
242243
private static final String[] inlineTags = {
243244
"object", "base", "font", "tt", "i", "b", "u", "big", "small", "em", "strong", "dfn", "code", "samp", "kbd",
244245
"var", "cite", "abbr", "time", "acronym", "mark", "ruby", "rt", "rp", "a", "img", "br", "wbr", "map", "q",
245246
"sub", "sup", "bdo", "iframe", "embed", "span", "input", "select", "textarea", "label", "button", "optgroup",
246247
"option", "legend", "datalist", "keygen", "output", "progress", "meter", "area", "param", "source", "track",
247248
"summary", "command", "device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track",
248-
"data", "bdi", "s"
249+
"data", "bdi", "s", "strike", "nobr"
249250
};
250251
private static final String[] emptyTags = {
251252
"meta", "link", "base", "frame", "img", "br", "wbr", "embed", "hr", "input", "keygen", "col", "command",

src/test/java/org/jsoup/parser/HtmlTreeBuilderStateTest.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.jsoup.parser;
22

33
import org.jsoup.Jsoup;
4+
import org.jsoup.internal.StringUtil;
45
import org.jsoup.parser.HtmlTreeBuilderState.Constants;
56
import org.junit.jupiter.api.Test;
67

@@ -10,8 +11,8 @@
1011
import java.util.Arrays;
1112
import java.util.List;
1213

13-
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
14-
import static org.junit.jupiter.api.Assertions.assertEquals;
14+
import static org.jsoup.parser.HtmlTreeBuilderState.Constants.InBodyStartInputAttribs;
15+
import static org.junit.jupiter.api.Assertions.*;
1516

1617
public class HtmlTreeBuilderStateTest {
1718
static List<Object[]> findConstantArrays(Class aClass) {
@@ -47,6 +48,18 @@ public void ensureArraysAreSorted() {
4748
assertEquals(40, constants.size());
4849
}
4950

51+
@Test public void ensureTagSearchesAreKnownTags() {
52+
List<Object[]> constants = findConstantArrays(Constants.class);
53+
for (Object[] constant : constants) {
54+
String[] tagNames = (String[]) constant;
55+
for (String tagName : tagNames) {
56+
if (StringUtil.inSorted(tagName, InBodyStartInputAttribs))
57+
continue; // odd one out in the constant
58+
assertTrue(Tag.isKnownTag(tagName), String.format("Unknown tag name: %s", tagName));
59+
}
60+
}
61+
}
62+
5063

5164
@Test
5265
public void nestedAnchorElements01() {

0 commit comments

Comments
 (0)