diff --git a/tensorboard/java/org/tensorflow/tensorboard/vulcanize/Vulcanize.java b/tensorboard/java/org/tensorflow/tensorboard/vulcanize/Vulcanize.java index c495e174cea..91ebb9d01d8 100644 --- a/tensorboard/java/org/tensorflow/tensorboard/vulcanize/Vulcanize.java +++ b/tensorboard/java/org/tensorflow/tensorboard/vulcanize/Vulcanize.java @@ -74,13 +74,14 @@ import org.jsoup.nodes.Comment; import org.jsoup.nodes.DataNode; import org.jsoup.nodes.Document; +import org.jsoup.nodes.DocumentType; import org.jsoup.nodes.Element; -import org.jsoup.nodes.Html5Printer; import org.jsoup.nodes.Node; import org.jsoup.nodes.TextNode; import org.jsoup.parser.Parser; import org.jsoup.parser.Tag; import org.jsoup.select.Elements; +import org.jsoup.select.NodeVisitor; /** Simple one-off solution for TensorBoard vulcanization. */ public final class Vulcanize { @@ -182,10 +183,14 @@ public static void main(String[] args) } boolean shouldExtractJs = !jsPath.isEmpty(); + // Write an empty file for shasum when all scripts are extracted out. createFile( jsOutput, shouldExtractJs ? extractAndTransformJavaScript(document, jsPath) : ""); - // Write an empty file for shasum when all scripts are extracted out. - createFile(output, Html5Printer.stringify(document)); + Document normalizedDocument = getFlattenedHTML5Document(document); + // Prevent from correcting the DOM structure and messing up the whitespace + // in the template. + normalizedDocument.outputSettings().prettyPrint(false); + createFile(output, normalizedDocument.toString()); } private static void createFile(Path filePath, String content) throws IOException { @@ -778,6 +783,125 @@ private static String extractAndTransformJavaScript(Document document, Webpath j return scriptContent; } + private static void cloneChildrenWithoutWhitespace(Element src, Element dest) { + List toMove = new ArrayList(); + for (Node node : src.childNodes()) { + if (node instanceof TextNode && ((TextNode) node).isBlank()) { + continue; + } + toMove.add(node); + } + for (int i = 0; i < toMove.size(); i++) { + Node node = toMove.get(i); + dest.appendChild(node.clone()); + } + } + + /** + * When we inline the HTML based on `` in `transform`, we + * replace the link element with parsed document. This makes us have nested + * documents and jsoup's Node.outerHtml (or Node.toString) are incapable of + * properly outputting that. Here, we flatten the document by combining all + * elements in `` and `` of nested document in one `` and + * ``. + * + * It also prepends since TensorBoard requires that the + * document is HTML. + * + * NOTE: it makes side-effect to the input `document`. + * + * Examples: + * // Input + * <#root> + * + * + * <#root> + * + * + * + * <#root>welcome + * + * foo + * + * bar + * + * + * // Output + * <#root> + * + * + * + * welcome foobar + * + * + **/ + private static Document getFlattenedHTML5Document(Document document) { + Document flatDoc = new Document("/"); + + flatDoc.appendChild(new DocumentType("html", "", "", "")); + + // Transfer comment nodes from the `document` level. They are important + // license comments + for (Node node : document.childNodes()) { + if (node instanceof Comment) { + flatDoc.appendChild(node.clone()); + } + } + + // Create ``, `` and ``. + flatDoc.normalise(); + + document.traverse(new FlatDocumentCopier(flatDoc)); + + for (Element subdoc : flatDoc.getElementsByTag("#root")) { + if (subdoc != flatDoc) { + final int MAX_ELEMENT_STR_LEN = 200; + String parentStr = subdoc.parent().outerHtml(); + if (parentStr.length() > MAX_ELEMENT_STR_LEN) { + parentStr = parentStr.substring(0, MAX_ELEMENT_STR_LEN) + "..."; + } + throw new RuntimeException( + "Nested doc (e.g., importing outside the head of a document) " + + "is not supported.\nParent of offending element: " + parentStr); + } + } + + return flatDoc; + } + + private static class FlatDocumentCopier implements NodeVisitor { + private Element destHead; + private Element destBody; + + public FlatDocumentCopier(Document dest) { + destHead = dest.head(); + destBody = dest.body(); + } + + @Override + public void head(Node node, int depth) { + // Copy childNodes from `head` into the dest doc's head without + // modification if the node is not a `document` (or a `<#root>` element) + // in which case we want to traverse further and only copy the childNodes + // in its `body` and `head` elements. + if (node.parentNode() != null && node.parentNode().nodeName().equals("head") + && !(node instanceof Document)) { + destHead.appendChild(node.clone()); + } + + if (node.nodeName().equals("body")) { + cloneChildrenWithoutWhitespace((Element) node, destBody); + // No need to further traverse the `body`. Skip by removing the nodes. + ((Element) node).empty(); + } + } + + @Override + public void tail(Node node, int depth) { + // Copying is done during the `head`. No need to do any work. + } + } + private static final class JsPrintlessErrorManager extends BasicErrorManager { @Override