From 03d12a7f09ea9c300088f4dee3dba21f63e4c010 Mon Sep 17 00:00:00 2001 From: Muhammad Ridho Date: Wed, 4 Oct 2023 21:12:24 +0700 Subject: [PATCH 1/2] html-support (with hickory) --- deps.edn | 3 ++- src/cq/formats.clj | 17 ++++++++++++++++- test/cq/formats_test.clj | 33 ++++++++++++++++++++++++++++++++- 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/deps.edn b/deps.edn index a75d65e..38dc69f 100644 --- a/deps.edn +++ b/deps.edn @@ -14,7 +14,8 @@ com.cognitect/transit-clj {:mvn/version "1.0.324"} medley/medley {:mvn/version "1.3.0"} mvxcvi/puget {:mvn/version "1.3.1"} - tolitius/xml-in {:mvn/version "0.1.1"}} + tolitius/xml-in {:mvn/version "0.1.1"} + org.clj-commons/hickory {:mvn/version "0.7.3"}} :aliases {:run {:main-opts ["-m" "cq.main"]} diff --git a/src/cq/formats.clj b/src/cq/formats.clj index a49e475..27e7e5b 100644 --- a/src/cq/formats.clj +++ b/src/cq/formats.clj @@ -2,6 +2,8 @@ (:require [clojure.data.csv :as csv] [clojure.data.json :as json] [clojure.data.xml :as xml] + [hickory.core :as html] + [hickory.render :refer [hickory-to-html]] [clojure.edn :as edn] [clojure.pprint :as ppt] [clojure.java.io :as io] @@ -167,6 +169,17 @@ (with-open [w (io/writer out)] (emit x w))))) +(defn ->html-reader + [_] + (fn [in] + (html/as-hickory (org.jsoup.Jsoup/parse in nil "")))) + +(defn ->html-writer + [_] + (fn [x out] + (with-open [w (io/writer out)] + (.write w (hickory-to-html x))))) + (def formats {"json" {:->reader ->json-reader :->writer ->json-writer} @@ -185,7 +198,9 @@ "transit" {:->reader ->transit-reader :->writer ->transit-writer} "xml" {:->reader ->xml-reader - :->writer ->xml-writer}}) + :->writer ->xml-writer} + "html" {:->reader ->html-reader + :->writer ->html-writer}}) (defn format->reader [format in opts] diff --git a/test/cq/formats_test.clj b/test/cq/formats_test.clj index 6254452..bbf3347 100644 --- a/test/cq/formats_test.clj +++ b/test/cq/formats_test.clj @@ -2,7 +2,8 @@ (:require [cq.formats :as sut] [clojure.test :refer :all] [clojure.java.io :as io] - [clojure.string :as str]) + [clojure.string :as str] + [hickory.core :as html]) (:import [java.io ByteArrayInputStream BufferedInputStream PrintStream ByteArrayOutputStream])) (defn- to-out-stream @@ -152,3 +153,33 @@ Hello \n" (test-writer-str sut/->xml-writer {:pretty true} test-xml-data)))))) + +(def test-html-str + "

hello

") + +(def test-html-data + {:type :document + :content + [{:type :element + :attrs nil + :tag :html + :content + [{:type :element + :attrs nil + :tag :head :content nil} + {:type :element + :attrs nil + :tag :body :content + [{:type :element + :attrs nil + :tag :p + :content ["hello"]}]}]}]}) + +(deftest html + (testing "reader" + (is (= test-html-data + (test-reader-str sut/->html-reader nil test-html-str)))) + + (testing "writer" + (is (= "

hello

" + (test-writer-str sut/->html-writer nil test-html-data))))) From efd9774cc6699bd91fff74c626b0197cc75abf39 Mon Sep 17 00:00:00 2001 From: Muhammad Ridho Date: Thu, 5 Oct 2023 01:03:10 +0700 Subject: [PATCH 2/2] fix hickory/jsoup error on native build --- deps.edn | 3 ++- src/cq/formats.clj | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/deps.edn b/deps.edn index 38dc69f..1213df6 100644 --- a/deps.edn +++ b/deps.edn @@ -15,7 +15,8 @@ medley/medley {:mvn/version "1.3.0"} mvxcvi/puget {:mvn/version "1.3.1"} tolitius/xml-in {:mvn/version "0.1.1"} - org.clj-commons/hickory {:mvn/version "0.7.3"}} + org.clj-commons/hickory {:mvn/version "0.7.3"} + org.jsoup/jsoup {:mvn/version "1.14.3"}} :aliases {:run {:main-opts ["-m" "cq.main"]} diff --git a/src/cq/formats.clj b/src/cq/formats.clj index 27e7e5b..0a57772 100644 --- a/src/cq/formats.clj +++ b/src/cq/formats.clj @@ -172,13 +172,14 @@ (defn ->html-reader [_] (fn [in] - (html/as-hickory (org.jsoup.Jsoup/parse in nil "")))) + (html/as-hickory (html/parse (slurp (io/reader in)))))) (defn ->html-writer [_] (fn [x out] - (with-open [w (io/writer out)] - (.write w (hickory-to-html x))))) + (binding [*out* (io/writer out)] + (print (hickory-to-html x)) + (flush)))) (def formats {"json" {:->reader ->json-reader