Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions c_src/lazy_html.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <functional>
#include <memory>
#include <optional>
#include <set>
#include <stdexcept>
#include <string>
#include <tuple>
Expand Down Expand Up @@ -714,6 +715,25 @@ ExLazyHTML child_nodes(ErlNifEnv *env, ExLazyHTML ex_lazy_html) {

FINE_NIF(child_nodes, 0);

ExLazyHTML parent_node(ErlNifEnv *env, ExLazyHTML ex_lazy_html) {
auto nodes = std::vector<lxb_dom_node_t *>();
auto inserted_nodes = std::set<lxb_dom_node_t *>();

for (auto node : ex_lazy_html.resource->nodes) {
auto parent = node->parent;
if (parent != NULL && parent->type == LXB_DOM_NODE_TYPE_ELEMENT) {
auto inserted_node = inserted_nodes.find(parent);
if (inserted_node == inserted_nodes.end()) {
inserted_nodes.insert(parent);
nodes.push_back(parent);
}
}
}
return ExLazyHTML(fine::make_resource<LazyHTML>(
ex_lazy_html.resource->document_ref, nodes, true));
}
FINE_NIF(parent_node, ERL_NIF_DIRTY_JOB_CPU_BOUND);

std::string text(ErlNifEnv *env, ExLazyHTML ex_lazy_html) {
auto document = ex_lazy_html.resource->document_ref->document;

Expand Down Expand Up @@ -802,6 +822,12 @@ std::uint64_t num_nodes(ErlNifEnv *env, ExLazyHTML ex_lazy_html) {

FINE_NIF(num_nodes, 0);

bool equals(ErlNifEnv *env, ExLazyHTML html_a, ExLazyHTML html_b) {
return (html_a.resource->document_ref == html_b.resource->document_ref &&
html_a.resource->nodes == html_b.resource->nodes);
}
FINE_NIF(equals, 0);

std::vector<fine::Term> tag(ErlNifEnv *env, ExLazyHTML ex_lazy_html) {
auto values = std::vector<fine::Term>();

Expand Down
53 changes: 53 additions & 0 deletions lib/lazy_html.ex
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,36 @@ defmodule LazyHTML do
LazyHTML.NIF.child_nodes(lazy_html)
end

@doc """
Returns the (unique) parent nodes of the root nodes in `lazy_html`.

## Examples

iex> lazy_html = LazyHTML.from_fragment(~S|<div><span>Hello</span> <span>world</span></div>|)
iex> spans = LazyHTML.query(lazy_html, "span")
iex> LazyHTML.parent_node(spans)
#LazyHTML<
1 node (from selector)
#1
<div><span>Hello</span> <span>world</span></div>
>

The root node is always <html>, even if initialized via `from_fragment/1`:

iex> lazy_html = LazyHTML.from_fragment(~S|<div>root</div>|)
iex> LazyHTML.parent_node(lazy_html)
#LazyHTML<
1 node (from selector)
#1
<html><div>root</div></html>
>

"""
@spec parent_node(t()) :: t()
def parent_node(lazy_html) do
LazyHTML.NIF.parent_node(lazy_html)
end

@doc """
Returns the text content of all nodes in `lazy_html`.

Expand Down Expand Up @@ -481,6 +511,29 @@ defmodule LazyHTML do
LazyHTML.NIF.tag(lazy_html)
end

@doc """
Returns true if the lazy_html is selecting the same nodes starting from the same document.

## Examples

iex> lazy_html = LazyHTML.from_fragment(~S|<div><span id="a">Hello</span></div>|)
iex> a = LazyHTML.query(lazy_html, "#a")
iex> b = LazyHTML.query(lazy_html, "div > span")
iex> LazyHTML.equals?(a, b)
true

Note that if the lazy_htmls are created separately, they are never equal:

iex> html_a = LazyHTML.from_fragment(~S|<div>hello</div>|)
iex> html_b = LazyHTML.from_fragment(~S|<div>hello</div>|)
iex> LazyHTML.equals?(html_a, html_b)
false
"""
@spec equals?(t(), t()) :: boolean()
def equals?(html_a, html_b) do
LazyHTML.NIF.equals(html_a, html_b)
end

@doc ~S"""
Escapes the given string to make a valid HTML text.

Expand Down
2 changes: 2 additions & 0 deletions lib/lazy_html/nif.ex
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@ defmodule LazyHTML.NIF do
def filter(_lazy_html, _css_selector), do: err!()
def query_by_id(_lazy_html, _id), do: err!()
def child_nodes(_lazy_html), do: err!()
def parent_node(_lazy_html), do: err!()
def text(_lazy_html), do: err!()
def attribute(_lazy_html, _name), do: err!()
def attributes(_lazy_html), do: err!()
def tag(_lazy_html), do: err!()
def nodes(_lazy_html), do: err!()
def num_nodes(_lazy_html), do: err!()
def equals(_lazy_html_a, _lazy_html_b), do: err!()

defp err!(), do: :erlang.nif_error(:not_loaded)
end
91 changes: 91 additions & 0 deletions test/lazy_html_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,97 @@ defmodule LazyHTMLTest do
end
end

describe "parent_node/1" do
test "from selector of nodes on different levels" do
lazy_html =
LazyHTML.from_fragment("""
<div id="a">
<div id="b">
<span>Hello</span>
</div>
<span>world</span>
</div>
""")

spans = LazyHTML.query(lazy_html, "span")
parents = LazyHTML.parent_node(spans)
parent_ids = parents |> Enum.flat_map(&LazyHTML.attribute(&1, "id")) |> Enum.sort()
assert parent_ids == ["a", "b"]

# parent of div#id="a" is <html>
grandparents = LazyHTML.parent_node(parents)
assert LazyHTML.tag(grandparents) |> Enum.sort() == ["div", "html"]

# parent of <html> is null, so it's filtered out
great_grandparents = LazyHTML.parent_node(grandparents)
assert great_grandparents |> Enum.count() == 1

# again, parent of <html> is filtered out
assert LazyHTML.parent_node(great_grandparents) |> Enum.count() == 0
end

test "from selector of nodes on same level" do
lazy_html =
LazyHTML.from_fragment("""
<div id="a">
<div id="b">
<span>Hello</span>
</div>
<div id="c">
<span>world</span>
</div>
</div>
""")

spans = LazyHTML.query(lazy_html, "span")
parents = LazyHTML.parent_node(spans)
parent_ids = parents |> Enum.flat_map(&LazyHTML.attribute(&1, "id")) |> Enum.sort()
assert parent_ids == ["b", "c"]

# since they share the same parent, we now only have one node left
grandparent = LazyHTML.parent_node(parents)
assert LazyHTML.attribute(grandparent, "id") == ["a"]
end

defp get_css_path(node, acc) do
1 = Enum.count(node)
parent = LazyHTML.parent_node(node)

if Enum.count(parent) > 0 do
siblings =
LazyHTML.child_nodes(parent)
|> Enum.reject(fn n -> LazyHTML.tag(n) == [] end)

[tag] = LazyHTML.tag(node)
i = Enum.find_index(siblings, fn n -> LazyHTML.equals?(n, node) end)
get_css_path(parent, [{tag, i} | acc])
else
acc |> Enum.map_join(" > ", fn {tag, i} -> "#{tag}:nth-child(#{i + 1})" end)
end
end

test "construct nth-child selector by traversing parents" do
lazy_html =
LazyHTML.from_fragment("""
<div>
<div class="wibble">
<span>wibble</span>
</div>
<div class="wobble">
<span>wobble</span>
</div>
</div>
""")

span = LazyHTML.query(lazy_html, ".wobble span")
path = get_css_path(span, [])
assert path == "div:nth-child(1) > div:nth-child(2) > span:nth-child(1)"

span2 = LazyHTML.query(lazy_html, path)
assert LazyHTML.equals?(span, span2)
end
end

describe "query_by_id/2" do
test "raises when an empty id is given" do
assert_raise ArgumentError, ~r/id cannot be empty/, fn ->
Expand Down