dashbitco · jonatanklosko · Oct 3, 2025 · Sep 23, 2025 · Sep 23, 2025 · Sep 23, 2025
diff --git a/c_src/lazy_html.cpp b/c_src/lazy_html.cpp
@@ -4,6 +4,7 @@
 #include <functional>
 #include <memory>
 #include <optional>
+#include <set>
 #include <stdexcept>
 #include <string>
 #include <tuple>
@@ -714,6 +715,25 @@ ExLazyHTML child_nodes(ErlNifEnv *env, ExLazyHTML ex_lazy_html) {
 
 FINE_NIF(child_nodes, 0);
 
+ExLazyHTML parent_nodes(ErlNifEnv *env, ExLazyHTML ex_lazy_html) {
+  auto nodes = std::vector<lxb_dom_node_t *>();
+  auto inserted_nodes = std::set<lxb_dom_node_t *>();
+
+  for (auto node : ex_lazy_html.resource->nodes) {
+    auto parent = node->parent;
+    if (parent != NULL && parent->type == LXB_DOM_NODE_TYPE_ELEMENT) {
+      auto inserted_node = inserted_nodes.find(parent);
+      if (inserted_node == inserted_nodes.end()) {
+        inserted_nodes.insert(parent);
+        nodes.push_back(parent);
+      }
+    }
+  }
+  return ExLazyHTML(fine::make_resource<LazyHTML>(
+      ex_lazy_html.resource->document_ref, nodes, true));
+}
+FINE_NIF(parent_nodes, ERL_NIF_DIRTY_JOB_CPU_BOUND);
+
 std::string text(ErlNifEnv *env, ExLazyHTML ex_lazy_html) {
   auto document = ex_lazy_html.resource->document_ref->document;
 
@@ -802,6 +822,12 @@ std::uint64_t num_nodes(ErlNifEnv *env, ExLazyHTML ex_lazy_html) {
 
 FINE_NIF(num_nodes, 0);
 
+bool equals(ErlNifEnv *env, ExLazyHTML html_a, ExLazyHTML html_b) {
+  return (html_a.resource->document_ref == html_b.resource->document_ref &&
+          html_a.resource->nodes == html_b.resource->nodes);
+}
+FINE_NIF(equals, 0);
+
 std::vector<fine::Term> tag(ErlNifEnv *env, ExLazyHTML ex_lazy_html) {
   auto values = std::vector<fine::Term>();
 

diff --git a/lib/lazy_html.ex b/lib/lazy_html.ex
@@ -357,6 +357,60 @@ defmodule LazyHTML do
     LazyHTML.NIF.child_nodes(lazy_html)
   end
 
+  @doc """
+  Returns the (unique) parent nodes of the root nodes in `lazy_html`.
+
+  ## Examples
+
+      iex> lazy_html = LazyHTML.from_fragment(~S|<div><span>Hello</span> <span>world</span></div>|)
+      iex> spans = LazyHTML.query(lazy_html, "span")
+      iex> LazyHTML.parent_nodes(spans)
+      #LazyHTML<
+        1 node (from selector)
+        #1
+        <div><span>Hello</span> <span>world</span></div>
+      >
+
+  The root node is always <html>, even if initialized via `from_fragment/1`:
+
+      iex> lazy_html = LazyHTML.from_fragment(~S|<div>root</div>|)
+      iex> LazyHTML.parent_nodes(lazy_html)
+      #LazyHTML<
+        1 node (from selector)
+        #1
+        <html><div>root</div></html>
+      >
+
+  """
+  @spec parent_nodes(t()) :: t()
+  def parent_nodes(lazy_html) do
+    LazyHTML.NIF.parent_nodes(lazy_html)
+  end
+
+  @doc """
+  Returns the parent nodes of the root nodes in `lazy_html`.
+  Useful when you're expecting a single, shared parent.
+  """
+  def parent_node(lazy_html) do
+    parent = LazyHTML.NIF.parent_nodes(lazy_html)
+
+    case LazyHTML.NIF.num_nodes(parent) do
+      0 -> {:ok, nil}
+      1 -> {:ok, parent}
+      _ -> {:error, :multiple_parents}
+    end
+  end
+
+  @doc """
+  Same as `parent_node/1` but raises on multiple parents
+  """
+  def parent_node!(lazy_html) do
+    case parent_node(lazy_html) do
+      {:ok, res} -> res
+      {:error, :multiple_parents} -> raise "Selected nodes have multiple parents"
+    end
+  end
+
   @doc """
   Returns the text content of all nodes in `lazy_html`.
 
@@ -481,6 +535,29 @@ defmodule LazyHTML do
     LazyHTML.NIF.tag(lazy_html)
   end
 
+  @doc """
+  Returns true if the lazy_html is selecting the same nodes starting from the same document.
+
+  ## Examples
+
+    iex> lazy_html = LazyHTML.from_fragment(~S|<div><span id=1>Hello</span></div>|)
+    iex> a = LazyHTML.query(lazy_html, "#1")
+    iex> b = LazyHTML.query(lazy_html, "div > span")
+    iex> LazyHTML.equals?(a, b)
+    true
+
+  Note that if the lazy_htmls are created separately, they are never equal:
+
+    iex> html_a = LazyHTML.from_fragment(~S|<div>hello</div>|)
+    iex> html_b = LazyHTML.from_fragment(~S|<div>hello</div>|)
+    iex> LazyHTML.equals?(html_a, html_b)
+    false
+  """
+  @spec equals?(t(), t()) :: boolean()
+  def equals?(html_a, html_b) do
+    LazyHTML.NIF.equals(html_a, html_b)
+  end
+
   @doc ~S"""
   Escapes the given string to make a valid HTML text.
 

diff --git a/lib/lazy_html/nif.ex b/lib/lazy_html/nif.ex
@@ -21,12 +21,14 @@ defmodule LazyHTML.NIF do
   def filter(_lazy_html, _css_selector), do: err!()
   def query_by_id(_lazy_html, _id), do: err!()
   def child_nodes(_lazy_html), do: err!()
+  def parent_nodes(_lazy_html), do: err!()
   def text(_lazy_html), do: err!()
   def attribute(_lazy_html, _name), do: err!()
   def attributes(_lazy_html), do: err!()
   def tag(_lazy_html), do: err!()
   def nodes(_lazy_html), do: err!()
   def num_nodes(_lazy_html), do: err!()
+  def equals(_lazy_html_a, _lazy_html_b), do: err!()
 
   defp err!(), do: :erlang.nif_error(:not_loaded)
 end
diff --git a/test/lazy_html_test.exs b/test/lazy_html_test.exs
@@ -250,6 +250,96 @@ defmodule LazyHTMLTest do
     end
   end
 
+  describe "parent_nodes/1" do
+    test "from selector of nodes on different levels" do
+      lazy_html =
+        LazyHTML.from_fragment("""
+        <div id=0>
+          <div id=1>
+            <span>Hello</span>
+          </div>
+          <span>world</span>
+        </div>
+        """)
+
+      spans = LazyHTML.query(lazy_html, "span")
+      parents = LazyHTML.parent_nodes(spans)
+      parent_ids = parents |> Enum.flat_map(&LazyHTML.attribute(&1, "id")) |> Enum.sort()
+      assert parent_ids == ["0", "1"]
+
+      # parent of div#id=0 is <html>
+      grandparents = LazyHTML.parent_nodes(parents)
+      assert LazyHTML.tag(grandparents) |> Enum.sort() == ["div", "html"]
+
+      # parent of <html> is null, so it's filtered out
+      great_grandparents = LazyHTML.parent_nodes(grandparents)
+      assert great_grandparents |> Enum.count() == 1
+
+      # again, parent of <html> is filtered out
+      assert LazyHTML.parent_nodes(great_grandparents) |> Enum.count() == 0
+    end
+
+    test "from selector of nodes on same level" do
+      lazy_html =
+        LazyHTML.from_fragment("""
+        <div id=0>
+          <div id=1>
+            <span>Hello</span>
+          </div>
+          <div id=2>
+            <span>world</span>
+          </div>
+        </div>
+        """)
+
+      spans = LazyHTML.query(lazy_html, "span")
+      parents = LazyHTML.parent_nodes(spans)
+      parent_ids = parents |> Enum.flat_map(&LazyHTML.attribute(&1, "id")) |> Enum.sort()
+      assert parent_ids == ["1", "2"]
+
+      # since they share the same parent, we now only have one node left
+      grandparent = LazyHTML.parent_nodes(parents)
+      assert LazyHTML.attribute(grandparent, "id") == ["0"]
+    end
+
+    defp get_css_path(node, acc) do
+      parent = LazyHTML.parent_node!(node)
+
+      if parent do
+        siblings =
+          LazyHTML.child_nodes(parent)
+          |> Enum.reject(fn n -> LazyHTML.tag(n) == [] end)
+
+        [tag] = LazyHTML.tag(node)
+        i = Enum.find_index(siblings, fn n -> LazyHTML.equals?(n, node) end)
+        get_css_path(parent, [{tag, i} | acc])
+      else
+        acc |> Enum.map_join(" > ", fn {tag, i} -> "#{tag}:nth-child(#{i + 1})" end)
+      end
+    end
+
+    test "construct nth-child selector by traversing parents" do
+      lazy_html =
+        LazyHTML.from_fragment("""
+        <div>
+          <div class="wibble">
+            <span>wibble</span>
+          </div>
+          <div class="wobble">
+            <span>wobble</span>
+          </div>
+        </div>
+        """)
+
+      span = LazyHTML.query(lazy_html, ".wobble span")
+      path = get_css_path(span, [])
+      assert path == "div:nth-child(1) > div:nth-child(2) > span:nth-child(1)"
+
+      span2 = LazyHTML.query(lazy_html, path)
+      assert LazyHTML.equals?(span, span2)
+    end
+  end
+
   describe "query_by_id/2" do
     test "raises when an empty id is given" do
       assert_raise ArgumentError, ~r/id cannot be empty/, fn ->