diff --git a/html/doc.go b/html/doc.go
index 3a7e5ab17..885c4c593 100644
--- a/html/doc.go
+++ b/html/doc.go
@@ -78,16 +78,11 @@ example, to process each anchor node in depth-first order:
if err != nil {
// ...
}
- var f func(*html.Node)
- f = func(n *html.Node) {
+ for n := range doc.Descendants() {
if n.Type == html.ElementNode && n.Data == "a" {
// Do something with n...
}
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- f(c)
- }
}
- f(doc)
The relevant specifications include:
https://html.spec.whatwg.org/multipage/syntax.html and
diff --git a/html/example_test.go b/html/example_test.go
index 0b06ed773..830f0b27a 100644
--- a/html/example_test.go
+++ b/html/example_test.go
@@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+//go:build go1.23
+
// This example demonstrates parsing HTML data and walking the resulting tree.
package html_test
@@ -11,6 +13,7 @@ import (
"strings"
"golang.org/x/net/html"
+ "golang.org/x/net/html/atom"
)
func ExampleParse() {
@@ -19,9 +22,8 @@ func ExampleParse() {
if err != nil {
log.Fatal(err)
}
- var f func(*html.Node)
- f = func(n *html.Node) {
- if n.Type == html.ElementNode && n.Data == "a" {
+ for n := range doc.Descendants() {
+ if n.Type == html.ElementNode && n.DataAtom == atom.A {
for _, a := range n.Attr {
if a.Key == "href" {
fmt.Println(a.Val)
@@ -29,11 +31,8 @@ func ExampleParse() {
}
}
}
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- f(c)
- }
}
- f(doc)
+
// Output:
// foo
// /bar/baz
diff --git a/html/iter.go b/html/iter.go
new file mode 100644
index 000000000..54be8fd30
--- /dev/null
+++ b/html/iter.go
@@ -0,0 +1,56 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build go1.23
+
+package html
+
+import "iter"
+
+// Ancestors returns an iterator over the ancestors of n, starting with n.Parent.
+//
+// Mutating a Node or its parents while iterating may have unexpected results.
+func (n *Node) Ancestors() iter.Seq[*Node] {
+ _ = n.Parent // eager nil check
+
+ return func(yield func(*Node) bool) {
+ for p := n.Parent; p != nil && yield(p); p = p.Parent {
+ }
+ }
+}
+
+// ChildNodes returns an iterator over the immediate children of n,
+// starting with n.FirstChild.
+//
+// Mutating a Node or its children while iterating may have unexpected results.
+func (n *Node) ChildNodes() iter.Seq[*Node] {
+ _ = n.FirstChild // eager nil check
+
+ return func(yield func(*Node) bool) {
+ for c := n.FirstChild; c != nil && yield(c); c = c.NextSibling {
+ }
+ }
+
+}
+
+// Descendants returns an iterator over all nodes recursively beneath
+// n, excluding n itself. Nodes are visited in depth-first preorder.
+//
+// Mutating a Node or its descendants while iterating may have unexpected results.
+func (n *Node) Descendants() iter.Seq[*Node] {
+ _ = n.FirstChild // eager nil check
+
+ return func(yield func(*Node) bool) {
+ n.descendants(yield)
+ }
+}
+
+func (n *Node) descendants(yield func(*Node) bool) bool {
+ for c := range n.ChildNodes() {
+ if !yield(c) || !c.descendants(yield) {
+ return false
+ }
+ }
+ return true
+}
diff --git a/html/iter_test.go b/html/iter_test.go
new file mode 100644
index 000000000..cca7f82f5
--- /dev/null
+++ b/html/iter_test.go
@@ -0,0 +1,96 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build go1.23
+
+package html
+
+import (
+ "strings"
+ "testing"
+)
+
+func TestNode_ChildNodes(t *testing.T) {
+ tests := []struct {
+ in string
+ want string
+ }{
+ {"", ""},
+ {"", "a"},
+ {"a", "a"},
+ {"", "a b"},
+ {"ac", "a b c"},
+ {"ad", "a b d"},
+ {"cef