Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/xml.cr
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ module XML
# Parses an XML document from *string* with *options* into an `XML::Node`.
#
# See `ParserOptions.default` for default options.
def self.parse(string : String, options : ParserOptions = ParserOptions.default) : Node
def self.parse(string : String, options : ParserOptions = ParserOptions.default) : Document
raise XML::Error.new("Document is empty", 0) if string.empty?
ctxt = LibXML.xmlNewParserCtxt
from_ptr(ctxt) do
Expand All @@ -65,7 +65,7 @@ module XML
# Parses an XML document from *io* with *options* into an `XML::Node`.
#
# See `ParserOptions.default` for default options.
def self.parse(io : IO, options : ParserOptions = ParserOptions.default) : Node
def self.parse(io : IO, options : ParserOptions = ParserOptions.default) : Document
ctxt = LibXML.xmlNewParserCtxt
from_ptr(ctxt) do
LibXML.xmlCtxtReadIO(ctxt, ->read_callback, ->close_callback, Box(IO).box(io), nil, nil, options)
Expand All @@ -75,7 +75,7 @@ module XML
# Parses an HTML document from *string* with *options* into an `XML::Node`.
#
# See `HTMLParserOptions.default` for default options.
def self.parse_html(string : String, options : HTMLParserOptions = HTMLParserOptions.default) : Node
def self.parse_html(string : String, options : HTMLParserOptions = HTMLParserOptions.default) : Document
raise XML::Error.new("Document is empty", 0) if string.empty?
ctxt = LibXML.htmlNewParserCtxt
from_ptr(ctxt) do
Expand All @@ -86,7 +86,7 @@ module XML
# Parses an HTML document from *io* with *options* into an `XML::Node`.
#
# See `HTMLParserOptions.default` for default options.
def self.parse_html(io : IO, options : HTMLParserOptions = HTMLParserOptions.default) : Node
def self.parse_html(io : IO, options : HTMLParserOptions = HTMLParserOptions.default) : Document
ctxt = LibXML.htmlNewParserCtxt
from_ptr(ctxt) do
LibXML.htmlCtxtReadIO(ctxt, ->read_callback, ->close_callback, Box(IO).box(io), nil, "utf-8", options)
Expand Down Expand Up @@ -119,7 +119,7 @@ module XML
{% end %}
raise Error.new(LibXML.xmlGetLastError) unless doc

Node.new(doc, errors)
Document.new(doc, errors)
end

{% unless LibXML.has_method?(:xmlSaveSetIndentString) %}
Expand Down
94 changes: 94 additions & 0 deletions src/xml/document.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
require "weak_ref"

class XML::Document < XML::Node
# :nodoc:
#
# The constructors allocate a XML::Node for a libxml node once, so we don't
# finalize a document twice for example.
#
# We store the reference into the libxml struct (_private) for documents
# because a document's XML::Node lives as long as its libxml doc. However we
# can lose references to subtree XML::Node, so using _private would leave
# dangling pointers. We thus keep a cache of weak references to all nodes in
# the document, so we can still collect lost references, and at worst
# reinstantiate a XML::Node if needed.
#
# NOTE: when a XML::Node is moved to another document, the XML::Node and any
# instantiated descendant XML::Node shall be cleaned from the original
# document's cache, and must be added to the new document's cache.
protected getter cache : Hash(LibXML::Node*, WeakRef(Node))

# :nodoc:
#
# Unlinked libxml nodes, and all their descendant nodes, don't appear in the
# document's tree anymore, and must be manually freed, yet we can't merely
# free the libxml node in a finalizer, because it would free the whole
# subtree, while we may still have live XML::Node instances.
#
# We keep an explicit list of unlinked libxml nodes. We can't rely on the
# cache because it uses weak references and the XML::Node could be collected,
# leaking the libxml node and its subtree.
#
# NOTE: the libxml node, along with any descendant shall be removed from the
# list when relinked into a tree, be it the same document or another.
protected getter unlinked_nodes : Set(LibXML::Node*)

# :nodoc:
def self.new(doc : LibXML::Doc*, errors : Array(Error)? = nil) : Document
if ptr = doc.value._private
ptr.as(Document)
else
new(doc_: doc, errors_: errors)
end
end

# Must never be called directly, use the constructors above.
private def initialize(*, doc_ : LibXML::Doc*, errors_ : Array(Error)?)
@node = doc_.as(LibXML::Node*)
@errors = errors_
@cache = Hash(LibXML::Node*, WeakRef(Node)).new
@unlinked_nodes = Set(LibXML::Node*).new
@document = self
doc_.value._private = self.as(Void*)
end

# :nodoc:
def finalize
# free unlinked nodes and their subtrees
@unlinked_nodes.each do |node|
if node.value.doc == @node
LibXML.xmlFreeNode(node)
else
# the node has been adopted into another document, don't free!
end
end

# free the doc and its subtree
LibXML.xmlFreeDoc(@node.as(LibXML::Doc*))
end

# Returns the encoding of this node's document.
def encoding : String?
if encoding = @node.as(LibXML::Doc*).value.encoding
String.new(encoding)
end
end

# Returns the version of this node's document.
def version : String?
if version = @node.as(LibXML::Doc*).value.version
String.new(version)
end
end

# :nodoc:
def document : Document
self
end

# Returns the list of `XML::Error` found when parsing this document.
# Returns `nil` if no errors were found.
def errors : Array(XML::Error)?
@errors unless @errors.try &.empty?
end
end
10 changes: 8 additions & 2 deletions src/xml/namespace.cr
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
class XML::Namespace
getter document : Node
getter document : Document

# :nodoc:
def initialize(@document : Node, @ns : LibXML::NS*)
@[Deprecated]
def self.new(document : Node, ns : LibXML::NS*)
new(document.as(Document), ns)
end

# :nodoc:
def initialize(@document : Document, @ns : LibXML::NS*)
end

# See `Object#hash(hasher)`
Expand Down
Loading