Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use only lxml for XML handling #863

Merged
merged 1 commit into from
Feb 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@ jobs:
strategy:
matrix:
python-version: ['3.7', '3.8', '3.9']
lxml: [true, false]
env:
LXML: ${{ matrix.lxml }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COVERALLS_SERVICE_NAME: github
steps:
Expand All @@ -28,8 +26,6 @@ jobs:
pip3 install -e .
pip3 install -r requirements.txt
pip3 install -r requirements-dev.txt
echo "LXML => $LXML"
if [ "$LXML" == "true" ]; then pip install lxml; fi
- name: run tests ⚙️
run: python3 -m pytest
- name: run coveralls ⚙️
Expand Down
2 changes: 1 addition & 1 deletion docs/en/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Installation
Requirements
------------

OWSLib requires a Python interpreter, as well as `ElementTree <https://docs.python.org/2/library/xml.etree.elementtree.html>`_ or `lxml <http://lxml.de>`_ for XML parsing.
OWSLib requires a Python interpreter, as well as `lxml <https://lxml.de>`_ for XML parsing.

Install
-------
Expand Down
2 changes: 1 addition & 1 deletion etc/debian/control
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ Homepage: http://geopython.github.com/OWSLib/

Package: python-owslib
Architecture: all
Depends: ${misc:Depends}, debconf, python (>=2.7), python-lxml
Depends: ${misc:Depends}, debconf, python (>=3), python-lxml
Description: OWSLib is a Python package for client programming with Open Geospatial Consortium (OGC) web service (hence OWS) interface standards, and their related content models.
15 changes: 1 addition & 14 deletions owslib/catalogue/csw2.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,11 +226,6 @@ def getrecords(self, qtype=None, keywords=[], typenames='csw:Record', propertyna
else:
# construct request
node0 = self._setrootelement('csw:GetRecords')
if etree.__name__ != 'lxml.etree': # apply nsmap manually
node0.set('xmlns:ows', namespaces['ows'])
node0.set('xmlns:gmd', namespaces['gmd'])
node0.set('xmlns:dif', namespaces['dif'])
node0.set('xmlns:fgdc', namespaces['fgdc'])
node0.set('outputSchema', outputschema)
node0.set('outputFormat', format)
node0.set('version', self.version)
Expand Down Expand Up @@ -354,11 +349,6 @@ def getrecords2(self, constraints=[], sortby=None, typenames='csw:Record', esn='
else:
# construct request
node0 = self._setrootelement('csw:GetRecords')
if etree.__name__ != 'lxml.etree': # apply nsmap manually
node0.set('xmlns:ows', namespaces['ows'])
node0.set('xmlns:gmd', namespaces['gmd'])
node0.set('xmlns:dif', namespaces['dif'])
node0.set('xmlns:fgdc', namespaces['fgdc'])
node0.set('outputSchema', outputschema)
node0.set('outputFormat', format)
node0.set('version', self.version)
Expand Down Expand Up @@ -622,10 +612,7 @@ def _setidentifierkey(self, el):
return el

def _setrootelement(self, el):
if etree.__name__ == 'lxml.etree': # apply nsmap
return etree.Element(util.nspath_eval(el, namespaces), nsmap=namespaces)
else:
return etree.Element(util.nspath_eval(el, namespaces))
return etree.Element(util.nspath_eval(el, namespaces), nsmap=namespaces)

def _setconstraint(self, parent, qtype=None, propertyname='csw:AnyText', keywords=[], bbox=None, cql=None,
identifier=None):
Expand Down
10 changes: 1 addition & 9 deletions owslib/catalogue/csw3.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,6 @@ def getrecords(self, constraints=[], sortby=None, typenames='csw30:Record', esn=
else:
# construct request
node0 = self._setrootelement('csw30:GetRecords')
if etree.__name__ != 'lxml.etree': # apply nsmap manually
node0.set('xmlns:ows110', namespaces['ows110'])
node0.set('xmlns:gmd', namespaces['gmd'])
node0.set('xmlns:dif', namespaces['dif'])
node0.set('xmlns:fgdc', namespaces['fgdc'])
node0.set('outputSchema', outputschema)
node0.set('outputFormat', format)
node0.set('version', self.version)
Expand Down Expand Up @@ -516,10 +511,7 @@ def _setidentifierkey(self, el):
return el

def _setrootelement(self, el):
if etree.__name__ == 'lxml.etree': # apply nsmap
return etree.Element(util.nspath_eval(el, namespaces), nsmap=namespaces)
else:
return etree.Element(util.nspath_eval(el, namespaces))
return etree.Element(util.nspath_eval(el, namespaces), nsmap=namespaces)

def _setconstraint(self, parent, qtype=None, propertyname='csw30:AnyText', keywords=[], bbox=None, cql=None,
identifier=None):
Expand Down
32 changes: 14 additions & 18 deletions owslib/etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,33 @@
# Contact email: [email protected]
# =============================================================================


from lxml import etree
from lxml.etree import ParseError
ElementType = etree._Element

from owslib.namespaces import Namespaces


def patch_well_known_namespaces(etree_module):
"""Monkey patches the etree module to add some well-known namespaces."""
def patch_well_known_namespaces():
"""Monkey patches lxml.etree to add some well-known namespaces."""

ns = Namespaces()

try:
register_namespace = etree_module.register_namespace
register_namespace = etree.register_namespace
except AttributeError:
etree_module._namespace_map
etree._namespace_map

def register_namespace(prefix, uri):
etree_module._namespace_map[uri] = prefix
etree._namespace_map[uri] = prefix

for k, v in list(ns.get_namespaces().items()):
register_namespace(k, v)

etree.set_default_parser(
parser=etree.XMLParser(resolve_entities=False)
)

# try to find lxml or elementtree
try:
from lxml import etree
from lxml.etree import ParseError
ElementType = etree._Element
except ImportError:
import xml.etree.ElementTree as etree
ElementType = etree.Element
try:
from xml.etree.ElementTree import ParseError
except ImportError:
from xml.parsers.expat import ExpatError as ParseError

patch_well_known_namespaces(etree)
patch_well_known_namespaces()
2 changes: 1 addition & 1 deletion owslib/feature/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from urllib.parse import urlencode, parse_qsl
from owslib.etree import etree
from owslib.namespaces import Namespaces
from owslib.util import which_etree, findall, Authentication, openURL
from owslib.util import findall, Authentication, openURL

MYNS = Namespaces()
XS_NAMESPACE = MYNS.get_namespace("xs")
Expand Down
98 changes: 36 additions & 62 deletions owslib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,11 +277,8 @@ def nspath_eval(xpath, namespaces):

def cleanup_namespaces(element):
""" Remove unused namespaces from an element """
if etree.__name__ == 'lxml.etree':
etree.cleanup_namespaces(element)
return element
else:
return etree.fromstring(etree.tostring(element))
etree.cleanup_namespaces(element)
return element


def add_namespaces(root, ns_keys):
Expand All @@ -292,35 +289,34 @@ def add_namespaces(root, ns_keys):

ns_keys = [(x, namespaces.get_namespace(x)) for x in ns_keys]

if etree.__name__ != 'lxml.etree':
# We can just add more namespaces when not using lxml.
# We can't re-add an existing namespaces. Get a list of current
# namespaces in use
existing_namespaces = set()
for elem in root.iter():
if elem.tag[0] == "{":
uri, tag = elem.tag[1:].split("}")
existing_namespaces.add(namespaces.get_namespace_from_url(uri))
for key, link in ns_keys:
if link is not None and key not in existing_namespaces:
root.set("xmlns:%s" % key, link)
return root
else:
# lxml does not support setting xmlns attributes
# Update the elements nsmap with new namespaces
new_map = root.nsmap
for key, link in ns_keys:
if link is not None:
new_map[key] = link
# Recreate the root element with updated nsmap
new_root = etree.Element(root.tag, nsmap=new_map)
# Carry over attributes
for a, v in list(root.items()):
new_root.set(a, v)
# Carry over children
for child in root:
new_root.append(deepcopy(child))
return new_root
# lxml does not support setting xmlns attributes
# Update the elements nsmap with new namespaces
new_map = root.nsmap
for key, link in ns_keys:
if link is not None:
new_map[key] = link
# Recreate the root element with updated nsmap
new_root = etree.Element(root.tag, nsmap=new_map)
# Carry over attributes
for a, v in list(root.items()):
new_root.set(a, v)
# Carry over children
for child in root:
new_root.append(deepcopy(child))
return new_root

# We can just add more namespaces when not using lxml.
# We can't re-add an existing namespaces. Get a list of current
# namespaces in use
existing_namespaces = set()
for elem in root.iter():
if elem.tag[0] == "{":
uri, tag = elem.tag[1:].split("}")
existing_namespaces.add(namespaces.get_namespace_from_url(uri))
for key, link in ns_keys:
if link is not None and key not in existing_namespaces:
root.set("xmlns:%s" % key, link)
return root


def getXMLInteger(elem, tag):
Expand Down Expand Up @@ -519,21 +515,14 @@ def element_to_string(element, encoding=None, xml_declaration=False):
if encoding is None:
encoding = "ISO-8859-1"

if etree.__name__ == 'lxml.etree':
if xml_declaration:
if encoding in ['unicode', 'utf-8']:
output = '<?xml version="1.0" encoding="utf-8" standalone="no"?>\n{}'.format(
etree.tostring(element, encoding='unicode'))
else:
output = etree.tostring(element, encoding=encoding, xml_declaration=True)
if xml_declaration:
if encoding in ['unicode', 'utf-8']:
output = '<?xml version="1.0" encoding="utf-8" standalone="no"?>\n{}'.format(
etree.tostring(element, encoding='unicode'))
else:
output = etree.tostring(element)
output = etree.tostring(element, encoding=encoding, xml_declaration=True)
else:
if xml_declaration:
output = '<?xml version="1.0" encoding="{}" standalone="no"?>\n{}'.format(
encoding, etree.tostring(element, encoding=encoding))
else:
output = etree.tostring(element)
output = etree.tostring(element)

return output

Expand Down Expand Up @@ -777,21 +766,6 @@ def bind_url(url):
log.addHandler(NullHandler())


def which_etree():
"""decipher which etree library is being used by OWSLib"""

which_etree = None

if 'lxml' in etree.__file__:
which_etree = 'lxml.etree'
elif 'xml/etree' in etree.__file__:
which_etree = 'xml.etree'
elif 'elementree' in etree.__file__:
which_etree = 'elementtree.ElementTree'

return which_etree


def findall(root, xpath, attribute_name=None, attribute_value=None):
"""Find elements recursively from given root element based on
xpath and possibly given attribute
Expand Down
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
dataclasses; python_version < '3.7'
lxml
python-dateutil>=1.5
pytz
requests>=1.0
pyyaml
dataclasses; python_version < '3.7'
requests>=1.0