From 25953708ebeaff307c0de9c3c41ce86c101d0446 Mon Sep 17 00:00:00 2001 From: KlaasJan Date: Wed, 6 Jan 2021 14:00:23 +0100 Subject: [PATCH 1/2] No longer using defusedxml since it is not necessary. - mimicked behaviour from defused into the loader.py - bumped lxml version to > 4.6.0 --- README.rst | 1 + setup.py | 3 +-- src/zeep/exceptions.py | 23 +++++++++++++++++++++++ src/zeep/loader.py | 28 ++++++++++++++++++---------- src/zeep/wsdl/messages/mime.py | 2 +- tests/test_loader.py | 2 +- tests/test_wsdl.py | 2 +- 7 files changed, 46 insertions(+), 15 deletions(-) diff --git a/README.rst b/README.rst index df2cf70b2..16e1c6a6d 100644 --- a/README.rst +++ b/README.rst @@ -46,6 +46,7 @@ Installation Note that the latest version to support Python 2.7, 3.3, 3.4 and 3.5 is Zeep 3.4, install via `pip install zeep==3.4.0` +Zeep uses the lxml library for parsing xml. See https://lxml.de/installation.html for the installation requirements. Usage ----- diff --git a/setup.py b/setup.py index e932830dc..9f5f4d48b 100755 --- a/setup.py +++ b/setup.py @@ -6,9 +6,8 @@ "appdirs>=1.4.0", "attrs>=17.2.0", "cached-property>=1.3.0", - "defusedxml>=0.4.1", "isodate>=0.5.4", - "lxml>=3.1.0", + "lxml>=4.6.0", "requests>=2.7.0", "requests-toolbelt>=0.7.1", "requests-file>=1.5.1", diff --git a/src/zeep/exceptions.py b/src/zeep/exceptions.py index 663fa742f..632b3b088 100644 --- a/src/zeep/exceptions.py +++ b/src/zeep/exceptions.py @@ -91,3 +91,26 @@ class IncompleteMessage(Error): class IncompleteOperation(Error): pass + + +class DTDForbidden(Error): + def __init__(self, name, sysid, pubid): + super(DTDForbidden, self).__init__() + self.name = name + self.sysid = sysid + self.pubid = pubid + + def __str__(self): + tpl = "DTDForbidden(name='{}', system_id={!r}, public_id={!r})" + return tpl.format(self.name, self.sysid, self.pubid) + + +class EntitiesForbidden(Error): + def __init__(self, name, content): + super(EntitiesForbidden, self).__init__() + self.name = name + self.content = content + + def __str__(self): + tpl = "EntitiesForbidden(name='{}', content={!r})" + return tpl.format(self.name, self.content) diff --git a/src/zeep/loader.py b/src/zeep/loader.py index 8fa9efe99..e1c8de78a 100644 --- a/src/zeep/loader.py +++ b/src/zeep/loader.py @@ -2,14 +2,15 @@ import typing from urllib.parse import urljoin, urlparse, urlunparse -from defusedxml.lxml import fromstring +from exceptions import DTDForbidden, EntitiesForbidden from lxml import etree +from lxml.etree import fromstring, XMLParser, XMLSyntaxError, Resolver from zeep.exceptions import XMLSyntaxError from zeep.settings import Settings -class ImportResolver(etree.Resolver): +class ImportResolver(Resolver): """Custom lxml resolve to use the transport object""" def __init__(self, transport): @@ -39,7 +40,7 @@ def parse_xml(content: str, transport, base_url=None, settings=None): """ settings = settings or Settings() recover = not settings.strict - parser = etree.XMLParser( + parser = XMLParser( remove_comments=True, resolve_entities=False, recover=recover, @@ -47,13 +48,20 @@ def parse_xml(content: str, transport, base_url=None, settings=None): ) parser.resolvers.add(ImportResolver(transport)) try: - return fromstring( - content, - parser=parser, - base_url=base_url, - forbid_dtd=settings.forbid_dtd, - forbid_entities=settings.forbid_entities, - ) + elementtree = fromstring(content, parser=parser,base_url=base_url) + docinfo = elementtree.getroottree().docinfo + if docinfo.doctype: + if settings.forbid_dtd: + raise DTDForbidden(docinfo.doctype, docinfo.system_url, docinfo.public_id) + if settings.forbid_entities: + for dtd in docinfo.internalDTD, docinfo.externalDTD: + if dtd is None: + continue + for entity in dtd.iterentities(): + raise EntitiesForbidden(entity.name, entity.content) + + + return elementtree except etree.XMLSyntaxError as exc: raise XMLSyntaxError( "Invalid XML content received (%s)" % exc.msg, content=content diff --git a/src/zeep/wsdl/messages/mime.py b/src/zeep/wsdl/messages/mime.py index 74c7c6844..c37faec20 100644 --- a/src/zeep/wsdl/messages/mime.py +++ b/src/zeep/wsdl/messages/mime.py @@ -5,8 +5,8 @@ """ from urllib.parse import urlencode -from defusedxml.lxml import fromstring from lxml import etree +from lxml.etree import fromstring from zeep import ns, xsd from zeep.helpers import serialize_object diff --git a/tests/test_loader.py b/tests/test_loader.py index ada2e4ec9..4ab54559d 100644 --- a/tests/test_loader.py +++ b/tests/test_loader.py @@ -1,5 +1,5 @@ import pytest -from defusedxml import DTDForbidden, EntitiesForbidden +from exceptions import DTDForbidden, EntitiesForbidden from pytest import raises as assert_raises from tests.utils import DummyTransport diff --git a/tests/test_wsdl.py b/tests/test_wsdl.py index 00271a50c..27cbe0fa0 100644 --- a/tests/test_wsdl.py +++ b/tests/test_wsdl.py @@ -3,7 +3,7 @@ import pytest import requests_mock -from defusedxml import DTDForbidden, EntitiesForbidden +from exceptions import DTDForbidden, EntitiesForbidden from lxml import etree from pretend import stub From 76ae8caa068a56205319e6a6b5ff30bebcc72a4f Mon Sep 17 00:00:00 2001 From: KlaasJan Date: Wed, 6 Jan 2021 14:09:04 +0100 Subject: [PATCH 2/2] No longer using defusedxml since it is not necessary. - mimicked behaviour from defused into the loader.py - bumped lxml version to > 4.6.0 --- src/zeep/loader.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/zeep/loader.py b/src/zeep/loader.py index e1c8de78a..69723fd75 100644 --- a/src/zeep/loader.py +++ b/src/zeep/loader.py @@ -53,12 +53,12 @@ def parse_xml(content: str, transport, base_url=None, settings=None): if docinfo.doctype: if settings.forbid_dtd: raise DTDForbidden(docinfo.doctype, docinfo.system_url, docinfo.public_id) - if settings.forbid_entities: - for dtd in docinfo.internalDTD, docinfo.externalDTD: - if dtd is None: - continue - for entity in dtd.iterentities(): - raise EntitiesForbidden(entity.name, entity.content) + if settings.forbid_entities: + for dtd in docinfo.internalDTD, docinfo.externalDTD: + if dtd is None: + continue + for entity in dtd.iterentities(): + raise EntitiesForbidden(entity.name, entity.content) return elementtree