From 023e2b063a9f56a4bd285d271193c00bed786e68 Mon Sep 17 00:00:00 2001 From: Joe Clarke Date: Tue, 12 Nov 2024 11:02:56 -0500 Subject: [PATCH 1/2] Use lxml for XML extraction. Pyang already requires lxml, so this isn't technically a new requirement. Lxml is also much more powerful and flexible when it comes to parsing XML. . The builtin xml library dies on unknown entities, whereas lxml allows us to skip them. --- requirements.txt | 1 + xym/xym.py | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 36c5de2..0518826 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ requests>=2.6 pyang>=2.5.0 +lxml \ No newline at end of file diff --git a/xym/xym.py b/xym/xym.py index cb876ae..bdf1ace 100644 --- a/xym/xym.py +++ b/xym/xym.py @@ -7,7 +7,7 @@ import os.path import re import sys -import xml.etree.ElementTree as ET +from lxml import etree as ET from collections import Counter import requests @@ -699,7 +699,10 @@ def write_code_snippets_to_files(self): code_snippet_file.write(line) def extract_yang_model_xml(self, content): - root = ET.fromstring(content) + doc_parser = ET.XMLParser( + resolve_entities=False, recover=True, ns_clean=True, encoding="utf-8" + ) + root = ET.fromstring(content.encode("utf-8"), doc_parser) for sourcecode in root.iter("sourcecode"): if not sourcecode.text: continue From 5938fc885271267feecf1a461ea3c777bcfdbbba Mon Sep 17 00:00:00 2001 From: Joe Clarke Date: Thu, 14 Nov 2024 11:38:28 -0500 Subject: [PATCH 2/2] Multiple fixes. * Fix a typo in argument parsing * Make sure to adjust the module start index by one less since the loop will always run one extra time --- xym/xym.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/xym/xym.py b/xym/xym.py index bdf1ace..def9ef9 100644 --- a/xym/xym.py +++ b/xym/xym.py @@ -740,7 +740,8 @@ def extract_yang_model_xml(self, content): match = self.MODULE_STATEMENT.match(line) if match is None: continue - lines = lines[i:] + mstart = i - 1 + lines = lines[mstart:] if not output_file: self.warning('Missing file name in ') if match.group(2) or match.group(5): @@ -842,8 +843,12 @@ def xym(source_id, srcdir, dstdir, strict=False, strict_name=False, strict_examp parser.add_argument("source", help="The URL or file name of the RFC/draft text from " "which to get the model") - parser.add_argument("--rfcxml", action='store_ture', default=False, - help="Parse a file in RFCXMLv3 format") + parser.add_argument( + "--rfcxml", + action="store_true", + default=False, + help="Parse a file in RFCXMLv3 format", + ) parser.add_argument("--srcdir", default='.', help="Optional: directory where to find the source " "text; default is './'")