feat: Extract document creation date from XML draft (#5733)

* fix: Extract document creation date from XML draft * test: Fix test
ietf-tools · Jun 1, 2023 · 5a27082 · 5a27082
1 parent 8d4780d
commit 5a27082
Show file tree

Hide file tree

Showing 4 changed files with 60 additions and 24 deletions.
diff --git a/ietf/submit/tests.py b/ietf/submit/tests.py
@@ -3354,7 +3354,7 @@ def test_process_submission_xml(self):
         self.assertEqual(output["title"], "Correct Draft Title")
         self.assertIsNone(output["abstract"])
         self.assertEqual(len(output["authors"]), 1)  # not checking in detail, parsing is unreliable
-        self.assertIsNone(output["document_date"])
+        self.assertEqual(output["document_date"], date_today())
         self.assertIsNone(output["pages"])
         self.assertIsNone(output["words"])
         self.assertIsNone(output["first_two_pages"])

diff --git a/ietf/submit/utils.py b/ietf/submit/utils.py
@@ -1159,7 +1159,7 @@ def process_submission_xml(filename, revision):
             for auth in xml_draft.get_author_list()
         ],
         "abstract": None,  # not supported from XML
-        "document_date": None,  # not supported from XML
+        "document_date": xml_draft.get_creation_date(),
         "pages": None,  # not supported from XML
         "words": None,  # not supported from XML
         "first_two_pages": None,  # not supported from XML
@@ -1287,9 +1287,14 @@ def process_and_validate_submission(submission):
         if not submission.title:
             raise SubmissionError("Could not determine the title of the draft")
 
+        # Items to get from text only when not available from XML
+        if xml_metadata and xml_metadata.get("document_date", None) is not None:
+            submission.document_date = xml_metadata["document_date"]
+        else:
+            submission.document_date = text_metadata["document_date"]
+
         # Items always to get from text, even when XML is available
         submission.abstract = text_metadata["abstract"]
-        submission.document_date = text_metadata["document_date"]
         submission.pages = text_metadata["pages"]
         submission.words = text_metadata["words"]
         submission.first_two_pages = text_metadata["first_two_pages"]

diff --git a/ietf/utils/draft.py b/ietf/utils/draft.py
@@ -189,6 +189,46 @@ def get_title(self):
 
     def get_wordcount(self):
         raise NotImplementedError
+
+    @staticmethod
+    def _construct_creation_date(year, month, day=None):
+        """Construct a date for the document
+        
+        Roughly follows RFC 7991 section 2.17, but only allows missing day and
+        assumes the 15th if day is not specified month/year are not current.
+        
+        year: integer or string with 4-digit year
+        month: integer or string with numeric or English month. Some abbreviations recognized.
+        day: integer or string with numeric day of month. Optional.
+        
+        Raises ValueError if there is a problem interpreting the data
+        """
+        year = int(year)
+        day = int(day)
+        if isinstance(month, str):
+            month = month.lower()
+            if month in month_names:
+                month = month_names.index(month) + 1
+            elif month in month_names_abbrev3:
+                month = month_names_abbrev3.index(month) + 1
+            elif month in month_names_abbrev4:
+                month = month_names_abbrev4.index(month) + 1
+            elif month.isdigit() and int(month) in range(1, 13):
+                month = int(month)
+            else:
+                raise ValueError("Unrecognized month")
+        today = date_today()
+        if not day:
+            # if the date was given with only month and year, use
+            # today's date if month and year is today's month and
+            # year, otherwise pick the middle of the month.
+            # Don't use today's day for month and year in the past
+            if month == today.month and year == today.year:
+                day = today.day
+            else:
+                day = 15
+        return datetime.date(year, month, day)
+
 
 # ----------------------------------------------------------------------
 
@@ -460,27 +500,7 @@ def get_creation_date(self):
                 day = int( md.get( 'day', 0 ) )
                 year = int( md['year'] )
                 try:
-                    if   mon in month_names:
-                        month = month_names.index( mon ) + 1
-                    elif mon in month_names_abbrev3:
-                        month = month_names_abbrev3.index( mon ) + 1
-                    elif mon in month_names_abbrev4:
-                        month = month_names_abbrev4.index( mon ) + 1
-                    elif mon.isdigit() and int(mon) in range(1,13):
-                        month = int(mon)
-                    else:
-                        continue
-                    today = date_today()
-                    if day==0:
-                        # if the date was given with only month and year, use
-                        # today's date if month and year is today's month and
-                        # year, otherwise pick the middle of the month.
-                        # Don't use today's day for month and year in the past
-                        if month==today.month and year==today.year:
-                            day = today.day
-                        else:
-                            day = 15
-                    self._creation_date = datetime.date(year, month, day)
+                    self._creation_date = self._construct_creation_date(year, mon, day)
                     return self._creation_date
                 except ValueError:
                     # mon abbreviation not in _MONTH_NAMES

diff --git a/ietf/utils/xmldraft.py b/ietf/utils/xmldraft.py
@@ -133,6 +133,17 @@ def _parse_docname(self):
     def get_title(self):
         return self.xmlroot.findtext('front/title').strip()
 
+    def get_creation_date(self):
+        date_elt = self.xmlroot.find("front/date")
+        if date_elt is not None:
+            try:
+                year = date_elt.get("year")
+                month = date_elt.get("month")
+                return self._construct_creation_date(year, month, date_elt.get("day", None))
+            except ValueError:
+                pass
+        return None
+
     # todo fix the implementation of XMLDraft.get_abstract()
     #
     # This code was pulled from ietf.submit.forms where it existed for some time.