Skip to content

Commit

Permalink
feat: Extract document creation date from XML draft (#5733)
Browse files Browse the repository at this point in the history
* fix: Extract document creation date from XML draft

* test: Fix test
  • Loading branch information
jennifer-richards authored Jun 1, 2023
1 parent 8d4780d commit 5a27082
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 24 deletions.
2 changes: 1 addition & 1 deletion ietf/submit/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -3354,7 +3354,7 @@ def test_process_submission_xml(self):
self.assertEqual(output["title"], "Correct Draft Title")
self.assertIsNone(output["abstract"])
self.assertEqual(len(output["authors"]), 1) # not checking in detail, parsing is unreliable
self.assertIsNone(output["document_date"])
self.assertEqual(output["document_date"], date_today())
self.assertIsNone(output["pages"])
self.assertIsNone(output["words"])
self.assertIsNone(output["first_two_pages"])
Expand Down
9 changes: 7 additions & 2 deletions ietf/submit/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1159,7 +1159,7 @@ def process_submission_xml(filename, revision):
for auth in xml_draft.get_author_list()
],
"abstract": None, # not supported from XML
"document_date": None, # not supported from XML
"document_date": xml_draft.get_creation_date(),
"pages": None, # not supported from XML
"words": None, # not supported from XML
"first_two_pages": None, # not supported from XML
Expand Down Expand Up @@ -1287,9 +1287,14 @@ def process_and_validate_submission(submission):
if not submission.title:
raise SubmissionError("Could not determine the title of the draft")

# Items to get from text only when not available from XML
if xml_metadata and xml_metadata.get("document_date", None) is not None:
submission.document_date = xml_metadata["document_date"]
else:
submission.document_date = text_metadata["document_date"]

# Items always to get from text, even when XML is available
submission.abstract = text_metadata["abstract"]
submission.document_date = text_metadata["document_date"]
submission.pages = text_metadata["pages"]
submission.words = text_metadata["words"]
submission.first_two_pages = text_metadata["first_two_pages"]
Expand Down
62 changes: 41 additions & 21 deletions ietf/utils/draft.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,46 @@ def get_title(self):

def get_wordcount(self):
raise NotImplementedError

@staticmethod
def _construct_creation_date(year, month, day=None):
"""Construct a date for the document
Roughly follows RFC 7991 section 2.17, but only allows missing day and
assumes the 15th if day is not specified month/year are not current.
year: integer or string with 4-digit year
month: integer or string with numeric or English month. Some abbreviations recognized.
day: integer or string with numeric day of month. Optional.
Raises ValueError if there is a problem interpreting the data
"""
year = int(year)
day = int(day)
if isinstance(month, str):
month = month.lower()
if month in month_names:
month = month_names.index(month) + 1
elif month in month_names_abbrev3:
month = month_names_abbrev3.index(month) + 1
elif month in month_names_abbrev4:
month = month_names_abbrev4.index(month) + 1
elif month.isdigit() and int(month) in range(1, 13):
month = int(month)
else:
raise ValueError("Unrecognized month")
today = date_today()
if not day:
# if the date was given with only month and year, use
# today's date if month and year is today's month and
# year, otherwise pick the middle of the month.
# Don't use today's day for month and year in the past
if month == today.month and year == today.year:
day = today.day
else:
day = 15
return datetime.date(year, month, day)


# ----------------------------------------------------------------------

Expand Down Expand Up @@ -460,27 +500,7 @@ def get_creation_date(self):
day = int( md.get( 'day', 0 ) )
year = int( md['year'] )
try:
if mon in month_names:
month = month_names.index( mon ) + 1
elif mon in month_names_abbrev3:
month = month_names_abbrev3.index( mon ) + 1
elif mon in month_names_abbrev4:
month = month_names_abbrev4.index( mon ) + 1
elif mon.isdigit() and int(mon) in range(1,13):
month = int(mon)
else:
continue
today = date_today()
if day==0:
# if the date was given with only month and year, use
# today's date if month and year is today's month and
# year, otherwise pick the middle of the month.
# Don't use today's day for month and year in the past
if month==today.month and year==today.year:
day = today.day
else:
day = 15
self._creation_date = datetime.date(year, month, day)
self._creation_date = self._construct_creation_date(year, mon, day)
return self._creation_date
except ValueError:
# mon abbreviation not in _MONTH_NAMES
Expand Down
11 changes: 11 additions & 0 deletions ietf/utils/xmldraft.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,17 @@ def _parse_docname(self):
def get_title(self):
return self.xmlroot.findtext('front/title').strip()

def get_creation_date(self):
date_elt = self.xmlroot.find("front/date")
if date_elt is not None:
try:
year = date_elt.get("year")
month = date_elt.get("month")
return self._construct_creation_date(year, month, date_elt.get("day", None))
except ValueError:
pass
return None

# todo fix the implementation of XMLDraft.get_abstract()
#
# This code was pulled from ietf.submit.forms where it existed for some time.
Expand Down

0 comments on commit 5a27082

Please sign in to comment.