Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Extract document creation date from XML draft #5733

Merged
merged 3 commits into from
Jun 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ietf/submit/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -3354,7 +3354,7 @@ def test_process_submission_xml(self):
self.assertEqual(output["title"], "Correct Draft Title")
self.assertIsNone(output["abstract"])
self.assertEqual(len(output["authors"]), 1) # not checking in detail, parsing is unreliable
self.assertIsNone(output["document_date"])
self.assertEqual(output["document_date"], date_today())
self.assertIsNone(output["pages"])
self.assertIsNone(output["words"])
self.assertIsNone(output["first_two_pages"])
Expand Down
9 changes: 7 additions & 2 deletions ietf/submit/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1159,7 +1159,7 @@ def process_submission_xml(filename, revision):
for auth in xml_draft.get_author_list()
],
"abstract": None, # not supported from XML
"document_date": None, # not supported from XML
"document_date": xml_draft.get_creation_date(),
"pages": None, # not supported from XML
"words": None, # not supported from XML
"first_two_pages": None, # not supported from XML
Expand Down Expand Up @@ -1287,9 +1287,14 @@ def process_and_validate_submission(submission):
if not submission.title:
raise SubmissionError("Could not determine the title of the draft")

# Items to get from text only when not available from XML
if xml_metadata and xml_metadata.get("document_date", None) is not None:
submission.document_date = xml_metadata["document_date"]
else:
submission.document_date = text_metadata["document_date"]

# Items always to get from text, even when XML is available
submission.abstract = text_metadata["abstract"]
submission.document_date = text_metadata["document_date"]
submission.pages = text_metadata["pages"]
submission.words = text_metadata["words"]
submission.first_two_pages = text_metadata["first_two_pages"]
Expand Down
62 changes: 41 additions & 21 deletions ietf/utils/draft.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,46 @@ def get_title(self):

def get_wordcount(self):
raise NotImplementedError

@staticmethod
def _construct_creation_date(year, month, day=None):
"""Construct a date for the document

Roughly follows RFC 7991 section 2.17, but only allows missing day and
assumes the 15th if day is not specified month/year are not current.

year: integer or string with 4-digit year
month: integer or string with numeric or English month. Some abbreviations recognized.
day: integer or string with numeric day of month. Optional.

Raises ValueError if there is a problem interpreting the data
"""
year = int(year)
day = int(day)
if isinstance(month, str):
month = month.lower()
if month in month_names:
month = month_names.index(month) + 1
elif month in month_names_abbrev3:
month = month_names_abbrev3.index(month) + 1
elif month in month_names_abbrev4:
month = month_names_abbrev4.index(month) + 1
elif month.isdigit() and int(month) in range(1, 13):
month = int(month)
else:
raise ValueError("Unrecognized month")
today = date_today()
if not day:
# if the date was given with only month and year, use
# today's date if month and year is today's month and
# year, otherwise pick the middle of the month.
# Don't use today's day for month and year in the past
if month == today.month and year == today.year:
day = today.day
else:
day = 15
return datetime.date(year, month, day)


# ----------------------------------------------------------------------

Expand Down Expand Up @@ -460,27 +500,7 @@ def get_creation_date(self):
day = int( md.get( 'day', 0 ) )
year = int( md['year'] )
try:
if mon in month_names:
month = month_names.index( mon ) + 1
elif mon in month_names_abbrev3:
month = month_names_abbrev3.index( mon ) + 1
elif mon in month_names_abbrev4:
month = month_names_abbrev4.index( mon ) + 1
elif mon.isdigit() and int(mon) in range(1,13):
month = int(mon)
else:
continue
today = date_today()
if day==0:
# if the date was given with only month and year, use
# today's date if month and year is today's month and
# year, otherwise pick the middle of the month.
# Don't use today's day for month and year in the past
if month==today.month and year==today.year:
day = today.day
else:
day = 15
self._creation_date = datetime.date(year, month, day)
self._creation_date = self._construct_creation_date(year, mon, day)
return self._creation_date
except ValueError:
# mon abbreviation not in _MONTH_NAMES
Expand Down
11 changes: 11 additions & 0 deletions ietf/utils/xmldraft.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,17 @@ def _parse_docname(self):
def get_title(self):
return self.xmlroot.findtext('front/title').strip()

def get_creation_date(self):
date_elt = self.xmlroot.find("front/date")
if date_elt is not None:
try:
year = date_elt.get("year")
month = date_elt.get("month")
return self._construct_creation_date(year, month, date_elt.get("day", None))
except ValueError:
pass
return None

# todo fix the implementation of XMLDraft.get_abstract()
#
# This code was pulled from ietf.submit.forms where it existed for some time.
Expand Down