Skip to content

Commit

Permalink
Improved extracting issue ranges with year (#76)
Browse files Browse the repository at this point in the history
  • Loading branch information
Casvt committed Jul 23, 2023
1 parent ce5df4e commit c664c9d
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 25 deletions.
34 changes: 12 additions & 22 deletions backend/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,15 +136,13 @@ def extract_filename_data(filepath: str, assume_volume_number: bool=True) -> dic
series, year, volume_number, special_version, issue_number = None, None, None, None, None

# Determine annual or not
annual = True
annual_result = annual_regex.search(basename(filepath))
annual_folder_result = annual_regex.search(basename(dirname(filepath)))
if annual_result and annual_folder_result:
annual = False
annual = not (annual_result and annual_folder_result)

# Generalise filename
filepath = (unquote(filepath)
.replace('+',' ')
.replace('+',' ')
.replace('_',' ')
.replace('_28','(')
.replace('_29',')')
Expand Down Expand Up @@ -225,26 +223,18 @@ def extract_filename_data(filepath: str, assume_volume_number: bool=True) -> dic
r = list(regex.finditer(filename, pos=volume_end))
if r:
r.sort(key=lambda e: (int(e.group(1)[-1] not in '0123456789'), 1 / e.start(0) if e.start(0) else 0))
issue_result = r[0]

if (year_pos <= issue_result.start(0) <= year_end
or year_pos <= issue_result.end(0) <= year_end):
for p in ({'endpos': issue_result.start(0)}, {'pos': issue_result.end(0)}):
issue_scd_result = regex.search(filename, **p)
if issue_scd_result:
# Issue number found
issue_number = issue_scd_result.group(1)
issue_pos = issue_scd_result.start(0)
break
else:
continue
break

for result in r:
if not (year_pos <= result.start(0) <= year_end
or year_pos <= result.end(0) <= year_end):
# Issue number found
issue_number = result.group(1)
issue_pos = result.start(0)
break
else:
# Issue number found
issue_number = issue_result.group(1)
issue_pos = issue_result.start(0)
break
continue
break

else:
issue_result = issue_regex_7.search(no_ext_clean_filename)
if issue_result:
Expand Down
6 changes: 3 additions & 3 deletions tests/Tbackend/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class extract_filename_data(unittest.TestCase):
def run_cases(self, cases: Dict[str, dict]):
self.longMessage = False
for input, output in cases.items():
self.assertEqual(ef(input), output, f"'{input}' isn't extracted properly")
self.assertEqual(ef(input), output, f"'{input}' isn't extracted properly: {output}")
return

def test_general(self):
Expand Down Expand Up @@ -48,8 +48,8 @@ def test_general(self):
'Batman 026-050 (1945-1949) GetComics.INFO/Batman 048 52p ctc (08-1948) flattermann.cbr':
{'series': 'Batman', 'year': 1945, 'volume_number': 1, 'special_version': None, 'issue_number': 48.0, 'annual': False},

'01. X-Men Vol. 2 (#05, #1 – 113 + Annuals) Part 1 — #1 – 25':
{'series': 'X-Men', 'year': None, 'volume_number': 2, 'special_version': None, 'issue_number': (1.0, 25.0), 'annual': False}
'01. X-Men Vol. 2 (#05, #1 – 113 + Annuals) Part 1 — #1 – 25 --2022-2023--':
{'series': 'X-Men', 'year': 2022, 'volume_number': 2, 'special_version': None, 'issue_number': (1.0, 25.0), 'annual': False}
}
self.run_cases(cases)

Expand Down

0 comments on commit c664c9d

Please sign in to comment.