Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Strip trailing tabs in embedded YAML header. #567

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/sssom/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,11 @@ def _separate_metadata_and_table_from_stream(s: io.StringIO):
if header_section:
header_section = False
elif header_section:
metadata_component.write(line)
# We strip any trailing tabs. Such tabs may have been left
# by a spreadsheet editor who treated the header lines as
# if they were normal data lines; they would prevent the
# YAML parser from correctly parsing the metadata block.
metadata_component.write(line.rstrip("\t\n") + "\n")
else:
logging.info(
f"Line {line} is starting with hash symbol, but header section is already passed. "
Expand Down
21 changes: 21 additions & 0 deletions tests/data/trailing-tabs.sssom.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#curie_map:
# COMENT: https://example.com/entities/
# COMPID: https://example.com/people/
# ORGENT: https://example.org/entities/
# ORGPID: https://example.org/people/
#mapping_set_id: https://example.org/sets/exo2c
#mapping_set_title: O2C set
#creator_id:
# - ORGPID:0000-0000-0001-1234
# - COMPID:0000-0000-0002-5678
#license: https://creativecommons.org/licenses/by/4.0/
#publication_date: 2023-09-13
subject_id subject_label predicate_id object_id object_label mapping_justification
ORGENT:0001 alice skos:closeMatch COMENT:0011 alpha semapv:ManualMappingCuration
ORGENT:0002 bob skos:closeMatch COMENT:0012 beta semapv:ManualMappingCuration
ORGENT:0004 daphne skos:closeMatch COMENT:0014 delta semapv:ManualMappingCuration
ORGENT:0005 eve skos:closeMatch COMENT:0015 epsilon semapv:ManualMappingCuration
ORGENT:0006 fanny skos:closeMatch COMENT:0016 zeta semapv:ManualMappingCuration
ORGENT:0007 gavin skos:exactMatch COMENT:0013 gamma semapv:ManualMappingCuration
ORGENT:0008 hector skos:closeMatch COMENT:0017 eta semapv:ManualMappingCuration
ORGENT:0009 ivan skos:exactMatch COMENT:0019 iota semapv:ManualMappingCuration
11 changes: 11 additions & 0 deletions tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,17 @@ def test_parse_obographs_merged(self):
msdf = parse_sssom_table(outfile)
self.assertTrue(custom_curie_map.items() <= msdf.prefix_map.items())

def test_parse_trailing_tabs_in_metadata_header(self):
"""Test parsing a file containing trailing tabs in header."""
input_path = f"{test_data_dir}/trailing-tabs.sssom.tsv"
msdf = parse_sssom_table(input_path)
self.assertEqual(msdf.metadata["mapping_set_id"], "https://example.org/sets/exo2c")
self.assertEqual(
len(msdf.df),
8,
f"{input_path} has the wrong number of mappings.",
)


class TestParseExplicit(unittest.TestCase):
"""This test case contains explicit tests for parsing."""
Expand Down
Loading