|
4 | 4 | import json |
5 | 5 | import math |
6 | 6 | import os |
| 7 | +import tempfile |
7 | 8 | import unittest |
| 9 | +from pathlib import Path |
8 | 10 | from xml.dom import minidom |
9 | 11 |
|
10 | 12 | import numpy as np |
11 | 13 | import pandas as pd |
12 | 14 | import yaml |
13 | 15 | from rdflib import Graph |
14 | 16 |
|
| 17 | +from sssom.constants import CURIE_MAP, DEFAULT_LICENSE, SSSOM_URI_PREFIX |
| 18 | +from sssom.context import SSSOM_BUILT_IN_PREFIXES, ensure_converter |
15 | 19 | from sssom.io import parse_file |
16 | 20 | from sssom.parsers import ( |
| 21 | + _open_input, |
| 22 | + _read_pandas_and_metadata, |
17 | 23 | from_alignment_minidom, |
18 | 24 | from_obographs, |
19 | 25 | from_sssom_dataframe, |
|
22 | 28 | parse_sssom_table, |
23 | 29 | ) |
24 | 30 | from sssom.typehints import Metadata |
25 | | -from sssom.util import PREFIX_MAP_KEY, sort_df_rows_columns |
| 31 | +from sssom.util import PREFIX_MAP_KEY, MappingSetDataFrame, sort_df_rows_columns |
26 | 32 | from sssom.writers import write_table |
27 | 33 | from tests.test_data import data_dir as test_data_dir |
28 | 34 | from tests.test_data import test_out_dir |
@@ -245,3 +251,64 @@ def test_parse_obographs_merged(self): |
245 | 251 | ) |
246 | 252 | msdf = parse_sssom_table(outfile) |
247 | 253 | self.assertTrue(custom_curie_map.items() <= msdf.prefix_map.items()) |
| 254 | + |
| 255 | + |
| 256 | +class TestParseExplicit(unittest.TestCase): |
| 257 | + """This test case contains explicit tests for parsing.""" |
| 258 | + |
| 259 | + def test_round_trip(self): |
| 260 | + """Explicitly test round tripping.""" |
| 261 | + rows = [ |
| 262 | + ( |
| 263 | + "DOID:0050601", |
| 264 | + "ADULT syndrome", |
| 265 | + "skos:exactMatch", |
| 266 | + "UMLS:C1863204", |
| 267 | + "ADULT SYNDROME", |
| 268 | + "semapv:ManualMappingCuration", |
| 269 | + "orcid:0000-0003-4423-4370", |
| 270 | + ) |
| 271 | + ] |
| 272 | + columns = [ |
| 273 | + "subject_id", |
| 274 | + "subject_label", |
| 275 | + "predicate_id", |
| 276 | + "object_id", |
| 277 | + "object_label", |
| 278 | + "mapping_justification", |
| 279 | + "creator_id", |
| 280 | + ] |
| 281 | + df = pd.DataFrame(rows, columns=columns) |
| 282 | + msdf = MappingSetDataFrame(df=df, converter=ensure_converter()) |
| 283 | + msdf.clean_prefix_map(strict=True) |
| 284 | + self.assertEqual( |
| 285 | + {"DOID", "semapv", "orcid", "skos", "UMLS"}.union(SSSOM_BUILT_IN_PREFIXES), |
| 286 | + set(msdf.prefix_map), |
| 287 | + ) |
| 288 | + |
| 289 | + with tempfile.TemporaryDirectory() as directory: |
| 290 | + directory = Path(directory) |
| 291 | + path = directory.joinpath("test.sssom.tsv") |
| 292 | + with path.open("w") as file: |
| 293 | + write_table(msdf, file) |
| 294 | + |
| 295 | + _, read_metadata = _read_pandas_and_metadata(_open_input(path)) |
| 296 | + |
| 297 | + # This tests what's actually in the file after it's written out |
| 298 | + self.assertEqual({CURIE_MAP, "license", "mapping_set_id"}, set(read_metadata)) |
| 299 | + self.assertEqual(DEFAULT_LICENSE, read_metadata["license"]) |
| 300 | + self.assertTrue(read_metadata["mapping_set_id"].startswith(f"{SSSOM_URI_PREFIX}mappings/")) |
| 301 | + self.assertEqual( |
| 302 | + { |
| 303 | + "DOID": "http://purl.obolibrary.org/obo/DOID_", |
| 304 | + "UMLS": "http://linkedlifedata.com/resource/umls/id/", |
| 305 | + "orcid": "https://orcid.org/", |
| 306 | + "owl": "http://www.w3.org/2002/07/owl#", |
| 307 | + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", |
| 308 | + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", |
| 309 | + "semapv": "https://w3id.org/semapv/vocab/", |
| 310 | + "skos": "http://www.w3.org/2004/02/skos/core#", |
| 311 | + "sssom": "https://w3id.org/sssom/", |
| 312 | + }, |
| 313 | + read_metadata[CURIE_MAP], |
| 314 | + ) |
0 commit comments