diff --git a/allensdk/api/cache.py b/allensdk/api/cache.py index 38aa90529..666c4844a 100755 --- a/allensdk/api/cache.py +++ b/allensdk/api/cache.py @@ -244,7 +244,7 @@ def load_csv(self, index : string, optional post-rename column to use as the row label. ''' - data = pd.DataFrame.from_csv(path) + data = pd.read_csv(path, parse_dates=True) Cache.rename_columns(data, rename) @@ -383,14 +383,14 @@ def csv_writer(pth, gen): def cache_csv_json(): return { 'writer': Cache.csv_writer, - 'reader': lambda f: pd.DataFrame.from_csv(f).to_dict('records') + 'reader': lambda f: pd.read_csv(f, parse_dates=True).to_dict('records') } @staticmethod def cache_csv_dataframe(): return { 'writer': Cache.csv_writer, - 'reader' : pd.DataFrame.from_csv + 'reader' : lambda f: pd.read_csv(f, parse_dates=True) } @staticmethod @@ -422,7 +422,7 @@ def cache_json(): def cache_csv(): return { 'writer': Cache.csv_writer, - 'reader': pd.DataFrame.from_csv + 'reader': lambda f: pd.read_csv(f, parse_dates=True) } @staticmethod @@ -525,7 +525,7 @@ def wrap(self, fn, path, cache, else: data = ju.read(path) elif return_dataframe is True: - data = pd.DataFrame.from_csv(path) + data = pd.read_csv(path, parse_dates=True) else: raise ValueError( 'save_as_json=False cannot be used with return_dataframe=False') diff --git a/allensdk/core/mouse_connectivity_cache.py b/allensdk/core/mouse_connectivity_cache.py index 26a835443..c52052cf3 100644 --- a/allensdk/core/mouse_connectivity_cache.py +++ b/allensdk/core/mouse_connectivity_cache.py @@ -420,7 +420,7 @@ def get_experiment_structure_unionizes(self, experiment_id, pre=col_rn, post=filter_fn, writer=lambda p, x : pd.DataFrame(x).to_csv(p), - reader=pd.DataFrame.from_csv) + reader=lambda x: pd.read_csv(x, index_col=0, parse_dates=True)) def rank_structures(self, experiment_ids, is_injection, structure_ids=None, hemisphere_ids=None, rank_on='normalized_projection_volume', n=5, threshold=10**-2): diff --git a/allensdk/test/api/test_cacheable.py b/allensdk/test/api/test_cacheable.py index ec51f06dc..0e5a17d1b 100644 --- a/allensdk/test/api/test_cacheable.py +++ b/allensdk/test/api/test_cacheable.py @@ -50,17 +50,17 @@ _msg = [{'whatever': True}] _pd_msg = pd.DataFrame(_msg) -_csv_msg = pd.DataFrame.from_csv(StringIO.StringIO(""",whatever +_csv_msg = pd.read_csv(StringIO.StringIO(""",whatever 0,True -""")) +"""), index_col=0) @patch("allensdk.core.json_utilities.write") @patch("allensdk.core.json_utilities.read", return_value=_msg) @patch("allensdk.core.json_utilities.read_url_get", return_value={'msg': _msg}) @patch('csv.DictWriter') -@patch.object(pd.DataFrame, 'from_csv', return_value=_csv_msg) -def test_cacheable_csv_dataframe(from_csv, dictwriter, ju_read_url_get, +@patch('pandas.read_csv', return_value=_csv_msg) +def test_cacheable_csv_dataframe(read_csv, dictwriter, ju_read_url_get, ju_read, ju_write): @cacheable() def get_hemispheres(): @@ -79,7 +79,7 @@ def get_hemispheres(): ju_read_url_get.assert_called_once_with( 'http://api.brain-map.org/api/v2/data/query.json?q=model::Hemisphere') - from_csv.assert_called_once_with('/xyz/abc/example.txt') + read_csv.assert_called_once_with('/xyz/abc/example.txt', parse_dates=True) assert not ju_write.called, 'write should not have been called' assert not ju_read.called, 'read should not have been called' mkdir.assert_called_once_with('/xyz/abc') @@ -90,8 +90,8 @@ def get_hemispheres(): @patch("allensdk.core.json_utilities.read", return_value=_msg) @patch("allensdk.core.json_utilities.read_url_get", return_value={'msg': _msg}) @patch.object(Manifest, 'safe_mkdir') -@patch.object(pd.DataFrame, 'from_csv', return_value=_csv_msg) -def test_cacheable_json(from_csv, mkdir, ju_read_url_get, ju_read, ju_write): +@patch('pandas.read_csv', return_value=_csv_msg) +def test_cacheable_json(read_csv, mkdir, ju_read_url_get, ju_read, ju_write): @cacheable() def get_hemispheres(): return RmaApi().model_query(model='Hemisphere') @@ -104,7 +104,7 @@ def get_hemispheres(): ju_read_url_get.assert_called_once_with( 'http://api.brain-map.org/api/v2/data/query.json?q=model::Hemisphere') - assert not from_csv.called, 'from_csv should not have been called' + assert not read_csv.called, 'read_csv should not have been called' ju_write.assert_called_once_with('/xyz/abc/example.json', _msg) ju_read.assert_called_once_with('/xyz/abc/example.json') @@ -136,8 +136,8 @@ def get_hemispheres_excpt(): @patch("allensdk.core.json_utilities.write") @patch("allensdk.core.json_utilities.read", return_value=_msg) @patch("allensdk.core.json_utilities.read_url_get", return_value={'msg': _msg}) -@patch.object(pd.DataFrame, 'from_csv', return_value=_csv_msg) -def test_cacheable_no_cache_csv(from_csv, ju_read_url_get, ju_read, ju_write): +@patch('pandas.read_csv', return_value=_csv_msg) +def test_cacheable_no_cache_csv(read_csv, ju_read_url_get, ju_read, ju_write): @cacheable() def get_hemispheres(): return RmaApi().model_query(model='Hemisphere') @@ -149,19 +149,19 @@ def get_hemispheres(): assert df.loc[:, 'whatever'][0] assert not ju_read_url_get.called - from_csv.assert_called_once_with('/xyz/abc/example.csv') + read_csv.assert_called_once_with('/xyz/abc/example.csv', parse_dates=True) assert not ju_write.called, 'json write should not have been called' assert not ju_read.called, 'json read should not have been called' @patch("pandas.io.json.read_json", return_value=_pd_msg) -@patch.object(pd.DataFrame, "from_csv", return_value=_csv_msg) +@patch("pandas.read_csv", return_value=_csv_msg) @patch("allensdk.core.json_utilities.write") @patch("allensdk.core.json_utilities.read", return_value=_msg) @patch("allensdk.core.json_utilities.read_url_get", return_value={'msg': _msg}) @patch.object(Manifest, 'safe_mkdir') def test_cacheable_json_dataframe(mkdir, ju_read_url_get, ju_read, ju_write, - from_csv, mock_read_json): + read_csv, mock_read_json): @cacheable() def get_hemispheres(): return RmaApi().model_query(model='Hemisphere') @@ -174,7 +174,7 @@ def get_hemispheres(): ju_read_url_get.assert_called_once_with( 'http://api.brain-map.org/api/v2/data/query.json?q=model::Hemisphere') - assert not from_csv.called, 'from_csv should not have been called' + assert not read_csv.called, 'read_csv should not have been called' mock_read_json.assert_called_once_with('/xyz/abc/example.json', orient='records') ju_write.assert_called_once_with('/xyz/abc/example.json', _msg) @@ -183,14 +183,14 @@ def get_hemispheres(): @patch("pandas.io.json.read_json", return_value=_pd_msg) -@patch.object(pd.DataFrame, "from_csv", return_value=_csv_msg) +@patch("pandas.read_csv", return_value=_csv_msg) @patch("allensdk.core.json_utilities.write") @patch("allensdk.core.json_utilities.read", return_value=_msg) @patch("allensdk.core.json_utilities.read_url_get", return_value={'msg': _msg}) @patch('csv.DictWriter') @patch.object(Manifest, 'safe_mkdir') def test_cacheable_csv_json(mkdir, dictwriter, ju_read_url_get, ju_read, - ju_write, from_csv, mock_read_json): + ju_write, read_csv, mock_read_json): @cacheable() def get_hemispheres(): return RmaApi().model_query(model='Hemisphere') @@ -207,7 +207,7 @@ def get_hemispheres(): ju_read_url_get.assert_called_once_with( 'http://api.brain-map.org/api/v2/data/query.json?q=model::Hemisphere') - from_csv.assert_called_once_with('/xyz/example.csv') + read_csv.assert_called_once_with('/xyz/example.csv', parse_dates=True) dictwriter.return_value.writerow.assert_called() assert not mock_read_json.called, 'pj.read_json should not have been called' assert not ju_write.called, 'ju.write should not have been called' @@ -219,9 +219,9 @@ def get_hemispheres(): @patch("allensdk.core.json_utilities.write") @patch("allensdk.core.json_utilities.read", return_value=_msg) @patch("allensdk.core.json_utilities.read_url_get", return_value={'msg': _msg}) -@patch.object(pd.DataFrame, "from_csv") +@patch("pandas.read_csv") @patch.object(pd.DataFrame, "to_csv") -def test_cacheable_no_save(to_csv, from_csv, ju_read_url_get, ju_read, +def test_cacheable_no_save(to_csv, read_csv, ju_read_url_get, ju_read, ju_write): @cacheable() def get_hemispheres(): @@ -234,7 +234,7 @@ def get_hemispheres(): ju_read_url_get.assert_called_once_with( 'http://api.brain-map.org/api/v2/data/query.json?q=model::Hemisphere') assert not to_csv.called, 'to_csv should not have been called' - assert not from_csv.called, 'from_csv should not have been called' + assert not read_csv.called, 'read_csv should not have been called' assert not ju_write.called, 'json write should not have been called' assert not ju_read.called, 'json read should not have been called' @@ -242,9 +242,9 @@ def get_hemispheres(): @patch("allensdk.core.json_utilities.write") @patch("allensdk.core.json_utilities.read", return_value=_msg) @patch("allensdk.core.json_utilities.read_url_get", return_value={'msg': _msg}) -@patch.object(pd.DataFrame, "from_csv", return_value=_csv_msg) +@patch("pandas.read_csv", return_value=_csv_msg) @patch.object(pd.DataFrame, "to_csv") -def test_cacheable_no_save_dataframe(to_csv, from_csv, ju_read_url_get, +def test_cacheable_no_save_dataframe(to_csv, read_csv, ju_read_url_get, ju_read, ju_write): @cacheable() def get_hemispheres(): @@ -257,19 +257,19 @@ def get_hemispheres(): ju_read_url_get.assert_called_once_with( 'http://api.brain-map.org/api/v2/data/query.json?q=model::Hemisphere') assert not to_csv.called, 'to_csv should not have been called' - assert not from_csv.called, 'from_csv should not have been called' + assert not read_csv.called, 'read_csv should not have been called' assert not ju_write.called, 'json write should not have been called' assert not ju_read.called, 'json read should not have been called' -@patch.object(pd.DataFrame, "from_csv", return_value=_csv_msg) +@patch("pandas.read_csv", return_value=_csv_msg) @patch("allensdk.core.json_utilities.write") @patch("allensdk.core.json_utilities.read", return_value=_msg) @patch("allensdk.core.json_utilities.read_url_get", return_value={'msg': _msg}) @patch('csv.DictWriter') @patch.object(Manifest, 'safe_mkdir') def test_cacheable_lazy_csv_no_file(mkdir, dictwriter, ju_read_url_get, - ju_read, ju_write, from_csv): + ju_read, ju_write, read_csv): @cacheable() def get_hemispheres(): return RmaApi().model_query(model='Hemisphere') @@ -289,7 +289,7 @@ def get_hemispheres(): 'http://api.brain-map.org/api/v2/data/query.json?q=model::Hemisphere') open_mock.assert_called_once_with('/xyz/abc/example.csv', 'w') dictwriter.return_value.writerow.assert_called() - from_csv.assert_called_once_with('/xyz/abc/example.csv') + read_csv.assert_called_once_with('/xyz/abc/example.csv', parse_dates=True) assert not ju_write.called, 'json write should not have been called' assert not ju_read.called, 'json read should not have been called' @@ -297,8 +297,8 @@ def get_hemispheres(): @patch("allensdk.core.json_utilities.write") @patch("allensdk.core.json_utilities.read", return_value=_msg) @patch("allensdk.core.json_utilities.read_url_get", return_value={'msg': _msg}) -@patch.object(pd.DataFrame, "from_csv", return_value=_csv_msg) -def test_cacheable_lazy_csv_file_exists(from_csv, ju_read_url_get, ju_read, +@patch("pandas.read_csv", return_value=_csv_msg) +def test_cacheable_lazy_csv_file_exists(read_csv, ju_read_url_get, ju_read, ju_write): @cacheable() def get_hemispheres(): @@ -312,6 +312,6 @@ def get_hemispheres(): assert df.loc[:, 'whatever'][0] assert not ju_read_url_get.called - from_csv.assert_called_once_with('/xyz/abc/example.csv') + read_csv.assert_called_once_with('/xyz/abc/example.csv', parse_dates=True) assert not ju_write.called, 'json write should not have been called' assert not ju_read.called, 'json read should not have been called' \ No newline at end of file diff --git a/allensdk/test/api/test_pager.py b/allensdk/test/api/test_pager.py index 3827fada9..a657a2ad1 100644 --- a/allensdk/test/api/test_pager.py +++ b/allensdk/test/api/test_pager.py @@ -58,9 +58,9 @@ def pager(): _msg = [{'whatever': True}] _pd_msg = pd.DataFrame(_msg) -_csv_msg = pd.DataFrame.from_csv(StringIO.StringIO(""",whatever +_csv_msg = pd.read_csv(StringIO.StringIO(""",whatever 0,True -""")) +"""), index_col=0) _read_url_get_msg5 = [{'msg': _msg}, {'msg': _msg}, @@ -145,11 +145,11 @@ def get_genes(**kwargs): (Cache.cache_csv, Cache.cache_csv_json, Cache.cache_csv_dataframe)) -@patch.object(pd.DataFrame, "from_csv", return_value=_csv_msg) +@patch("pandas.read_csv", return_value=_csv_msg) @patch("allensdk.core.json_utilities.read_url_get", side_effect=_read_url_get_msg5) @patch("os.makedirs") -def test_cacheable_pageable_csv(os_makedirs, ju_read_url_get, from_csv, +def test_cacheable_pageable_csv(os_makedirs, ju_read_url_get, read_csv, cache_style): archive_templates = \ {"cam_cell_queries": [ @@ -193,7 +193,7 @@ def get_cam_cell_metrics(*args, [0, 1, 2, 3, 4, 5]) assert ju_read_url_get.call_args_list == list(expected_calls) - from_csv.assert_called_once_with('/path/to/cam_cell_metrics.csv') + read_csv.assert_called_once_with('/path/to/cam_cell_metrics.csv', parse_dates=True) assert csv_writerow.call_args_list == [call({'whatever': 'whatever'}), call({'whatever': True}), diff --git a/allensdk/test/core/test_cell_types_cache_unit.py b/allensdk/test/core/test_cell_types_cache_unit.py index 18162a321..685839628 100644 --- a/allensdk/test/core/test_cell_types_cache_unit.py +++ b/allensdk/test/core/test_cell_types_cache_unit.py @@ -43,6 +43,7 @@ import itertools as it import allensdk.core.json_utilities as ju import pandas.io.json as pj +import pandas as pd import os _MOCK_PATH = '/path/to/xyz.txt' @@ -60,11 +61,17 @@ def cell_id(): return cell_id +@pytest.fixture +def cached_csv(tmpdir_factory): + csv = str(tmpdir_factory.mktemp("cache_test").join("data.csv")) + return csv + + @pytest.fixture def cache_fixture(tmpdir_factory): # Instantiate the CellTypesCache instance. The manifest_file argument # tells it where to store the manifest, which is a JSON file that tracks - # file paths. If you supply a relative path (like this), it will go + # file paths. If you supply a relative path, it will go # into your current working directory manifest_file = str(tmpdir_factory.mktemp("ctc").join("manifest.json")) ctc = CTC.CellTypesCache(manifest_file=manifest_file) @@ -250,9 +257,7 @@ def test_get_reconstruction(cache_fixture, @pytest.mark.parametrize('path_exists', (False, True)) @patch.object(DataFrame, "to_csv") -@patch.object(DataFrame, "from_csv") -def test_get_reconstruction_with_api(from_csv, - to_csv, +def test_get_reconstruction_with_api(to_csv, cache_fixture, cell_id, path_exists): @@ -279,9 +284,7 @@ def test_get_reconstruction_with_api(from_csv, @patch.object(DataFrame, "to_csv") -@patch.object(DataFrame, "from_csv") -def test_get_reconstruction_exception(from_csv, - to_csv, +def test_get_reconstruction_exception(to_csv, cache_fixture, cell_id): ctc = cache_fixture @@ -410,8 +413,8 @@ def test_get_ephys_features(cache_fixture, it.product((False,True), (False,True))) @patch.object(DataFrame, "to_csv") -@patch.object(DataFrame, "from_csv") -def test_get_ephys_features_with_api(from_csv, +@patch("pandas.read_csv") +def test_get_ephys_features_with_api(read_csv, to_csv, cache_fixture, df, @@ -435,20 +438,42 @@ def test_get_ephys_features_with_api(from_csv, _ = ctc.get_ephys_features(dataframe=df) if path_exists: - from_csv.assert_called_once_with(_MOCK_PATH) + read_csv.assert_called_once_with(_MOCK_PATH, parse_dates=True) else: mkd.assert_called_once_with(_MOCK_PATH) assert query_mock.called +@pytest.mark.parametrize('df', (False, True)) +def test_get_ephys_features_cache_roundtrip(cached_csv, + cache_fixture, + df): + ctc = cache_fixture + + mock_data = [{'lorem': 1, + 'ipsum': 2 }, + {'lorem': 3, + 'ipsum': 4 }] + + with patch.object(ctc, "get_cache_path", return_value=cached_csv): + with patch('allensdk.api.queries.cell_types_api.CellTypesApi.model_query', + MagicMock(name='model query', + return_value=mock_data)) as query_mock: + data = ctc.get_ephys_features() + pandas_data = pd.read_csv(cached_csv, parse_dates=True) + + assert len(data) == 2 + assert sorted(data[0].keys()) == sorted(pandas_data.columns) + + @pytest.mark.parametrize('path_exists,df', it.product((False, True), (False, True))) @patch.object(DataFrame, "to_csv") -@patch.object(DataFrame, "from_csv", +@patch("pandas.read_csv", return_value=DataFrame([{ 'stuff': 'whatever'}, { 'stuff': 'nonsense'}])) -def test_get_morphology_features(from_csv, +def test_get_morphology_features(read_csv, to_csv, cache_fixture, path_exists, @@ -467,8 +492,7 @@ def test_get_morphology_features(from_csv, with patch('allensdk.api.queries.cell_types_api.CellTypesApi.model_query', MagicMock(name='model query', return_value=json_data)) as query_mock: - data = ctc.get_morphology_features(df, - _MOCK_PATH) + data = ctc.get_morphology_features(df, _MOCK_PATH) if df: assert ('stuff' in data) == True @@ -478,7 +502,7 @@ def test_get_morphology_features(from_csv, if path_exists: if df: - from_csv.assert_called_once_with(_MOCK_PATH) + read_csv.assert_called_once_with(_MOCK_PATH, parse_dates=True) else: assert True assert not mkd.called @@ -546,10 +570,10 @@ def test_get_ephys_sweeps_with_api(cache_fixture, (False, True))) @patch('pandas.DataFrame.merge') @patch.object(DataFrame, "to_csv") -@patch.object(DataFrame, "from_csv", +@patch("pandas.read_csv", return_value=DataFrame([{ 'stuff': 'whatever'}, { 'stuff': 'nonsense'}])) -def test_get_all_features(from_csv, +def test_get_all_features(read_csv, to_csv, mock_merge, cache_fixture, @@ -577,7 +601,7 @@ def test_get_all_features(from_csv, require_reconstruction=require_reconstruction) if path_exists: - assert from_csv.called + assert read_csv.called else: assert query_mock.called diff --git a/allensdk/test/core/test_mouse_connectivity_cache.py b/allensdk/test/core/test_mouse_connectivity_cache.py index 8fccb2b5e..cbc0bc113 100755 --- a/allensdk/test/core/test_mouse_connectivity_cache.py +++ b/allensdk/test/core/test_mouse_connectivity_cache.py @@ -46,6 +46,12 @@ from allensdk.core.structure_tree import StructureTree +@pytest.fixture +def cached_csv(tmpdir_factory): + csv = str(tmpdir_factory.mktemp("cache_test").join("data.csv")) + return csv + + @pytest.fixture(scope='function') def mcc(tmpdir_factory): manifest_file = tmpdir_factory.mktemp("mcc").join('manifest.json') @@ -343,6 +349,21 @@ def test_get_experiment_structure_unionizes(mcc, unionizes): assert os.path.exists(path) +def test_get_experiment_structure_unionizes_cache_roundtrip(mcc, unionizes, + cached_csv): + + eid = 166218353 + + with mock.patch.object(mcc.api, "model_query", + new=lambda *args, **kwargs: unionizes): + obtained = mcc.get_experiment_structure_unionizes( + eid, file_name=cached_csv) + pandas_data = pd.read_csv(cached_csv, index_col=0, parse_dates=True) + + assert obtained.loc[0, 'projection_intensity'] == 263.231 + assert(sorted(obtained.keys()) == sorted(pandas_data.columns)) + + def test_filter_structure_unionizes(mcc, unionizes): obtained = mcc.filter_structure_unionizes(pd.DataFrame(unionizes), diff --git a/doc_template/examples_root/examples/data_api_client_ex.py b/doc_template/examples_root/examples/data_api_client_ex.py index b6335e0bf..071606b92 100644 --- a/doc_template/examples_root/examples/data_api_client_ex.py +++ b/doc_template/examples_root/examples/data_api_client_ex.py @@ -105,7 +105,7 @@ 'parent_structure_id', 'acronym']].to_csv('summary_structures.csv', index_label='structure_id') -reread = pd.DataFrame.from_csv('summary_structures.csv') +reread = pd.read_csv('summary_structures.csv') #=============================================================================== # example 10 @@ -113,7 +113,7 @@ for id, name, parent_structure_id in summary_structures[['name', 'parent_structure_id']].itertuples(): - print("%d %s %d" % (id, name, parent_structure_id)) + print("%d %s %d" % (id, name, parent_structure_id)) #=============================================================================== # example 11