diff --git a/README.md b/README.md index 54cd4f2..5ec1725 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,35 @@ handle.get_captures() # returns list of 'captures' dictionaries handle.get_annotations() # returns list of all annotations ``` +### Load a SigMF archive with multiple recordings +There are different ways to read an archive using `SigMFArchiveReader` +class, the `sigmffile.fromarchive()` method, and the `sigmffile.fromfile()` +method. + +```python +import numpy as np +from sigmf.archivereader import SigMFArchiveReader + +from sigmf.sigmffile import (fromarchive, + fromfile) + +# read multirecording archive using fromarchive +sigmffiles = fromarchive("multi_recording_archive.sigmf") +# length should be equal to the number of recordings in the archive +print(len(sigmffiles)) + +# read multirecording archive using fromfile +sigmffiles = fromfile("multi_recording_archive.sigmf") +# length should be equal to the number of recordings in the archive +print(len(sigmffiles)) + +# read multirecording archive using SigMFArchiveReader +reader = SigMFArchiveReader("multi_recording_archive.sigmf") +# length of reader and reader.sigmffiles should be equal to the number of recordings in the archive +print(len(reader)) +print(len(reader.sigmffiles)) +``` + ### Verify SigMF dataset integrity & compliance ```bash @@ -107,6 +136,7 @@ data.tofile('example_cf32.sigmf-data') # create the metadata meta = SigMFFile( + name='example_cf32', data_file='example_cf32.sigmf-data', # extension is optional global_info = { SigMFFile.DATATYPE_KEY: get_data_type_str(data), # in this case, 'cf32_le' @@ -147,6 +177,7 @@ data_ci16.view(np.float32).astype(np.int16).tofile('example_ci16.sigmf-data') # create the metadata for the second file meta_ci16 = SigMFFile( + name='example_ci16', data_file='example_ci16.sigmf-data', # extension is optional global_info = { SigMFFile.DATATYPE_KEY: 'ci16_le', # get_data_type_str() is only valid for numpy types @@ -180,6 +211,80 @@ ci16_sigmffile = collection.get_SigMFFile(stream_name='example_ci16') cf32_sigmffile = collection.get_SigMFFile(stream_name='example_cf32') ``` +### Create a SigMF Archive +The `SigMFArchive` class, the `SigMFFile.archive()` method, and the +`SigMFFile.tofile()` method can all be used to create an archive. + +```python +import numpy as np + +from sigmf.sigmffile import (SigMFFile, + SigMFArchive) + + +# create data file +random_data = np.random.rand(128) +data_path = "recording.sigmf-data" +random_data.tofile(data_path) + +# create metadata +sigmf_file = SigMFFile(name='recording') +sigmf_file.set_global_field("core:datatype", "rf32_le") +sigmf_file.add_annotation(start_index=0, length=len(random_data)) +sigmf_file.add_capture(start_index=0) +sigmf_file.set_data_file(data_path) + +# create archive using SigMFArchive +archive = SigMFArchive(sigmffiles=sigmf_file, + path="single_recording_archive1.sigmf") + +# create archive using SigMFFile archive() +archive_path = sigmf_file.archive(file_path="single_recording_archive2.sigmf") + +# create archive using tofile +sigmf_file.tofile(file_path="single_recording_archive3.sigmf", + toarchive=True) +``` + +### Create SigMF Archives with Multiple Recordings +Archives with multiple recordings can be created using the `SigMFArchive` class. + +```python +import numpy as np + +from sigmf.sigmffile import (SigMFFile, + SigMFArchive) + + +# create data files +random_data1 = np.random.rand(128) +data1_path = "recording1.sigmf-data" +random_data1.tofile(data1_path) + +random_data2 = np.random.rand(128) +data2_path = "recording2.sigmf-data" +random_data2.tofile(data2_path) + +# create metadata +sigmf_file_1 = SigMFFile(name='recording1') +sigmf_file_1.set_global_field("core:datatype", "rf32_le") +sigmf_file_1.add_annotation(start_index=0, length=len(random_data1)) +sigmf_file_1.add_capture(start_index=0) +sigmf_file_1.set_data_file(data1_path) + +sigmf_file_2 = SigMFFile(name='recording2') +sigmf_file_2.set_global_field("core:datatype", "rf32_le") +sigmf_file_2.add_annotation(start_index=0, length=len(random_data2)) +sigmf_file_2.add_capture(start_index=0) +sigmf_file_2.set_data_file(data2_path) + + +# create archive using SigMFArchive +sigmffiles = [sigmf_file_1, sigmf_file_2] +archive = SigMFArchive(sigmffiles=sigmffiles, + path="multi_recording_archive.sigmf") +``` + ### Load a SigMF Archive and slice its data without untaring it Since an *archive* is merely a tarball (uncompressed), and since there any many @@ -196,13 +301,13 @@ In [1]: import sigmf In [2]: arc = sigmf.SigMFArchiveReader('/src/LTE.sigmf') -In [3]: arc.shape +In [3]: arc.sigmffiles[0].shape Out[3]: (15379532,) -In [4]: arc.ndim +In [4]: arc.sigmffiles[0].ndim Out[4]: 1 -In [5]: arc[:10] +In [5]: arc.sigmffiles[0][:10] Out[5]: array([-20.+11.j, -21. -6.j, -17.-20.j, -13.-52.j, 0.-75.j, 22.-58.j, 48.-44.j, 49.-60.j, 31.-56.j, 23.-47.j], dtype=complex64) @@ -215,13 +320,13 @@ However, the `.sigmffile` member keeps track of this, and converts the data to `numpy.complex64` *after* slicing it, that is, after reading it from disk. ```python -In [6]: arc.sigmffile.get_global_field(sigmf.SigMFFile.DATATYPE_KEY) +In [6]: arc.sigmffiles[0].get_global_field(sigmf.SigMFFile.DATATYPE_KEY) Out[6]: 'ci16_le' -In [7]: arc.sigmffile._memmap.dtype +In [7]: arc.sigmffiles[0]._memmap.dtype Out[7]: dtype('int16') -In [8]: arc.sigmffile._return_type +In [8]: arc.sigmffiles[0]._return_type Out[8]: ' None: + self.metadata = None + self.data = None + + class SigMFArchiveReader(): - """Access data within SigMF archive `tar` in-place without extracting. + """Access data within SigMF archive `tar` in-place without extracting. This + class can be used to iterate through multiple SigMFFiles in the archive. Parameters: - name -- path to archive file to access. If file does not exist, - or if `name` doesn't end in .sigmf, SigMFFileError is raised. - """ - def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_buffer=None): - self.name = name - if self.name is not None: - if not name.endswith(SIGMF_ARCHIVE_EXT): - err = "archive extension != {}".format(SIGMF_ARCHIVE_EXT) - raise SigMFFileError(err) - - tar_obj = tarfile.open(self.name) - - elif archive_buffer is not None: - tar_obj = tarfile.open(fileobj=archive_buffer, mode='r:') - - else: - raise ValueError('In sigmf.archivereader.__init__(), either `name` or `archive_buffer` must be not None') - - json_contents = None - data_offset_size = None - - for memb in tar_obj.getmembers(): - if memb.isdir(): # memb.type == tarfile.DIRTYPE: - # the directory structure will be reflected in the member name - continue - - elif memb.isfile(): # memb.type == tarfile.REGTYPE: - if memb.name.endswith(SIGMF_METADATA_EXT): - json_contents = memb.name - if data_offset_size is None: - # consider a warnings.warn() here; the datafile should be earlier in the - # archive than the metadata, so that updating it (like, adding an annotation) - # is fast. - pass - with tar_obj.extractfile(memb) as memb_fid: - json_contents = memb_fid.read() + path -- path to archive file to access. If file does not exist, + or if `path` doesn't end in .sigmf, SigMFFileError is raised. - elif memb.name.endswith(SIGMF_DATASET_EXT): - data_offset_size = memb.offset_data, memb.size + self.sigmffiles will contain the SigMFFile(s) (metadata/data) found in the + archive. + """ + def __init__(self, + path=None, + skip_checksum=False, + map_readonly=True, + archive_buffer=None): + self.path = path + tar_obj = None + try: + if self.path is not None: + if not self.path.endswith(SIGMF_ARCHIVE_EXT): + err = "archive extension != {}".format(SIGMF_ARCHIVE_EXT) + raise SigMFFileError(err) + + tar_obj = tarfile.open(self.path) + + elif archive_buffer is not None: + tar_obj = tarfile.open(fileobj=archive_buffer, mode='r:') - else: - print('A regular file', memb.name, 'was found but ignored in the archive') else: - print('A member of type', memb.type, 'and name', memb.name, 'was found but not handled, just FYI.') - - if data_offset_size is None: - raise SigMFFileError('No .sigmf-data file found in archive!') - - self.sigmffile = SigMFFile(metadata=json_contents) - valid_md = self.sigmffile.validate() - - self.sigmffile.set_data_file(self.name, data_buffer=archive_buffer, skip_checksum=skip_checksum, offset=data_offset_size[0], - size_bytes=data_offset_size[1], map_readonly=map_readonly) - - self.ndim = self.sigmffile.ndim - self.shape = self.sigmffile.shape - - tar_obj.close() + raise ValueError('In sigmf.archivereader.__init__(), either ' + '`path` or `archive_buffer` must be not None') + + json_contents = None + data_offset_size = None + sigmffile_name = None + self.sigmffiles = [] + recordings = [] + + recording = ArchiveRecording() + for memb in tar_obj.getmembers(): + if memb.isdir(): # memb.type == tarfile.DIRTYPE: + # the directory structure will be reflected in the member + # name + continue + + elif memb.isfile(): # memb.type == tarfile.REGTYPE: + if memb.name.endswith(SIGMF_METADATA_EXT): + if recording.metadata: + # save previous recording + recordings.append(recording) + recording = ArchiveRecording() + recording.metadata = memb + elif memb.name.endswith(SIGMF_DATASET_EXT): + recording.data = memb + elif memb.name.endswith(SIGMF_COLLECTION_EXT): + print('A SigMF Collection file ', + memb.name, + 'was found but not handled.') + else: + print('A regular file', + memb.name, + 'was found but ignored in the archive') + else: + print('A member of type', + memb.type, + 'and name', + memb.name, + 'was found but not handled, just FYI.') + + if recording.metadata: + recordings.append(recording) # save final recording + + if recordings: + for recording in recordings: + metadata = recording.metadata + sigmffile_name, _ = os.path.splitext(metadata.name) + sigmffile_name = os.path.dirname(sigmffile_name) + with tar_obj.extractfile(metadata) as memb_fid: + json_contents = memb_fid.read() + sigmffile = SigMFFile(sigmffile_name, + metadata=json_contents) + + sigmffile.validate() + data = recording.data + if data: + data_offset_size = data.offset_data, data.size + sigmffile.set_data_file(self.path, + data_buffer=archive_buffer, + skip_checksum=skip_checksum, + offset=data_offset_size[0], + size_bytes=data_offset_size[1], + map_readonly=map_readonly) + + self.sigmffiles.append(sigmffile) + + if not any([r.data for r in recordings]): + warnings.warn(f"No file with {SIGMF_DATASET_EXT} extension" + " found in archive!") + finally: + if tar_obj: + tar_obj.close() def __len__(self): - return self.sigmffile.__len__() + return len(self.sigmffiles) def __iter__(self): - return self.sigmffile.__iter__() + return self.sigmffiles.__iter__() def __getitem__(self, sli): - return self.sigmffile.__getitem__(sli) + return self.sigmffiles.__getitem__(sli) diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index 423ab49..357064f 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -9,8 +9,6 @@ from collections import OrderedDict import codecs import json -import tarfile -import tempfile from os import path import warnings import numpy as np @@ -78,6 +76,7 @@ def dump(self, filep, pretty=True): indent=4 if pretty else None, separators=(',', ': ') if pretty else None, ) + filep.write("\n") def dumps(self, pretty=True): ''' @@ -97,7 +96,7 @@ def dumps(self, pretty=True): self.ordered_metadata(), indent=4 if pretty else None, separators=(',', ': ') if pretty else None, - ) + ) + "\n" class SigMFFile(SigMFMetafile): START_INDEX_KEY = "core:sample_start" @@ -148,12 +147,34 @@ class SigMFFile(SigMFMetafile): ] VALID_KEYS = {GLOBAL_KEY: VALID_GLOBAL_KEYS, CAPTURE_KEY: VALID_CAPTURE_KEYS, ANNOTATION_KEY: VALID_ANNOTATION_KEYS} - def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksum=False, map_readonly=True): + def __init__(self, + name, + metadata=None, + data_file=None, + global_info=None, + skip_checksum=False, + map_readonly=True): ''' API for SigMF I/O Parameters ---------- + name: str, required + Name used for directory and filenames if archived. + For example, given `name=recording1`, then passing this + sigmffile to SigMFArchive will add the following files + to the archive: + - recording1/ + - recording1.sigmf-meta + - recording1.sigmf-data + Folders can also be added in the name. For example, given + `name=folder1/recording1`, then passing this + sigmffile to SigMFArchive will add the following files + to the archive: + - folder1/ + - recording1/ + - recording1.sigmf-meta + - recording1.sigmf-data metadata: str or dict, optional Metadata for associated dataset. data_file: str, optional @@ -183,6 +204,7 @@ def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksu self.set_global_info(global_info) if data_file is not None: self.set_data_file(data_file, skip_checksum=skip_checksum, map_readonly=map_readonly) + self.name = name def __len__(self): return self._memmap.shape[0] @@ -213,6 +235,20 @@ def __getitem__(self, sli): raise ValueError("unhandled ndim in SigMFFile.__getitem__(); this shouldn't happen") return a + def __eq__(self, other): + """Define equality between two `SigMFFile`s. + + Rely on the `core:sha512` value in the metadata to decide whether + `data_file` is the same since the same sigmf archive could be extracted + twice to two different temp directories and the SigMFFiles should still + be equivalent. + + """ + if isinstance(other, SigMFFile): + return self._metadata == other._metadata + + return False + def _get_start_offset(self): """ Return the offset of the first sample. @@ -511,13 +547,33 @@ def validate(self): version = self.get_global_field(self.VERSION_KEY) validate.validate(self._metadata, self.get_schema()) - def archive(self, name=None, fileobj=None): + def archive(self, file_path=None, fileobj=None, pretty=True): """Dump contents to SigMF archive format. - `name` and `fileobj` are passed to SigMFArchive and are defined there. - + Keyword arguments: + file_path -- passed to SigMFArchive`path`. Path to archive file to + create. If file exists, overwrite. If `path` doesn't end + in .sigmf, it will be appended. If not given, `file_path` + will be set to self.name. (default None) + fileobj -- passed to SigMFArchive `fileobj`. If `fileobj` is + specified, it is used as an alternative to a file object + opened in binary mode for `file_path`. If `fileobj` is an + open tarfile, it will be appended to. It is supposed to + be at position 0. `fileobj` won't be closed. If `fileobj` + is given, `file_path` has no effect. (default None) + pretty -- passed to SigMFArchive `pretty`. If True, pretty print + JSON when creating the metadata and collection files in + the archive. (default True). + + Returns the path to the created archive. """ - archive = SigMFArchive(self, name, fileobj) + if file_path is None: + file_path = self.name + + archive = SigMFArchive(self, + path=file_path, + fileobj=fileobj, + pretty=pretty) return archive.path def tofile(self, file_path, pretty=True, toarchive=False, skip_validate=False): @@ -538,11 +594,10 @@ def tofile(self, file_path, pretty=True, toarchive=False, skip_validate=False): self.validate() fns = get_sigmf_filenames(file_path) if toarchive: - self.archive(fns['archive_fn']) + self.archive(fns['archive_fn'], pretty=pretty) else: with open(fns['meta_fn'], 'w') as fp: self.dump(fp, pretty=pretty) - fp.write('\n') # text files should end in carriage return def read_samples_in_capture(self, index=0, autoscale=True): ''' @@ -771,7 +826,6 @@ def tofile(self, file_path, pretty=True): fns = get_sigmf_filenames(file_path) with open(fns['collection_fn'], 'w') as fp: self.dump(fp, pretty=pretty) - fp.write('\n') # text files should end in carriage return def get_SigMFFile(self, stream_name=None, stream_index=None): ''' @@ -890,14 +944,23 @@ def get_dataset_filename_from_metadata(meta_fn, metadata=None): return None -def fromarchive(archive_path, dir=None): - """Extract an archive and return a SigMFFile. +def fromarchive(archive_path): + """Extract an archive and return containing SigMFFiles. - The `dir` parameter is no longer used as this function has been changed to - access SigMF archives without extracting them. + If the archive contains a single recording, a single SigMFFile object will + be returned. If the archive contains multiple recordings a list of + SigMFFile objects will be returned. """ from .archivereader import SigMFArchiveReader - return SigMFArchiveReader(archive_path).sigmffile + reader = SigMFArchiveReader(archive_path) + sigmffiles = reader.sigmffiles + ret = None + if len(sigmffiles) == 1: + ret = sigmffiles[0] + else: + ret = sigmffiles + + return ret def fromfile(filename, skip_checksum=False): @@ -917,7 +980,8 @@ def fromfile(filename, skip_checksum=False): Returns ------- object - SigMFFile object with dataset & metadata or a SigMFCollection depending on the type of file + SigMFFile object(s) with dataset & metadata or a SigMFCollection + depending on the type of file ''' fns = get_sigmf_filenames(filename) meta_fn = fns['meta_fn'] @@ -944,7 +1008,10 @@ def fromfile(filename, skip_checksum=False): meta_fp.close() data_fn = get_dataset_filename_from_metadata(meta_fn, metadata) - return SigMFFile(metadata=metadata, data_file=data_fn, skip_checksum=skip_checksum) + return SigMFFile(name=fns['base_fn'], + metadata=metadata, + data_file=data_fn, + skip_checksum=skip_checksum) def get_sigmf_filenames(filename): diff --git a/tests/conftest.py b/tests/conftest.py index 9a8aa64..60f0be4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,22 +24,63 @@ from sigmf.sigmffile import SigMFFile -from .testdata import TEST_FLOAT32_DATA, TEST_METADATA +from .testdata import (TEST_FLOAT32_DATA_1, + TEST_METADATA_1, + TEST_FLOAT32_DATA_2, + TEST_METADATA_2, + TEST_FLOAT32_DATA_3, + TEST_METADATA_3) @pytest.fixture -def test_data_file(): +def test_data_file_1(): with tempfile.NamedTemporaryFile() as temp: - TEST_FLOAT32_DATA.tofile(temp.name) + TEST_FLOAT32_DATA_1.tofile(temp.name) yield temp @pytest.fixture -def test_sigmffile(test_data_file): - sigf = SigMFFile() - sigf.set_global_field("core:datatype", "rf32_le") - sigf.add_annotation(start_index=0, length=len(TEST_FLOAT32_DATA)) - sigf.add_capture(start_index=0) - sigf.set_data_file(test_data_file.name) - assert sigf._metadata == TEST_METADATA - return sigf +def test_data_file_2(): + with tempfile.NamedTemporaryFile() as t: + TEST_FLOAT32_DATA_2.tofile(t.name) + yield t + + +@pytest.fixture +def test_data_file_3(): + with tempfile.NamedTemporaryFile() as t: + TEST_FLOAT32_DATA_3.tofile(t.name) + yield t + + +@pytest.fixture +def test_sigmffile(test_data_file_1): + f = SigMFFile(name='test1') + f.set_global_field("core:datatype", "rf32_le") + f.add_annotation(start_index=0, length=len(TEST_FLOAT32_DATA_1)) + f.add_capture(start_index=0) + f.set_data_file(test_data_file_1.name) + assert f._metadata == TEST_METADATA_1 + return f + + +@pytest.fixture +def test_alternate_sigmffile(test_data_file_2): + f = SigMFFile(name='test2') + f.set_global_field("core:datatype", "rf32_le") + f.add_annotation(start_index=0, length=len(TEST_FLOAT32_DATA_2)) + f.add_capture(start_index=0) + f.set_data_file(test_data_file_2.name) + assert f._metadata == TEST_METADATA_2 + return f + + +@pytest.fixture +def test_alternate_sigmffile_2(test_data_file_3): + f = SigMFFile(name='test3') + f.set_global_field("core:datatype", "rf32_le") + f.add_annotation(start_index=0, length=len(TEST_FLOAT32_DATA_3)) + f.add_capture(start_index=0) + f.set_data_file(test_data_file_3.name) + assert f._metadata == TEST_METADATA_3 + return f diff --git a/tests/test_archive.py b/tests/test_archive.py index 5c3d67b..900abc8 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -1,5 +1,7 @@ import codecs import json +import os +from pathlib import Path import tarfile import tempfile from os import path @@ -8,10 +10,13 @@ import pytest import jsonschema -from sigmf import error -from sigmf.archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT +from sigmf import error, sigmffile +from sigmf.archive import (SIGMF_DATASET_EXT, + SIGMF_METADATA_EXT, + SigMFArchive) +from sigmf.archivereader import SigMFArchiveReader -from .testdata import TEST_FLOAT32_DATA, TEST_METADATA +from .testdata import TEST_FLOAT32_DATA_1, TEST_METADATA_1 def create_test_archive(test_sigmffile, tmpfile): @@ -24,20 +29,20 @@ def test_without_data_file_throws_fileerror(test_sigmffile): test_sigmffile.data_file = None with tempfile.NamedTemporaryFile() as temp: with pytest.raises(error.SigMFFileError): - test_sigmffile.archive(name=temp.name) + test_sigmffile.archive(file_path=temp.name) def test_invalid_md_throws_validationerror(test_sigmffile): del test_sigmffile._metadata["global"]["core:datatype"] # required field with tempfile.NamedTemporaryFile() as temp: with pytest.raises(jsonschema.exceptions.ValidationError): - test_sigmffile.archive(name=temp.name) + test_sigmffile.archive(file_path=temp.name) def test_name_wrong_extension_throws_fileerror(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: with pytest.raises(error.SigMFFileError): - test_sigmffile.archive(name=temp.name + ".zip") + test_sigmffile.archive(file_path=temp.name + ".zip") def test_fileobj_extension_ignored(test_sigmffile): @@ -47,17 +52,19 @@ def test_fileobj_extension_ignored(test_sigmffile): def test_name_used_in_fileobj(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: - sigmf_archive = test_sigmffile.archive(name="testarchive", fileobj=temp) + sigmf_archive = test_sigmffile.archive(file_path="testarchive", + fileobj=temp) sigmf_tarfile = tarfile.open(sigmf_archive, mode="r") basedir, file1, file2 = sigmf_tarfile.getmembers() - assert basedir.name == "testarchive" + assert basedir.name == test_sigmffile.name + assert sigmf_tarfile.name == temp.name def filename(tarinfo): path_root, _ = path.splitext(tarinfo.name) return path.split(path_root)[-1] - assert filename(file1) == "testarchive" - assert filename(file2) == "testarchive" + assert filename(file1) == test_sigmffile.name + assert filename(file2) == test_sigmffile.name def test_fileobj_not_closed(test_sigmffile): @@ -77,7 +84,7 @@ def test_unwritable_name_throws_fileerror(test_sigmffile): # so use invalid filename unwritable_file = '/bad_name/' with pytest.raises(error.SigMFFileError): - test_sigmffile.archive(name=unwritable_file) + test_sigmffile.archive(file_path=unwritable_file) def test_tarfile_layout(test_sigmffile): @@ -93,21 +100,59 @@ def test_tarfile_names_and_extensions(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: sigmf_tarfile = create_test_archive(test_sigmffile, temp) basedir, file1, file2 = sigmf_tarfile.getmembers() - archive_name = basedir.name - assert archive_name == path.split(temp.name)[-1] + assert basedir.name == test_sigmffile.name + archive_name = sigmf_tarfile.name + assert archive_name == temp.name + path.split(temp.name)[-1] file_extensions = {SIGMF_DATASET_EXT, SIGMF_METADATA_EXT} file1_name, file1_ext = path.splitext(file1.name) assert file1_ext in file_extensions - assert path.split(file1_name)[-1] == archive_name + assert path.split(file1_name)[-1] == test_sigmffile.name file_extensions.remove(file1_ext) file2_name, file2_ext = path.splitext(file2.name) - assert path.split(file2_name)[-1] == archive_name + assert path.split(file2_name)[-1] == test_sigmffile.name assert file2_ext in file_extensions +def test_tarfile_names_and_extensions_with_paths(test_sigmffile): + with tempfile.NamedTemporaryFile() as temp: + test_sigmffile.name = os.path.join("test_folder", "test") + sigmf_tarfile = create_test_archive(test_sigmffile, temp) + basedir, subdir, file1, file2 = sigmf_tarfile.getmembers() + assert basedir.name == path.split(test_sigmffile.name)[0] + assert subdir.name == test_sigmffile.name + archive_name = sigmf_tarfile.name + assert archive_name == temp.name + path.split(temp.name)[-1] + file_extensions = {SIGMF_DATASET_EXT, SIGMF_METADATA_EXT} + + file1_name, file1_ext = path.splitext(file1.name) + assert file1_ext in file_extensions + assert path.dirname(file1_name) == test_sigmffile.name + assert path.basename(file1_name) == path.basename(test_sigmffile.name) + + file_extensions.remove(file1_ext) + + file2_name, file2_ext = path.splitext(file2.name) + assert path.dirname(file2_name) == test_sigmffile.name + assert path.basename(file2_name) == path.basename(test_sigmffile.name) + assert file2_ext in file_extensions + + +def test_multirec_archive_into_fileobj(test_sigmffile, + test_alternate_sigmffile): + with tempfile.NamedTemporaryFile() as t: + # add first sigmffile to the fileobj t + create_test_archive(test_sigmffile, t) + # add a second one to the same fileobj + multirec_tar = create_test_archive(test_alternate_sigmffile, t) + members = multirec_tar.getmembers() + assert len(members) == 6 # 2 folders + 2 metadata files + 2 data files + + def test_tarfile_persmissions(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: sigmf_tarfile = create_test_archive(test_sigmffile, temp) @@ -130,17 +175,104 @@ def test_contents(test_sigmffile): bytestream_reader = codecs.getreader("utf-8") # bytes -> str mdfile_reader = bytestream_reader(sigmf_tarfile.extractfile(mdfile)) - assert json.load(mdfile_reader) == TEST_METADATA + assert json.load(mdfile_reader) == TEST_METADATA_1 datfile_reader = sigmf_tarfile.extractfile(datfile) # calling `fileno` on `tarfile.ExFileObject` throws error (?), but # np.fromfile requires it, so we need this extra step data = np.frombuffer(datfile_reader.read(), dtype=np.float32) - assert np.array_equal(data, TEST_FLOAT32_DATA) + assert np.array_equal(data, TEST_FLOAT32_DATA_1) def test_tarfile_type(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: sigmf_tarfile = create_test_archive(test_sigmffile, temp) assert sigmf_tarfile.format == tarfile.PAX_FORMAT + + +def test_create_archive_pathlike(test_sigmffile, test_alternate_sigmffile): + with tempfile.NamedTemporaryFile() as t: + input_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + arch = SigMFArchive(input_sigmffiles, path=Path(t.name)) + output_sigmf_files = sigmffile.fromarchive(archive_path=arch.path) + assert len(output_sigmf_files) == 2 + assert input_sigmffiles == output_sigmf_files + + +def test_archive_names(test_sigmffile): + with tempfile.NamedTemporaryFile(suffix=".sigmf") as t: + a = SigMFArchive(sigmffiles=test_sigmffile, path=t.name) + assert a.path == t.name + observed_sigmffile = sigmffile.fromarchive(t.name) + observed_sigmffile.name == test_sigmffile.name + + with tempfile.NamedTemporaryFile(suffix=".sigmf") as t: + archive_path = test_sigmffile.archive(t.name) + assert archive_path == t.name + observed_sigmffile = sigmffile.fromarchive(t.name) + observed_sigmffile.name == test_sigmffile.name + + with tempfile.NamedTemporaryFile(suffix=".sigmf") as t: + test_sigmffile.tofile(t.name, toarchive=True) + observed_sigmffile = sigmffile.fromarchive(t.name) + observed_sigmffile.name == test_sigmffile.name + + +def test_archive_no_path_or_fileobj(test_sigmffile): + """Error should be raised when no path or fileobj given.""" + with pytest.raises(error.SigMFFileError): + SigMFArchive(test_sigmffile) + + +def test_fromfile_name_to_archive(test_sigmffile): + """make sure creating an archive works when reading a sigmf-meta file with + absolute path + """ + try: + with open('/tmp/test_sigmf.sigmf-meta', 'w') as test_sigmf_meta_f: + test_sigmffile.dump(test_sigmf_meta_f) + read_sigmffile = sigmffile.fromfile('/tmp/test_sigmf.sigmf-meta') + assert read_sigmffile.name == '/tmp/test_sigmf' + read_sigmffile.set_data_file(data_file=test_sigmffile.data_file) + read_sigmffile.archive('/tmp/testarchive.sigmf') + sigmf_tar = tarfile.open('/tmp/testarchive.sigmf') + basedir, subdir, file1, file2 = sigmf_tar.getmembers() + assert basedir.name == 'tmp' + assert subdir.name == 'tmp/test_sigmf' + if file1.name.endswith(SIGMF_DATASET_EXT): + sigmf_data = file1 + sigmf_meta = file2 + else: + sigmf_data = file2 + sigmf_meta = file1 + + assert sigmf_data.name == 'tmp/test_sigmf/test_sigmf.sigmf-data' + assert sigmf_meta.name == 'tmp/test_sigmf/test_sigmf.sigmf-meta' + finally: + if os.path.exists('/tmp/test_sigmf.sigmf-meta'): + os.remove('/tmp/test_sigmf.sigmf-meta') + if os.path.exists('/tmp/testarchive.sigmf'): + os.remove('/tmp/testarchive.sigmf') + + +def test_create_archive_from_archive_reader(test_sigmffile, + test_alternate_sigmffile): + """ This test is to ensure that SigMFArchive will correctly create archive + using SigMFFile offset_and_size which is set when using SigMFArchiveReader + """ + original_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + with tempfile.TemporaryDirectory() as temp_dir: + archive_path1 = os.path.join(temp_dir, "original_archive.sigmf") + SigMFArchive(sigmffiles=original_sigmffiles, path=archive_path1) + reader = SigMFArchiveReader(path=archive_path1) + archive_path2 = os.path.join(temp_dir, "archive_from_reader.sigmf") + SigMFArchive(sigmffiles=reader.sigmffiles, path=archive_path2) + read_archive_from_reader = SigMFArchiveReader(path=archive_path2) + # SigMFFile.__eq__() method will check metadata + # which includes datafile hash + assert original_sigmffiles == read_archive_from_reader.sigmffiles + observed_sigmffile0 = read_archive_from_reader.sigmffiles[0] + observed_sigmffile1 = read_archive_from_reader.sigmffiles[1] + assert test_sigmffile.name == observed_sigmffile0.name + assert test_alternate_sigmffile.name == observed_sigmffile1.name diff --git a/tests/test_archivereader.py b/tests/test_archivereader.py index 2b5b449..187f809 100644 --- a/tests/test_archivereader.py +++ b/tests/test_archivereader.py @@ -1,15 +1,12 @@ -import codecs -import json -import tarfile +import os +import shutil import tempfile -from os import path import numpy as np -import pytest -from sigmf import error from sigmf import SigMFFile, SigMFArchiveReader -from sigmf.archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT +from sigmf.archive import SIGMF_METADATA_EXT, SigMFArchive + def test_access_data_without_untar(test_sigmffile): global_info = { @@ -24,7 +21,7 @@ def test_access_data_without_untar(test_sigmffile): "core:datetime": "2021-06-18T23:17:51.163959Z", "core:sample_start": 0 } - + NUM_ROWS = 5 for dt in "ri16_le", "ci16_le", "rf32_le", "rf64_le", "cf32_le", "cf64_le": @@ -33,7 +30,7 @@ def test_access_data_without_untar(test_sigmffile): global_info["core:num_channels"] = num_chan base_filename = dt + '_' + str(num_chan) archive_filename = base_filename + '.sigmf' - + a = np.arange(NUM_ROWS * num_chan * (2 if 'c' in dt else 1)) if 'i16' in dt: b = a.astype(np.int16) @@ -43,12 +40,102 @@ def test_access_data_without_untar(test_sigmffile): b = a.astype(np.float64) else: raise ValueError('whoops') - + test_sigmffile.data_file = None with tempfile.NamedTemporaryFile() as temp: b.tofile(temp.name) - meta = SigMFFile(data_file=temp.name, global_info=global_info) + meta = SigMFFile("test", + data_file=temp.name, + global_info=global_info) meta.add_capture(0, metadata=capture_info) meta.tofile(archive_filename, toarchive=True) archi = SigMFArchiveReader(archive_filename, skip_checksum=True) + + +def test_extract_single_recording(test_sigmffile): + with tempfile.NamedTemporaryFile() as tf: + expected_sigmffile = test_sigmffile + arch = SigMFArchive(expected_sigmffile, path=tf.name) + reader = SigMFArchiveReader(arch.path) + assert len(reader) == 1 + actual_sigmffile = reader[0] + assert expected_sigmffile == actual_sigmffile + + +def test_extract_multi_recording(test_sigmffile, test_alternate_sigmffile): + with tempfile.NamedTemporaryFile() as tf: + # Create a multi-recording archive + expected_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + arch = SigMFArchive(expected_sigmffiles, path=tf.name) + reader = SigMFArchiveReader(arch.path) + assert len(reader) == 2 + for expected in expected_sigmffiles: + assert expected in reader.sigmffiles + + +def test_archivereader_different_folder(test_sigmffile, + test_alternate_sigmffile): + try: + os.makedirs("folder1", exist_ok=True) + test_sigmffile.name = os.path.join("folder1", "test1") + os.makedirs("folder2", exist_ok=True) + test_alternate_sigmffile.name = os.path.join("folder2", "test2") + meta1_filepath = test_sigmffile.name + SIGMF_METADATA_EXT + with open(meta1_filepath, "w") as meta_fd: + test_sigmffile.dump(meta_fd) + meta2_filepath = test_alternate_sigmffile.name + SIGMF_METADATA_EXT + with open(meta2_filepath, "w") as meta_fd: + test_alternate_sigmffile.dump(meta_fd) + + os.makedirs("archive_folder", exist_ok=True) + archive_path = os.path.join("archive_folder", "test_archive.sigmf") + input_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + arch = SigMFArchive(input_sigmffiles, path=archive_path) + reader = SigMFArchiveReader(arch.path) + assert len(reader) == 2 # number of SigMFFiles + for actual_sigmffile in reader: + assert actual_sigmffile in input_sigmffiles + finally: + if os.path.exists(meta1_filepath): + os.remove(meta1_filepath) + if os.path.exists(meta2_filepath): + os.remove(meta2_filepath) + if os.path.exists(archive_path): + os.remove(archive_path) + if os.path.exists("folder1"): + shutil.rmtree("folder1") + if os.path.exists("folder2"): + shutil.rmtree("folder2") + if os.path.exists("archive_folder"): + shutil.rmtree("archive_folder") + + +def test_archivereader_same_folder(test_sigmffile, + test_alternate_sigmffile): + try: + os.makedirs("folder1", exist_ok=True) + test_sigmffile.name = os.path.join("folder1", "test1") + test_alternate_sigmffile.name = os.path.join("folder1", "test2") + meta1_filepath = test_sigmffile.name + SIGMF_METADATA_EXT + with open(meta1_filepath, "w") as meta_fd: + test_sigmffile.dump(meta_fd) + meta2_filepath = test_alternate_sigmffile.name + SIGMF_METADATA_EXT + with open(meta2_filepath, "w") as meta_fd: + test_alternate_sigmffile.dump(meta_fd) + archive_path = os.path.join("folder1", "test_archive.sigmf") + input_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + arch = SigMFArchive(input_sigmffiles, path=archive_path) + reader = SigMFArchiveReader(arch.path) + assert len(reader) == 2 # number of SigMFFiles + for actual_sigmffile in reader: + assert actual_sigmffile in input_sigmffiles + finally: + if os.path.exists(meta1_filepath): + os.remove(meta1_filepath) + if os.path.exists(meta2_filepath): + os.remove(meta2_filepath) + if os.path.exists(archive_path): + os.remove(archive_path) + if os.path.exists("folder1"): + shutil.rmtree("folder1") diff --git a/tests/test_sigmffile.py b/tests/test_sigmffile.py index e371964..f20ef5c 100644 --- a/tests/test_sigmffile.py +++ b/tests/test_sigmffile.py @@ -26,7 +26,8 @@ import unittest from sigmf import sigmffile, utils -from sigmf.sigmffile import SigMFFile +from sigmf.sigmffile import SigMFFile, fromarchive +from sigmf.archive import SigMFArchive from .testdata import * @@ -35,8 +36,10 @@ class TestClassMethods(unittest.TestCase): def setUp(self): '''assure tests have a valid SigMF object to work with''' _, temp_path = tempfile.mkstemp() - TEST_FLOAT32_DATA.tofile(temp_path) - self.sigmf_object = SigMFFile(TEST_METADATA, data_file=temp_path) + TEST_FLOAT32_DATA_1.tofile(temp_path) + self.sigmf_object = SigMFFile("test", + TEST_METADATA_1, + data_file=temp_path) def test_iterator_basic(self): '''make sure default batch_size works''' @@ -64,39 +67,86 @@ def simulate_capture(sigmf_md, n, capture_len): def test_default_constructor(): - SigMFFile() + SigMFFile(name="test") def test_set_non_required_global_field(): - sigf = SigMFFile() + sigf = SigMFFile(name="test") sigf.set_global_field('this_is:not_in_the_schema', None) def test_add_capture(): - sigf = SigMFFile() + sigf = SigMFFile(name="test") sigf.add_capture(start_index=0, metadata={}) def test_add_annotation(): - sigf = SigMFFile() + sigf = SigMFFile(name="test") sigf.add_capture(start_index=0) meta = {"latitude": 40.0, "longitude": -105.0} sigf.add_annotation(start_index=0, length=128, metadata=meta) +def test_add_annotation_with_duplicate_key(): + f = SigMFFile(name="test") + f.add_capture(start_index=0) + m1 = {"test_name_1": "test_value_1", "test_name_2": "test_value_2"} + f.add_annotation(start_index=0, length=128, metadata=m1) + m2 = {"test_name_1": "test_value_3", "test_name_2": "test_value_4"} + f.add_annotation(start_index=0, length=128, metadata=m2) + assert len(f.get_annotations(64)) == 2 + + def test_fromarchive(test_sigmffile): print("test_sigmffile is:\n", test_sigmffile) tf = tempfile.mkstemp()[1] - td = tempfile.mkdtemp() - archive_path = test_sigmffile.archive(name=tf) - result = sigmffile.fromarchive(archive_path=archive_path, dir=td) - assert result._metadata == test_sigmffile._metadata == TEST_METADATA + archive_path = test_sigmffile.archive(file_path=tf) + result = sigmffile.fromarchive(archive_path=archive_path) + assert result == test_sigmffile os.remove(tf) - shutil.rmtree(td) + + +def test_fromarchive_multi_recording(test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2): + # single recording + with tempfile.NamedTemporaryFile(suffix=".sigmf") as t_file: + path = t_file.name + test_sigmffile.archive(fileobj=t_file) + single_sigmffile = fromarchive(path) + assert isinstance(single_sigmffile, SigMFFile) + assert single_sigmffile == test_sigmffile + + # 2 recordings + with tempfile.NamedTemporaryFile(suffix=".sigmf") as t_file: + path = t_file.name + input_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + SigMFArchive(input_sigmffiles, fileobj=t_file) + sigmffile_one, sigmffile_two = fromarchive(path) + assert isinstance(sigmffile_one, SigMFFile) + assert sigmffile_one == test_sigmffile + assert isinstance(sigmffile_two, SigMFFile) + assert sigmffile_two == test_alternate_sigmffile + + # 3 recordings + with tempfile.NamedTemporaryFile(suffix=".sigmf") as t_file: + path = t_file.name + input_sigmffiles = [test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2] + SigMFArchive(input_sigmffiles, fileobj=t_file) + list_of_sigmffiles = fromarchive(path) + assert len(list_of_sigmffiles) == 3 + assert isinstance(list_of_sigmffiles[0], SigMFFile) + assert list_of_sigmffiles[0] == test_sigmffile + assert isinstance(list_of_sigmffiles[1], SigMFFile) + assert list_of_sigmffiles[1] == test_alternate_sigmffile + assert isinstance(list_of_sigmffiles[2], SigMFFile) + assert list_of_sigmffiles[2] == test_alternate_sigmffile_2 def test_add_multiple_captures_and_annotations(): - sigf = SigMFFile() + sigf = SigMFFile(name="test") for idx in range(3): simulate_capture(sigf, idx, 1024) @@ -124,6 +174,7 @@ def test_multichannel_types(): # for real or complex check_count = raw_count * 1 # deepcopy temp_signal = SigMFFile( + name="test", data_file=temp_path, global_info={ SigMFFile.DATATYPE_KEY: f'{complex_prefix}{key}_le', @@ -149,6 +200,7 @@ def test_multichannel_seek(): # write some dummy data and read back np.arange(18, dtype=np.uint16).tofile(temp_path) temp_signal = SigMFFile( + name="test", data_file=temp_path, global_info={ SigMFFile.DATATYPE_KEY: 'cu16_le', @@ -163,7 +215,7 @@ def test_multichannel_seek(): def test_key_validity(): '''assure the keys in test metadata are valid''' - for top_key, top_val in TEST_METADATA.items(): + for top_key, top_val in TEST_METADATA_1.items(): if type(top_val) is dict: for core_key in top_val.keys(): assert core_key in vars(SigMFFile)[f'VALID_{top_key.upper()}_KEYS'] @@ -178,7 +230,7 @@ def test_key_validity(): def test_ordered_metadata(): '''check to make sure the metadata is sorted as expected''' - sigf = SigMFFile() + sigf = SigMFFile(name="test") top_sort_order = ['global', 'captures', 'annotations'] for kdx, key in enumerate(sigf.ordered_metadata()): assert kdx == top_sort_order.index(key) diff --git a/tests/test_validation.py b/tests/test_validation.py index 75cf048..57a186c 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -26,18 +26,19 @@ from jsonschema.exceptions import ValidationError -from .testdata import TEST_FLOAT32_DATA, TEST_METADATA +from .testdata import TEST_FLOAT32_DATA_1, TEST_METADATA_1 def test_valid_data(): '''assure the supplied metadata is OK''' - invalid_metadata = dict(TEST_METADATA) - SigMFFile(TEST_METADATA).validate() + invalid_metadata = dict(TEST_METADATA_1) + SigMFFile("test", TEST_METADATA_1).validate() + class FailingCases(unittest.TestCase): '''Cases where the validator should throw an exception.''' def setUp(self): - self.metadata = dict(TEST_METADATA) + self.metadata = dict(TEST_METADATA_1) def test_extra_top_level_key(self): '''no extra keys allowed on the top level''' @@ -45,7 +46,7 @@ def test_extra_top_level_key(self): with self.assertRaises(ValidationError): SigMFFile(self.metadata).validate() - def test_extra_top_level_key(self): + def test_invalid_label(self): '''label must be less than 20 chars''' self.metadata[SigMFFile.ANNOTATION_KEY][0][SigMFFile.LABEL_KEY] = 'a' * 21 with self.assertRaises(ValidationError): @@ -83,7 +84,7 @@ def test_invalid_annotation_order(self): def test_invalid_hash(self): _, temp_path = tempfile.mkstemp() - TEST_FLOAT32_DATA.tofile(temp_path) + TEST_FLOAT32_DATA_1.tofile(temp_path) self.metadata[SigMFFile.GLOBAL_KEY][SigMFFile.HASH_KEY] = 'derp' with self.assertRaises(sigmf.error.SigMFFileError): - SigMFFile(metadata=self.metadata, data_file=temp_path) + SigMFFile(name="test", metadata=self.metadata, data_file=temp_path) diff --git a/tests/testdata.py b/tests/testdata.py index 0a0d5ed..db55c56 100644 --- a/tests/testdata.py +++ b/tests/testdata.py @@ -25,9 +25,9 @@ from sigmf import __version__ from sigmf import SigMFFile -TEST_FLOAT32_DATA = np.arange(16, dtype=np.float32) +TEST_FLOAT32_DATA_1 = np.arange(16, dtype=np.float32) -TEST_METADATA = { +TEST_METADATA_1 = { SigMFFile.ANNOTATION_KEY: [{SigMFFile.LENGTH_INDEX_KEY: 16, SigMFFile.START_INDEX_KEY: 0}], SigMFFile.CAPTURE_KEY: [{SigMFFile.START_INDEX_KEY: 0}], SigMFFile.GLOBAL_KEY: { @@ -38,6 +38,32 @@ } } +TEST_FLOAT32_DATA_2 = np.arange(16, 32, dtype=np.float32) + +TEST_METADATA_2 = { + SigMFFile.ANNOTATION_KEY: [{SigMFFile.LENGTH_INDEX_KEY: 16, SigMFFile.START_INDEX_KEY: 0}], + SigMFFile.CAPTURE_KEY: [{SigMFFile.START_INDEX_KEY: 0}], + SigMFFile.GLOBAL_KEY: { + SigMFFile.DATATYPE_KEY: 'rf32_le', + SigMFFile.HASH_KEY: 'a85018cf117a4704596c0f360dbc3fce2d0d561966d865b9b8a356634161bde6a528c5181837890a9f4d54243e2e8eaf7e19bd535e54e3e34aabf76793723d03', + SigMFFile.NUM_CHANNELS_KEY: 1, + SigMFFile.VERSION_KEY: __version__ + } +} + +TEST_FLOAT32_DATA_3 = np.arange(32, 48, dtype=np.float32) + +TEST_METADATA_3 = { + SigMFFile.ANNOTATION_KEY: [{SigMFFile.LENGTH_INDEX_KEY: 16, SigMFFile.START_INDEX_KEY: 0}], + SigMFFile.CAPTURE_KEY: [{SigMFFile.START_INDEX_KEY: 0}], + SigMFFile.GLOBAL_KEY: { + SigMFFile.DATATYPE_KEY: 'rf32_le', + SigMFFile.HASH_KEY: '089753bd48a1724c485e822eaf4d510491e4e54faa83cc3e7b3f18a9f651813190862aa97c922278454c66f20a741050762e008cbe4f96f3bd0dcdb7d720179d', + SigMFFile.NUM_CHANNELS_KEY: 1, + SigMFFile.VERSION_KEY: __version__ + } +} + # Data0 is a test of a compliant two capture recording TEST_U8_DATA0 = list(range(256)) TEST_U8_META0 = {