From 28368f2ca1f572c62282ec9a814c8fb68c66747d Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Fri, 9 Jun 2023 13:57:39 -0600 Subject: [PATCH 01/14] support for multiple recordings in archives --- README.md | 101 +++++++++++++++++++ sigmf/archive.py | 190 ++++++++++++++++++++++++------------ sigmf/archivereader.py | 156 +++++++++++++++++------------ sigmf/sigmffile.py | 91 ++++++++++++++--- tests/conftest.py | 63 +++++++++--- tests/test_archive.py | 115 ++++++++++++++++++---- tests/test_archivereader.py | 109 ++++++++++++++++++--- tests/test_sigmffile.py | 78 ++++++++++++--- tests/test_validation.py | 15 +-- tests/testdata.py | 30 +++++- 10 files changed, 746 insertions(+), 202 deletions(-) diff --git a/README.md b/README.md index 54cd4f2..4f2832c 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,33 @@ handle.get_captures() # returns list of 'captures' dictionaries handle.get_annotations() # returns list of all annotations ``` +### Load a SigMF archive with multiple recordings +There are different ways to read an archive using `SigMFArchiveReader` +class, the `sigmffile.fromarchive()` method, and the `sigmffile.fromfile()` +method. + +```python +import numpy as np +from sigmf.archivereader import SigMFArchiveReader + +from sigmf.sigmffile import (fromarchive, + fromfile) + +# read multirecording archive using fromarchive +sigmffiles = fromarchive("multi_recording_archive1.sigmf") +print(len(sigmffiles)) + +# read multirecording archive using fromfile +sigmffiles = fromfile("multi_recording_archive1.sigmf") +print(len(sigmffiles)) + +# # read multirecording archive using SigMFArchiveReader +reader = SigMFArchiveReader("multi_recording_archive1.sigmf") +# length of reader and reader.sigmffiles should be the same +print(len(reader)) +print(len(reader.sigmffiles)) +``` + ### Verify SigMF dataset integrity & compliance ```bash @@ -180,6 +207,80 @@ ci16_sigmffile = collection.get_SigMFFile(stream_name='example_ci16') cf32_sigmffile = collection.get_SigMFFile(stream_name='example_cf32') ``` +### Create a SigMF Archive +The `SigMFArchive` class, the `SigMFFile.archive()` method, and the +`SigMFFile.tofile()` method can all be used to create an archive. + +```python +import numpy as np + +from sigmf.sigmffile import (SigMFFile, + SigMFArchive) + + +# create data file +random_data1 = np.random.rand(128) +data1_path = "recording1.sigmf-data" +random_data1.tofile(data1_path) + +# create metadata +sigmf_file_1 = SigMFFile(name='recording1') +sigmf_file_1.set_global_field("core:datatype", "rf32_le") +sigmf_file_1.add_annotation(start_index=0, length=len(random_data1)) +sigmf_file_1.add_capture(start_index=0) +sigmf_file_1.set_data_file(data1_path) + +# create archive using SigMFArchive +archive1 = SigMFArchive(sigmffiles=sigmf_file_1, + path="single_recording_archive1.sigmf") + +# create archive using SigMFFile archive() +archive1_path = sigmf_file_1.archive(file_path="single_recording_archive2.sigmf") + +# create archive using tofile +sigmf_file_1.tofile(file_path="single_recording_archive3.sigmf", + toarchive=True) +``` + +### Create SigMF Archives with Multiple Recordings +Archives with multiple recordings can be created using `SigMFArchive` class. + +```python +import numpy as np + +from sigmf.sigmffile import (SigMFFile, + SigMFArchive) + + +# create data files +random_data1 = np.random.rand(128) +data1_path = "recording1.sigmf-data" +random_data1.tofile(data1_path) + +random_data2 = np.random.rand(128) +data2_path = "recording2.sigmf-data" +random_data2.tofile(data2_path) + +# create metadata +sigmf_file_1 = SigMFFile(name='recording1') +sigmf_file_1.set_global_field("core:datatype", "rf32_le") +sigmf_file_1.add_annotation(start_index=0, length=len(random_data1)) +sigmf_file_1.add_capture(start_index=0) +sigmf_file_1.set_data_file(data1_path) + +sigmf_file_2 = SigMFFile(name='recording2') +sigmf_file_2.set_global_field("core:datatype", "rf32_le") +sigmf_file_2.add_annotation(start_index=0, length=len(random_data2)) +sigmf_file_2.add_capture(start_index=0) +sigmf_file_2.set_data_file(data2_path) + + +# create archive using SigMFArchive +sigmffiles = [sigmf_file_1, sigmf_file_2] +archive3 = SigMFArchive(sigmffiles=sigmffiles, + path="multi_recording_archive1.sigmf") +``` + ### Load a SigMF Archive and slice its data without untaring it Since an *archive* is merely a tarball (uncompressed), and since there any many diff --git a/sigmf/archive.py b/sigmf/archive.py index de6bd50..287651c 100644 --- a/sigmf/archive.py +++ b/sigmf/archive.py @@ -6,10 +6,16 @@ """Create and extract SigMF archives.""" +import collections +from io import BytesIO import os -import shutil import tarfile import tempfile +import time +from typing import BinaryIO, Iterable, Union + +import sigmf + from .error import SigMFFileError @@ -21,7 +27,7 @@ class SigMFArchive(): - """Archive a SigMFFile. + """Archive one or more `SigMFFile`s. A `.sigmf` file must include both valid metadata and data. If `self.data_file` is not set or the requested output file @@ -29,51 +35,65 @@ class SigMFArchive(): Parameters: - sigmffile -- A SigMFFile object with valid metadata and data_file - - name -- path to archive file to create. If file exists, overwrite. - If `name` doesn't end in .sigmf, it will be appended. - For example: if `name` == "/tmp/archive1", then the - following archive will be created: - /tmp/archive1.sigmf - - archive1/ - - archive1.sigmf-meta - - archive1.sigmf-data - - fileobj -- If `fileobj` is specified, it is used as an alternative to - a file object opened in binary mode for `name`. It is - supposed to be at position 0. `name` is not required, but - if specified will be used to determine the directory and - file names within the archive. `fileobj` won't be closed. - For example: if `name` == "archive1" and fileobj is given, - a tar archive will be written to fileobj with the - following structure: - - archive1/ - - archive1.sigmf-meta - - archive1.sigmf-data + sigmffiles -- A single SigMFFile or an iterable of SigMFFile objects with + valid metadata and data_files + + path -- Path to archive file to create. If file exists, overwrite. + If `path` doesn't end in .sigmf, it will be appended. The + `self.path` instance variable will be updated upon + successful writing of the archive to point to the final + archive path. + + + fileobj -- If `fileobj` is specified, it is used as an alternative to + a file object opened in binary mode for `path`. If + `fileobj` is an open tarfile, it will be appended to. It is + supposed to be at position 0. `fileobj` won't be closed. If + `fileobj` is given, `path` has no effect. + + pretty -- If True, pretty print JSON when creating the metadata + files in the archive. Defaults to True. """ - def __init__(self, sigmffile, name=None, fileobj=None): - self.sigmffile = sigmffile - self.name = name + def __init__(self, + sigmffiles: Union["sigmf.sigmffile.SigMFFile", + Iterable["sigmf.sigmffile.SigMFFile"]], + path: Union[str, os.PathLike] = None, + fileobj: BinaryIO = None, + pretty=True): + + if (not path) and (not fileobj): + raise SigMFFileError("'path' or 'fileobj' required for creating " + "SigMF archive!") + + if isinstance(sigmffiles, sigmf.sigmffile.SigMFFile): + self.sigmffiles = [sigmffiles] + elif (hasattr(collections, "Iterable") and + isinstance(sigmffiles, collections.Iterable)): + self.sigmffiles = sigmffiles + elif isinstance(sigmffiles, collections.abc.Iterable): # python 3.10 + self.sigmffiles = sigmffiles + else: + raise SigMFFileError("Unknown type for sigmffiles argument!") + + if path: + self.path = str(path) + else: + self.path = None self.fileobj = fileobj self._check_input() - archive_name = self._get_archive_name() + mode = "a" if fileobj is not None else "w" sigmf_fileobj = self._get_output_fileobj() - sigmf_archive = tarfile.TarFile(mode="w", - fileobj=sigmf_fileobj, - format=tarfile.PAX_FORMAT) - tmpdir = tempfile.mkdtemp() - sigmf_md_filename = archive_name + SIGMF_METADATA_EXT - sigmf_md_path = os.path.join(tmpdir, sigmf_md_filename) - sigmf_data_filename = archive_name + SIGMF_DATASET_EXT - sigmf_data_path = os.path.join(tmpdir, sigmf_data_filename) - - with open(sigmf_md_path, "w") as mdfile: - self.sigmffile.dump(mdfile, pretty=True) - - shutil.copy(self.sigmffile.data_file, sigmf_data_path) + try: + sigmf_archive = tarfile.TarFile(mode=mode, + fileobj=sigmf_fileobj, + format=tarfile.PAX_FORMAT) + except tarfile.ReadError: + # fileobj doesn't contain any archives yet, so reopen in 'w' mode + sigmf_archive = tarfile.TarFile(mode='w', + fileobj=sigmf_fileobj, + format=tarfile.PAX_FORMAT) def chmod(tarinfo): if tarinfo.isdir(): @@ -82,47 +102,91 @@ def chmod(tarinfo): tarinfo.mode = 0o644 # -wr-r--r-- return tarinfo - sigmf_archive.add(tmpdir, arcname=archive_name, filter=chmod) + for sigmffile in self.sigmffiles: + self._create_parent_dirs(sigmf_archive, sigmffile.name, chmod) + file_path = os.path.join(sigmffile.name, + os.path.basename(sigmffile.name)) + sf_md_filename = file_path + SIGMF_METADATA_EXT + sf_data_filename = file_path + SIGMF_DATASET_EXT + metadata = sigmffile.dumps(pretty=pretty) + metadata_tarinfo = tarfile.TarInfo(sf_md_filename) + metadata_tarinfo.size = len(metadata) + metadata_tarinfo.mtime = time.time() + metadata_tarinfo = chmod(metadata_tarinfo) + metadata_buffer = BytesIO(metadata.encode("utf-8")) + sigmf_archive.addfile(metadata_tarinfo, fileobj=metadata_buffer) + data_tarinfo = sigmf_archive.gettarinfo(name=sigmffile.data_file, + arcname=sf_data_filename) + data_tarinfo = chmod(data_tarinfo) + with open(sigmffile.data_file, "rb") as data_file: + sigmf_archive.addfile(data_tarinfo, fileobj=data_file) + sigmf_archive.close() if not fileobj: sigmf_fileobj.close() - - shutil.rmtree(tmpdir) + else: + sigmf_fileobj.seek(0) # ensure next open can read this as a tar self.path = sigmf_archive.name - def _check_input(self): - self._ensure_name_has_correct_extension() - self._ensure_data_file_set() - self._validate_sigmffile_metadata() + def _create_parent_dirs(self, _tarfile, sigmffile_name, set_permission): + path_components = sigmffile_name.split(os.path.sep) + current_path = "" + for path in path_components: + current_path = os.path.join(current_path, path) + path_found = False + for member in _tarfile.getmembers(): + if member.name == current_path: + path_found = True + break + if not path_found: + tarinfo = tarfile.TarInfo(current_path) + tarinfo.type = tarfile.DIRTYPE + tarinfo = set_permission(tarinfo) + _tarfile.addfile(tarinfo) - def _ensure_name_has_correct_extension(self): - name = self.name - if name is None: + def _check_input(self): + self._ensure_path_has_correct_extension() + for sigmffile in self.sigmffiles: + self._ensure_sigmffile_name_set(sigmffile) + self._ensure_data_file_set(sigmffile) + self._validate_sigmffile_metadata(sigmffile) + + def _ensure_path_has_correct_extension(self): + path = self.path + if path is None: return - has_extension = "." in name - has_correct_extension = name.endswith(SIGMF_ARCHIVE_EXT) + has_extension = "." in path + has_correct_extension = path.endswith(SIGMF_ARCHIVE_EXT) if has_extension and not has_correct_extension: - apparent_ext = os.path.splitext(name)[-1] + apparent_ext = os.path.splitext(path)[-1] err = "extension {} != {}".format(apparent_ext, SIGMF_ARCHIVE_EXT) raise SigMFFileError(err) - self.name = name if has_correct_extension else name + SIGMF_ARCHIVE_EXT + self.path = path if has_correct_extension else path + SIGMF_ARCHIVE_EXT + + @staticmethod + def _ensure_sigmffile_name_set(sigmffile): + if not sigmffile.name: + err = "the `name` attribute must be set to pass to `SigMFArchive`" + raise SigMFFileError(err) - def _ensure_data_file_set(self): - if not self.sigmffile.data_file: + @staticmethod + def _ensure_data_file_set(sigmffile): + if not sigmffile.data_file: err = "no data file - use `set_data_file`" raise SigMFFileError(err) - def _validate_sigmffile_metadata(self): - self.sigmffile.validate() + @staticmethod + def _validate_sigmffile_metadata(sigmffile): + sigmffile.validate() def _get_archive_name(self): - if self.fileobj and not self.name: + if self.fileobj and not self.path: pathname = self.fileobj.name else: - pathname = self.name + pathname = self.path filename = os.path.split(pathname)[-1] archive_name, archive_ext = os.path.splitext(filename) @@ -135,7 +199,7 @@ def _get_output_fileobj(self): if self.fileobj: err = "fileobj {!r} is not byte-writable".format(self.fileobj) else: - err = "can't open {!r} for writing".format(self.name) + err = "can't open {!r} for writing".format(self.path) raise SigMFFileError(err) @@ -146,6 +210,6 @@ def _get_open_fileobj(self): fileobj = self.fileobj fileobj.write(bytes()) # force exception if not byte-writable else: - fileobj = open(self.name, "wb") + fileobj = open(self.path, "wb") return fileobj diff --git a/sigmf/archivereader.py b/sigmf/archivereader.py index 5759b74..a957a46 100644 --- a/sigmf/archivereader.py +++ b/sigmf/archivereader.py @@ -7,86 +7,114 @@ """Access SigMF archives without extracting them.""" import os -import shutil import tarfile -import tempfile -from . import __version__ #, schema, sigmf_hash, validate from .sigmffile import SigMFFile -from .archive import SigMFArchive, SIGMF_DATASET_EXT, SIGMF_METADATA_EXT, SIGMF_ARCHIVE_EXT -from .utils import dict_merge +from .archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT, SIGMF_ARCHIVE_EXT from .error import SigMFFileError class SigMFArchiveReader(): - """Access data within SigMF archive `tar` in-place without extracting. + """Access data within SigMF archive `tar` in-place without extracting. This + class can be used to iterate through multiple SigMFFiles in the archive. Parameters: - name -- path to archive file to access. If file does not exist, - or if `name` doesn't end in .sigmf, SigMFFileError is raised. + path -- path to archive file to access. If file does not exist, + or if `path` doesn't end in .sigmf, SigMFFileError is raised. + + self.sigmffiles will contain the SigMFFile(s) (metadata/data) found in the + archive. """ - def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_buffer=None): - self.name = name - if self.name is not None: - if not name.endswith(SIGMF_ARCHIVE_EXT): - err = "archive extension != {}".format(SIGMF_ARCHIVE_EXT) - raise SigMFFileError(err) - - tar_obj = tarfile.open(self.name) - - elif archive_buffer is not None: - tar_obj = tarfile.open(fileobj=archive_buffer, mode='r:') - - else: - raise ValueError('In sigmf.archivereader.__init__(), either `name` or `archive_buffer` must be not None') - - json_contents = None - data_offset_size = None - - for memb in tar_obj.getmembers(): - if memb.isdir(): # memb.type == tarfile.DIRTYPE: - # the directory structure will be reflected in the member name - continue - - elif memb.isfile(): # memb.type == tarfile.REGTYPE: - if memb.name.endswith(SIGMF_METADATA_EXT): - json_contents = memb.name - if data_offset_size is None: - # consider a warnings.warn() here; the datafile should be earlier in the - # archive than the metadata, so that updating it (like, adding an annotation) - # is fast. - pass - with tar_obj.extractfile(memb) as memb_fid: - json_contents = memb_fid.read() - - elif memb.name.endswith(SIGMF_DATASET_EXT): - data_offset_size = memb.offset_data, memb.size + def __init__(self, + path=None, + skip_checksum=False, + map_readonly=True, + archive_buffer=None): + self.path = path + tar_obj = None + try: + if self.path is not None: + if not self.path.endswith(SIGMF_ARCHIVE_EXT): + err = "archive extension != {}".format(SIGMF_ARCHIVE_EXT) + raise SigMFFileError(err) + + tar_obj = tarfile.open(self.path) + + elif archive_buffer is not None: + tar_obj = tarfile.open(fileobj=archive_buffer, mode='r:') - else: - print('A regular file', memb.name, 'was found but ignored in the archive') else: - print('A member of type', memb.type, 'and name', memb.name, 'was found but not handled, just FYI.') - - if data_offset_size is None: - raise SigMFFileError('No .sigmf-data file found in archive!') - - self.sigmffile = SigMFFile(metadata=json_contents) - valid_md = self.sigmffile.validate() - - self.sigmffile.set_data_file(self.name, data_buffer=archive_buffer, skip_checksum=skip_checksum, offset=data_offset_size[0], - size_bytes=data_offset_size[1], map_readonly=map_readonly) - - self.ndim = self.sigmffile.ndim - self.shape = self.sigmffile.shape - - tar_obj.close() + raise ValueError('In sigmf.archivereader.__init__(), either ' + '`path` or `archive_buffer` must be not None') + + json_contents = None + data_offset_size = None + sigmffile_name = None + self.sigmffiles = [] + data_found = False + + for memb in tar_obj.getmembers(): + if memb.isdir(): # memb.type == tarfile.DIRTYPE: + # the directory structure will be reflected in the member + # name + continue + + elif memb.isfile(): # memb.type == tarfile.REGTYPE: + if memb.name.endswith(SIGMF_METADATA_EXT): + json_contents = memb.name + if data_offset_size is None: + # consider a warnings.warn() here; the datafile + # should be earlier in the archive than the + # metadata, so that updating it (like, adding an + # annotation) is fast. + pass + with tar_obj.extractfile(memb) as memb_fid: + json_contents = memb_fid.read() + + sigmffile_name, _ = os.path.splitext(memb.name) + elif memb.name.endswith(SIGMF_DATASET_EXT): + data_offset_size = memb.offset_data, memb.size + data_found = True + else: + print('A regular file', + memb.name, + 'was found but ignored in the archive') + else: + print('A member of type', + memb.type, + 'and name', + memb.name, + 'was found but not handled, just FYI.') + + if data_offset_size is not None and json_contents is not None: + sigmffile = SigMFFile(sigmffile_name, + metadata=json_contents) + sigmffile.validate() + + sigmffile.set_data_file(self.path, + data_buffer=archive_buffer, + skip_checksum=skip_checksum, + offset=data_offset_size[0], + size_bytes=data_offset_size[1], + map_readonly=map_readonly) + + self.sigmffiles.append(sigmffile) + data_offset_size = None + json_contents = None + sigmffile_name = None + + if not data_found: + raise SigMFFileError('No .sigmf-data file found in archive!') + finally: + if tar_obj: + tar_obj.close() def __len__(self): - return self.sigmffile.__len__() + return len(self.sigmffiles) def __iter__(self): - return self.sigmffile.__iter__() + return self.sigmffiles.__iter__() def __getitem__(self, sli): - return self.sigmffile.__getitem__(sli) + return self.sigmffiles.__getitem__(sli) diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index 423ab49..58fade2 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -9,8 +9,6 @@ from collections import OrderedDict import codecs import json -import tarfile -import tempfile from os import path import warnings import numpy as np @@ -78,6 +76,7 @@ def dump(self, filep, pretty=True): indent=4 if pretty else None, separators=(',', ': ') if pretty else None, ) + filep.write("\n") def dumps(self, pretty=True): ''' @@ -97,7 +96,7 @@ def dumps(self, pretty=True): self.ordered_metadata(), indent=4 if pretty else None, separators=(',', ': ') if pretty else None, - ) + ) + "\n" class SigMFFile(SigMFMetafile): START_INDEX_KEY = "core:sample_start" @@ -148,12 +147,25 @@ class SigMFFile(SigMFMetafile): ] VALID_KEYS = {GLOBAL_KEY: VALID_GLOBAL_KEYS, CAPTURE_KEY: VALID_CAPTURE_KEYS, ANNOTATION_KEY: VALID_ANNOTATION_KEYS} - def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksum=False, map_readonly=True): + def __init__(self, + name, + metadata=None, + data_file=None, + global_info=None, + skip_checksum=False, + map_readonly=True): ''' API for SigMF I/O Parameters ---------- + name: Name used for directory and filenames if archived. + For example, given `name=archive1`, then passing this + sigmffile to SigMFArchive will add the following files + to the archive: + - archive1/ + - archive1.sigmf-meta + - archive1.sigmf-data metadata: str or dict, optional Metadata for associated dataset. data_file: str, optional @@ -183,6 +195,7 @@ def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksu self.set_global_info(global_info) if data_file is not None: self.set_data_file(data_file, skip_checksum=skip_checksum, map_readonly=map_readonly) + self.name = name def __len__(self): return self._memmap.shape[0] @@ -213,6 +226,20 @@ def __getitem__(self, sli): raise ValueError("unhandled ndim in SigMFFile.__getitem__(); this shouldn't happen") return a + def __eq__(self, other): + """Define equality between two `SigMFFile`s. + + Rely on the `core:sha512` value in the metadata to decide whether + `data_file` is the same since the same sigmf archive could be extracted + twice to two different temp directories and the SigMFFiles should still + be equivalent. + + """ + if isinstance(other, SigMFFile): + return self._metadata == other._metadata + + return False + def _get_start_offset(self): """ Return the offset of the first sample. @@ -511,13 +538,33 @@ def validate(self): version = self.get_global_field(self.VERSION_KEY) validate.validate(self._metadata, self.get_schema()) - def archive(self, name=None, fileobj=None): + def archive(self, file_path=None, fileobj=None, pretty=True): """Dump contents to SigMF archive format. - `name` and `fileobj` are passed to SigMFArchive and are defined there. - + Keyword arguments: + file_path -- passed to SigMFArchive`path`. Path to archive file to + create. If file exists, overwrite. If `path` doesn't end + in .sigmf, it will be appended. If not given, `file_path` + will be set to self.name. (default None) + fileobj -- passed to SigMFArchive `fileobj`. If `fileobj` is + specified, it is used as an alternative to a file object + opened in binary mode for `file_path`. If `fileobj` is an + open tarfile, it will be appended to. It is supposed to + be at position 0. `fileobj` won't be closed. If `fileobj` + is given, `file_path` has no effect. (default None) + pretty -- passed to SigMFArchive `pretty`. If True, pretty print + JSON when creating the metadata and collection files in + the archive. (default True). + + Returns the path to the created archive. """ - archive = SigMFArchive(self, name, fileobj) + if file_path is None: + file_path = self.name + + archive = SigMFArchive(self, + path=file_path, + fileobj=fileobj, + pretty=pretty) return archive.path def tofile(self, file_path, pretty=True, toarchive=False, skip_validate=False): @@ -538,11 +585,10 @@ def tofile(self, file_path, pretty=True, toarchive=False, skip_validate=False): self.validate() fns = get_sigmf_filenames(file_path) if toarchive: - self.archive(fns['archive_fn']) + self.archive(fns['archive_fn'], pretty=pretty) else: with open(fns['meta_fn'], 'w') as fp: self.dump(fp, pretty=pretty) - fp.write('\n') # text files should end in carriage return def read_samples_in_capture(self, index=0, autoscale=True): ''' @@ -771,7 +817,6 @@ def tofile(self, file_path, pretty=True): fns = get_sigmf_filenames(file_path) with open(fns['collection_fn'], 'w') as fp: self.dump(fp, pretty=pretty) - fp.write('\n') # text files should end in carriage return def get_SigMFFile(self, stream_name=None, stream_index=None): ''' @@ -891,13 +936,25 @@ def get_dataset_filename_from_metadata(meta_fn, metadata=None): def fromarchive(archive_path, dir=None): - """Extract an archive and return a SigMFFile. + """Extract an archive and return containing SigMFFiles. The `dir` parameter is no longer used as this function has been changed to access SigMF archives without extracting them. + + If the archive contains a single recording, a single SigMFFile object will + be returned. If the archive contains multiple recordings a list of + SigMFFile objects will be returned. """ from .archivereader import SigMFArchiveReader - return SigMFArchiveReader(archive_path).sigmffile + reader = SigMFArchiveReader(archive_path) + sigmffiles = reader.sigmffiles + ret = None + if len(sigmffiles) == 1: + ret = sigmffiles[0] + else: + ret = sigmffiles + + return ret def fromfile(filename, skip_checksum=False): @@ -917,7 +974,8 @@ def fromfile(filename, skip_checksum=False): Returns ------- object - SigMFFile object with dataset & metadata or a SigMFCollection depending on the type of file + SigMFFile object(s) with dataset & metadata or a SigMFCollection + depending on the type of file ''' fns = get_sigmf_filenames(filename) meta_fn = fns['meta_fn'] @@ -944,7 +1002,10 @@ def fromfile(filename, skip_checksum=False): meta_fp.close() data_fn = get_dataset_filename_from_metadata(meta_fn, metadata) - return SigMFFile(metadata=metadata, data_file=data_fn, skip_checksum=skip_checksum) + return SigMFFile(name=fns['base_fn'], + metadata=metadata, + data_file=data_fn, + skip_checksum=skip_checksum) def get_sigmf_filenames(filename): diff --git a/tests/conftest.py b/tests/conftest.py index 9a8aa64..60f0be4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,22 +24,63 @@ from sigmf.sigmffile import SigMFFile -from .testdata import TEST_FLOAT32_DATA, TEST_METADATA +from .testdata import (TEST_FLOAT32_DATA_1, + TEST_METADATA_1, + TEST_FLOAT32_DATA_2, + TEST_METADATA_2, + TEST_FLOAT32_DATA_3, + TEST_METADATA_3) @pytest.fixture -def test_data_file(): +def test_data_file_1(): with tempfile.NamedTemporaryFile() as temp: - TEST_FLOAT32_DATA.tofile(temp.name) + TEST_FLOAT32_DATA_1.tofile(temp.name) yield temp @pytest.fixture -def test_sigmffile(test_data_file): - sigf = SigMFFile() - sigf.set_global_field("core:datatype", "rf32_le") - sigf.add_annotation(start_index=0, length=len(TEST_FLOAT32_DATA)) - sigf.add_capture(start_index=0) - sigf.set_data_file(test_data_file.name) - assert sigf._metadata == TEST_METADATA - return sigf +def test_data_file_2(): + with tempfile.NamedTemporaryFile() as t: + TEST_FLOAT32_DATA_2.tofile(t.name) + yield t + + +@pytest.fixture +def test_data_file_3(): + with tempfile.NamedTemporaryFile() as t: + TEST_FLOAT32_DATA_3.tofile(t.name) + yield t + + +@pytest.fixture +def test_sigmffile(test_data_file_1): + f = SigMFFile(name='test1') + f.set_global_field("core:datatype", "rf32_le") + f.add_annotation(start_index=0, length=len(TEST_FLOAT32_DATA_1)) + f.add_capture(start_index=0) + f.set_data_file(test_data_file_1.name) + assert f._metadata == TEST_METADATA_1 + return f + + +@pytest.fixture +def test_alternate_sigmffile(test_data_file_2): + f = SigMFFile(name='test2') + f.set_global_field("core:datatype", "rf32_le") + f.add_annotation(start_index=0, length=len(TEST_FLOAT32_DATA_2)) + f.add_capture(start_index=0) + f.set_data_file(test_data_file_2.name) + assert f._metadata == TEST_METADATA_2 + return f + + +@pytest.fixture +def test_alternate_sigmffile_2(test_data_file_3): + f = SigMFFile(name='test3') + f.set_global_field("core:datatype", "rf32_le") + f.add_annotation(start_index=0, length=len(TEST_FLOAT32_DATA_3)) + f.add_capture(start_index=0) + f.set_data_file(test_data_file_3.name) + assert f._metadata == TEST_METADATA_3 + return f diff --git a/tests/test_archive.py b/tests/test_archive.py index 5c3d67b..2b77493 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -1,5 +1,7 @@ import codecs import json +import os +from pathlib import Path import tarfile import tempfile from os import path @@ -8,10 +10,12 @@ import pytest import jsonschema -from sigmf import error -from sigmf.archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT +from sigmf import error, sigmffile +from sigmf.archive import (SIGMF_DATASET_EXT, + SIGMF_METADATA_EXT, + SigMFArchive) -from .testdata import TEST_FLOAT32_DATA, TEST_METADATA +from .testdata import TEST_FLOAT32_DATA_1, TEST_METADATA_1 def create_test_archive(test_sigmffile, tmpfile): @@ -24,20 +28,20 @@ def test_without_data_file_throws_fileerror(test_sigmffile): test_sigmffile.data_file = None with tempfile.NamedTemporaryFile() as temp: with pytest.raises(error.SigMFFileError): - test_sigmffile.archive(name=temp.name) + test_sigmffile.archive(file_path=temp.name) def test_invalid_md_throws_validationerror(test_sigmffile): del test_sigmffile._metadata["global"]["core:datatype"] # required field with tempfile.NamedTemporaryFile() as temp: with pytest.raises(jsonschema.exceptions.ValidationError): - test_sigmffile.archive(name=temp.name) + test_sigmffile.archive(file_path=temp.name) def test_name_wrong_extension_throws_fileerror(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: with pytest.raises(error.SigMFFileError): - test_sigmffile.archive(name=temp.name + ".zip") + test_sigmffile.archive(file_path=temp.name + ".zip") def test_fileobj_extension_ignored(test_sigmffile): @@ -47,17 +51,19 @@ def test_fileobj_extension_ignored(test_sigmffile): def test_name_used_in_fileobj(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: - sigmf_archive = test_sigmffile.archive(name="testarchive", fileobj=temp) + sigmf_archive = test_sigmffile.archive(file_path="testarchive", + fileobj=temp) sigmf_tarfile = tarfile.open(sigmf_archive, mode="r") basedir, file1, file2 = sigmf_tarfile.getmembers() - assert basedir.name == "testarchive" + assert basedir.name == test_sigmffile.name + assert sigmf_tarfile.name == temp.name def filename(tarinfo): path_root, _ = path.splitext(tarinfo.name) return path.split(path_root)[-1] - assert filename(file1) == "testarchive" - assert filename(file2) == "testarchive" + assert filename(file1) == test_sigmffile.name + assert filename(file2) == test_sigmffile.name def test_fileobj_not_closed(test_sigmffile): @@ -77,7 +83,7 @@ def test_unwritable_name_throws_fileerror(test_sigmffile): # so use invalid filename unwritable_file = '/bad_name/' with pytest.raises(error.SigMFFileError): - test_sigmffile.archive(name=unwritable_file) + test_sigmffile.archive(file_path=unwritable_file) def test_tarfile_layout(test_sigmffile): @@ -93,21 +99,59 @@ def test_tarfile_names_and_extensions(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: sigmf_tarfile = create_test_archive(test_sigmffile, temp) basedir, file1, file2 = sigmf_tarfile.getmembers() - archive_name = basedir.name - assert archive_name == path.split(temp.name)[-1] + assert basedir.name == test_sigmffile.name + archive_name = sigmf_tarfile.name + assert archive_name == temp.name + path.split(temp.name)[-1] file_extensions = {SIGMF_DATASET_EXT, SIGMF_METADATA_EXT} file1_name, file1_ext = path.splitext(file1.name) assert file1_ext in file_extensions - assert path.split(file1_name)[-1] == archive_name + assert path.split(file1_name)[-1] == test_sigmffile.name file_extensions.remove(file1_ext) file2_name, file2_ext = path.splitext(file2.name) - assert path.split(file2_name)[-1] == archive_name + assert path.split(file2_name)[-1] == test_sigmffile.name assert file2_ext in file_extensions +def test_tarfile_names_and_extensions_with_paths(test_sigmffile): + with tempfile.NamedTemporaryFile() as temp: + test_sigmffile.name = os.path.join("test_folder", "test") + sigmf_tarfile = create_test_archive(test_sigmffile, temp) + basedir, subdir, file1, file2 = sigmf_tarfile.getmembers() + assert basedir.name == path.split(test_sigmffile.name)[0] + assert subdir.name == test_sigmffile.name + archive_name = sigmf_tarfile.name + assert archive_name == temp.name + path.split(temp.name)[-1] + file_extensions = {SIGMF_DATASET_EXT, SIGMF_METADATA_EXT} + + file1_name, file1_ext = path.splitext(file1.name) + assert file1_ext in file_extensions + assert path.dirname(file1_name) == test_sigmffile.name + assert path.basename(file1_name) == path.basename(test_sigmffile.name) + + file_extensions.remove(file1_ext) + + file2_name, file2_ext = path.splitext(file2.name) + assert path.dirname(file2_name) == test_sigmffile.name + assert path.basename(file2_name) == path.basename(test_sigmffile.name) + assert file2_ext in file_extensions + + +def test_multirec_archive_into_fileobj(test_sigmffile, + test_alternate_sigmffile): + with tempfile.NamedTemporaryFile() as t: + # add first sigmffile to the fileobj t + create_test_archive(test_sigmffile, t) + # add a second one to the same fileobj + multirec_tar = create_test_archive(test_alternate_sigmffile, t) + members = multirec_tar.getmembers() + assert len(members) == 6 # 2 folders + 2 metadata files + 2 data files + + def test_tarfile_persmissions(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: sigmf_tarfile = create_test_archive(test_sigmffile, temp) @@ -130,17 +174,54 @@ def test_contents(test_sigmffile): bytestream_reader = codecs.getreader("utf-8") # bytes -> str mdfile_reader = bytestream_reader(sigmf_tarfile.extractfile(mdfile)) - assert json.load(mdfile_reader) == TEST_METADATA + assert json.load(mdfile_reader) == TEST_METADATA_1 datfile_reader = sigmf_tarfile.extractfile(datfile) # calling `fileno` on `tarfile.ExFileObject` throws error (?), but # np.fromfile requires it, so we need this extra step data = np.frombuffer(datfile_reader.read(), dtype=np.float32) - assert np.array_equal(data, TEST_FLOAT32_DATA) + assert np.array_equal(data, TEST_FLOAT32_DATA_1) def test_tarfile_type(test_sigmffile): with tempfile.NamedTemporaryFile() as temp: sigmf_tarfile = create_test_archive(test_sigmffile, temp) assert sigmf_tarfile.format == tarfile.PAX_FORMAT + + +def test_create_archive_pathlike(test_sigmffile, test_alternate_sigmffile): + with tempfile.NamedTemporaryFile() as t: + input_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + arch = SigMFArchive(input_sigmffiles, path=Path(t.name)) + output_sigmf_files = sigmffile.fromarchive(archive_path=arch.path) + assert len(output_sigmf_files) == 2 + assert input_sigmffiles == output_sigmf_files + + +def test_archive_names(test_sigmffile): + with tempfile.NamedTemporaryFile(suffix=".sigmf") as t: + a = SigMFArchive(sigmffiles=test_sigmffile, path=t.name) + assert a.path == t.name + observed_sigmffile = sigmffile.fromarchive(t.name) + assert os.path.dirname(observed_sigmffile.name) == test_sigmffile.name + assert os.path.basename(observed_sigmffile.name) == test_sigmffile.name + + with tempfile.NamedTemporaryFile(suffix=".sigmf") as t: + archive_path = test_sigmffile.archive(t.name) + assert archive_path == t.name + observed_sigmffile = sigmffile.fromarchive(t.name) + assert os.path.dirname(observed_sigmffile.name) == test_sigmffile.name + assert os.path.basename(observed_sigmffile.name) == test_sigmffile.name + + with tempfile.NamedTemporaryFile(suffix=".sigmf") as t: + test_sigmffile.tofile(t.name, toarchive=True) + observed_sigmffile = sigmffile.fromarchive(t.name) + assert os.path.dirname(observed_sigmffile.name) == test_sigmffile.name + assert os.path.basename(observed_sigmffile.name) == test_sigmffile.name + + +def test_archive_no_path_or_fileobj(test_sigmffile): + """Error should be raised when no path or fileobj given.""" + with pytest.raises(error.SigMFFileError): + SigMFArchive(test_sigmffile) diff --git a/tests/test_archivereader.py b/tests/test_archivereader.py index 2b5b449..187f809 100644 --- a/tests/test_archivereader.py +++ b/tests/test_archivereader.py @@ -1,15 +1,12 @@ -import codecs -import json -import tarfile +import os +import shutil import tempfile -from os import path import numpy as np -import pytest -from sigmf import error from sigmf import SigMFFile, SigMFArchiveReader -from sigmf.archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT +from sigmf.archive import SIGMF_METADATA_EXT, SigMFArchive + def test_access_data_without_untar(test_sigmffile): global_info = { @@ -24,7 +21,7 @@ def test_access_data_without_untar(test_sigmffile): "core:datetime": "2021-06-18T23:17:51.163959Z", "core:sample_start": 0 } - + NUM_ROWS = 5 for dt in "ri16_le", "ci16_le", "rf32_le", "rf64_le", "cf32_le", "cf64_le": @@ -33,7 +30,7 @@ def test_access_data_without_untar(test_sigmffile): global_info["core:num_channels"] = num_chan base_filename = dt + '_' + str(num_chan) archive_filename = base_filename + '.sigmf' - + a = np.arange(NUM_ROWS * num_chan * (2 if 'c' in dt else 1)) if 'i16' in dt: b = a.astype(np.int16) @@ -43,12 +40,102 @@ def test_access_data_without_untar(test_sigmffile): b = a.astype(np.float64) else: raise ValueError('whoops') - + test_sigmffile.data_file = None with tempfile.NamedTemporaryFile() as temp: b.tofile(temp.name) - meta = SigMFFile(data_file=temp.name, global_info=global_info) + meta = SigMFFile("test", + data_file=temp.name, + global_info=global_info) meta.add_capture(0, metadata=capture_info) meta.tofile(archive_filename, toarchive=True) archi = SigMFArchiveReader(archive_filename, skip_checksum=True) + + +def test_extract_single_recording(test_sigmffile): + with tempfile.NamedTemporaryFile() as tf: + expected_sigmffile = test_sigmffile + arch = SigMFArchive(expected_sigmffile, path=tf.name) + reader = SigMFArchiveReader(arch.path) + assert len(reader) == 1 + actual_sigmffile = reader[0] + assert expected_sigmffile == actual_sigmffile + + +def test_extract_multi_recording(test_sigmffile, test_alternate_sigmffile): + with tempfile.NamedTemporaryFile() as tf: + # Create a multi-recording archive + expected_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + arch = SigMFArchive(expected_sigmffiles, path=tf.name) + reader = SigMFArchiveReader(arch.path) + assert len(reader) == 2 + for expected in expected_sigmffiles: + assert expected in reader.sigmffiles + + +def test_archivereader_different_folder(test_sigmffile, + test_alternate_sigmffile): + try: + os.makedirs("folder1", exist_ok=True) + test_sigmffile.name = os.path.join("folder1", "test1") + os.makedirs("folder2", exist_ok=True) + test_alternate_sigmffile.name = os.path.join("folder2", "test2") + meta1_filepath = test_sigmffile.name + SIGMF_METADATA_EXT + with open(meta1_filepath, "w") as meta_fd: + test_sigmffile.dump(meta_fd) + meta2_filepath = test_alternate_sigmffile.name + SIGMF_METADATA_EXT + with open(meta2_filepath, "w") as meta_fd: + test_alternate_sigmffile.dump(meta_fd) + + os.makedirs("archive_folder", exist_ok=True) + archive_path = os.path.join("archive_folder", "test_archive.sigmf") + input_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + arch = SigMFArchive(input_sigmffiles, path=archive_path) + reader = SigMFArchiveReader(arch.path) + assert len(reader) == 2 # number of SigMFFiles + for actual_sigmffile in reader: + assert actual_sigmffile in input_sigmffiles + finally: + if os.path.exists(meta1_filepath): + os.remove(meta1_filepath) + if os.path.exists(meta2_filepath): + os.remove(meta2_filepath) + if os.path.exists(archive_path): + os.remove(archive_path) + if os.path.exists("folder1"): + shutil.rmtree("folder1") + if os.path.exists("folder2"): + shutil.rmtree("folder2") + if os.path.exists("archive_folder"): + shutil.rmtree("archive_folder") + + +def test_archivereader_same_folder(test_sigmffile, + test_alternate_sigmffile): + try: + os.makedirs("folder1", exist_ok=True) + test_sigmffile.name = os.path.join("folder1", "test1") + test_alternate_sigmffile.name = os.path.join("folder1", "test2") + meta1_filepath = test_sigmffile.name + SIGMF_METADATA_EXT + with open(meta1_filepath, "w") as meta_fd: + test_sigmffile.dump(meta_fd) + meta2_filepath = test_alternate_sigmffile.name + SIGMF_METADATA_EXT + with open(meta2_filepath, "w") as meta_fd: + test_alternate_sigmffile.dump(meta_fd) + archive_path = os.path.join("folder1", "test_archive.sigmf") + input_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + arch = SigMFArchive(input_sigmffiles, path=archive_path) + reader = SigMFArchiveReader(arch.path) + assert len(reader) == 2 # number of SigMFFiles + for actual_sigmffile in reader: + assert actual_sigmffile in input_sigmffiles + finally: + if os.path.exists(meta1_filepath): + os.remove(meta1_filepath) + if os.path.exists(meta2_filepath): + os.remove(meta2_filepath) + if os.path.exists(archive_path): + os.remove(archive_path) + if os.path.exists("folder1"): + shutil.rmtree("folder1") diff --git a/tests/test_sigmffile.py b/tests/test_sigmffile.py index e371964..da036d2 100644 --- a/tests/test_sigmffile.py +++ b/tests/test_sigmffile.py @@ -26,7 +26,8 @@ import unittest from sigmf import sigmffile, utils -from sigmf.sigmffile import SigMFFile +from sigmf.sigmffile import SigMFFile, fromarchive +from sigmf.archive import SigMFArchive from .testdata import * @@ -35,8 +36,10 @@ class TestClassMethods(unittest.TestCase): def setUp(self): '''assure tests have a valid SigMF object to work with''' _, temp_path = tempfile.mkstemp() - TEST_FLOAT32_DATA.tofile(temp_path) - self.sigmf_object = SigMFFile(TEST_METADATA, data_file=temp_path) + TEST_FLOAT32_DATA_1.tofile(temp_path) + self.sigmf_object = SigMFFile("test", + TEST_METADATA_1, + data_file=temp_path) def test_iterator_basic(self): '''make sure default batch_size works''' @@ -64,39 +67,88 @@ def simulate_capture(sigmf_md, n, capture_len): def test_default_constructor(): - SigMFFile() + SigMFFile(name="test") def test_set_non_required_global_field(): - sigf = SigMFFile() + sigf = SigMFFile(name="test") sigf.set_global_field('this_is:not_in_the_schema', None) def test_add_capture(): - sigf = SigMFFile() + sigf = SigMFFile(name="test") sigf.add_capture(start_index=0, metadata={}) def test_add_annotation(): - sigf = SigMFFile() + sigf = SigMFFile(name="test") sigf.add_capture(start_index=0) meta = {"latitude": 40.0, "longitude": -105.0} sigf.add_annotation(start_index=0, length=128, metadata=meta) +def test_add_annotation_with_duplicate_key(): + f = SigMFFile(name="test") + f.add_capture(start_index=0) + m1 = {"latitude": 40.0, "longitude": -105.0} + f.add_annotation(start_index=0, length=128, metadata=m1) + m2 = {"latitude": 50.0, "longitude": -115.0} + f.add_annotation(start_index=0, length=128, metadata=m2) + assert len(f.get_annotations(64)) == 2 + + def test_fromarchive(test_sigmffile): print("test_sigmffile is:\n", test_sigmffile) tf = tempfile.mkstemp()[1] td = tempfile.mkdtemp() - archive_path = test_sigmffile.archive(name=tf) + archive_path = test_sigmffile.archive(file_path=tf) result = sigmffile.fromarchive(archive_path=archive_path, dir=td) - assert result._metadata == test_sigmffile._metadata == TEST_METADATA + assert result == test_sigmffile os.remove(tf) shutil.rmtree(td) +def test_fromarchive_multi_recording(test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2): + # single recording + with tempfile.NamedTemporaryFile(suffix=".sigmf") as t_file: + path = t_file.name + test_sigmffile.archive(fileobj=t_file) + single_sigmffile = fromarchive(path) + assert isinstance(single_sigmffile, SigMFFile) + assert single_sigmffile == test_sigmffile + + # 2 recordings + with tempfile.NamedTemporaryFile(suffix=".sigmf") as t_file: + path = t_file.name + input_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + SigMFArchive(input_sigmffiles, fileobj=t_file) + sigmffile_one, sigmffile_two = fromarchive(path) + assert isinstance(sigmffile_one, SigMFFile) + assert sigmffile_one == test_sigmffile + assert isinstance(sigmffile_two, SigMFFile) + assert sigmffile_two == test_alternate_sigmffile + + # 3 recordings + with tempfile.NamedTemporaryFile(suffix=".sigmf") as t_file: + path = t_file.name + input_sigmffiles = [test_sigmffile, + test_alternate_sigmffile, + test_alternate_sigmffile_2] + SigMFArchive(input_sigmffiles, fileobj=t_file) + list_of_sigmffiles = fromarchive(path) + assert len(list_of_sigmffiles) == 3 + assert isinstance(list_of_sigmffiles[0], SigMFFile) + assert list_of_sigmffiles[0] == test_sigmffile + assert isinstance(list_of_sigmffiles[1], SigMFFile) + assert list_of_sigmffiles[1] == test_alternate_sigmffile + assert isinstance(list_of_sigmffiles[2], SigMFFile) + assert list_of_sigmffiles[2] == test_alternate_sigmffile_2 + + def test_add_multiple_captures_and_annotations(): - sigf = SigMFFile() + sigf = SigMFFile(name="test") for idx in range(3): simulate_capture(sigf, idx, 1024) @@ -124,6 +176,7 @@ def test_multichannel_types(): # for real or complex check_count = raw_count * 1 # deepcopy temp_signal = SigMFFile( + name="test", data_file=temp_path, global_info={ SigMFFile.DATATYPE_KEY: f'{complex_prefix}{key}_le', @@ -149,6 +202,7 @@ def test_multichannel_seek(): # write some dummy data and read back np.arange(18, dtype=np.uint16).tofile(temp_path) temp_signal = SigMFFile( + name="test", data_file=temp_path, global_info={ SigMFFile.DATATYPE_KEY: 'cu16_le', @@ -163,7 +217,7 @@ def test_multichannel_seek(): def test_key_validity(): '''assure the keys in test metadata are valid''' - for top_key, top_val in TEST_METADATA.items(): + for top_key, top_val in TEST_METADATA_1.items(): if type(top_val) is dict: for core_key in top_val.keys(): assert core_key in vars(SigMFFile)[f'VALID_{top_key.upper()}_KEYS'] @@ -178,7 +232,7 @@ def test_key_validity(): def test_ordered_metadata(): '''check to make sure the metadata is sorted as expected''' - sigf = SigMFFile() + sigf = SigMFFile(name="test") top_sort_order = ['global', 'captures', 'annotations'] for kdx, key in enumerate(sigf.ordered_metadata()): assert kdx == top_sort_order.index(key) diff --git a/tests/test_validation.py b/tests/test_validation.py index 75cf048..57a186c 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -26,18 +26,19 @@ from jsonschema.exceptions import ValidationError -from .testdata import TEST_FLOAT32_DATA, TEST_METADATA +from .testdata import TEST_FLOAT32_DATA_1, TEST_METADATA_1 def test_valid_data(): '''assure the supplied metadata is OK''' - invalid_metadata = dict(TEST_METADATA) - SigMFFile(TEST_METADATA).validate() + invalid_metadata = dict(TEST_METADATA_1) + SigMFFile("test", TEST_METADATA_1).validate() + class FailingCases(unittest.TestCase): '''Cases where the validator should throw an exception.''' def setUp(self): - self.metadata = dict(TEST_METADATA) + self.metadata = dict(TEST_METADATA_1) def test_extra_top_level_key(self): '''no extra keys allowed on the top level''' @@ -45,7 +46,7 @@ def test_extra_top_level_key(self): with self.assertRaises(ValidationError): SigMFFile(self.metadata).validate() - def test_extra_top_level_key(self): + def test_invalid_label(self): '''label must be less than 20 chars''' self.metadata[SigMFFile.ANNOTATION_KEY][0][SigMFFile.LABEL_KEY] = 'a' * 21 with self.assertRaises(ValidationError): @@ -83,7 +84,7 @@ def test_invalid_annotation_order(self): def test_invalid_hash(self): _, temp_path = tempfile.mkstemp() - TEST_FLOAT32_DATA.tofile(temp_path) + TEST_FLOAT32_DATA_1.tofile(temp_path) self.metadata[SigMFFile.GLOBAL_KEY][SigMFFile.HASH_KEY] = 'derp' with self.assertRaises(sigmf.error.SigMFFileError): - SigMFFile(metadata=self.metadata, data_file=temp_path) + SigMFFile(name="test", metadata=self.metadata, data_file=temp_path) diff --git a/tests/testdata.py b/tests/testdata.py index 0a0d5ed..db55c56 100644 --- a/tests/testdata.py +++ b/tests/testdata.py @@ -25,9 +25,9 @@ from sigmf import __version__ from sigmf import SigMFFile -TEST_FLOAT32_DATA = np.arange(16, dtype=np.float32) +TEST_FLOAT32_DATA_1 = np.arange(16, dtype=np.float32) -TEST_METADATA = { +TEST_METADATA_1 = { SigMFFile.ANNOTATION_KEY: [{SigMFFile.LENGTH_INDEX_KEY: 16, SigMFFile.START_INDEX_KEY: 0}], SigMFFile.CAPTURE_KEY: [{SigMFFile.START_INDEX_KEY: 0}], SigMFFile.GLOBAL_KEY: { @@ -38,6 +38,32 @@ } } +TEST_FLOAT32_DATA_2 = np.arange(16, 32, dtype=np.float32) + +TEST_METADATA_2 = { + SigMFFile.ANNOTATION_KEY: [{SigMFFile.LENGTH_INDEX_KEY: 16, SigMFFile.START_INDEX_KEY: 0}], + SigMFFile.CAPTURE_KEY: [{SigMFFile.START_INDEX_KEY: 0}], + SigMFFile.GLOBAL_KEY: { + SigMFFile.DATATYPE_KEY: 'rf32_le', + SigMFFile.HASH_KEY: 'a85018cf117a4704596c0f360dbc3fce2d0d561966d865b9b8a356634161bde6a528c5181837890a9f4d54243e2e8eaf7e19bd535e54e3e34aabf76793723d03', + SigMFFile.NUM_CHANNELS_KEY: 1, + SigMFFile.VERSION_KEY: __version__ + } +} + +TEST_FLOAT32_DATA_3 = np.arange(32, 48, dtype=np.float32) + +TEST_METADATA_3 = { + SigMFFile.ANNOTATION_KEY: [{SigMFFile.LENGTH_INDEX_KEY: 16, SigMFFile.START_INDEX_KEY: 0}], + SigMFFile.CAPTURE_KEY: [{SigMFFile.START_INDEX_KEY: 0}], + SigMFFile.GLOBAL_KEY: { + SigMFFile.DATATYPE_KEY: 'rf32_le', + SigMFFile.HASH_KEY: '089753bd48a1724c485e822eaf4d510491e4e54faa83cc3e7b3f18a9f651813190862aa97c922278454c66f20a741050762e008cbe4f96f3bd0dcdb7d720179d', + SigMFFile.NUM_CHANNELS_KEY: 1, + SigMFFile.VERSION_KEY: __version__ + } +} + # Data0 is a test of a compliant two capture recording TEST_U8_DATA0 = list(range(256)) TEST_U8_META0 = { From 43d5bd3f7fd806199822829af26fa0235bb4c1f8 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Thu, 15 Jun 2023 15:28:18 -0600 Subject: [PATCH 02/14] minor improvements --- sigmf/archive.py | 2 +- tests/test_sigmffile.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sigmf/archive.py b/sigmf/archive.py index 287651c..210d28e 100644 --- a/sigmf/archive.py +++ b/sigmf/archive.py @@ -59,7 +59,7 @@ def __init__(self, Iterable["sigmf.sigmffile.SigMFFile"]], path: Union[str, os.PathLike] = None, fileobj: BinaryIO = None, - pretty=True): + pretty: bool = True): if (not path) and (not fileobj): raise SigMFFileError("'path' or 'fileobj' required for creating " diff --git a/tests/test_sigmffile.py b/tests/test_sigmffile.py index da036d2..1121ca3 100644 --- a/tests/test_sigmffile.py +++ b/tests/test_sigmffile.py @@ -90,9 +90,9 @@ def test_add_annotation(): def test_add_annotation_with_duplicate_key(): f = SigMFFile(name="test") f.add_capture(start_index=0) - m1 = {"latitude": 40.0, "longitude": -105.0} + m1 = {"test_name_1": "test_value_1", "test_name_2": "test_value_2"} f.add_annotation(start_index=0, length=128, metadata=m1) - m2 = {"latitude": 50.0, "longitude": -115.0} + m2 = {"test_name_1": "test_value_3", "test_name_2": "test_value_4"} f.add_annotation(start_index=0, length=128, metadata=m2) assert len(f.get_annotations(64)) == 2 From 2d4d183c88d3e70ea4feafea2fc631fcae547d74 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Fri, 16 Jun 2023 09:13:28 -0600 Subject: [PATCH 03/14] minor improvements to README --- README.md | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 4f2832c..2f1c9c1 100644 --- a/README.md +++ b/README.md @@ -60,15 +60,15 @@ from sigmf.sigmffile import (fromarchive, fromfile) # read multirecording archive using fromarchive -sigmffiles = fromarchive("multi_recording_archive1.sigmf") +sigmffiles = fromarchive("multi_recording_archive.sigmf") print(len(sigmffiles)) # read multirecording archive using fromfile -sigmffiles = fromfile("multi_recording_archive1.sigmf") +sigmffiles = fromfile("multi_recording_archive.sigmf") print(len(sigmffiles)) # # read multirecording archive using SigMFArchiveReader -reader = SigMFArchiveReader("multi_recording_archive1.sigmf") +reader = SigMFArchiveReader("multi_recording_archive.sigmf") # length of reader and reader.sigmffiles should be the same print(len(reader)) print(len(reader.sigmffiles)) @@ -219,26 +219,26 @@ from sigmf.sigmffile import (SigMFFile, # create data file -random_data1 = np.random.rand(128) -data1_path = "recording1.sigmf-data" -random_data1.tofile(data1_path) +random_data = np.random.rand(128) +data_path = "recording.sigmf-data" +random_data.tofile(data_path) # create metadata -sigmf_file_1 = SigMFFile(name='recording1') -sigmf_file_1.set_global_field("core:datatype", "rf32_le") -sigmf_file_1.add_annotation(start_index=0, length=len(random_data1)) -sigmf_file_1.add_capture(start_index=0) -sigmf_file_1.set_data_file(data1_path) +sigmf_file = SigMFFile(name='recording') +sigmf_file.set_global_field("core:datatype", "rf32_le") +sigmf_file.add_annotation(start_index=0, length=len(random_data)) +sigmf_file.add_capture(start_index=0) +sigmf_file.set_data_file(data_path) # create archive using SigMFArchive -archive1 = SigMFArchive(sigmffiles=sigmf_file_1, +archive = SigMFArchive(sigmffiles=sigmf_file, path="single_recording_archive1.sigmf") # create archive using SigMFFile archive() -archive1_path = sigmf_file_1.archive(file_path="single_recording_archive2.sigmf") +archive_path = sigmf_file.archive(file_path="single_recording_archive2.sigmf") # create archive using tofile -sigmf_file_1.tofile(file_path="single_recording_archive3.sigmf", +sigmf_file.tofile(file_path="single_recording_archive3.sigmf", toarchive=True) ``` @@ -277,8 +277,8 @@ sigmf_file_2.set_data_file(data2_path) # create archive using SigMFArchive sigmffiles = [sigmf_file_1, sigmf_file_2] -archive3 = SigMFArchive(sigmffiles=sigmffiles, - path="multi_recording_archive1.sigmf") +archive = SigMFArchive(sigmffiles=sigmffiles, + path="multi_recording_archive.sigmf") ``` ### Load a SigMF Archive and slice its data without untaring it From 36217d4f9e3b4579a52e48dc8cc19b311510792b Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Fri, 16 Jun 2023 12:32:27 -0600 Subject: [PATCH 04/14] improve docstring --- sigmf/sigmffile.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index 58fade2..5a40317 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -159,13 +159,22 @@ def __init__(self, Parameters ---------- - name: Name used for directory and filenames if archived. - For example, given `name=archive1`, then passing this - sigmffile to SigMFArchive will add the following files - to the archive: - - archive1/ - - archive1.sigmf-meta - - archive1.sigmf-data + name: str, required + Name used for directory and filenames if archived. + For example, given `name=recording1`, then passing this + sigmffile to SigMFArchive will add the following files + to the archive: + - recording1/ + - recording1.sigmf-meta + - recording1.sigmf-data + Folders can also be added in the name. For example, given + `name=folder1/recording1`, then passing this + sigmffile to SigMFArchive will add the following files + to the archive: + - folder1/ + - recording1/ + - recording1.sigmf-meta + - recording1.sigmf-data metadata: str or dict, optional Metadata for associated dataset. data_file: str, optional From 43e5071de0c32c94835e7f00940773bfc057b4f4 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Tue, 20 Jun 2023 10:46:38 -0600 Subject: [PATCH 05/14] fix bug with absolute path in SigMFFile name --- sigmf/archive.py | 13 ++++++++++--- tests/test_archive.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/sigmf/archive.py b/sigmf/archive.py index 210d28e..0971f0b 100644 --- a/sigmf/archive.py +++ b/sigmf/archive.py @@ -103,9 +103,16 @@ def chmod(tarinfo): return tarinfo for sigmffile in self.sigmffiles: - self._create_parent_dirs(sigmf_archive, sigmffile.name, chmod) - file_path = os.path.join(sigmffile.name, - os.path.basename(sigmffile.name)) + sigmffile_name = os.path.normpath(sigmffile.name) + if os.path.isabs(sigmffile_name): + # remove root path component to make relative path for tarfile + sigmffile_name_split = sigmffile_name.split(os.path.sep) + sigmffile_name = os.path.sep.join(sigmffile_name_split[1:]) + if os.path.isabs(sigmffile_name): + raise SigMFFileError("Invalid SigMFFile name") + self._create_parent_dirs(sigmf_archive, sigmffile_name, chmod) + file_path = os.path.join(sigmffile_name, + os.path.basename(sigmffile_name)) sf_md_filename = file_path + SIGMF_METADATA_EXT sf_data_filename = file_path + SIGMF_DATASET_EXT metadata = sigmffile.dumps(pretty=pretty) diff --git a/tests/test_archive.py b/tests/test_archive.py index 2b77493..294a10b 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -225,3 +225,34 @@ def test_archive_no_path_or_fileobj(test_sigmffile): """Error should be raised when no path or fileobj given.""" with pytest.raises(error.SigMFFileError): SigMFArchive(test_sigmffile) + + +def test_fromfile_name_to_archive(test_sigmffile): + """make sure creating an archive works when reading a sigmf-meta file with + absolute path + """ + try: + with open('/tmp/test_sigmf.sigmf-meta', 'w') as test_sigmf_meta_f: + test_sigmffile.dump(test_sigmf_meta_f) + read_sigmffile = sigmffile.fromfile('/tmp/test_sigmf.sigmf-meta') + assert read_sigmffile.name == '/tmp/test_sigmf' + read_sigmffile.set_data_file(data_file=test_sigmffile.data_file) + read_sigmffile.archive('/tmp/testarchive.sigmf') + sigmf_tar = tarfile.open('/tmp/testarchive.sigmf') + basedir, subdir, file1, file2 = sigmf_tar.getmembers() + assert basedir.name == 'tmp' + assert subdir.name == 'tmp/test_sigmf' + if file1.name.endswith(SIGMF_DATASET_EXT): + sigmf_data = file1 + sigmf_meta = file2 + else: + sigmf_data = file2 + sigmf_meta = file1 + + assert sigmf_data.name == 'tmp/test_sigmf/test_sigmf.sigmf-data' + assert sigmf_meta.name == 'tmp/test_sigmf/test_sigmf.sigmf-meta' + finally: + if os.path.exists('/tmp/test_sigmf.sigmf-meta'): + os.remove('/tmp/test_sigmf.sigmf-meta') + if os.path.exists('/tmp/testarchive.sigmf'): + os.remove('/tmp/testarchive.sigmf') From f5013abe2598db61b528770427138b6303555c11 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Fri, 23 Jun 2023 08:38:29 -0600 Subject: [PATCH 06/14] remove duplicate hash symbol --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2f1c9c1..998e293 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ print(len(sigmffiles)) sigmffiles = fromfile("multi_recording_archive.sigmf") print(len(sigmffiles)) -# # read multirecording archive using SigMFArchiveReader +# read multirecording archive using SigMFArchiveReader reader = SigMFArchiveReader("multi_recording_archive.sigmf") # length of reader and reader.sigmffiles should be the same print(len(reader)) From e04c461790e9afeff22e5b6b3bd6c3e0e744ec5f Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Fri, 23 Jun 2023 09:53:16 -0600 Subject: [PATCH 07/14] added comment about length when reading archive, fixed broken readme example --- README.md | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 998e293..c751293 100644 --- a/README.md +++ b/README.md @@ -61,15 +61,17 @@ from sigmf.sigmffile import (fromarchive, # read multirecording archive using fromarchive sigmffiles = fromarchive("multi_recording_archive.sigmf") +# length should be equal to the number of recordings in the archive print(len(sigmffiles)) # read multirecording archive using fromfile sigmffiles = fromfile("multi_recording_archive.sigmf") +# length should be equal to the number of recordings in the archive print(len(sigmffiles)) # read multirecording archive using SigMFArchiveReader reader = SigMFArchiveReader("multi_recording_archive.sigmf") -# length of reader and reader.sigmffiles should be the same +# length of reader and reader.sigmffiles should be equal to the number of recordings in the archive print(len(reader)) print(len(reader.sigmffiles)) ``` @@ -297,13 +299,13 @@ In [1]: import sigmf In [2]: arc = sigmf.SigMFArchiveReader('/src/LTE.sigmf') -In [3]: arc.shape +In [3]: arc.sigmffiles[0].shape Out[3]: (15379532,) -In [4]: arc.ndim +In [4]: arc.sigmffiles[0].ndim Out[4]: 1 -In [5]: arc[:10] +In [5]: arc.sigmffiles[0][:10] Out[5]: array([-20.+11.j, -21. -6.j, -17.-20.j, -13.-52.j, 0.-75.j, 22.-58.j, 48.-44.j, 49.-60.j, 31.-56.j, 23.-47.j], dtype=complex64) @@ -316,13 +318,13 @@ However, the `.sigmffile` member keeps track of this, and converts the data to `numpy.complex64` *after* slicing it, that is, after reading it from disk. ```python -In [6]: arc.sigmffile.get_global_field(sigmf.SigMFFile.DATATYPE_KEY) +In [6]: arc.sigmffiles[0].get_global_field(sigmf.SigMFFile.DATATYPE_KEY) Out[6]: 'ci16_le' -In [7]: arc.sigmffile._memmap.dtype +In [7]: arc.sigmffiles[0]._memmap.dtype Out[7]: dtype('int16') -In [8]: arc.sigmffile._return_type +In [8]: arc.sigmffiles[0]._return_type Out[8]: ' Date: Fri, 23 Jun 2023 10:16:20 -0600 Subject: [PATCH 08/14] fix typo in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c751293..596a363 100644 --- a/README.md +++ b/README.md @@ -245,7 +245,7 @@ sigmf_file.tofile(file_path="single_recording_archive3.sigmf", ``` ### Create SigMF Archives with Multiple Recordings -Archives with multiple recordings can be created using `SigMFArchive` class. +Archives with multiple recordings can be created using the `SigMFArchive` class. ```python import numpy as np From b9a70049a917433de1d7ca5cf2a6f8406811ee91 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Fri, 23 Jun 2023 10:40:58 -0600 Subject: [PATCH 09/14] add docstring for SigMFArchive._create_parent_dirs() --- sigmf/archive.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sigmf/archive.py b/sigmf/archive.py index 0971f0b..241b41f 100644 --- a/sigmf/archive.py +++ b/sigmf/archive.py @@ -137,6 +137,10 @@ def chmod(tarinfo): self.path = sigmf_archive.name def _create_parent_dirs(self, _tarfile, sigmffile_name, set_permission): + """ Create parent directory TarInfo objects if tarfile doesn't + already contain parent directories. Then call the set_permission + function and add to the tarfile. + """ path_components = sigmffile_name.split(os.path.sep) current_path = "" for path in path_components: From ee190e306b85314958e9fd5d1a9f9f7201dc700f Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Fri, 23 Jun 2023 13:56:21 -0600 Subject: [PATCH 10/14] fix bug saving sigmf data to archive and add test --- sigmf/archive.py | 5 +++++ tests/test_archive.py | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/sigmf/archive.py b/sigmf/archive.py index 241b41f..407832e 100644 --- a/sigmf/archive.py +++ b/sigmf/archive.py @@ -124,8 +124,13 @@ def chmod(tarinfo): sigmf_archive.addfile(metadata_tarinfo, fileobj=metadata_buffer) data_tarinfo = sigmf_archive.gettarinfo(name=sigmffile.data_file, arcname=sf_data_filename) + if sigmffile.offset_and_size: + data_tarinfo.size = sigmffile.offset_and_size[1] + data_tarinfo = chmod(data_tarinfo) with open(sigmffile.data_file, "rb") as data_file: + if sigmffile.offset_and_size: + data_file.seek(sigmffile.offset_and_size[0]) sigmf_archive.addfile(data_tarinfo, fileobj=data_file) sigmf_archive.close() diff --git a/tests/test_archive.py b/tests/test_archive.py index 294a10b..f7c097b 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -14,6 +14,7 @@ from sigmf.archive import (SIGMF_DATASET_EXT, SIGMF_METADATA_EXT, SigMFArchive) +from sigmf.archivereader import SigMFArchiveReader from .testdata import TEST_FLOAT32_DATA_1, TEST_METADATA_1 @@ -256,3 +257,21 @@ def test_fromfile_name_to_archive(test_sigmffile): os.remove('/tmp/test_sigmf.sigmf-meta') if os.path.exists('/tmp/testarchive.sigmf'): os.remove('/tmp/testarchive.sigmf') + + +def test_create_archive_from_archive_reader(test_sigmffile, + test_alternate_sigmffile): + """ This test is to ensure that SigMFArchive will correctly create archive + using SigMFFile offset_and_size which is set when using SigMFArchiveReader + """ + original_sigmffiles = [test_sigmffile, test_alternate_sigmffile] + with tempfile.TemporaryDirectory() as temp_dir: + archive_path1 = os.path.join(temp_dir, "original_archive.sigmf") + SigMFArchive(sigmffiles=original_sigmffiles, path=archive_path1) + reader = SigMFArchiveReader(path=archive_path1) + archive_path2 = os.path.join(temp_dir, "archive_from_reader.sigmf") + SigMFArchive(sigmffiles=reader.sigmffiles, path=archive_path2) + read_archive_from_reader = SigMFArchiveReader(path=archive_path2) + # SigMFFile.__eq__() method will check metadata + # which includes datafile hash + assert original_sigmffiles == read_archive_from_reader.sigmffiles From 3214e498e0c300f3fc9c94b3defe1556316f5c5b Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Mon, 26 Jun 2023 11:04:03 -0600 Subject: [PATCH 11/14] add check for collection file in archive --- sigmf/archivereader.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sigmf/archivereader.py b/sigmf/archivereader.py index a957a46..e40d73b 100644 --- a/sigmf/archivereader.py +++ b/sigmf/archivereader.py @@ -10,7 +10,10 @@ import tarfile from .sigmffile import SigMFFile -from .archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT, SIGMF_ARCHIVE_EXT +from .archive import (SIGMF_COLLECTION_EXT, + SIGMF_DATASET_EXT, + SIGMF_METADATA_EXT, + SIGMF_ARCHIVE_EXT) from .error import SigMFFileError @@ -76,6 +79,10 @@ def __init__(self, elif memb.name.endswith(SIGMF_DATASET_EXT): data_offset_size = memb.offset_data, memb.size data_found = True + elif memb.name.endswith(SIGMF_COLLECTION_EXT): + print('A SigMF Collection file ', + memb.name, + 'was found but not handled.') else: print('A regular file', memb.name, From 91e577f6b69a47d4e5924e4e5fcb033a01398c8f Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Mon, 26 Jun 2023 15:08:04 -0600 Subject: [PATCH 12/14] modify archivereader to read archives without sigmf-data files, fix sigmffile name when reading archives, change error to warning when no sigmf-data files in archive --- sigmf/archivereader.py | 66 +++++++++++++++++++++++++----------------- tests/test_archive.py | 13 +++++---- 2 files changed, 46 insertions(+), 33 deletions(-) diff --git a/sigmf/archivereader.py b/sigmf/archivereader.py index e40d73b..b61d9d5 100644 --- a/sigmf/archivereader.py +++ b/sigmf/archivereader.py @@ -8,6 +8,7 @@ import os import tarfile +import warnings from .sigmffile import SigMFFile from .archive import (SIGMF_COLLECTION_EXT, @@ -17,6 +18,13 @@ from .error import SigMFFileError +class ArchiveRecording: + """Holds TarInfo objects found in archive for a recording""" + def __init__(self) -> None: + self.metadata = None + self.data = None + + class SigMFArchiveReader(): """Access data within SigMF archive `tar` in-place without extracting. This class can be used to iterate through multiple SigMFFiles in the archive. @@ -55,8 +63,9 @@ def __init__(self, data_offset_size = None sigmffile_name = None self.sigmffiles = [] - data_found = False + recordings = [] + recording = ArchiveRecording() for memb in tar_obj.getmembers(): if memb.isdir(): # memb.type == tarfile.DIRTYPE: # the directory structure will be reflected in the member @@ -65,20 +74,13 @@ def __init__(self, elif memb.isfile(): # memb.type == tarfile.REGTYPE: if memb.name.endswith(SIGMF_METADATA_EXT): - json_contents = memb.name - if data_offset_size is None: - # consider a warnings.warn() here; the datafile - # should be earlier in the archive than the - # metadata, so that updating it (like, adding an - # annotation) is fast. - pass - with tar_obj.extractfile(memb) as memb_fid: - json_contents = memb_fid.read() - - sigmffile_name, _ = os.path.splitext(memb.name) + if recording.metadata: + # save previous recording + recordings.append(recording) + recording = ArchiveRecording() + recording.metadata = memb elif memb.name.endswith(SIGMF_DATASET_EXT): - data_offset_size = memb.offset_data, memb.size - data_found = True + recording.data = memb elif memb.name.endswith(SIGMF_COLLECTION_EXT): print('A SigMF Collection file ', memb.name, @@ -94,25 +96,35 @@ def __init__(self, memb.name, 'was found but not handled, just FYI.') - if data_offset_size is not None and json_contents is not None: + if recording.metadata: + recordings.append(recording) # save final recording + + if recordings: + for recording in recordings: + metadata = recording.metadata + sigmffile_name, _ = os.path.splitext(metadata.name) + sigmffile_name = os.path.dirname(sigmffile_name) + with tar_obj.extractfile(metadata) as memb_fid: + json_contents = memb_fid.read() sigmffile = SigMFFile(sigmffile_name, metadata=json_contents) - sigmffile.validate() - sigmffile.set_data_file(self.path, - data_buffer=archive_buffer, - skip_checksum=skip_checksum, - offset=data_offset_size[0], - size_bytes=data_offset_size[1], - map_readonly=map_readonly) + sigmffile.validate() + data = recording.data + if data: + data_offset_size = data.offset_data, data.size + sigmffile.set_data_file(self.path, + data_buffer=archive_buffer, + skip_checksum=skip_checksum, + offset=data_offset_size[0], + size_bytes=data_offset_size[1], + map_readonly=map_readonly) self.sigmffiles.append(sigmffile) - data_offset_size = None - json_contents = None - sigmffile_name = None - if not data_found: - raise SigMFFileError('No .sigmf-data file found in archive!') + if not any([r.data for r in recordings]): + warnings.warn(f"No file with {SIGMF_DATASET_EXT} extension" + " found in archive!") finally: if tar_obj: tar_obj.close() diff --git a/tests/test_archive.py b/tests/test_archive.py index f7c097b..900abc8 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -205,21 +205,18 @@ def test_archive_names(test_sigmffile): a = SigMFArchive(sigmffiles=test_sigmffile, path=t.name) assert a.path == t.name observed_sigmffile = sigmffile.fromarchive(t.name) - assert os.path.dirname(observed_sigmffile.name) == test_sigmffile.name - assert os.path.basename(observed_sigmffile.name) == test_sigmffile.name + observed_sigmffile.name == test_sigmffile.name with tempfile.NamedTemporaryFile(suffix=".sigmf") as t: archive_path = test_sigmffile.archive(t.name) assert archive_path == t.name observed_sigmffile = sigmffile.fromarchive(t.name) - assert os.path.dirname(observed_sigmffile.name) == test_sigmffile.name - assert os.path.basename(observed_sigmffile.name) == test_sigmffile.name + observed_sigmffile.name == test_sigmffile.name with tempfile.NamedTemporaryFile(suffix=".sigmf") as t: test_sigmffile.tofile(t.name, toarchive=True) observed_sigmffile = sigmffile.fromarchive(t.name) - assert os.path.dirname(observed_sigmffile.name) == test_sigmffile.name - assert os.path.basename(observed_sigmffile.name) == test_sigmffile.name + observed_sigmffile.name == test_sigmffile.name def test_archive_no_path_or_fileobj(test_sigmffile): @@ -275,3 +272,7 @@ def test_create_archive_from_archive_reader(test_sigmffile, # SigMFFile.__eq__() method will check metadata # which includes datafile hash assert original_sigmffiles == read_archive_from_reader.sigmffiles + observed_sigmffile0 = read_archive_from_reader.sigmffiles[0] + observed_sigmffile1 = read_archive_from_reader.sigmffiles[1] + assert test_sigmffile.name == observed_sigmffile0.name + assert test_alternate_sigmffile.name == observed_sigmffile1.name From 6462dc2e1a8cb8fca5e9794d26998b9e0fd888b9 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Thu, 13 Jul 2023 14:48:39 -0600 Subject: [PATCH 13/14] remove `dir` parameter in fromarchive method --- sigmf/sigmffile.py | 5 +---- tests/test_sigmffile.py | 4 +--- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index 5a40317..357064f 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -944,12 +944,9 @@ def get_dataset_filename_from_metadata(meta_fn, metadata=None): return None -def fromarchive(archive_path, dir=None): +def fromarchive(archive_path): """Extract an archive and return containing SigMFFiles. - The `dir` parameter is no longer used as this function has been changed to - access SigMF archives without extracting them. - If the archive contains a single recording, a single SigMFFile object will be returned. If the archive contains multiple recordings a list of SigMFFile objects will be returned. diff --git a/tests/test_sigmffile.py b/tests/test_sigmffile.py index 1121ca3..f20ef5c 100644 --- a/tests/test_sigmffile.py +++ b/tests/test_sigmffile.py @@ -100,12 +100,10 @@ def test_add_annotation_with_duplicate_key(): def test_fromarchive(test_sigmffile): print("test_sigmffile is:\n", test_sigmffile) tf = tempfile.mkstemp()[1] - td = tempfile.mkdtemp() archive_path = test_sigmffile.archive(file_path=tf) - result = sigmffile.fromarchive(archive_path=archive_path, dir=td) + result = sigmffile.fromarchive(archive_path=archive_path) assert result == test_sigmffile os.remove(tf) - shutil.rmtree(td) def test_fromarchive_multi_recording(test_sigmffile, From 14fbe8089c1e12dc4b0ae5ecad9366af3988bc36 Mon Sep 17 00:00:00 2001 From: Justin Haze Date: Fri, 14 Jul 2023 11:04:21 -0600 Subject: [PATCH 14/14] fix README examples --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 596a363..5ec1725 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,7 @@ data.tofile('example_cf32.sigmf-data') # create the metadata meta = SigMFFile( + name='example_cf32', data_file='example_cf32.sigmf-data', # extension is optional global_info = { SigMFFile.DATATYPE_KEY: get_data_type_str(data), # in this case, 'cf32_le' @@ -176,6 +177,7 @@ data_ci16.view(np.float32).astype(np.int16).tofile('example_ci16.sigmf-data') # create the metadata for the second file meta_ci16 = SigMFFile( + name='example_ci16', data_file='example_ci16.sigmf-data', # extension is optional global_info = { SigMFFile.DATATYPE_KEY: 'ci16_le', # get_data_type_str() is only valid for numpy types