Skip to content

Commit

Permalink
fix: entity splitting bug (#63)
Browse files Browse the repository at this point in the history
splitting entities was implemented under the assumption that entity ids are strictly monotonic. This assumption holds for DS01 but not for most other subdatasets.
  • Loading branch information
tilman151 authored May 23, 2024
1 parent a9bf0d3 commit 2ba0afb
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 4 deletions.
5 changes: 3 additions & 2 deletions rul_datasets/reader/ncmapss.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,8 +358,9 @@ def _window_by_cycle(

@staticmethod
def _get_end_idx(identifiers):
_, split_idx = np.unique(identifiers, return_counts=True)
split_idx = np.cumsum(split_idx)
_, split_idx = np.unique(identifiers, return_index=True)
split_idx = np.sort(split_idx)
split_idx = np.concatenate([split_idx[1:], [len(identifiers)]])

return split_idx

Expand Down
19 changes: 17 additions & 2 deletions tests/reader/test_ncmapss.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,9 @@ def test_max_rul(max_rul, prepared_ncmapss):


@pytest.mark.needs_data
def test__split_by_unit(prepared_ncmapss):
reader = NCmapssReader(1)
@pytest.mark.parametrize("fd", range(1, 8))
def test__split_by_unit(fd, prepared_ncmapss):
reader = NCmapssReader(fd)
features, targets, auxiliary = reader._load_raw_data()
features, targets, auxiliary = reader._split_by_unit(features, targets, auxiliary)

Expand All @@ -113,6 +114,20 @@ def test__split_by_unit(prepared_ncmapss):
assert np.unique(auxiliary[i][:, 0]).size == 1 # only one unit id present


@pytest.mark.needs_data
@pytest.mark.parametrize("fd", range(1, 8))
def test__get_end_idx_for_cycles(fd, prepared_ncmapss):
reader = NCmapssReader(fd)
features, targets, auxiliary = reader._load_raw_data()
features, targets, auxiliary = reader._split_by_unit(features, targets, auxiliary)

for aux in auxiliary:
cycle_end_idx = reader._get_end_idx(aux[:, 1])
split_aux = np.split(aux, cycle_end_idx[:-1])
for cycle in split_aux:
assert np.unique(cycle[:, 1]).size == 1 # only one cycle id present


@pytest.mark.needs_data
@pytest.mark.parametrize("window_size", [10, 100])
def test_padding_and_window_size(window_size, prepared_ncmapss):
Expand Down

0 comments on commit 2ba0afb

Please sign in to comment.