Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/build-baseline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ jobs:
- name: Install node dependencies
run: npm ci
- name: Sync Python dependencies
if: runner.os != 'macOS' || runner.arch != 'X64' # PyTorch lacks Python 3.12 wheel for macOS x86_64
run: uv sync --project services/analysis-engine --group dev --frozen
Comment thread
coderabbitai[bot] marked this conversation as resolved.
- name: Build frontend
run: npm run build --workspace @bandscope/desktop
Expand Down
1 change: 1 addition & 0 deletions services/analysis-engine/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ version = "0.1.0"
description = "BandScope local-first analysis engine"
requires-python = ">=3.12"
dependencies = [
"demucs>=4.0.1",
"librosa>=0.11.0",
"numba<0.63.0",
"soundfile>=0.13.1",
Expand Down
14 changes: 14 additions & 0 deletions services/analysis-engine/src/bandscope_analysis/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,20 @@ def main() -> int:
except Exception as e:
logging.warning(f"Temporal analysis failed, continuing with mock: {e}")

logging.info(f"Performing stem separation on {audio_path}...")
try:
import librosa

from bandscope_analysis.separation.audio_separator import AudioStemSeparator

# Load only the first 10 seconds for the CLI proof to prevent hanging
y, sr = librosa.load(audio_path, sr=44100, mono=False, duration=10.0)
separator = AudioStemSeparator()
stems = separator.separate_audio(y, sample_rate=int(sr), segment_seconds=2.0)
logging.info(f"Successfully extracted {len(stems)} stems: {list(stems.keys())}")
except Exception as e:
logging.warning(f"Stem separation failed, continuing with mock: {e}")
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

requested_at = datetime.now(UTC).isoformat().replace("+00:00", "Z")
response = run_analysis_job(job_id, request, requested_at)
json.dump(response, sys.stdout)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
"""Audio source separation using Demucs."""

from __future__ import annotations

import logging
from typing import Any

import numpy as np

try:
from torch import Tensor
except ImportError: # pragma: no cover
Tensor = Any # type: ignore

logger = logging.getLogger(__name__)


class AudioStemSeparator:
"""Isolates standard stems from an audio mix using Demucs.

Security Notes:
- Trust boundary: Audio input is passed as raw numpy arrays from a prior decoding step
(e.g. librosa), reducing the risk of codec-based exploitation within Demucs itself.
- Limits: Employs chunked inference (split=True) to strictly bound peak memory (OOM avoidance).
- Network: Downloads model weights securely to local cache on first run. Future executions
should ideally be offline.
"""

def __init__(self, model_name: str = "htdemucs") -> None:
"""Initialize the audio stem separator.

Args:
model_name: The name of the pretrained Demucs model to use.
"""
self.model_name = model_name
self._model = None

def _load_model(self) -> Any:
from demucs.pretrained import get_model

if self._model is None:
logger.info("Loading demucs model '%s'...", self.model_name)
self._model = get_model(self.model_name)
if self._model:
self._model.eval()
return self._model
Comment thread
coderabbitai[bot] marked this conversation as resolved.

def separate_audio(
self,
audio_data: np.ndarray,
sample_rate: int,
segment_seconds: float = 10.0,
) -> dict[str, np.ndarray]:
"""Perform source separation on the given audio array.

Args:
audio_data: The input audio waveform, shape (channels, samples).
If mono (samples,), it will be converted to stereo.
sample_rate: The sample rate of the input audio.
segment_seconds: The length of each chunk for OOM-safe processing.

Returns:
A dictionary mapping stem names ('vocals', 'bass', 'drums', 'other')
to their separated audio waveforms (channels, samples).
"""
import torch
from demucs.apply import apply_model
from demucs.audio import convert_audio

model = self._load_model()

# Ensure 2D (channels, samples)
if audio_data.ndim == 1:
audio_data = np.expand_dims(audio_data, axis=0)

# Convert to torch tensor
mix = torch.from_numpy(audio_data).float()

# Convert audio to match model expectations
mix = convert_audio( # type: ignore
mix,
sample_rate,
model.samplerate,
model.audio_channels,
)

# Add batch dimension: (1, channels, samples)
mix = mix.unsqueeze(0)

# Determine device
device = "cpu"
if torch.cuda.is_available():
device = "cuda"
elif torch.backends.mps.is_available():
device = "mps"

model.to(device)
mix = mix.to(device)

logger.info("Applying model to mix using device %s...", device)
# Apply model with chunking
with torch.no_grad():
stems = apply_model(
model,
mix,
shifts=1,
split=True,
overlap=0.25,
segment=segment_seconds,
progress=False,
)

# stems shape: [batch, sources, channels, samples]
# Remove batch dim
stems_np: np.ndarray = stems[0].cpu().numpy()

result = {}
for idx, source_name in enumerate(model.sources):
result[source_name] = stems_np[idx]

return result
124 changes: 124 additions & 0 deletions services/analysis-engine/tests/test_audio_separator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
"""Tests for audio stem separation."""

from unittest import mock

import numpy as np
import pytest

from bandscope_analysis.separation.audio_separator import AudioStemSeparator


@pytest.fixture
def mock_demucs_model():
"""Provide a mock demucs model with standard sources."""
mock_model = mock.MagicMock()
mock_model.sources = ["drums", "bass", "other", "vocals"]
mock_model.samplerate = 44100
mock_model.audio_channels = 2
return mock_model


@mock.patch("bandscope_analysis.separation.audio_separator.logger")
@mock.patch("demucs.audio.convert_audio")
@mock.patch("demucs.apply.apply_model")
@mock.patch("demucs.pretrained.get_model")
def test_audio_stem_separator(
mock_get_model, mock_apply_model, mock_convert_audio, mock_logger, mock_demucs_model
):
"""Test that the AudioStemSeparator correctly coordinates the mock Demucs model."""
import torch

# Setup mocks
mock_get_model.return_value = mock_demucs_model

# fake convert_audio output (channels, samples)
# convert_audio returns the tensor directly
def fake_convert(wav, from_sr, to_sr, channels):
# ensure shape matches expectations
return torch.zeros((2, 100))

mock_convert_audio.side_effect = fake_convert

# fake apply_model output (batch, sources, channels, samples)
mock_apply_model.return_value = torch.ones((1, 4, 2, 100))

separator = AudioStemSeparator(model_name="fake_model")

# Test mono audio
audio_data = np.zeros((100,))
result = separator.separate_audio(audio_data, sample_rate=22050, segment_seconds=2.0)

# Assertions
mock_get_model.assert_called_once_with("fake_model")
mock_apply_model.assert_called_once()

# Verify the results match the model sources
assert set(result.keys()) == {"drums", "bass", "other", "vocals"}
for stem_name in ["drums", "bass", "other", "vocals"]:
assert result[stem_name].shape == (2, 100)
assert np.all(result[stem_name] == 1.0)

# Check that model gets loaded only once
separator.separate_audio(audio_data, sample_rate=22050, segment_seconds=2.0)
assert mock_get_model.call_count == 1
assert mock_apply_model.call_count == 2
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated


@mock.patch("bandscope_analysis.separation.audio_separator.logger")
@mock.patch("demucs.audio.convert_audio")
@mock.patch("demucs.apply.apply_model")
@mock.patch("demucs.pretrained.get_model")
@mock.patch("torch.from_numpy")
@mock.patch("torch.cuda.is_available")
@mock.patch("torch.backends.mps.is_available")
def test_audio_stem_separator_device(
mock_mps,
mock_cuda,
mock_from_numpy,
mock_get_model,
mock_apply_model,
mock_convert_audio,
mock_logger,
mock_demucs_model,
):
"""Test that device selection (mps, cuda, cpu) falls back correctly."""
# This test verifies that the correct device string is chosen.
# By mocking torch.from_numpy and convert_audio, we prevent real tensors
# from being created, thus avoiding actual PyTorch .to("cuda") calls
# that would fail on machines compiled without CUDA.
mock_get_model.return_value = mock_demucs_model

mock_tensor = mock.MagicMock()
mock_from_numpy.return_value.float.return_value = mock_tensor
mock_convert_audio.return_value = mock_tensor
mock_tensor.unsqueeze.return_value = mock_tensor
mock_tensor.to.return_value = mock_tensor

# Mock apply_model return value so stems[0].cpu().numpy() works
mock_stems_item = mock.MagicMock()
mock_stems_item.cpu.return_value.numpy.return_value = np.zeros((4, 2, 100))
mock_stems = mock.MagicMock()
mock_stems.__getitem__.return_value = mock_stems_item
mock_apply_model.return_value = mock_stems

separator = AudioStemSeparator(model_name="fake_model")
audio_data = np.zeros((2, 100)) # Test stereo

# 1. Test cuda
mock_cuda.return_value = True
mock_mps.return_value = False
result = separator.separate_audio(audio_data, sample_rate=22050, segment_seconds=2.0)
assert set(result.keys()) == {"drums", "bass", "other", "vocals"}
mock_tensor.to.assert_called_with("cuda")

# 2. Test mps
mock_cuda.return_value = False
mock_mps.return_value = True
result = separator.separate_audio(audio_data, sample_rate=22050, segment_seconds=2.0)
mock_tensor.to.assert_called_with("mps")

# 3. Test cpu
mock_cuda.return_value = False
mock_mps.return_value = False
result = separator.separate_audio(audio_data, sample_rate=22050, segment_seconds=2.0)
mock_tensor.to.assert_called_with("cpu")
69 changes: 69 additions & 0 deletions services/analysis-engine/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,3 +372,72 @@ def analyze(self, path):
assert cli.main() == 0
res = json.loads(stdout.getvalue())
assert res["jobId"] == "job-audio-success"


def test_cli_main_temporal_analyzer_and_separator_mock_success(monkeypatch) -> None:
"""Ensure the temporal analyzer and stem separator injection block succeeds."""
import io
import json

from bandscope_analysis import cli

stdin = io.StringIO(
json.dumps(
{
"jobId": "job-audio-success-sep",
"request": {
"sourceKind": "local_audio",
"projectId": "p1",
"sourceLabel": "test.wav",
"roleFocus": [],
"localSource": {
"sourcePath": "/valid/path.wav",
"fileName": "test.wav",
"extension": "wav",
"fileSizeBytes": 100,
},
},
}
)
)
stdout = io.StringIO()

class FakeAnalyzerSuccess:
def analyze(self, path):
return {"bpm": 120.0, "beats": []}

class FakeAudioStemSeparator:
def separate_audio(self, audio, sample_rate, segment_seconds=2.0):
import numpy as np

return {
"vocals": np.zeros((2, 100), dtype=np.float32),
"drums": np.zeros((2, 100), dtype=np.float32),
"bass": np.zeros((2, 100), dtype=np.float32),
"other": np.zeros((2, 100), dtype=np.float32),
}

def fake_librosa_load(path, sr, mono, duration):
import numpy as np

return np.zeros((2, 100), dtype=np.float32), sr

import librosa

monkeypatch.setattr(librosa, "load", fake_librosa_load)
import bandscope_analysis.separation.audio_separator

monkeypatch.setattr(
bandscope_analysis.separation.audio_separator,
"AudioStemSeparator",
FakeAudioStemSeparator,
)

monkeypatch.setattr(cli, "TemporalAnalyzer", FakeAnalyzerSuccess)
monkeypatch.setattr(cli.sys, "stdin", stdin)
monkeypatch.setattr(cli.sys, "stdout", stdout)
monkeypatch.setattr(cli.sys, "argv", ["cli.py"])

assert cli.main() == 0
res = json.loads(stdout.getvalue())
assert res["jobId"] == "job-audio-success-sep"
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Loading
Loading