seonghobae · seonghobae · Apr 25, 2026 · Apr 25, 2026
@@ -0,0 +1,121 @@
+import type { TranscriptionNote } from "@bandscope/shared-types";
+
+interface GrooveMapProps {
+  notes?: TranscriptionNote[];
+  isLoading?: boolean;
+}
+
+/** Documented. */
+export function GrooveMap({ notes, isLoading }: GrooveMapProps) {
+  if (isLoading) {
+    return (
+      <div
+        aria-live="polite"
+        style={{
+          marginTop: "16px",
+          padding: "24px",
+          backgroundColor: "#fff",
+          borderRadius: "8px",
+          border: "1px dashed #d9d9d9",
+          display: "flex",
+          justifyContent: "space-between",
+          alignItems: "center"
+        }}
+      >
+        <span style={{ color: "#1890ff" }}>Analyzing pitch... 45%</span>
+        <button style={{ padding: "4px 8px", cursor: "pointer" }}>Cancel</button>
+      </div>
+    );
+  }
+
+  if (!notes || notes.length === 0) {
+    return (
+      <div
+        style={{
+          marginTop: "16px",
+          padding: "24px",
+          backgroundColor: "#fafafa",
+          borderRadius: "8px",
+          border: "1px dashed #d9d9d9",
+          textAlign: "center",
+          color: "#999",
+          fontStyle: "italic"
+        }}
+      >
+        No transcription yet. Click to analyze bass line.
+      </div>
+    );
+  }
+
+  // Find max offset to determine timeline width
+  const maxTime = Math.max(...notes.map(n => n.offset), 10);
+  // Unique pitches to determine vertical lanes (avoiding 88-key piano roll)
+  const uniquePitches = Array.from(new Set(notes.map(n => n.pitch))).sort();
+
+  return (
+    <div
+      style={{
+        marginTop: "16px",
+        padding: "16px",
+        backgroundColor: "#2c2c2c",
+        borderRadius: "8px",
+        overflowX: "auto",
+        position: "relative"
+      }}
+      role="region"
+      aria-label="Groove Map Transcription"
+    >
+      <div className="sr-only" style={{ position: "absolute", left: "-9999px" }}>
+        Transcription complete. {notes.length} notes analyzed.
+      </div>
+
+      <div style={{ position: "relative", minWidth: "100%", height: `${uniquePitches.length * 40}px` }}>
+        {/* Render horizontal lanes for unique pitches */}
+        {uniquePitches.map((pitch, index) => (
+          <div
+            key={pitch}
+            style={{
+              position: "absolute",
+              top: `${index * 40}px`,
+              left: 0,
+              right: 0,
+              height: "40px",
+              borderBottom: "1px solid #444",
+              display: "flex",
+              alignItems: "center",
+              color: "#aaa",
+              fontSize: "12px",
+              paddingLeft: "8px"
+            }}
+          >
+            {pitch}
+          </div>
+        ))}
+
+        {/* Render note blocks */}
+        {notes.map((note, index) => {
+          const pitchIndex = uniquePitches.indexOf(note.pitch);
+          const leftPercent = (note.onset / maxTime) * 100;
+          const widthPercent = ((note.offset - note.onset) / maxTime) * 100;
+
+          return (
+            <div
+              key={index}
+              style={{
+                position: "absolute",
+                top: `${pitchIndex * 40 + 8}px`,
+                left: `${leftPercent}%`,
+                width: `${widthPercent}%`,
+                height: "24px",
+                backgroundColor: "#52c41a",
+                borderRadius: "4px",
+                boxShadow: "0 1px 3px rgba(0,0,0,0.5)"
+              }}
+              title={`${note.pitch} (${note.onset.toFixed(2)}s - ${note.offset.toFixed(2)}s)`}
+            />
+          );
+        })}
+      </div>
+    </div>
+  );
+}
@@ -2,6 +2,7 @@ import { useState, useMemo } from "react";
 import type { RehearsalSong } from "@bandscope/shared-types";
 import { RoleSwitcher } from "./RoleSwitcher";
 import { SectionRoadmap } from "./SectionRoadmap";
+import { GrooveMap } from "./GrooveMap";
 import { generateCueSheetCsv, generateChartSummaryJson, sanitizeFilename } from "../../lib/export";
 
 interface WorkspaceProps {
@@ -87,11 +88,34 @@ export function Workspace({ song, onSongUpdate }: WorkspaceProps) {
 
 
       {activeRole && (
-        <div style={{ marginTop: "16px", padding: "16px", backgroundColor: "#f0f2f5", borderRadius: "8px", display: "flex", gap: "16px", alignItems: "center" }}>
-          <strong>Stem Player: {activeRole}</strong>
-          <button aria-label="Play stem" title="Coming soon" disabled={true} style={{ padding: "8px 16px", borderRadius: "4px", backgroundColor: "#1890ff", color: "#fff", border: "none", cursor: "not-allowed", minWidth: "44px", minHeight: "44px" }}>▶ Play</button>
-          <button aria-label="Loop section" title="Coming soon" disabled={true} style={{ padding: "8px 16px", borderRadius: "4px", border: "1px solid #d9d9d9", backgroundColor: "#f5f5f5", cursor: "not-allowed", minWidth: "44px", minHeight: "44px" }}>🔁 Loop Section</button>
-          <button aria-label="Solo/mute others" title="Coming soon" disabled={true} style={{ padding: "8px 16px", borderRadius: "4px", border: "1px solid #d9d9d9", backgroundColor: "#f5f5f5", cursor: "not-allowed", minWidth: "44px", minHeight: "44px" }}>🔇 Mute Others (Solo)</button>
+        <div style={{ marginTop: "16px", padding: "16px", backgroundColor: "#f0f2f5", borderRadius: "8px", display: "flex", flexDirection: "column", gap: "16px" }}>
+          <div style={{ display: "flex", gap: "16px", alignItems: "center" }}>
+            <strong>Stem Player: {activeRole}</strong>
+            <button aria-label="Play stem" title="Coming soon" disabled={true} style={{ padding: "8px 16px", borderRadius: "4px", backgroundColor: "#1890ff", color: "#fff", border: "none", cursor: "not-allowed", minWidth: "44px", minHeight: "44px" }}>▶ Play</button>
+            <button aria-label="Loop section" title="Coming soon" disabled={true} style={{ padding: "8px 16px", borderRadius: "4px", border: "1px solid #d9d9d9", backgroundColor: "#f5f5f5", cursor: "not-allowed", minWidth: "44px", minHeight: "44px" }}>🔁 Loop Section</button>
+            <button aria-label="Solo/mute others" title="Coming soon" disabled={true} style={{ padding: "8px 16px", borderRadius: "4px", border: "1px solid #d9d9d9", backgroundColor: "#f5f5f5", cursor: "not-allowed", minWidth: "44px", minHeight: "44px" }}>🔇 Mute Others (Solo)</button>
+            <button 
+              aria-label="Transcribe Bass"
+              title={activeRole.toLowerCase().includes("bass") ? "Transcribe part" : "Transcription is currently optimized for Bass. More instruments coming soon."}
+              disabled={!activeRole.toLowerCase().includes("bass")}
+              style={{
+                padding: "8px 16px",
+                borderRadius: "4px",
+                border: "1px solid #d9d9d9",
+                backgroundColor: activeRole.toLowerCase().includes("bass") ? "#52c41a" : "#f5f5f5",
+                color: activeRole.toLowerCase().includes("bass") ? "#fff" : "rgba(0, 0, 0, 0.25)",
+                cursor: activeRole.toLowerCase().includes("bass") ? "pointer" : "not-allowed",
+                minWidth: "44px",
+                minHeight: "44px"
+              }}
+            >
+              Transcribe Bass
+            </button>
+          </div>
+          {(() => {
+            const role = song.sections.flatMap(s => s.roles).find(r => r.id === activeRole);
+            return <GrooveMap notes={role?.transcription} isLoading={false} />;
+          })()}
         </div>
       )}
 

@@ -0,0 +1,113 @@
+<!-- /autoplan restore point: /Users/seonghobae/.gstack/projects//feature-issue-151-transcription-autoplan-restore-20260425-223305.md -->
+# Plan: V2 Transcription and Notation from Part STEMs
+
+## Problem Statement
+BandScope V1 provided rehearsal certainty by breaking songs into section roadmaps and allowing users to isolate their part stems (e.g., Vocals, Keys, Bass). However, learning a part strictly by ear from a stem can still be time-consuming for complex arrangements.
+The next step is to introduce Transcription and Notation generation (Issue #151), enabling users to automatically convert isolated stems into playable sheet music, tabs, or MIDI representations.
+
+## Scope
+- Implement audio-to-MIDI transcription for separated stems (Keys, Bass, Vocals, Guitar).
+- Integrate an ML model (like Basic Pitch, CREPE, or a transformer-based AMT model) to extract note events (pitch, onset, offset, velocity) from single-instrument audio stems.
+- Add a "Transcribe Part" button in the Role Switcher UI.
+- Render the transcribed notes as a basic piano roll or notation view alongside the stem player.
+- Allow users to export the transcription as a `.mid` file.
+
+## Out of Scope
+- Multi-instrument transcription from raw audio (we rely on V1 STEMs for single-instrument inputs).
+- Real-time sheet music scrolling playback (keep it static or simple for V2.0).
+- Replacing the human ear (transcriptions should be marked with confidence levels).
+
+
+## CEO Review Completion Summary
+- Mode: SELECTIVE EXPANSION -> REFRAMING
+- Scope Decisions:
+  - Approved: Narrow transcription scope exclusively to **Bass (monophonic)** for V2.0 to avoid polyphonic/tab generation complexity.
+  - Approved: Shift output expectation from "readable sheet music" to "Simplification & Groove Map" (rhythmic hits and root notes) to avoid the "Readable Notation" delusion of messy raw AMT data.
+  - Approved: Make Temporal Grid (tempo/beat map) a hard prerequisite before pitch transcription to ensure quantized, snap-to-grid MIDI exports.
+  - Approved: Perform a technical spike on ONNX/TFLite footprint before shipping, setting a strict "Readability Acceptance Criteria" (abort feature if >10% manual correction required).
+- Dual Voices: `[single-model]` (Codex unavailable, Claude subagent provided 5 critical/high findings).
+
+
+## Design UI/UX Specifications
+
+### Information Architecture
+- The "Transcribe" trigger is an attribute of the stem track, NOT a global setting. Move it from the Role Switcher to the Stem Player track header.
+- The Groove Map renders directly below the waveform, sharing the exact same time/X-axis.
+
+### Specific UI Mechanisms
+- **Ban the 88-key piano roll.** The Groove Map is a constrained, collapsed horizontal timeline showing *only* active pitches as labeled blocks (e.g., "E1", "A1") snapped to the beat grid.
+- **Non-Bass Roles:** Do not hide the button for Vocals, Guitar, or Keys. Show it disabled with a tooltip: `Transcription is currently optimized for Bass. More instruments coming soon.`
+
+### Interaction States
+- **Empty:** A dedicated lane showing "No transcription yet. Click to analyze bass line."
+- **Prerequisite missing:** If the Temporal Grid is missing, clicking Transcribe auto-sequences the tasks: `[1] Generating Beat Grid...` seamlessly followed by `[2] Extracting Bass Notes...`.
+- **Loading:** Inline progress bar/spinner on the track with text (e.g., `Analyzing pitch... 45%`) and a `[x] Cancel` button.
+- **Error:** "Stem too complex for accurate transcription."
+- **Partial/Rejected:** "Transcription requires >10% manual correction (Confidence low). [Keep Anyway] [Discard]"
+- **Success:** The Groove Map populates, and a `[Download .mid]` export button appears next to the track header.
+
+### Accessibility
+- Processing states must announce to screen readers via `aria-live="polite"`.
+- Disabled tooltips must be accessible via keyboard focus.
+- The Groove Map needs a textual summary equivalent for screen readers (e.g., "Transcription complete. 45 bars analyzed. High confidence.").
+
+## Design Review Completion Summary
+- Initial Score: 3/10
+- Final Score: 10/10
+- Decisions Made: 5 structural issues fixed via Claude Subagent.
+- Dual Voices: `[single-model]` (Codex unavailable).
+
+
+## Engineering Review Completion Summary
+- Initial Assessment: Architectural ambiguities, missing edge case limits, and highly complex unstated quantization logic.
+- Final State: Security boundaries, ML test suites, and measurable fallbacks explicitly added.
+- Dual Voices: `[single-model]` (Codex unavailable, Claude subagent provided 5 critical/high findings).
+
+### Architecture & Security (ASCII Diagram)
+```text
+[Desktop UI (React)] --(IPC)--> [Tauri Orchestrator]
+                                       |
+                                       v
+                                [Python Subprocess (Sandboxed)]
+                                 ├── 1. Audio Resampling (16kHz mono)
+                                 ├── 2. Temporal Grid Generation
+                                 └── 3. Local ONNX Inference (Bass AMT)
+```
+- **Model Security:** If models are downloaded at runtime, they MUST use HTTPS and verify hardcoded SHA-256 checksums before loading to prevent supply chain poisoning.
+- **Sandboxing:** Python subprocess must run with dropped privileges to prevent malicious audio decoding RCEs.
+
+### Complexity Reduction & Edge Cases
+- **Unbounded Input:** Enforce a hard 5-minute duration limit or implement chunking for inference to prevent OOM crashes on older laptops.
+- **Cancellation Leaks:** Aggressive cleanup of partial `.mid` artifacts and `temp` audio chunks if the user hits `[x] Cancel`.
+- **Quantization:** Snapping absolute time (seconds) to a fluctuating beat grid is incredibly difficult. V2.0 will spike a dynamic programming approach (e.g., Hidden Markov Model) for alignment, rather than naive mathematical rounding.
+- **Metric Reframing:** The "10% manual correction" metric is subjective. Replace with a technical gate: "Abort and show error if the average confidence score of extracted notes is < 0.80 or if onset density exceeds 15 notes/second (indicating noise)."
+
+### Test Plan Diagram & Gaps
+```
+CODE PATHS                                            USER FLOWS
+[+] services/analysis-engine/src/bandscope_analysis/transcription/
+  ├── run_inference()                                   ├── [GAP] [→E2E] Large audio file > 5 mins (Chunking/OOM check)
+  │   ├── [GAP] [→EVAL] Golden Dataset (F1 > 95%)       ├── [GAP] [→E2E] Cancellation mid-inference (Temp cleanup)
+  │   └── [GAP] Resampling fallback (48kHz -> 16kHz)    └── [GAP]        Low confidence reject (Density > 15 n/s)
+[+] apps/desktop/src/features/transcription/          [+] UI States
+  ├── renderGrooveMap()                                 ├── [GAP]        Missing Temporal Grid auto-sequence
+  │   └── [GAP] Snapping logic edge cases               └── [GAP]        Disabled non-bass roles tooltip
+```
+- **Action:** Introduce a "Golden Dataset" CI step for the ML engine. Run inference on 5 known bass stems and assert onset/pitch F1 scores > 95% against baseline before allowing merges.
+
+
+## Security Notes
+
+### Attack Surface
+The raw audio stems derived from imported files or separation are considered untrusted.
+### Trust Boundary
+The transcription ONNX models execute within the Python subprocess sandbox, explicitly isolated from the React frontend UI and the main Rust process.
+### Mitigations
+If an untrusted model weights payload (ONNX/TFLite) fails the SHA-256 verification step upon startup or download, the transcription process is aborted safely and alerts the user.
+### Realistic Threats
+Malicious ONNX models loading attempt leading to supply chain attack or local arbitrary code execution.
+### Remaining Risk
+No extracted MIDI or user stem data leaves the local machine. Transcription operations are fully offlined.
+### Test Points
+- Malformed ONNX models loading attempt.
+- Corrupt audio buffer payload injection to transcription engine.
@@ -55,6 +55,14 @@ export type RangeSummary = {
   highestNote: string;
 };
 
+/** Documented. */
+export type TranscriptionNote = {
+  pitch: string;
+  onset: number;
+  offset: number;
+  velocity: number;
+};
+
 /** Documented. */
 export type RehearsalHarmony = {
   chord: string;
@@ -84,6 +92,7 @@ export type RehearsalRole = {
   setupNote: string;
   manualOverrides: ManualOverride[];
   overlapWarnings: string[];
+  transcription?: TranscriptionNote[];
 };
 
 /** Documented. */
@@ -800,6 +809,30 @@ function validateManualOverride(value: unknown, path: string): string | null {
   return null;
 }
 
+/** Documented. */
+function validateTranscriptionNote(value: unknown, path: string): string | null {
+  if (!isRecord(value)) {
+    return invalidField(path);
+  }
+  const extraKey = unexpectedKey(value, ["pitch", "onset", "offset", "velocity"], path);
+  if (extraKey) {
+    return extraKey;
+  }
+  if (typeof value.pitch !== "string") {
+    return invalidField(`${path}.pitch`);
+  }
+  if (typeof value.onset !== "number") {
+    return invalidField(`${path}.onset`);
+  }
+  if (typeof value.offset !== "number") {
+    return invalidField(`${path}.offset`);
+  }
+  if (typeof value.velocity !== "number") {
+    return invalidField(`${path}.velocity`);
+  }
+  return null;
+}
+
 /** Documented. */
 function validateRehearsalRole(value: unknown, path: string): string | null {
   if (!isRecord(value)) {
@@ -819,7 +852,8 @@ function validateRehearsalRole(value: unknown, path: string): string | null {
       "simplification",
       "setupNote",
       "manualOverrides",
-      "overlapWarnings"
+      "overlapWarnings",
+      "transcription"
     ],
     path
   );
@@ -883,6 +917,18 @@ function validateRehearsalRole(value: unknown, path: string): string | null {
     }
   }
 
+  if (value.transcription !== undefined) {
+    if (!isDenseArray(value.transcription)) {
+      return invalidField(`${path}.transcription`);
+    }
+    for (const [index, note] of value.transcription.entries()) {
+      const noteError = validateTranscriptionNote(note, `${path}.transcription[${index}]`);
+      if (noteError) {
+        return noteError;
+      }
+    }
+  }
+
   return null;
 }
 

@@ -28,6 +28,9 @@ packages = ["src/bandscope_analysis"]
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 pythonpath = ["src"]
+filterwarnings = [
+    "ignore::DeprecationWarning",
+]
 
 [tool.coverage.run]
 source = ["src/bandscope_analysis"]

@@ -38,8 +38,12 @@ def analyze(self, audio_path: str | Path) -> TemporalFeatures:
         logger.info(f"Loading and decoding audio: {path_str}")
 
         try:
-            # Load audio, converting to mono and standardizing sample rate
-            y, sr = librosa.load(path_str, sr=TARGET_SR, mono=True)
+            import warnings
+
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore", DeprecationWarning)
+                # Load audio, converting to mono and standardizing sample rate
+                y, sr = librosa.load(path_str, sr=TARGET_SR, mono=True)
 
             # Ensure it's a 1D float array for librosa
             if not isinstance(y, np.ndarray):