seonghobae · seonghobae · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026
@@ -1,4 +1,18 @@
 export const SUPPORTED_AUDIO_FORMATS = ["wav", "mp3", "flac", "m4a"] as const;
+export const SECTION_FORM_LABELS = [
+  "intro",
+  "verse",
+  "pre-chorus",
+  "chorus",
+  "bridge",
+  "outro",
+  "tag",
+  "pickup",
+  "stop",
+  "handoff"
+] as const;
+
+export type SectionFormLabel = (typeof SECTION_FORM_LABELS)[number];
 
 export type ProjectSummary = {
   id: string;
@@ -58,7 +72,7 @@ export type RehearsalRole = {
 
 export type RehearsalSection = {
   id: string;
-  label: string;
+  label: SectionFormLabel;
   groove: string;
   confidence: ConfidenceMarker;
   roles: RehearsalRole[];
@@ -177,7 +191,7 @@ const demoRehearsalSongSeed: RehearsalSong = {
   sections: [
     {
       id: "verse-1",
-      label: "Verse 1",
+      label: "verse",
       groove: "Straight eighths with a late snare feel",
       confidence: {
         level: "medium",
@@ -281,8 +295,8 @@ const demoRehearsalSongSeed: RehearsalSong = {
   ],
   exportSummary: {
     format: "cue-sheet",
-    headline: "Start with Verse 1 entrances before the chorus lift.",
-    focusSections: ["Verse 1"]
+    headline: "Start with verse entrances before the chorus lift.",
+    focusSections: ["verse"]
   }
 };
 
@@ -761,7 +775,7 @@ function validateRehearsalSection(value: unknown, path: string): string | null {
   if (typeof value.id !== "string") {
     return invalidField(`${path}.id`);
   }
-  if (typeof value.label !== "string") {
+  if (!isOneOf(SECTION_FORM_LABELS, value.label)) {
     return invalidField(`${path}.label`);
   }
   if (typeof value.groove !== "string") {

@@ -353,7 +353,7 @@ describe("shared type helpers", () => {
       sections: [
         {
           id: "verse-1",
-          label: "Verse 1",
+          label: "verse",
           confidence: {
             level: "medium",
             source: "model"
@@ -496,7 +496,7 @@ describe("shared type helpers", () => {
     roleSparse.sections[0]!.roles = new Array(1);
     badOverride.sections[0]!.roles[2]!.manualOverrides[0]!.value.source = "model";
     badHeadline.exportSummary.headline = 99;
-    badFocusSection.exportSummary.focusSections = ["Verse 1", 7];
+    badFocusSection.exportSummary.focusSections = ["verse", 7];
     badExportSummary.exportSummary = [];
     sparseSections.sections = new Array(1) as RehearsalSong["sections"];
 

@@ -5,6 +5,7 @@
 from typing import Literal, NotRequired, TypedDict
 
 from bandscope_analysis.health import HealthReport, build_health_report
+from bandscope_analysis.sections import extract_sections
 
 
 class AnalysisJobRequest(TypedDict):
@@ -205,14 +206,20 @@ def validate_analysis_job_request(payload: object) -> AnalysisJobRequest:
 
 def build_demo_rehearsal_song() -> RehearsalSong:
     """Return the bootstrap rehearsal song payload for orchestration tests."""
+
+    # Extract sections using the new pipeline
+    arrangement = [{"label": "verse", "groove": "Straight eighths with a late snare feel"}]
+    extraction_result = extract_sections(arrangement)
+    verse_section = extraction_result["sections"][0]
+
     return {
         "id": "demo-song",
         "title": "Late Night Set",
         "sections": [
             {
-                "id": "verse-1",
-                "label": "Verse 1",
-                "groove": "Straight eighths with a late snare feel",
+                "id": verse_section["id"],
+                "label": verse_section["form_label"],
+                "groove": verse_section["groove"],
                 "confidence": {
                     "level": "medium",
                     "source": "model",
@@ -307,8 +314,8 @@ def build_demo_rehearsal_song() -> RehearsalSong:
         ],
         "exportSummary": {
             "format": "cue-sheet",
-            "headline": "Start with Verse 1 entrances before the chorus lift.",
-            "focusSections": ["Verse 1"],
+            "headline": "Start with verse entrances before the chorus lift.",
+            "focusSections": ["verse"],
         },
     }
 

@@ -0,0 +1,28 @@
+"""Section extraction components and models.
+
+This package exposes the core models and logic for extracting sections
+from arrangement representations.
+"""
+
+from .anchors import count_based_anchor, lyric_phrase_anchor
+from .extractor import extract_sections
+from .model import (
+    ALL_SECTION_LABELS,
+    CueAnchor,
+    CueAnchorStrategy,
+    SectionCandidate,
+    SectionExtractionResult,
+    SectionLabel,
+)
+
+__all__ = [
+    "CueAnchor",
+    "CueAnchorStrategy",
+    "SectionCandidate",
+    "SectionExtractionResult",
+    "SectionLabel",
+    "ALL_SECTION_LABELS",
+    "count_based_anchor",
+    "lyric_phrase_anchor",
+    "extract_sections",
+]
@@ -0,0 +1,16 @@
+"""Helper functions for creating cue anchors."""
+
+from .model import CueAnchor, CueAnchorStrategy
+
+
+def count_based_anchor(beat: int = 1, bar: int = 1) -> CueAnchor:
+    """Create a count-based anchor, usually used when no lyrics are available."""
+    return {
+        "strategy": CueAnchorStrategy.COUNT.value,
+        "value": f"Enter on beat {beat} of bar {bar}",
+    }
+
+
+def lyric_phrase_anchor(phrase: str) -> CueAnchor:
+    """Create a lyric-based anchor using the given phrase."""
+    return {"strategy": CueAnchorStrategy.LYRIC.value, "value": phrase}
@@ -0,0 +1,89 @@
+"""Pipeline logic for extracting section candidates from song arrangements."""
+
+from typing import Any, Dict, List, Literal
+
+from .anchors import count_based_anchor, lyric_phrase_anchor
+from .model import (
+    ALL_SECTION_LABELS,
+    SectionCandidate,
+    SectionExtractionResult,
+)
+
+
+def _normalize_label(raw_label: str) -> str:
+    """Normalize a string to a SectionLabel if possible."""
+    normalized = str(raw_label).lower().strip()
+    # Handle variations (e.g. "verse 1" -> "verse")
+    # Sort by length descending to match longest possible prefix first if needed,
+    # but here ALL_SECTION_LABELS works fine since they are distinct
+    for label in ALL_SECTION_LABELS:
+        if normalized.startswith(label):
+            return label
+    return normalized
+
+
+def extract_sections(arrangement: List[Dict[str, Any]]) -> SectionExtractionResult:
+    """
+    Extract structured section candidates from raw arrangement data.
+
+    Expects arrangement list of dicts with at least:
+    - label: str
+    - groove: str (optional)
+    - lyric_cue: str (optional)
+    """
+    sections: List[SectionCandidate] = []
+
+    # Determine dominant strategy: if any item has lyric_cue, use LYRIC strategy
+    has_lyrics = any(item.get("lyric_cue") for item in arrangement)
+    dominant_strategy = "lyric" if has_lyrics else "count"
+
+    label_counts: Dict[str, int] = {}
+
+    for item in arrangement:
+        raw_label = item.get("label", "unknown")
+        form_label = _normalize_label(raw_label)
+
+        # Track sequence index per form label (e.g. verse-1, verse-2)
+        # Note: we want 1-based index but the type implies we just count them
+        label_counts[form_label] = label_counts.get(form_label, 0) + 1
+        sequence_index = label_counts[form_label]
+
+        section_id = f"{form_label}-{sequence_index}"
+
+        # Determine confidence
+        confidence_level: Literal["low", "medium", "high"] = "low"
+        confidence_source: Literal["model", "user"] = "model"
+
+        if form_label in ALL_SECTION_LABELS:
+            confidence_level = "high"
+            confidence_source = "model"
+            confidence_notes = "Recognized standard section label"
+        else:
+            confidence_level = "low"
+            confidence_source = "model"
+            confidence_notes = "Unrecognized section label"
+
+        # Create anchor
+        if has_lyrics and "lyric_cue" in item and item["lyric_cue"]:
+            anchor = lyric_phrase_anchor(item["lyric_cue"])
+        else:
+            # Fallback or default count anchor
+            anchor = count_based_anchor(beat=1, bar=1)
+
+        candidate: SectionCandidate = {
+            "id": section_id,
+            "form_label": form_label,
+            "sequence_index": sequence_index,
+            "groove": item.get("groove", "standard"),
+            "confidence_level": confidence_level,
+            "confidence_source": confidence_source,
+            "confidence_notes": confidence_notes,
+            "cue_anchor": anchor,
+        }
+        sections.append(candidate)
+
+    return {
+        "sections": sections,
+        "strategy_used": dominant_strategy,
+        "extraction_notes": f"Extracted {len(sections)} sections using {dominant_strategy}.",
+    }
@@ -0,0 +1,80 @@
+"""Domain model for section/form/cue anchor extraction."""
+
+from __future__ import annotations
+
+from enum import Enum
+from typing import Literal, TypedDict
+
+
+class SectionLabel(str, Enum):
+    """Canonical form labels for song sections.
+
+    These labels cover the rehearsal-relevant structural vocabulary for
+    contemporary popular, jazz, gospel, and R&B arrangements.
+    """
+
+    INTRO = "intro"
+    VERSE = "verse"
+    PRE_CHORUS = "pre-chorus"
+    CHORUS = "chorus"
+    BRIDGE = "bridge"
+    OUTRO = "outro"
+    TAG = "tag"
+    PICKUP = "pickup"
+    STOP = "stop"
+    HANDOFF = "handoff"
+
+
+# All canonical labels as a plain tuple for iteration and validation.
+ALL_SECTION_LABELS: tuple[str, ...] = tuple(label.value for label in SectionLabel)
+
+
+class CueAnchorStrategy(str, Enum):
+    """Strategy used to anchor a section cue."""
+
+    LYRIC = "lyric"
+    COUNT = "count"
+    TRANSITION = "transition"
+
+
+class CueAnchor(TypedDict):
+    """A rehearsal cue anchor tied to a specific entry strategy."""
+
+    strategy: str  # CueAnchorStrategy value
+    value: str
+
+
+class SectionCandidate(TypedDict):
+    """A single candidate section produced during extraction.
+
+    ``form_label`` is a ``SectionLabel`` value string.
+    ``sequence_index`` is the zero-based position in the arrangement.
+    ``confidence_level`` is one of ``"low" | "medium" | "high"``.
+    ``confidence_source`` is always ``"model"`` for extracted sections.
+    ``confidence_notes`` is a human-readable explanation.
+    ``groove`` is a brief textual groove descriptor for rehearsal reference.
+    ``cue_anchor`` is the primary entry cue for this section.
+    ``id`` is a stable slug derived from label and sequence index.
+    """
+
+    id: str
+    form_label: str  # SectionLabel value
+    sequence_index: int
+    groove: str
+    confidence_level: Literal["low", "medium", "high"]
+    confidence_source: Literal["model", "user"]
+    confidence_notes: str
+    cue_anchor: CueAnchor
+
+
+class SectionExtractionResult(TypedDict):
+    """Result returned by the section extraction pipeline.
+
+    ``sections`` is an ordered list of ``SectionCandidate`` objects.
+    ``strategy_used`` records which anchor strategy dominated.
+    ``extraction_notes`` provides overall pipeline commentary.
+    """
+
+    sections: list[SectionCandidate]
+    strategy_used: str  # CueAnchorStrategy value
+    extraction_notes: str