-
Notifications
You must be signed in to change notification settings - Fork 963
docs(cli): add VHS demo walkthrough for the Superset CLI #4461
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b6cdfd8
be88b42
455d31e
855e14d
a829a81
425b31c
e0a2ac1
31e36f4
8df310e
3e84bed
917dc45
19ae365
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| __pycache__/ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| #!/usr/bin/env bash | ||
| # Adds a soundtrack to the VHS-rendered demo: a lo-fi music bed (demo/music.mp3) | ||
| # plus mechanical-keyboard clicks timed off the .tape script. | ||
| # | ||
| # Input: demo/superset-cli.mp4 (produced by `vhs demo/superset-cli.tape`) | ||
| # demo/music.mp3 (the music bed) | ||
| # demo/keyboard.mp3 (a continuous mechanical-keyboard recording) | ||
| # Output: demo/superset-cli-sound.mp4 | ||
| # | ||
| # Credits: music — "Lofi Production" by Pulsebox (Pixabay, royalty-free); | ||
| # keyboard — "Mechanical Keyboard Typing HD" by VirtualZero (Pixabay). | ||
| # Individual keystrokes are sliced out of keyboard.mp3 and dropped onto the | ||
| # .tape timeline (one random sample per key, with slight pitch/level jitter). | ||
| # If keyboard.mp3 is missing, the clicks fall back to a numpy synth. | ||
| set -euo pipefail | ||
| cd "$(dirname "$0")/.." # -> packages/cli | ||
| SRC=demo/superset-cli.mp4 | ||
| TAPE=demo/superset-cli.tape | ||
| MUSIC=${1:-demo/music.mp3} | ||
| KB=demo/keyboard.mp3 | ||
| OUT=demo/superset-cli-sound.mp4 | ||
| TMP=$(mktemp -d -t demo-sound) | ||
| CLICKS="$TMP/clicks.wav" | ||
|
|
||
| [ -f "$SRC" ] || { echo "missing $SRC — run: vhs demo/superset-cli.tape" >&2; exit 1; } | ||
| [ -f "$MUSIC" ] || { echo "missing music bed: $MUSIC" >&2; exit 1; } | ||
| DUR=$(ffprobe -v error -show_entries format=duration -of csv=p=0 "$SRC") | ||
| FADE_AT=$(awk "BEGIN{print $DUR-3}") | ||
|
|
||
| KEYS_ARG="" | ||
| if [ -f "$KB" ]; then | ||
| echo "slicing keystroke samples from $KB ..." | ||
| ffmpeg -y -loglevel error -i "$KB" -ac 1 -ar 44100 "$TMP/kb.wav" | ||
| python3 demo/extract_keys.py "$TMP/kb.wav" "$TMP/keys" | ||
| KEYS_ARG="$TMP/keys" | ||
| fi | ||
|
|
||
| echo "placing clicks on the timeline..." | ||
| python3 demo/gen_audio.py "$TAPE" "$CLICKS" "$DUR" "$KEYS_ARG" | ||
|
|
||
| # [music] -> trim to video length, fade in/out, light low-pass, drop the level | ||
| # [clicks] -> as-is (already left headroom); mix, keep under the ceiling | ||
| ffmpeg -y -i "$SRC" -i "$MUSIC" -i "$CLICKS" \ | ||
| -filter_complex "\ | ||
| [1:a]atrim=0:${DUR},asetpts=PTS-STARTPTS,lowpass=f=12000,volume=0.5,afade=t=in:st=0:d=2,afade=t=out:st=${FADE_AT}:d=3[mus];\ | ||
| [2:a]volume=0.4[clk];\ | ||
| [mus][clk]amix=inputs=2:normalize=0,alimiter=limit=0.95:level=disabled,aresample=44100[a]" \ | ||
| -map 0:v -map "[a]" -c:v copy -c:a aac -b:a 192k -shortest "$OUT" | ||
| rm -rf "$TMP" | ||
| echo "wrote $OUT" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,84 @@ | ||
| #!/usr/bin/env python3 | ||
| """Slice individual keystroke samples out of a continuous typing recording. | ||
|
|
||
| Detects transients in <src.wav> and writes one short WAV per keystroke into | ||
| <out_dir>/keyNN.wav (1 ms fade-in, ~20 ms fade-out so the edges don't click). | ||
|
|
||
| Usage: python3 extract_keys.py <src.wav> <out_dir> | ||
| """ | ||
| import os | ||
| import sys | ||
| import wave | ||
|
|
||
| import numpy as np | ||
|
|
||
| SR = 44100 | ||
|
|
||
|
|
||
| def load_mono(path): | ||
| with wave.open(path, "rb") as w: | ||
| n, sw, ch, fr = w.getnframes(), w.getsampwidth(), w.getnchannels(), w.getframerate() | ||
| raw = w.readframes(n) | ||
| dt = {1: np.int8, 2: np.int16, 4: np.int32}[sw] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P2: The WAV decoding path misinterprets 8-bit PCM as signed data and can crash with a Prompt for AI agents |
||
| a = np.frombuffer(raw, dtype=dt).astype(np.float32) | ||
| a = (a - 128) / 128.0 if sw == 1 else a / float(np.iinfo(dt).max) | ||
| if ch > 1: | ||
| a = a.reshape(-1, ch).mean(axis=1) | ||
| if fr != SR: | ||
| idx = np.linspace(0, len(a) - 1, int(len(a) * SR / fr)) | ||
| a = np.interp(idx, np.arange(len(a)), a) | ||
| return a | ||
|
|
||
|
|
||
| def detect_onsets(a, min_gap=0.06, thr_frac=0.16): | ||
| win = int(SR * 0.003) | ||
| sm = np.convolve(np.abs(a), np.ones(win) / win, "same") | ||
| above = sm > thr_frac * sm.max() | ||
| rising = np.where(above[1:] & ~above[:-1])[0] | ||
| out, last = [], -10 * SR | ||
| for i in rising: | ||
| if i - last > int(SR * min_gap): | ||
| out.append(i) | ||
| last = i | ||
| return out | ||
|
|
||
|
|
||
| def main(): | ||
| src, out_dir = sys.argv[1], sys.argv[2] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add CLI argument validation for
Proposed usage guard def main():
- src, out_dir = sys.argv[1], sys.argv[2]
+ if len(sys.argv) != 3:
+ print("Usage: python3 extract_keys.py <src.wav> <out_dir>", file=sys.stderr)
+ sys.exit(2)
+ src, out_dir = sys.argv[1], sys.argv[2]🤖 Prompt for AI Agents |
||
| os.makedirs(out_dir, exist_ok=True) | ||
| a = load_mono(src) | ||
| onsets = detect_onsets(a) | ||
|
|
||
| pre, length = int(SR * 0.004), int(SR * 0.14) | ||
| fi, fo = int(SR * 0.001), int(SR * 0.02) | ||
| kept = 0 | ||
| peak_global = np.max(np.abs(a)) or 1.0 | ||
| for k, on in enumerate(onsets): | ||
| s = max(0, on - pre) | ||
| seg = a[s:s + length].copy() | ||
| if len(seg) < length // 2: | ||
| continue | ||
| if np.max(np.abs(seg)) < 0.06 * peak_global: # too quiet — probably a tail, skip | ||
| continue | ||
| # mellow it a touch: gentle low-pass + a softer/longer fade-out | ||
| a_lp = np.exp(-2 * np.pi * 3800 / SR) | ||
| prev = 0.0 | ||
| for j in range(len(seg)): | ||
| prev = (1 - a_lp) * seg[j] + a_lp * prev | ||
| seg[j] = prev | ||
| fo = int(SR * 0.045) | ||
| if len(seg) >= fi + fo: | ||
| seg[:fi] *= np.linspace(0, 1, fi) | ||
| seg[-fo:] *= np.linspace(1, 0, fo) ** 1.5 | ||
| seg = seg / (np.max(np.abs(seg)) or 1.0) * 0.95 | ||
| kept += 1 | ||
| with wave.open(os.path.join(out_dir, f"key{kept:02d}.wav"), "wb") as w: | ||
| w.setnchannels(1) | ||
| w.setsampwidth(2) | ||
| w.setframerate(SR) | ||
| w.writeframes((seg * 32767).astype(np.int16).tobytes()) | ||
| print(f" extracted {kept} keystroke samples from {os.path.basename(src)} -> {out_dir}/") | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,204 @@ | ||||||
| #!/usr/bin/env python3 | ||||||
| """Generate a keyboard-click track for the VHS demo, timed off the .tape script. | ||||||
|
|
||||||
| Clicks are placed by replaying the .tape timeline: every `Type` character costs | ||||||
| `TypingSpeed`, every `Enter` is a keystroke, every `Sleep` advances the clock, | ||||||
| and the `Hide`..`Show` block is skipped (VHS doesn't render it). | ||||||
|
|
||||||
| The click sound is synthesized with numpy by default. Pass a WAV (a single | ||||||
| keystroke) or a directory of WAVs (a pool of keystrokes — picked at random per | ||||||
| key) to use real recordings instead; each hit gets slight pitch/level jitter. | ||||||
|
|
||||||
| Usage: python3 gen_audio.py <tape> <out.wav> <duration_seconds> [keys.wav|keys_dir] [keyreturn.wav] | ||||||
| """ | ||||||
| import os | ||||||
| import re | ||||||
| import sys | ||||||
| import wave | ||||||
|
|
||||||
| import numpy as np | ||||||
|
|
||||||
| SR = 44100 | ||||||
| _rng = np.random.default_rng(7) | ||||||
|
|
||||||
|
|
||||||
| # ---------------------------------------------------------------- tape timeline | ||||||
| def parse_events(tape_path): | ||||||
| typing_speed = 0.05 # VHS default; tape overrides via `Set TypingSpeed` | ||||||
| t = 0.0 | ||||||
| in_hidden = False | ||||||
| keys, returns = [], [] | ||||||
|
|
||||||
| for raw in open(tape_path, encoding="utf-8"): | ||||||
| line = raw.strip() | ||||||
| if not line or line.startswith("#"): | ||||||
| continue | ||||||
| head = line.split(None, 1)[0] | ||||||
|
|
||||||
| if head == "Hide": | ||||||
| in_hidden = True | ||||||
| continue | ||||||
| if head == "Show": | ||||||
| in_hidden = False | ||||||
| t = 0.0 | ||||||
| continue | ||||||
|
Comment on lines
+38
to
+44
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix hidden-block timeline handling to avoid event drift.
Suggested fix if head == "Hide":
in_hidden = True
continue
if head == "Show":
in_hidden = False
- t = 0.0 # the rendered video starts here
continue
@@
if head == "Type":
+ if in_hidden:
+ continue
body = line[len("Type"):].strip()
if len(body) >= 2 and body[0] in "\"'`" and body[-1] == body[0]:
body = body[1:-1]
for ch in body:
- if not in_hidden:
- keys.append((t, ch))
+ keys.append((t, ch))
t += typing_speed
continue
if head == "Enter":
- if not in_hidden:
- returns.append(t)
+ if in_hidden:
+ continue
+ returns.append(t)
t += typing_speed
continue
@@
if re.match(r"(Ctrl\+|Alt\+|Shift\+|Backspace|Tab|Space|Escape|Up|Down|Left|Right|PageUp|PageDown|Enter)", head):
- if not in_hidden:
- keys.append((t, "\x00"))
+ if in_hidden:
+ continue
+ keys.append((t, "\x00"))
t += typing_speed
continueAlso applies to: 56-77 🤖 Prompt for AI Agents |
||||||
|
|
||||||
| m = re.match(r"Set\s+TypingSpeed\s+([\d.]+)(ms|s)?", line) | ||||||
| if m: | ||||||
| typing_speed = float(m.group(1)) / (1000 if m.group(2) == "ms" else 1) | ||||||
| continue | ||||||
| if head in ("Set", "Output", "Require", "Env"): | ||||||
| continue | ||||||
|
|
||||||
| m = re.match(r"Sleep\s+([\d.]+)(ms|s)?", line) | ||||||
| if m: | ||||||
| dt = float(m.group(1)) / (1000 if (m.group(2) or "s") == "ms" else 1) | ||||||
| if not in_hidden: | ||||||
| t += dt | ||||||
| continue | ||||||
|
|
||||||
| if head == "Type": | ||||||
| body = line[len("Type"):].strip() | ||||||
| if len(body) >= 2 and body[0] in "\"'`" and body[-1] == body[0]: | ||||||
| body = body[1:-1] | ||||||
| for _ch in body: | ||||||
| if not in_hidden: | ||||||
| keys.append(t) | ||||||
| t += typing_speed | ||||||
| continue | ||||||
|
|
||||||
| if head == "Enter": | ||||||
| if not in_hidden: | ||||||
| returns.append(t) | ||||||
| t += typing_speed | ||||||
| continue | ||||||
|
|
||||||
| if re.match(r"(Ctrl\+|Alt\+|Shift\+|Backspace|Tab|Space|Escape|Up|Down|Left|Right|PageUp|PageDown)", head): | ||||||
| if not in_hidden: | ||||||
| keys.append(t) | ||||||
| t += typing_speed | ||||||
| continue | ||||||
|
|
||||||
| return keys, returns | ||||||
|
|
||||||
|
|
||||||
| # --------------------------------------------------------------------- samples | ||||||
| def _load_wav_mono(path): | ||||||
| with wave.open(path, "rb") as w: | ||||||
| n, sw, ch, fr = w.getnframes(), w.getsampwidth(), w.getnchannels(), w.getframerate() | ||||||
| raw = w.readframes(n) | ||||||
| dt = {1: np.int8, 2: np.int16, 4: np.int32}[sw] | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P2: Decode 8-bit WAV data as unsigned ( Prompt for AI agents
Suggested change
Tip: Review your code locally with the cubic CLI to iterate faster. |
||||||
| a = np.frombuffer(raw, dtype=dt).astype(np.float32) | ||||||
| if sw == 1: | ||||||
| a = (a - 128) / 128.0 | ||||||
| else: | ||||||
| a /= float(np.iinfo(dt).max) | ||||||
| if ch > 1: | ||||||
| a = a.reshape(-1, ch).mean(axis=1) | ||||||
| if fr != SR: # cheap linear resample | ||||||
| idx = np.linspace(0, len(a) - 1, int(len(a) * SR / fr)) | ||||||
| a = np.interp(idx, np.arange(len(a)), a) | ||||||
| # trim leading silence so the transient lands on the timestamp | ||||||
| thr = 0.02 * (np.max(np.abs(a)) or 1.0) | ||||||
| nz = np.argmax(np.abs(a) > thr) | ||||||
| return a[nz:] | ||||||
|
|
||||||
|
|
||||||
| def _jitter(sample, semitones=1.5, gain_db=2.5): | ||||||
| sp = 2 ** (_rng.uniform(-semitones, semitones) / 12) | ||||||
| idx = np.arange(0, len(sample), sp) | ||||||
| s = np.interp(idx, np.arange(len(sample)), sample) | ||||||
| return s * (10 ** (_rng.uniform(-gain_db, gain_db) / 20)) | ||||||
|
|
||||||
|
|
||||||
| # ---------------------------------------------------------- synthesized clicks | ||||||
| def _synth_click(kind="key"): | ||||||
| if kind == "return": | ||||||
| body_f, dur, amp = _rng.uniform(95, 120), 0.075, 0.95 | ||||||
| click_amp, noise_amp = 0.5, 0.35 | ||||||
| else: | ||||||
| body_f, dur, amp = _rng.uniform(150, 235), 0.045, _rng.uniform(0.6, 0.85) | ||||||
| click_amp, noise_amp = 0.45, 0.30 | ||||||
| n = int(SR * dur) | ||||||
| tt = np.arange(n) / SR | ||||||
| nlen = int(SR * 0.006) | ||||||
| noise = np.zeros(n) | ||||||
| noise[:nlen] = _rng.standard_normal(nlen) * np.exp(-np.arange(nlen) / (nlen * 0.4)) | ||||||
| noise *= noise_amp | ||||||
| tick = np.sin(2 * np.pi * _rng.uniform(2600, 3400) * tt) * np.exp(-tt / 0.004) * click_amp | ||||||
| body = np.sin(2 * np.pi * body_f * tt) * np.exp(-tt / (dur * 0.5)) | ||||||
| sig = (noise + tick + body) * amp | ||||||
| a = int(SR * 0.0008) | ||||||
| sig[:a] *= np.linspace(0, 1, a) | ||||||
| return sig.astype(np.float32) | ||||||
|
|
||||||
|
|
||||||
| def _load_pool(path): | ||||||
| """path may be a single WAV or a directory of WAVs. Returns a list of arrays.""" | ||||||
| if not path or not os.path.exists(path): | ||||||
| return [] | ||||||
| if os.path.isdir(path): | ||||||
| files = sorted(f for f in os.listdir(path) if f.lower().endswith(".wav")) | ||||||
| return [_load_wav_mono(os.path.join(path, f)) for f in files] | ||||||
| return [_load_wav_mono(path)] | ||||||
|
|
||||||
|
|
||||||
| # Don't fire a key click within this gap of the previous one — keeps fast | ||||||
| # on-screen typing from sounding like a machine gun (the audio "types" calmer). | ||||||
| CLICK_MIN_GAP = 0.09 | ||||||
|
|
||||||
|
|
||||||
| def _thin(times, min_gap): | ||||||
| out, last = [], -1e9 | ||||||
| for t in times: | ||||||
| if t - last >= min_gap: | ||||||
| out.append(t) | ||||||
| last = t | ||||||
| return out | ||||||
|
|
||||||
|
|
||||||
| def build_track(keys, returns, total_len, key_pool, ret_pool): | ||||||
| buf = np.zeros(int(SR * total_len) + SR, dtype=np.float32) | ||||||
|
|
||||||
| def place(times, pool, kind, gain=1.0): | ||||||
| for t in times: | ||||||
| if pool: | ||||||
| c = _jitter(pool[_rng.integers(len(pool))]) * gain | ||||||
| else: | ||||||
| c = _synth_click(kind) | ||||||
| i = int(t * SR) | ||||||
| buf[i:i + len(c)] += c | ||||||
|
Comment on lines
+169
to
+170
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Potential crash when an event starts past If Proposed bounds-safe write i = int(t * SR)
- buf[i:i + len(c)] += c
+ if i >= len(buf):
+ continue
+ end = min(i + len(c), len(buf))
+ buf[i:end] += c[: end - i]🤖 Prompt for AI Agents |
||||||
|
|
||||||
| place(_thin(keys, CLICK_MIN_GAP), key_pool, "key") | ||||||
| # Enter: prefer a dedicated return sample; else reuse the keypress pool a touch louder | ||||||
| place(returns, ret_pool or key_pool, "return", gain=1.15 if not ret_pool else 1.0) | ||||||
| return buf[:int(SR * total_len)] | ||||||
|
|
||||||
|
|
||||||
| def main(): | ||||||
| tape, out_wav, dur = sys.argv[1], sys.argv[2], float(sys.argv[3]) | ||||||
| key_path = sys.argv[4] if len(sys.argv) > 4 else None | ||||||
| ret_path = sys.argv[5] if len(sys.argv) > 5 else None | ||||||
|
|
||||||
| key_pool = _load_pool(key_path) | ||||||
| ret_pool = _load_pool(ret_path) | ||||||
| src = f"sample pool x{len(key_pool)}" if key_pool else "synth" | ||||||
|
|
||||||
| keys, returns = parse_events(tape) | ||||||
| print(f" {len(keys)} keystrokes + {len(returns)} returns over {dur:.1f}s ({src} clicks)") | ||||||
|
|
||||||
| track = build_track(keys, returns, dur, key_pool, ret_pool) | ||||||
| peak = float(np.max(np.abs(track))) or 1.0 | ||||||
| track = (track / peak) * 0.9 # leave headroom for the music mix downstream | ||||||
| pcm = (track * 32767).astype(np.int16) | ||||||
|
|
||||||
| with wave.open(out_wav, "wb") as w: | ||||||
| w.setnchannels(1) | ||||||
| w.setsampwidth(2) | ||||||
| w.setframerate(SR) | ||||||
| w.writeframes(pcm.tobytes()) | ||||||
| print(f" wrote {out_wav}") | ||||||
|
|
||||||
|
|
||||||
| if __name__ == "__main__": | ||||||
| main() | ||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
P2:
mktemp -d -t demo-soundis not portable and fails on GNU/Linux, causing the script to exit immediately.Prompt for AI agents
Tip: Review your code locally with the cubic CLI to iterate faster.