superset-sh · saddlepaddle · Jan 29, 2026 · Jan 29, 2026 · Jan 29, 2026 · Jan 29, 2026
diff --git a/.github/workflows/build-desktop.yml b/.github/workflows/build-desktop.yml
@@ -81,6 +81,11 @@ jobs:
         working-directory: apps/desktop
         run: bun run clean:dev
 
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
       - name: Compile app with electron-vite
         working-directory: apps/desktop
         env:

diff --git a/apps/api/package.json b/apps/api/package.json
@@ -38,6 +38,7 @@
 		"lodash.chunk": "^4.2.0",
 		"mcp-handler": "^1.0.7",
 		"next": "^16.0.10",
+		"openai": "^6.17.0",
 		"react": "19.1.0",
 		"react-dom": "19.1.0",
 		"require-in-the-middle": "8.0.1",

diff --git a/apps/api/src/app/api/voice/route.ts b/apps/api/src/app/api/voice/route.ts
@@ -0,0 +1,82 @@
+import { auth } from "@superset/auth/server";
+import { runVoicePipeline } from "./voice-service";
+
+export async function POST(request: Request) {
+	const session = await auth.api.getSession({ headers: request.headers });
+	if (!session?.user) {
+		return Response.json({ error: "Unauthorized" }, { status: 401 });
+	}
+
+	const organizationId = session.session.activeOrganizationId;
+	if (!organizationId) {
+		return Response.json({ error: "No active organization" }, { status: 400 });
+	}
+
+	let formData: FormData;
+	try {
+		formData = await request.formData();
+	} catch {
+		return Response.json(
+			{ error: "Expected multipart form data with audio file" },
+			{ status: 400 },
+		);
+	}
-	try {
-		formData = await request.formData();
-	} catch {
-		return Response.json(
-			{ error: "Expected multipart form data with audio file" },
-			{ status: 400 },
-		);
-	}
+	try {
+		formData = await request.formData();
+	} catch (error) {
+		console.error("[voice/route] Form data parsing failed:", error);
+		return Response.json(
+			{ error: "Expected multipart form data with audio file" },
+			{ status: 400 },
+		);
+	}
-	try {
-		formData = await request.formData();
-	} catch {
-		return Response.json(
-			{ error: "Expected multipart form data with audio file" },
-			{ status: 400 },
-		);
-	}
+	try {
+		formData = await request.formData();
+	} catch (error) {
+		console.error("[voice/route] Form data parsing failed:", error);
+		return Response.json(
+			{ error: "Expected multipart form data with audio file" },
+			{ status: 400 },
+		);
+	}
+
+	const audioFile = formData.get("audio");
+	if (!audioFile || !(audioFile instanceof File)) {
+		return Response.json(
+			{ error: "Missing 'audio' file in form data" },
+			{ status: 400 },
+		);
+	}
+
+	const MAX_AUDIO_SIZE = 5 * 1024 * 1024; // 5 MB
+	if (audioFile.size > MAX_AUDIO_SIZE) {
+		return Response.json(
+			{ error: "Audio file too large (max 5 MB)" },
+			{ status: 413 },
+		);
+	}
+
+	const audioBuffer = new Uint8Array(await audioFile.arrayBuffer());
+
+	const encoder = new TextEncoder();
+
+	const stream = new ReadableStream({
+		async start(controller) {
+			const sse = {
+				write(event: string, data: unknown) {
+					const payload = `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
+					controller.enqueue(encoder.encode(payload));
+				},
+			};
+
+			try {
+				await runVoicePipeline({
+					audioBuffer,
+					ctx: { userId: session.user.id, organizationId },
+					sse,
+					signal: request.signal,
+				});
+			} catch (error) {
+				if (!request.signal.aborted) {
+					console.error("[voice/route] Pipeline error:", error);
+					sse.write("error", {
+						message:
+							error instanceof Error ? error.message : "Voice pipeline failed",
+					});
+				}
+			} finally {
+				controller.close();
+			}
+		},
+	});
+
+	return new Response(stream, {
+		headers: {
+			"Content-Type": "text/event-stream",
+			"Cache-Control": "no-cache",
+			Connection: "keep-alive",
+		},
+	});
+}
diff --git a/apps/api/src/app/api/voice/voice-service.ts b/apps/api/src/app/api/voice/voice-service.ts
@@ -0,0 +1,198 @@
+import Anthropic from "@anthropic-ai/sdk";
+import type { McpContext } from "@superset/mcp/auth";
+import { createInMemoryMcpClient } from "@superset/mcp/in-memory";
+import { OpenAI } from "openai";
+import { env } from "@/env";
+
+const SYSTEM_PROMPT = `You are a helpful voice assistant for Superset, a project management tool. You have access to tools for creating and managing tasks, workspaces, and other organizational resources. Keep responses concise and conversational — the user is speaking to you, so respond in 1-3 sentences unless the question requires more detail. When you use tools, briefly confirm what you did.`;
+
+// Desktop-only tools that don't make sense in voice context
+const DENIED_TOOLS = new Set([
+	"navigate_to_workspace",
+	"switch_workspace",
+	"get_app_context",
+]);
+
+interface SSEWriter {
+	write(event: string, data: unknown): void;
+}
+
+async function transcribeAudio({
+	audioBuffer,
+	signal,
+}: {
+	audioBuffer: Uint8Array;
+	signal?: AbortSignal;
+}): Promise<string> {
+	const openai = new OpenAI({ apiKey: env.OPENAI_API_KEY });
+
+	const blob = new Blob([audioBuffer as BlobPart], { type: "audio/wav" });
+	const file = new File([blob], "audio.wav", { type: "audio/wav" });
+
+	const result = await openai.audio.transcriptions.create(
+		{
+			model: "whisper-1",
+			file,
+		},
+		{ signal },
+	);
+
+	// Strip wake word from transcription
+	let text = result.text.trim();
+	text = text.replace(/^hey\s*jarvis[,.\s!?]*/i, "").trim();
+	return text;
+}
+
+/**
+ * Runs the full voice pipeline: transcription → Claude with MCP tools → streaming SSE.
+ */
+export async function runVoicePipeline({
+	audioBuffer,
+	ctx,
+	sse,
+	signal,
+}: {
+	audioBuffer: Uint8Array;
+	ctx: McpContext;
+	sse: SSEWriter;
+	signal?: AbortSignal;
+}): Promise<void> {
+	// 1. Transcribe
+	const transcription = await transcribeAudio({ audioBuffer, signal });
+	sse.write("transcription", { text: transcription });
+
+	if (!transcription) {
+		sse.write("done", { fullResponse: "" });
+		return;
+	}
+
+	// 2. Create in-memory MCP client for tool access
+	const { client: mcpClient, cleanup } = await createInMemoryMcpClient({
+		userId: ctx.userId,
+		organizationId: ctx.organizationId,
+	});
+
+	try {
+		const { tools: mcpTools } = await mcpClient.listTools();
+
+		const anthropicTools: Anthropic.Tool[] = mcpTools
+			.filter((t) => !DENIED_TOOLS.has(t.name))
+			.map((t) => ({
+				name: t.name,
+				description: t.description ?? "",
+				input_schema: t.inputSchema as Anthropic.Tool.InputSchema,
+			}));
+
+		// 3. Stream Claude response with tool use loop
+		const anthropic = new Anthropic({ apiKey: env.ANTHROPIC_API_KEY });
+
+		const messages: Anthropic.MessageParam[] = [
+			{ role: "user", content: transcription },
+		];
+
+		let fullResponse = "";
+
+		try {
+			const MAX_TOOL_ROUNDS = 5;
+			for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
+				if (signal?.aborted) return;
+
+				const stream = anthropic.messages.stream(
+					{
+						model: "claude-sonnet-4-20250514",
+						max_tokens: 1024,
+						system: SYSTEM_PROMPT,
+						messages,
+						tools: anthropicTools.length > 0 ? anthropicTools : undefined,
+					},
+					{ signal },
+				);
+
+				for await (const event of stream) {
+					if (event.type === "content_block_delta") {
+						if (event.delta.type === "text_delta") {
+							fullResponse += event.delta.text;
+							sse.write("text_delta", { delta: event.delta.text });
+						}
+					}
+				}
+
+				const finalMessage = await stream.finalMessage();
+				const contentBlocks = finalMessage.content;
+
+				const toolUseBlocks = contentBlocks.filter(
+					(block): block is Anthropic.ToolUseBlock => block.type === "tool_use",
+				);
+
+				if (toolUseBlocks.length === 0) {
+					break;
+				}
+
+				const toolResults: Anthropic.ToolResultBlockParam[] = [];
+
+				for (const toolBlock of toolUseBlocks) {
+					if (signal?.aborted) return;
+
+					sse.write("tool_use", {
+						toolName: toolBlock.name,
+						toolInput: toolBlock.input,
+					});
+
+					try {
+						const result = await mcpClient.callTool({
+							name: toolBlock.name,
+							arguments: toolBlock.input as Record<string, unknown>,
+						});
+
+						const resultText = JSON.stringify(result.content);
+
+						sse.write("tool_result", {
+							toolName: toolBlock.name,
+							result: resultText,
+						});
+
+						toolResults.push({
+							type: "tool_result",
+							tool_use_id: toolBlock.id,
+							content: resultText,
+						});
+					} catch (error) {
+						if (signal?.aborted) return;
+						console.error(
+							`[voice/tool] Error executing ${toolBlock.name}:`,
+							error,
+						);
+						const errorText = JSON.stringify({
+							error:
+								error instanceof Error
+									? error.message
+									: "Tool execution failed",
+						});
+
+						sse.write("tool_result", {
+							toolName: toolBlock.name,
+							result: errorText,
+						});
+
+						toolResults.push({
+							type: "tool_result",
+							tool_use_id: toolBlock.id,
+							content: errorText,
+							is_error: true,
+						});
+					}
+				}
+
+				messages.push({ role: "assistant", content: contentBlocks });
+				messages.push({ role: "user", content: toolResults });
+			}
+		} catch (error) {
+			if (signal?.aborted) return;
+			throw error;
+		}
+
+		sse.write("done", { fullResponse });
+	} finally {
+		await cleanup().catch(() => {});
+	}
+}
diff --git a/apps/api/src/env.ts b/apps/api/src/env.ts
@@ -40,6 +40,7 @@ export const env = createEnv({
 		STRIPE_PRO_MONTHLY_PRICE_ID: z.string(),
 		STRIPE_PRO_YEARLY_PRICE_ID: z.string(),
 		SENTRY_AUTH_TOKEN: z.string().optional(),
+		OPENAI_API_KEY: z.string().min(1),
 	},
 	client: {
 		NEXT_PUBLIC_API_URL: z.string().url(),

diff --git a/apps/desktop/electron-builder.canary.ts b/apps/desktop/electron-builder.canary.ts
@@ -31,6 +31,7 @@ const config: Configuration = {
 		icon: join(pkg.resources, "build/icons/icon-canary.icns"),
 		artifactName: `Superset-Canary-\${version}-\${arch}.\${ext}`,
 		extendInfo: {
+			...baseConfig.mac?.extendInfo,
 			CFBundleName: productName,
 			CFBundleDisplayName: productName,
 		},

diff --git a/apps/desktop/electron-builder.ts b/apps/desktop/electron-builder.ts
@@ -56,6 +56,12 @@ const config: Configuration = {
 			to: "resources/migrations",
 			filter: ["**/*"],
 		},
+		// Voice sidecar binary (built by PyInstaller via scripts/build-voice-sidecar.sh)
+		{
+			from: "dist/voice-sidecar/voice-sidecar",
+			to: "voice-sidecar",
+			filter: ["**/*"],
+		},
 	],
 
 	files: [
@@ -117,6 +123,8 @@ const config: Configuration = {
 		hardenedRuntime: true,
 		gatekeeperAssess: false,
 		notarize: true,
+		entitlements: join(pkg.resources, "build/entitlements.mac.plist"),
+		entitlementsInherit: join(pkg.resources, "build/entitlements.mac.plist"),
 		extendInfo: {
 			CFBundleName: productName,
 			CFBundleDisplayName: productName,
@@ -125,6 +133,9 @@ const config: Configuration = {
 				"Superset needs access to your local network to discover and connect to development servers running on your network.",
 			// Bonjour service types to browse for (triggers the permission prompt)
 			NSBonjourServices: ["_http._tcp", "_https._tcp"],
+			// Required for microphone access (voice commands)
+			NSMicrophoneUsageDescription:
+				"Superset uses the microphone for voice commands to interact with your development environment.",
 		},
 	},
 

diff --git a/apps/desktop/package.json b/apps/desktop/package.json
@@ -22,7 +22,7 @@
 		"copy:native-modules": "bun run scripts/copy-native-modules.ts",
 		"prebuild": "bun run clean:dev && bun run compile:app && bun run copy:native-modules",
 		"build": "cross-env CSC_IDENTITY_AUTO_DISCOVERY=false electron-builder --publish never",
-		"prepackage": "bun run copy:native-modules",
+		"prepackage": "bun run copy:native-modules && bash scripts/build-voice-sidecar.sh",
 		"package": "electron-builder --config electron-builder.ts",
 		"install:deps": "electron-builder install-app-deps",
 		"release": "electron-builder --publish always",