Skip to content

Commit

Permalink
feat: voice print
Browse files Browse the repository at this point in the history
  • Loading branch information
Dogtiti committed Nov 8, 2024
1 parent a4c9eaf commit 8b4ca13
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 22 deletions.
6 changes: 0 additions & 6 deletions app/components/realtime-chat/realtime-chat.module.scss
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,6 @@
box-sizing: border-box;
padding: 0 20px;
}
.icon-center {
display: flex;
justify-content: center;
align-items: center;
gap: 4px;
}

.icon-left,
.icon-right {
Expand Down
45 changes: 29 additions & 16 deletions app/components/realtime-chat/realtime-chat.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import {
} from "rt-client";
import { AudioHandler } from "@/app/lib/audio";
import { uploadImage } from "@/app/utils/chat";
import { VoicePrint } from "@/app/components/voice-print";

interface RealtimeChatProps {
onClose?: () => void;
Expand All @@ -41,6 +42,7 @@ export function RealtimeChat({
const [isConnecting, setIsConnecting] = useState(false);
const [modality, setModality] = useState("audio");
const [useVAD, setUseVAD] = useState(true);
const [frequencies, setFrequencies] = useState<Uint8Array | undefined>();

const clientRef = useRef<RTClient | null>(null);
const audioHandlerRef = useRef<AudioHandler | null>(null);
Expand Down Expand Up @@ -272,29 +274,39 @@ export function RealtimeChat({
console.error(error);
});

// TODO demo to get frequency. will pass audioHandlerRef.current to child component draw.
// TODO try using requestAnimationFrame
const interval = setInterval(() => {
if (audioHandlerRef.current) {
const data = audioHandlerRef.current.getByteFrequencyData();
console.log("getByteFrequencyData", data);
}
}, 1000);

return () => {
if (isRecording) {
toggleRecording();
}
audioHandlerRef.current
?.close()
.catch(console.error)
.finally(() => {
clearInterval(interval);
});
audioHandlerRef.current?.close().catch(console.error);
disconnect();
};
}, []);

useEffect(() => {
let animationFrameId: number;

if (isConnected && isRecording) {
const animationFrame = () => {
if (audioHandlerRef.current) {
const freqData = audioHandlerRef.current.getByteFrequencyData();
setFrequencies(freqData);
}
animationFrameId = requestAnimationFrame(animationFrame);
};

animationFrameId = requestAnimationFrame(animationFrame);
} else {
setFrequencies(undefined);
}

return () => {
if (animationFrameId) {
cancelAnimationFrame(animationFrameId);
}
};
}, [isConnected, isRecording]);

// update session params
useEffect(() => {
clientRef.current?.configure({ voice });
Expand All @@ -318,8 +330,9 @@ export function RealtimeChat({
[styles["pulse"]]: isRecording,
})}
>
<div className={styles["icon-center"]}></div>
<VoicePrint frequencies={frequencies} isActive={isRecording} />
</div>

<div className={styles["bottom-icons"]}>
<div>
<IconButton
Expand Down
1 change: 1 addition & 0 deletions app/components/voice-print/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from "./voice-print";
11 changes: 11 additions & 0 deletions app/components/voice-print/voice-print.module.scss
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.voice-print {
width: 100%;
height: 60px;
margin: 20px 0;

canvas {
width: 100%;
height: 100%;
filter: brightness(1.2); // 增加整体亮度
}
}
125 changes: 125 additions & 0 deletions app/components/voice-print/voice-print.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import { useEffect, useRef, useState } from "react";
import styles from "./voice-print.module.scss";

interface VoicePrintProps {
frequencies?: Uint8Array;
isActive?: boolean;
}

export function VoicePrint({ frequencies, isActive }: VoicePrintProps) {
const canvasRef = useRef<HTMLCanvasElement>(null);
const [history, setHistory] = useState<number[][]>([]);
const historyLengthRef = useRef(10); // 保存10帧历史数据

useEffect(() => {
const canvas = canvasRef.current;
if (!canvas) return;

const ctx = canvas.getContext("2d");
if (!ctx) return;

// 设置canvas尺寸
const dpr = window.devicePixelRatio || 1;
canvas.width = canvas.offsetWidth * dpr;
canvas.height = canvas.offsetHeight * dpr;
ctx.scale(dpr, dpr);

// 清空画布
ctx.clearRect(0, 0, canvas.width, canvas.height);

if (!frequencies || !isActive) {
setHistory([]); // 重置历史数据
return;
}

// 更新历史数据
const freqArray = Array.from(frequencies);
setHistory((prev) => {
const newHistory = [...prev, freqArray];
if (newHistory.length > historyLengthRef.current) {
newHistory.shift();
}
return newHistory;
});

// 绘制声纹
const points: [number, number][] = [];
const centerY = canvas.height / 2;
const width = canvas.width;
const sliceWidth = width / (frequencies.length - 1);

// 绘制主波形
ctx.beginPath();
ctx.moveTo(0, centerY);

// 使用历史数据计算平均值实现平滑效果
for (let i = 0; i < frequencies.length; i++) {
const x = i * sliceWidth;
let avgFrequency = frequencies[i];

// 计算历史数据的平均值
if (history.length > 0) {
const historicalValues = history.map((h) => h[i] || 0);
avgFrequency =
(avgFrequency + historicalValues.reduce((a, b) => a + b, 0)) /
(history.length + 1);
}

// 使用三角函数使波形更自然
const normalized = avgFrequency / 255.0;
const height = normalized * (canvas.height / 2);
const y = centerY + height * Math.sin(i * 0.2 + Date.now() * 0.002);

points.push([x, y]);

if (i === 0) {
ctx.moveTo(x, y);
} else {
// 使用贝塞尔曲线使波形更平滑
const prevPoint = points[i - 1];
const midX = (prevPoint[0] + x) / 2;
ctx.quadraticCurveTo(
prevPoint[0],
prevPoint[1],
midX,
(prevPoint[1] + y) / 2,
);
}
}

// 绘制对称的下半部分
for (let i = points.length - 1; i >= 0; i--) {
const [x, y] = points[i];
const symmetricY = centerY - (y - centerY);
if (i === points.length - 1) {
ctx.lineTo(x, symmetricY);
} else {
const nextPoint = points[i + 1];
const midX = (nextPoint[0] + x) / 2;
ctx.quadraticCurveTo(
nextPoint[0],
centerY - (nextPoint[1] - centerY),
midX,
centerY - ((nextPoint[1] + y) / 2 - centerY),
);
}
}

ctx.closePath();

// 设置渐变色和透明度
const gradient = ctx.createLinearGradient(0, 0, canvas.width, 0);
gradient.addColorStop(0, "rgba(100, 180, 255, 0.95)");
gradient.addColorStop(0.5, "rgba(140, 200, 255, 0.9)");
gradient.addColorStop(1, "rgba(180, 220, 255, 0.95)");

ctx.fillStyle = gradient;
ctx.fill();
}, [frequencies, isActive, history]);

return (
<div className={styles["voice-print"]}>
<canvas ref={canvasRef} />
</div>
);
}

0 comments on commit 8b4ca13

Please sign in to comment.