Skip to content

Commit 482e054

Browse files
authored
Support Silero V5 (#154)
1 parent 80e7b34 commit 482e054

33 files changed

+389
-659
lines changed

docs/developer-guide/hacking.md

+24
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,27 @@ The automated tests are useful, but manual testing is even more important. There
1515
## Project Management
1616

1717
I set up a [Github project for VAD](https://github.com/users/ricky0123/projects/1) to track work related to the project.
18+
19+
## Playing with VAD model in browser console
20+
21+
Go to [test.vad.ricky0123.com](https://test.vad.ricky0123.com) and open the browser console. Then run the following line by line:
22+
23+
```js linenums="1"
24+
script = this.document.createElement("script")
25+
script.src = "https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"
26+
document.body.appendChild(script)
27+
// wait a few seconds
28+
modelarraybuffer = await fetch(`${location}silero_vad_v5.onnx`).then((model) => model.arrayBuffer())
29+
session = await ort.InferenceSession.create(modelarraybuffer)
30+
state_zeroes = Array(2 * 128).fill(0)
31+
state = new this.ort.Tensor("float32", state_zeroes, [2, 1, 128]) // https://github.com/snakers4/silero-vad/blob/fdbb0a3a81e0f9d95561d6b388d67dce5d9e3f1b/utils_vad.py#L58
32+
audio_zeros = Array(512).fill(0)
33+
audio = new this.ort.Tensor("float32", audio_zeros, [1, audio_zeros.length])
34+
sr = new this.ort.Tensor("int64", [16000n])
35+
inputs = {
36+
sr,
37+
state,
38+
input: audio
39+
}
40+
out = await session.run(inputs)
41+
```

package-lock.json

+4-76
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/react/package.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@
1313
"react"
1414
],
1515
"homepage": "https://github.com/ricky0123/vad",
16-
"version": "0.0.25",
16+
"version": "0.0.26",
1717
"license": "ISC",
1818
"main": "dist/index.js",
1919
"devDependencies": {
2020
"@types/react": "^18.0.28"
2121
},
2222
"dependencies": {
2323
"onnxruntime-web": "^1.14.0",
24-
"@ricky0123/vad-web": "^0.0.19"
24+
"@ricky0123/vad-web": "^0.0.20"
2525
},
2626
"peerDependencies": {
2727
"react": "^18",

packages/react/src/index.ts

+18-10
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import type { RealTimeVADOptions } from "@ricky0123/vad-web"
2-
import { MicVAD, defaultRealTimeVADOptions } from "@ricky0123/vad-web"
2+
import {
3+
DEFAULT_MODEL,
4+
MicVAD,
5+
getDefaultRealTimeVADOptions,
6+
} from "@ricky0123/vad-web"
37
import React, { useEffect, useReducer, useState } from "react"
48

59
export { utils } from "@ricky0123/vad-web"
@@ -16,13 +20,17 @@ const defaultReactOptions: ReactOptions = {
1620
userSpeakingThreshold: 0.6,
1721
}
1822

19-
export const defaultReactRealTimeVADOptions = {
20-
...defaultRealTimeVADOptions,
21-
...defaultReactOptions,
23+
export const getDefaultReactRealTimeVADOptions = (
24+
model: "legacy" | "v5"
25+
): ReactRealTimeVADOptions => {
26+
return {
27+
...getDefaultRealTimeVADOptions(model),
28+
...defaultReactOptions,
29+
}
2230
}
2331

2432
const reactOptionKeys = Object.keys(defaultReactOptions)
25-
const vadOptionKeys = Object.keys(defaultRealTimeVADOptions)
33+
const vadOptionKeys = Object.keys(getDefaultRealTimeVADOptions("v5"))
2634

2735
const _filter = (keys: string[], obj: any) => {
2836
return keys.reduce((acc, key) => {
@@ -34,7 +42,8 @@ const _filter = (keys: string[], obj: any) => {
3442
function useOptions(
3543
options: Partial<ReactRealTimeVADOptions>
3644
): [ReactOptions, RealTimeVADOptions] {
37-
options = { ...defaultReactRealTimeVADOptions, ...options }
45+
const model = options.model ?? DEFAULT_MODEL
46+
options = { ...getDefaultReactRealTimeVADOptions(model), ...options }
3847
const reactOptions = _filter(reactOptionKeys, options) as ReactOptions
3948
const vadOptions = _filter(vadOptionKeys, options) as RealTimeVADOptions
4049
return [reactOptions, vadOptions]
@@ -62,7 +71,7 @@ export function useMicVAD(options: Partial<ReactRealTimeVADOptions>) {
6271
false
6372
)
6473
const [loading, setLoading] = useState(true)
65-
const [errored, setErrored] = useState<false | { message: string }>(false)
74+
const [errored, setErrored] = useState<false | string>(false)
6675
const [listening, setListening] = useState(false)
6776
const [vad, setVAD] = useState<MicVAD | null>(null)
6877

@@ -92,10 +101,9 @@ export function useMicVAD(options: Partial<ReactRealTimeVADOptions>) {
92101
} catch (e) {
93102
setLoading(false)
94103
if (e instanceof Error) {
95-
setErrored({ message: e.message })
104+
setErrored(e.message)
96105
} else {
97-
// @ts-ignore
98-
setErrored({ message: e })
106+
setErrored(String(e))
99107
}
100108
return
101109
}

packages/web/package.json

+1-4
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,12 @@
1212
"offline-speech-recognition"
1313
],
1414
"homepage": "https://github.com/ricky0123/vad",
15-
"version": "0.0.19",
15+
"version": "0.0.20",
1616
"license": "ISC",
1717
"main": "dist/index.js",
1818
"unpkg": "dist/bundle.min.js",
1919
"jsdelivr": "dist/bundle.min.js",
2020
"devDependencies": {
21-
"@playwright/test": "^1.48.1",
2221
"@types/audioworklet": "^0.0.36",
2322
"@types/express": "^4.17.17",
2423
"express": "^4.18.2",
@@ -30,8 +29,6 @@
3029
},
3130
"scripts": {
3231
"build": "./scripts/build.sh",
33-
"test-server": "tsc -p ./scripts/tsconfig.json && node ./scripts/test-server.js",
34-
"test": "playwright test",
3532
"clean": "rm -rf dist",
3633
"publish": "npm publish --access public"
3734
}

packages/web/playwright.config.ts

-90
This file was deleted.

packages/web/scripts/build.sh

+4-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
rm -rf dist
44
mkdir dist
55
npx tsc
6-
cp ../../silero_vad.onnx dist
6+
cp \
7+
../../silero_vad_legacy.onnx \
8+
../../silero_vad_v5.onnx \
9+
dist
710
npx webpack -c webpack.config.worklet.js
811
npx webpack -c webpack.config.index.js

0 commit comments

Comments
 (0)