-
Notifications
You must be signed in to change notification settings - Fork 1
/
webaligner.js
115 lines (105 loc) · 3.2 KB
/
webaligner.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
var webAligner = (function()
{
var exports = {};
// estimates the duration of a word, in seconds
function wordDuration(word) {
return 0.08475 + (0.05379 * word.length);
}
// estimates the step position of word boundaries
// optionally weighted by word length
function getStepPositions(wordArray, steps, weight)
{
var wordStepPos = [];
if (wordArray.length<2) {
console.log("Need at least two words!");
return null;
}
if (weight) {
var position = 0;
wordDur = [];
totalDur = 0;
for (i=0; i<wordArray.length; i++) {
var duration = wordDuration(wordArray[i]);
wordDur.push(duration);
totalDur += duration;
}
for (i=0; i<wordArray.length-1; i++) {
position += wordDur[i];
wordStepPos.push(Math.floor(position/totalDur*steps));
}
} else {
for (i=1; i<wordArray.length; i++) {
wordStepPos.push(Math.floor(i/wordArray.length*steps));
}
}
return wordStepPos;
}
// perform gradient descent on an array, starting at a given position
function descend(array, startPos)
{
var pos = startPos;
var min = array[pos];
while (true) {
var bottom = true;
var newPos = pos;
if (pos>0) {
if (array[pos-1] < min) {
min=array[pos-1];
newPos--;
bottom = false;
}
}
if (pos<array.length-1) {
if (array[pos+1] < min) {
min = array[pos+1];
newPos++;
bottom = false;
}
}
pos = newPos;
if (bottom) break;
}
return pos;
}
exports.align = function(audioCtx, audioData, startTs, endTs, words, weight, cb)
{
var blockLen = 0.025; // 25ms block size
var stepLen = 0.01; // 10ms step size
var duration = endTs-startTs;
var startStep = Math.floor((startTs-(blockLen/2))/stepLen);
var endStep = Math.floor((endTs-(blockLen/2))/stepLen);
var steps = endStep-startStep;
var blockSize = Math.floor(blockLen*audioCtx.sampleRate);
var stepSize = Math.floor(stepLen*audioCtx.sampleRate);
var wordArray = words.split(" ");
// estimate word positions
var wordStepPos = getStepPositions(wordArray, steps, weight);
// decode audio
audioCtx.decodeAudioData(audioData, function(decodedData)
{
var audio = decodedData.getChannelData(0);
var totalDuration = decodedData.duration;
if (endTs>totalDuration) return console.log('End time after end of file');
// calculate RMS energy curve of audio segment
var rms = [];
for (stepNum = startStep; stepNum < endStep; stepNum++) {
var blockTotal = 0.0;
for (i=0; i<blockSize; i++) {
var sample = audio[(stepSize*stepNum)+i];
blockTotal += sample*sample;
}
rms.push(Math.sqrt(blockTotal/blockSize));
}
// starting at the estimated word positions, find the timestamps of the local RMS minima
var times = [];
times.push(startTs);
for (i=0; i<wordStepPos.length; i++) {
wordStepPos[i] = descend(rms, wordStepPos[i]);
times.push(startTs + (wordStepPos[i]/steps*duration));
}
times.push(endTs);
cb(times);
});
}
return exports;
}());