Skip to content

Commit 2a9fdbc

Browse files
committed
Transient/VBR tuning, give more bits to frames where pitch changes
1 parent 971b055 commit 2a9fdbc

File tree

3 files changed

+110
-115
lines changed

3 files changed

+110
-115
lines changed

celt/celt.c

Lines changed: 108 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -293,21 +293,27 @@ static inline opus_val16 SIG2WORD16(celt_sig x)
293293
}
294294

295295
static int transient_analysis(const opus_val32 * restrict in, int len, int C,
296-
int overlap, opus_val16 *tf_estimate, int *tf_chan)
296+
int overlap, opus_val16 *tf_estimate, int *tf_chan, AnalysisInfo *analysis)
297297
{
298298
int i;
299299
VARDECL(opus_val16, tmp);
300300
opus_val32 mem0,mem1;
301301
int is_transient = 0;
302302
int block;
303303
int c, N;
304-
opus_val16 maxbin, minbin[3];
305-
opus_val32 L1, L2, tf_tmp, tf_max;
304+
opus_val16 maxbin;
305+
int tf_max;
306306
VARDECL(opus_val16, bins);
307+
opus_val16 T1, T2, T3, T4, T5;
308+
opus_val16 follower;
309+
int metric=0;
310+
int fmetric=0, bmetric=0;
311+
int count1, count2, count3, count4, count5;;
312+
307313
SAVE_STACK;
308314
ALLOC(tmp, len, opus_val16);
309315

310-
block = overlap/8;
316+
block = overlap/4;
311317
N=len/block-1;
312318
ALLOC(bins, N, opus_val16);
313319

@@ -318,111 +324,97 @@ static int transient_analysis(const opus_val32 * restrict in, int len, int C,
318324
mem0=0;
319325
mem1=0;
320326
for (i=0;i<len;i++)
321-
tmp[i] = SHR32(in[i*C+c],SIG_SHIFT);
327+
tmp[i] = SHR32(in[i+c*len],SIG_SHIFT);
322328

323-
/* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
324-
for (i=0;i<len;i++)
325-
{
326-
opus_val32 x,y;
327-
x = tmp[i];
328-
y = ADD32(mem0, x);
329+
/* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
330+
for (i=0;i<len;i++)
331+
{
332+
opus_val32 x,y;
333+
x = tmp[i];
334+
y = ADD32(mem0, x);
329335
#ifdef FIXED_POINT
330-
mem0 = mem1 + y - SHL32(x,1);
331-
mem1 = x - SHR32(y,1);
336+
mem0 = mem1 + y - SHL32(x,1);
337+
mem1 = x - SHR32(y,1);
332338
#else
333-
mem0 = mem1 + y - 2*x;
334-
mem1 = x - .5f*y;
339+
mem0 = mem1 + y - 2*x;
340+
mem1 = x - .5f*y;
335341
#endif
336-
tmp[i] = EXTRACT16(SHR32(y,2));
337-
}
338-
/* First few samples are bad because we don't propagate the memory */
339-
for (i=0;i<12;i++)
340-
tmp[i] = 0;
341-
342-
maxbin=0;
343-
minbin[0] = minbin[1] = minbin[2] = 32768;
344-
for (i=0;i<N;i++)
345-
{
346-
int j;
347-
opus_val16 max_abs=0;
348-
for (j=0;j<2*block;j++)
349-
max_abs = MAX16(max_abs, ABS16(tmp[i*block+j]));
350-
bins[i] = max_abs;
351-
maxbin = MAX16(maxbin, bins[i]);
352-
if (bins[i] < minbin[2])
353-
{
354-
if (bins[i] < minbin[1])
355-
{
356-
if (bins[i] < minbin[0])
357-
{
358-
minbin[2] = minbin[1];
359-
minbin[1] = minbin[0];
360-
minbin[0] = bins[i];
361-
} else {
362-
minbin[2] = minbin[1];
363-
minbin[1] = bins[i];
364-
}
365-
} else {
366-
minbin[2] = bins[i];
367-
}
368-
}
369-
}
370-
//printf("%f ", maxbin/minbin[2]);
371-
if (maxbin > 15*minbin[2])
372-
is_transient = 1;
373-
L1=0;
374-
L2=0;
375-
for (i=0;i<N;i++)
376-
{
377-
int j;
378-
int conseq=0;
379-
opus_val16 t1, t2, t3;
380-
opus_val16 tmp_bin;
381-
382-
tmp_bin = bins[i]+MULT16_16_Q15(QCONST16(.05f,15),maxbin);
383-
L1 += EXTEND32(tmp_bin);
384-
L2 += SHR32(MULT16_16(tmp_bin, tmp_bin), 4);
385-
t1 = MULT16_16_Q15(QCONST16(.15f, 15), bins[i]);
386-
t2 = MULT16_16_Q15(QCONST16(.3f, 15), bins[i]);
387-
t3 = MULT16_16_Q15(QCONST16(.15f, 15), bins[i]);
388-
for (j=0;j<i;j++)
389-
{
390-
if (bins[j] < t1)
391-
conseq++;
392-
if (bins[j] < t2)
393-
conseq++;
394-
else
395-
conseq = 0;
342+
tmp[i] = EXTRACT16(SHR32(y,2));
396343
}
397-
if (conseq>=12)
344+
/* First few samples are bad because we don't propagate the memory */
345+
for (i=0;i<12;i++)
346+
tmp[i] = 0;
347+
348+
maxbin=0;
349+
for (i=0;i<N;i++)
350+
{
351+
int j;
352+
opus_val16 max_abs=0;
353+
for (j=0;j<2*block;j++)
354+
max_abs = MAX16(max_abs, ABS16(tmp[i*block+j]));
355+
//printf("%f ", max_abs);
356+
bins[i] = max_abs;
357+
maxbin = MAX16(maxbin, bins[i]);
358+
}
359+
360+
T1 = QCONST16(.09f, 15);
361+
T2 = QCONST16(.12f, 15);
362+
T3 = QCONST16(.18f, 15);
363+
T4 = QCONST16(.28f, 15);
364+
T5 = QCONST16(.4f, 15);
365+
366+
follower = 0;
367+
count1=count2=count3=count4=count5=0;
368+
for (i=0;i<N;i++)
369+
{
370+
follower = MAX16(bins[i], MULT16_16_Q15(QCONST16(0.97f, 15), follower));
371+
if (bins[i] < MULT16_16_Q15(T1, follower))
372+
count1++;
373+
if (bins[i] < MULT16_16_Q15(T2, follower))
374+
count2++;
375+
if (bins[i] < MULT16_16_Q15(T3, follower))
376+
count3++;
377+
if (bins[i] < MULT16_16_Q15(T4, follower))
378+
count4++;
379+
if (bins[i] < MULT16_16_Q15(T5, follower))
380+
count5++;
381+
}
382+
fmetric = (5*count1 + 4*count2 + 3*count3 + 2*count4 + count5)/2;
383+
follower=0;
384+
count1=count2=count3=count4=count5=0;
385+
for (i=N-1;i>=0;i--)
386+
{
387+
follower = MAX16(bins[i], MULT16_16_Q15(QCONST16(0.97f, 15), follower));
388+
if (bins[i] < MULT16_16_Q15(T1, follower))
389+
count1++;
390+
if (bins[i] < MULT16_16_Q15(T2, follower))
391+
count2++;
392+
if (bins[i] < MULT16_16_Q15(T3, follower))
393+
count3++;
394+
if (bins[i] < MULT16_16_Q15(T4, follower))
395+
count4++;
396+
if (bins[i] < MULT16_16_Q15(T5, follower))
397+
count5++;
398+
}
399+
bmetric = 5*count1 + 4*count2 + 3*count3 + 2*count4 + count5;
400+
metric = fmetric+bmetric;
401+
402+
//if (metric>40)
403+
if (metric>20+50*MAX16(analysis->tonality, analysis->noisiness))
398404
is_transient=1;
399-
conseq = 0;
400-
for (j=i+1;j<N;j++)
405+
406+
if (metric>tf_max)
401407
{
402-
if (bins[j] < t3)
403-
conseq++;
404-
else
405-
conseq = 0;
408+
*tf_chan = c;
409+
tf_max = metric;
406410
}
407-
if (conseq>=28)
408-
is_transient=1;
409411
}
410-
/* sqrt(L2*N)/L1 */
411-
tf_tmp = SHL32(DIV32( SHL32(EXTEND32(celt_sqrt(SHR16(L2,4) * N)), 14), ADD32(EPSILON, L1)), 4);
412-
tf_tmp = 1+MIN16(1,MAX16(0, 1-10*minbin[2]/(1+maxbin)));
413-
if (tf_tmp>tf_max)
414-
{
415-
*tf_chan = c;
416-
tf_max = tf_tmp;
417-
}
418-
*tf_estimate = MAX16(*tf_estimate, EXTRACT16(MIN32(QCONST32(1.99, 14), tf_tmp)));
419-
}
420-
*tf_estimate = MAX16(QCONST16(1.f, 14), *tf_estimate);
412+
*tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20);
421413
RESTORE_STACK;
422414
#ifdef FUZZING
423415
is_transient = rand()&0x1;
424416
#endif
425-
//printf("%d %f\n", is_transient, *tf_estimate);
417+
//printf("%d %f %f %f %f\n", is_transient, *tf_estimate, tf_max, analysis->tonality, analysis->noisiness);
426418
return is_transient;
427419
}
428420

@@ -827,7 +819,7 @@ static void init_caps(const CELTMode *m,int *cap,int LM,int C)
827819

828820
static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
829821
const opus_val16 *bandLogE, int end, int LM, int C, int N0,
830-
AnalysisInfo *analysis, opus_val16 *stereo_saving)
822+
AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate)
831823
{
832824
int i;
833825
opus_val32 diff=0;
@@ -884,7 +876,8 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
884876
trim_index++;
885877
if (diff < -QCONST16(10.f, DB_SHIFT))
886878
trim_index++;
887-
trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), (diff+QCONST16(1.f, DB_SHIFT))/16 ));
879+
trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), (diff+QCONST16(1.f, DB_SHIFT))/6 ));
880+
trim -= 2*(tf_estimate-1);
888881
#ifndef FIXED_POINT
889882
if (analysis->valid)
890883
{
@@ -899,8 +892,8 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
899892
trim_index++;*/
900893
}
901894
#endif
902-
/*printf("%d %f\n", trim_index, trim);*/
903-
/*trim_index = floor(.5+trim);*/
895+
/*printf("%d %f ", trim_index, trim);*/
896+
trim_index = floor(.5+trim);
904897
if (trim_index<0)
905898
trim_index = 0;
906899
if (trim_index>10)
@@ -995,6 +988,7 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f
995988
int tf_chan = 0;
996989
opus_val16 tf_estimate=0;
997990
opus_val16 stereo_saving = 0;
991+
int pitch_change=0;
998992
ALLOC_STACK;
999993

1000994
if (nbCompressedBytes<2 || pcm==NULL)
@@ -1195,6 +1189,10 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f
11951189
if (pitch_index > COMBFILTER_MAXPERIOD-2)
11961190
pitch_index = COMBFILTER_MAXPERIOD-2;
11971191
gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1);
1192+
if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && st->analysis.tonality > .3
1193+
&& (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
1194+
pitch_change = 1;
1195+
//printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);
11981196
if (st->loss_rate>2)
11991197
gain1 = HALF32(gain1);
12001198
if (st->loss_rate>4)
@@ -1293,7 +1291,7 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f
12931291
if (st->complexity > 1)
12941292
{
12951293
isTransient = transient_analysis(in, N+st->overlap, CC,
1296-
st->overlap, &tf_estimate, &tf_chan);
1294+
st->overlap, &tf_estimate, &tf_chan, &st->analysis);
12971295
if (isTransient)
12981296
shortBlocks = M;
12991297
}
@@ -1465,7 +1463,7 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f
14651463
if (tell+(6<<BITRES) <= total_bits - total_boost)
14661464
{
14671465
alloc_trim = alloc_trim_analysis(st->mode, X, bandLogE,
1468-
st->end, LM, C, N, &st->analysis, &stereo_saving);
1466+
st->end, LM, C, N, &st->analysis, &stereo_saving, tf_estimate);
14691467
ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
14701468
tell = ec_tell_frac(enc);
14711469
}
@@ -1530,29 +1528,24 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f
15301528
if (C==2)
15311529
target -= MIN32(target/3, stereo_saving*(st->mode->eBands[intensity]<<LM<<BITRES));
15321530
#endif
1531+
target += (coded_bins<<BITRES)*.05;
1532+
target -= (coded_bins<<BITRES)*.13;
1533+
target *= .96;
15331534

15341535
#ifdef FIXED_POINT
1535-
new_target = SHL32(MULT16_32_Q15(target, SUB16(tf_estimate, QCONST16(0.05, 14))),1);
1536+
new_target = SHL32(MULT16_32_Q15(target, tf_estimate),1);
15361537
#else
1537-
{
1538-
//float tf_factor = 1+MIN16(1,2*MAX16(0,sqrt(tf_estimate-1)-.2));
1539-
float tf_factor = tf_estimate;
1540-
if (isTransient)
1541-
tf_factor = MAX16(1.2f, tf_factor);
1542-
//new_target = target*(tf_estimate-.05);
1543-
new_target = target*(tf_factor-.15);
1544-
//new_target = target*MIN32(2.f,MAX16(.85f,tf_sum/21.));
1545-
//printf("%f %f %f %f ", tf_factor, tf_sum/21., target*(tf_estimate-1.05), target*MIN32(2.f,MAX16(.85f,tf_sum/21.))-target);
1546-
}
1538+
new_target = target*tf_estimate;
15471539
#endif
15481540

15491541
#ifndef FIXED_POINT
15501542
if (st->analysis.valid) {
15511543
int tonal_target;
15521544
float tonal;
1553-
tonal = st->analysis.tonality;
1554-
tonal -= .15;
1545+
tonal = MAX16(0,st->analysis.tonality-.2)*(.5+st->analysis.tonality);
15551546
tonal_target = target + (coded_bins<<BITRES)*1.6f*tonal;
1547+
if (pitch_change)
1548+
tonal_target += (coded_bins<<BITRES)*.8;
15561549
/*printf("%f %d\n", tonal, tonal_target);*/
15571550
new_target = IMAX(tonal_target,new_target);
15581551
//printf("%f %f ", tonal, (coded_bins<<BITRES)*1.6f*tonal);

celt/celt.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ typedef struct {
5454
int valid;
5555
opus_val16 tonality;
5656
opus_val16 tonality_slope;
57+
opus_val16 noisiness;
5758
opus_val16 activity;
5859
int boost_band[2];
5960
opus_val16 boost_amount[2];

src/analysis.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,5 +420,6 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
420420
else if (bandwidth<=15 || (bandwidth==16 && close_enough))
421421
tonal->opus_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
422422
}
423+
info->noisiness = frame_noisiness;
423424
info->valid = 1;
424425
}

0 commit comments

Comments
 (0)