Skip to content
Merged
180 changes: 148 additions & 32 deletions src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,37 @@

using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif

namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{
/// <summary>
/// Quantization methods.
/// </summary>
internal static class QuantEnc
internal static unsafe class QuantEnc
{
private static readonly byte[] Zigzag = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 };

private static readonly ushort[] WeightY = { 38, 32, 20, 9, 32, 28, 17, 7, 20, 17, 10, 4, 9, 7, 4, 2 };

private const int MaxLevel = 2047;

#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector128<short> MaxCoeff2047 = Vector128.Create((short)MaxLevel);

private static readonly Vector128<byte> CstLo = Vector128.Create(0, 1, 2, 3, 8, 9, 254, 255, 10, 11, 4, 5, 6, 7, 12, 13);

private static readonly Vector128<byte> Cst7 = Vector128.Create(254, 255, 254, 255, 254, 255, 254, 255, 14, 15, 254, 255, 254, 255, 254, 255);

private static readonly Vector128<byte> CstHi = Vector128.Create(2, 3, 8, 9, 10, 11, 4, 5, 254, 255, 6, 7, 12, 13, 14, 15);

private static readonly Vector128<byte> Cst8 = Vector128.Create(254, 255, 254, 255, 254, 255, 0, 1, 254, 255, 254, 255, 254, 255, 254, 255);
#endif

// Diffusion weights. We under-correct a bit (15/16th of the error is actually
// diffused) to avoid 'rainbow' chessboard pattern of blocks at q~=0.
private const int C1 = 7; // fraction of error sent to the 4x4 block below
Expand Down Expand Up @@ -510,51 +527,150 @@ public static void RefineUsingDistortion(Vp8EncIterator it, Vp8SegmentInfo[] seg
[MethodImpl(InliningOptions.ShortMethod)]
public static int Quantize2Blocks(Span<short> input, Span<short> output, Vp8Matrix mtx)
Copy link
Member

@antonfirsov antonfirsov Nov 9, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You need to pass Vp8Matrix by reference now to avoid full stack copy.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah yes, good spot

{
int nz = QuantizeBlock(input, output, mtx) << 0;
nz |= QuantizeBlock(input.Slice(1 * 16), output.Slice(1 * 16), mtx) << 1;
int nz = QuantizeBlock(input.Slice(0, 16), output.Slice(0, 16), mtx) << 0;
nz |= QuantizeBlock(input.Slice(1 * 16, 16), output.Slice(1 * 16, 16), mtx) << 1;
return nz;
}

public static int QuantizeBlock(Span<short> input, Span<short> output, Vp8Matrix mtx)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here too: ref Vp8Matrix mtx

{
int last = -1;
int n;
for (n = 0; n < 16; ++n)
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse41.IsSupported)
{
int j = Zigzag[n];
bool sign = input[j] < 0;
uint coeff = (uint)((sign ? -input[j] : input[j]) + mtx.Sharpen[j]);
if (coeff > mtx.ZThresh[j])
#pragma warning disable SA1503 // Braces should not be omitted
// Load all inputs.
Vector128<short> input0 = Unsafe.As<short, Vector128<short>>(ref MemoryMarshal.GetReference(input));
Vector128<short> input8 = Unsafe.As<short, Vector128<short>>(ref MemoryMarshal.GetReference(input.Slice(8, 8)));
Vector128<ushort> iq0 = Unsafe.As<ushort, Vector128<ushort>>(ref mtx.IQ[0]);
Vector128<ushort> iq8 = Unsafe.As<ushort, Vector128<ushort>>(ref mtx.IQ[8]);
Vector128<ushort> q0 = Unsafe.As<ushort, Vector128<ushort>>(ref mtx.Q[0]);
Vector128<ushort> q8 = Unsafe.As<ushort, Vector128<ushort>>(ref mtx.Q[8]);

// coeff = abs(in)
Vector128<ushort> coeff0 = Ssse3.Abs(input0);
Vector128<ushort> coeff8 = Ssse3.Abs(input8);

// coeff = abs(in) + sharpen
Vector128<short> sharpen0 = Unsafe.As<short, Vector128<short>>(ref mtx.Sharpen[0]);
Vector128<short> sharpen8 = Unsafe.As<short, Vector128<short>>(ref mtx.Sharpen[8]);
Sse2.Add(coeff0.AsInt16(), sharpen0);
Sse2.Add(coeff8.AsInt16(), sharpen8);

// out = (coeff * iQ + B) >> QFIX
// doing calculations with 32b precision (QFIX=17)
// out = (coeff * iQ)
Vector128<ushort> coeffiQ0H = Sse2.MultiplyHigh(coeff0, iq0);
Vector128<ushort> coeffiQ0L = Sse2.MultiplyLow(coeff0, iq0);
Vector128<ushort> coeffiQ8H = Sse2.MultiplyHigh(coeff8, iq8);
Vector128<ushort> coeffiQ8L = Sse2.MultiplyLow(coeff8, iq8);
Vector128<ushort> out00 = Sse2.UnpackLow(coeffiQ0L, coeffiQ0H);
Vector128<ushort> out04 = Sse2.UnpackHigh(coeffiQ0L, coeffiQ0H);
Vector128<ushort> out08 = Sse2.UnpackLow(coeffiQ8L, coeffiQ8H);
Vector128<ushort> out12 = Sse2.UnpackHigh(coeffiQ8L, coeffiQ8H);

// out = (coeff * iQ + B)
Vector128<uint> bias00 = Unsafe.As<uint, Vector128<uint>>(ref mtx.Bias[0]);
Vector128<uint> bias04 = Unsafe.As<uint, Vector128<uint>>(ref mtx.Bias[4]);
Vector128<uint> bias08 = Unsafe.As<uint, Vector128<uint>>(ref mtx.Bias[8]);
Vector128<uint> bias12 = Unsafe.As<uint, Vector128<uint>>(ref mtx.Bias[12]);
out00 = Sse2.Add(out00.AsInt32(), bias00.AsInt32()).AsUInt16();
out04 = Sse2.Add(out04.AsInt32(), bias04.AsInt32()).AsUInt16();
out08 = Sse2.Add(out08.AsInt32(), bias08.AsInt32()).AsUInt16();
out12 = Sse2.Add(out12.AsInt32(), bias12.AsInt32()).AsUInt16();

// out = QUANTDIV(coeff, iQ, B, QFIX)
out00 = Sse2.ShiftRightArithmetic(out00.AsInt32(), WebpConstants.QFix).AsUInt16();
out04 = Sse2.ShiftRightArithmetic(out04.AsInt32(), WebpConstants.QFix).AsUInt16();
out08 = Sse2.ShiftRightArithmetic(out08.AsInt32(), WebpConstants.QFix).AsUInt16();
out12 = Sse2.ShiftRightArithmetic(out12.AsInt32(), WebpConstants.QFix).AsUInt16();

// pack result as 16b
Vector128<short> out0 = Sse2.PackSignedSaturate(out00.AsInt32(), out04.AsInt32());
Vector128<short> out8 = Sse2.PackSignedSaturate(out08.AsInt32(), out12.AsInt32());

// if (coeff > 2047) coeff = 2047
out0 = Sse2.Min(out0, MaxCoeff2047);
out8 = Sse2.Min(out8, MaxCoeff2047);

// put sign back
out0 = Ssse3.Sign(out0, input0);
out8 = Ssse3.Sign(out8, input8);

// in = out * Q
input0 = Sse2.MultiplyLow(out0, q0.AsInt16());
input8 = Sse2.MultiplyLow(out8, q8.AsInt16());

// in = out * Q
ref short inputRef = ref MemoryMarshal.GetReference(input);
Unsafe.As<short, Vector128<short>>(ref inputRef) = input0;
Unsafe.As<short, Vector128<short>>(ref Unsafe.Add(ref inputRef, 8)) = input8;

// zigzag the output before storing it. The re-ordering is:
// 0 1 2 3 4 5 6 7 | 8 9 10 11 12 13 14 15
// -> 0 1 4[8]5 2 3 6 | 9 12 13 10 [7]11 14 15
// There's only two misplaced entries ([8] and [7]) that are crossing the
// reg's boundaries.
// We use pshufb instead of pshuflo/pshufhi.
Vector128<byte> tmpLo = Ssse3.Shuffle(out0.AsByte(), CstLo);
Vector128<byte> tmp7 = Ssse3.Shuffle(out0.AsByte(), Cst7); // extract #7
Vector128<byte> tmpHi = Ssse3.Shuffle(out8.AsByte(), CstHi);
Vector128<byte> tmp8 = Ssse3.Shuffle(out8.AsByte(), Cst8); // extract #8
Vector128<byte> outZ0 = Sse2.Or(tmpLo, tmp8);
Vector128<byte> outZ8 = Sse2.Or(tmpHi, tmp7);

ref short outputRef = ref MemoryMarshal.GetReference(output);
Unsafe.As<short, Vector128<short>>(ref outputRef) = outZ0.AsInt16();
Unsafe.As<short, Vector128<short>>(ref Unsafe.Add(ref outputRef, 8)) = outZ8.AsInt16();

Vector128<sbyte> packedOutput = Sse2.PackSignedSaturate(outZ0.AsInt16(), outZ8.AsInt16());

// Detect if all 'out' values are zeroes or not.
Vector128<sbyte> cmpeq = Sse2.CompareEqual(packedOutput, Vector128<sbyte>.Zero);
return Sse2.MoveMask(cmpeq) != 0xffff ? 1 : 0;
#pragma warning restore SA1503 // Braces should not be omitted
}
else
#endif
{
int last = -1;
int n;
for (n = 0; n < 16; ++n)
{
uint q = mtx.Q[j];
uint iQ = mtx.IQ[j];
uint b = mtx.Bias[j];
int level = QuantDiv(coeff, iQ, b);
if (level > MaxLevel)
int j = Zigzag[n];
bool sign = input[j] < 0;
uint coeff = (uint)((sign ? -input[j] : input[j]) + mtx.Sharpen[j]);
if (coeff > mtx.ZThresh[j])
{
level = MaxLevel;
}
uint q = mtx.Q[j];
uint iQ = mtx.IQ[j];
uint b = mtx.Bias[j];
int level = QuantDiv(coeff, iQ, b);
if (level > MaxLevel)
{
level = MaxLevel;
}

if (sign)
{
level = -level;
}
if (sign)
{
level = -level;
}

input[j] = (short)(level * (int)q);
output[n] = (short)level;
if (level != 0)
input[j] = (short)(level * (int)q);
output[n] = (short)level;
if (level != 0)
{
last = n;
}
}
else
{
last = n;
output[n] = 0;
input[j] = 0;
}
}
else
{
output[n] = 0;
input[j] = 0;
}
}

return last >= 0 ? 1 : 0;
return last >= 0 ? 1 : 0;
}
}

// Quantize as usual, but also compute and return the quantization error.
Expand Down
8 changes: 2 additions & 6 deletions src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ private void SetLoopParams(float q)
this.ResetStats();
}

private void AdjustFilterStrength()
private unsafe void AdjustFilterStrength()
{
if (this.filterStrength > 0)
{
Expand Down Expand Up @@ -806,18 +806,14 @@ private void ResetStats()
proba.NbSkip = 0;
}

private void SetupMatrices(Vp8SegmentInfo[] dqm)
private unsafe void SetupMatrices(Vp8SegmentInfo[] dqm)
{
int tlambdaScale = this.method >= WebpEncodingMethod.Default ? this.spatialNoiseShaping : 0;
for (int i = 0; i < dqm.Length; i++)
{
Vp8SegmentInfo m = dqm[i];
int q = m.Quant;

m.Y1 = new Vp8Matrix();
m.Y2 = new Vp8Matrix();
m.Uv = new Vp8Matrix();

m.Y1.Q[0] = WebpLookupTables.DcTable[Numerics.Clamp(q + this.DqY1Dc, 0, 127)];
m.Y1.Q[1] = WebpLookupTables.AcTable[Numerics.Clamp(q, 0, 127)];

Expand Down
38 changes: 13 additions & 25 deletions src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{
internal class Vp8Matrix
internal unsafe struct Vp8Matrix
{
private static readonly int[][] BiasMatrices =
{
Expand All @@ -23,41 +23,29 @@ internal class Vp8Matrix
private const int SharpenBits = 11;

/// <summary>
/// Initializes a new instance of the <see cref="Vp8Matrix"/> class.
/// The quantizer steps.
/// </summary>
public Vp8Matrix()
{
this.Q = new ushort[16];
this.IQ = new ushort[16];
this.Bias = new uint[16];
this.ZThresh = new uint[16];
this.Sharpen = new short[16];
}

/// <summary>
/// Gets the quantizer steps.
/// </summary>
public ushort[] Q { get; }
public fixed ushort Q[16];

/// <summary>
/// Gets the reciprocals, fixed point.
/// The reciprocals, fixed point.
/// </summary>
public ushort[] IQ { get; }
public fixed ushort IQ[16];

/// <summary>
/// Gets the rounding bias.
/// The rounding bias.
/// </summary>
public uint[] Bias { get; }
public fixed uint Bias[16];

/// <summary>
/// Gets the value below which a coefficient is zeroed.
/// The value below which a coefficient is zeroed.
/// </summary>
public uint[] ZThresh { get; }
public fixed uint ZThresh[16];

/// <summary>
/// Gets the frequency boosters for slight sharpening.
/// The frequency boosters for slight sharpening.
/// </summary>
public short[] Sharpen { get; }
public fixed short Sharpen[16];

/// <summary>
/// Returns the average quantizer.
Expand All @@ -72,7 +60,7 @@ public int Expand(int type)
int isAcCoeff = i > 0 ? 1 : 0;
int bias = BiasMatrices[type][isAcCoeff];
this.IQ[i] = (ushort)((1 << WebpConstants.QFix) / this.Q[i]);
this.Bias[i] = (uint)this.BIAS(bias);
this.Bias[i] = (uint)BIAS(bias);

// zthresh is the exact value such that QUANTDIV(coeff, iQ, B) is:
// * zero if coeff <= zthresh
Expand Down Expand Up @@ -106,6 +94,6 @@ public int Expand(int type)
return (sum + 8) >> 4;
}

private int BIAS(int b) => b << (WebpConstants.QFix - 8);
private static int BIAS(int b) => b << (WebpConstants.QFix - 8);
}
}
12 changes: 6 additions & 6 deletions src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
internal class Vp8SegmentInfo
{
/// <summary>
/// Gets or sets the quantization matrix y1.
/// Gets the quantization matrix y1.
/// </summary>
public Vp8Matrix Y1 { get; set; }
public Vp8Matrix Y1;

/// <summary>
/// Gets or sets the quantization matrix y2.
/// Gets the quantization matrix y2.
/// </summary>
public Vp8Matrix Y2 { get; set; }
public Vp8Matrix Y2;

/// <summary>
/// Gets or sets the quantization matrix uv.
/// Gets the quantization matrix uv.
/// </summary>
public Vp8Matrix Uv { get; set; }
public Vp8Matrix Uv;

/// <summary>
/// Gets or sets the quant-susceptibility, range [-127,127]. Zero is neutral. Lower values indicate a lower risk of blurriness.
Expand Down
Loading