Skip to content

Commit

Permalink
Fix: clamping ConvertBits 32->8 for extreme out-of-range pixel values
Browse files Browse the repository at this point in the history
  • Loading branch information
pinterf authored and qyot27 committed Mar 3, 2020
1 parent 499614b commit e17f4f8
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 23 deletions.
11 changes: 6 additions & 5 deletions avs_core/convert/convert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1592,7 +1592,6 @@ void convert_32_to_uintN_sse41(const BYTE *srcp8, BYTE *dstp8, int src_rowsize,
int src_width = src_rowsize / sizeof(float);

constexpr int max_pixel_value = (1 << targetbits) - 1;
const __m128i max_pixel_value_128 = _mm_set1_epi16(max_pixel_value);

constexpr int limit_lo_d = (fulld ? 0 : 16) << (targetbits - 8);
constexpr int limit_hi_d = fulld ? ((1 << targetbits) - 1) : ((chroma ? 240 : 235) << (targetbits - 8));
Expand All @@ -1616,6 +1615,8 @@ void convert_32_to_uintN_sse41(const BYTE *srcp8, BYTE *dstp8, int src_rowsize,
const __m128 halfint_plus_rounder_ps = _mm_set1_ps(half_i + 0.5f);
const __m128 limit_lo_s_ps = _mm_set1_ps(limit_lo_s / 255.0f);
const __m128 limit_lo_plus_rounder_ps = _mm_set1_ps(limit_lo_d + 0.5f);
const __m128 max_dst_pixelvalue = _mm_set1_ps((float)max_pixel_value); // 255, 1023, 4095, 16383, 65535.0
const __m128 zero = _mm_setzero_ps();

__m128 factor_ps = _mm_set1_ps(factor); // 0-1.0 -> 0..max_pixel_value

Expand Down Expand Up @@ -1648,17 +1649,17 @@ void convert_32_to_uintN_sse41(const BYTE *srcp8, BYTE *dstp8, int src_rowsize,
src_1 = _mm_add_ps(_mm_mul_ps(src_1, factor_ps), limit_lo_plus_rounder_ps);
//pixel = (srcp0[x] - limit_lo_s_ps) * factor + half + limit_lo + 0.5f;
}

src_0 = _mm_max_ps(_mm_min_ps(src_0, max_dst_pixelvalue), zero);
src_1 = _mm_max_ps(_mm_min_ps(src_1, max_dst_pixelvalue), zero);
result_0 = _mm_cvttps_epi32(src_0); // truncate
result_1 = _mm_cvttps_epi32(src_1);
if constexpr(sizeof(pixel_t) == 2) {
result = _mm_packus_epi32(result_0, result_1); // sse41
if constexpr(targetbits > 8 && targetbits < 16) {
result = _mm_min_epu16(result, max_pixel_value_128); // sse41, extra clamp for 10, 12, 14 bits
}
_mm_store_si128(reinterpret_cast<__m128i *>(dstp + x), result);
}
else {
result = _mm_packus_epi32(result_0, result_1);
result = _mm_packs_epi32(result_0, result_1);
result = _mm_packus_epi16(result, result); // lo 8 byte
_mm_storel_epi64(reinterpret_cast<__m128i *>(dstp + x), result);
}
Expand Down
13 changes: 7 additions & 6 deletions avs_core/convert/convert_avx2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ void convert_32_to_uintN_avx2(const BYTE *srcp8, BYTE *dstp8, int src_rowsize, i
int src_width = src_rowsize / sizeof(float);

const int max_pixel_value = (1 << targetbits) - 1;
const __m256i max_pixel_value_256 = _mm256_set1_epi16(max_pixel_value);

const int limit_lo_d = (fulld ? 0 : 16) << (targetbits - 8);
const int limit_hi_d = fulld ? ((1 << targetbits) - 1) : ((chroma ? 240 : 235) << (targetbits - 8));
Expand All @@ -94,6 +93,8 @@ void convert_32_to_uintN_avx2(const BYTE *srcp8, BYTE *dstp8, int src_rowsize, i
const __m256 halfint_plus_rounder_ps = _mm256_set1_ps(half_i + 0.5f);
const __m256 limit_lo_s_ps = _mm256_set1_ps(limit_lo_s / 255.0f);
const __m256 limit_lo_plus_rounder_ps = _mm256_set1_ps(limit_lo_d + 0.5f);
const __m256 max_dst_pixelvalue = _mm256_set1_ps((float)max_pixel_value); // 255, 1023, 4095, 16383, 65535.0
const __m256 zero = _mm256_setzero_ps();

__m256 factor_ps = _mm256_set1_ps(factor);

Expand Down Expand Up @@ -126,18 +127,18 @@ void convert_32_to_uintN_avx2(const BYTE *srcp8, BYTE *dstp8, int src_rowsize, i
src_1 = _mm256_fmadd_ps(src_1, factor_ps, limit_lo_plus_rounder_ps);
//pixel = (srcp0[x] - limit_lo_s_ps) * factor + half + limit_lo + 0.5f;
}

src_0 = _mm256_max_ps(_mm256_min_ps(src_0, max_dst_pixelvalue), zero);
src_1 = _mm256_max_ps(_mm256_min_ps(src_1, max_dst_pixelvalue), zero);
result_0 = _mm256_cvttps_epi32(src_0); // truncate
result_1 = _mm256_cvttps_epi32(src_1);
if constexpr(sizeof(pixel_t) == 2) {
result = _mm256_packus_epi32(result_0, result_1);
result = _mm256_permute4x64_epi64(result, (0 << 0) | (2 << 2) | (1 << 4) | (3 << 6));
if (targetbits > 8 && targetbits < 16) {
result = _mm256_min_epu16(result, max_pixel_value_256); // extra clamp for 10, 12, 14 bits
}
_mm256_store_si256(reinterpret_cast<__m256i *>(dstp + x), result);
_mm256_store_si256(reinterpret_cast<__m256i*>(dstp + x), result);
}
else {
result = _mm256_packus_epi32(result_0, result_1);
result = _mm256_packs_epi32(result_0, result_1);
result = _mm256_permute4x64_epi64(result, (0 << 0) | (2 << 2) | (1 << 4) | (3 << 6));
__m128i result128_lo = _mm256_castsi256_si128(result);
__m128i result128_hi = _mm256_extractf128_si256(result, 1);
Expand Down
26 changes: 14 additions & 12 deletions distrib/Readme/readme.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ Short info for plugin writers
20200302 3.5.0
--------------
- New: Native Linux, macOS, and BSD support.
- Fix: ConvertBits 32->8 for extremely out of range float pixel values.
When pixel value in a 32 bit float format video was way out of range and greater than 128 (e.g. instead of 0 to 1.0 for Y plane) then the ConvertBits(8) had artifacts.
- Fix potential crash on exit or cache shrink (linux/gcc only?)
- Layer: support RGB24 and RGB48 (internally processed as Planar RGB - lossless pre and post conversion)
- Fix: RGBP to 444 8-14bit right side artifacts at specific widths
Expand Down Expand Up @@ -942,14 +944,14 @@ Conditional runtime functions have 10-16 bit/float support for YUV, PlanarRGB an
Since RGB is also available as a planar colorspace, the plane statistics functions logically were expanded.

New functions
AverageR, AverageG AverageB like AverageLuma
RDifference, GDifference, BDifference like LumaDifference(clip1, clip2)
RDifferenceFromPrevious, GDifferenceFromPrevious, BDifferenceFromPrevious
RDifferenceToNext, GDifferenceToNext, BDifferenceToNext
RPlaneMin, GPlaneMin BPlaneMin like YPlaneMin(clip [, float threshold = 0, int offset = 0])
RPlaneMax, GPlaneMax BPlaneMax like YPlaneMax(clip [, float threshold = 0, int offset = 0])
RPlaneMinMaxDifference, GPlaneMinMaxDifference BPlaneMinMaxDifference like YPlaneMinMaxDifference(clip [, float threshold = 0, int offset = 0])
RPlaneMedian, GPlaneMedian, BPlaneMedian like YPlaneMedian(clip [, int offset = 0])
AverageR, AverageG AverageB like AverageLuma
RDifference, GDifference, BDifference like LumaDifference(clip1, clip2)
RDifferenceFromPrevious, GDifferenceFromPrevious, BDifferenceFromPrevious
RDifferenceToNext, GDifferenceToNext, BDifferenceToNext
RPlaneMin, GPlaneMin BPlaneMin like YPlaneMin(clip [, float threshold = 0, int offset = 0])
RPlaneMax, GPlaneMax BPlaneMax like YPlaneMax(clip [, float threshold = 0, int offset = 0])
RPlaneMinMaxDifference, GPlaneMinMaxDifference BPlaneMinMaxDifference like YPlaneMinMaxDifference(clip [, float threshold = 0, int offset = 0])
RPlaneMedian, GPlaneMedian, BPlaneMedian like YPlaneMedian(clip [, int offset = 0])

For float colorspaces the Min, Max, MinMaxDifference and Median functions populate pixel counts for the internal statistics at a 16 bit resolution internally.

Expand Down Expand Up @@ -1044,12 +1046,12 @@ stackvertical(clip8.ConvertToYUV444().Histogram("levels"), Clip16.ConvertBits(8)
[ColorYUV]
Now it works for 10-16 bit clips

Slightly modified "demo" mode when using ColorYUV(showyuv=true)
Slightly modified "demo" mode when using ColorYUV(showyuv=true)

#old: draws YV12 with 16-239 U/V image (448x448)
#new: draws YV12 with 16-240 U/V image (450x450)

New options for "demo" mode when using ColorYUV(showyuv=true)
New options for "demo" mode when using ColorYUV(showyuv=true)
New parameter: bool showyuv_fullrange.
Description: Draws YV12 with 0-255 U/V image (512x512)
Usage: ColorYUV(showyuv=true, showyuv_fullrange=true)
Expand All @@ -1062,7 +1064,7 @@ ColorYUV(showyuv=true, bits=10).Info()

Luma steps are 16-235-16../0-255-0.. up to 0-65535-0... when bits=16

Additional infos for ColorYUV
Additional infos for ColorYUV

- Fixed an uninitialized internal variable regarding pc<->tv conversion,
resulting in clips sometimes were expanding to pc range when it wasn't asked.
Expand All @@ -1079,4 +1081,4 @@ Source filters are automatically detected, specifying MT_SERIALIZED is not neces
[Known issues/things]
GRunT in MT modes (Avs+ specific)
[done: v2502] Overlay blend with fully transparent mask is incorrect, overlaying pixel=0 becomes 1, overlaying pixel=255 becomes 254.
[done: v2676-] Float-type clips: chroma should be zero based: +/-0.5 instead of 0..1
[done: v2676-] Float-type clips: chroma should be zero based: +/-0.5 instead of 0..1
2 changes: 2 additions & 0 deletions distrib/Readme/readme_history.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ For a more logical (non-historical) arrangement of changes see readme.txt
20200302 3.5.0
--------------
- New: Native Linux, macOS, and BSD support.
- Fix: ConvertBits 32->8 for extremely out of range float pixel values.
When pixel value in a 32 bit float format video was way out of range and greater than 128 (e.g. instead of 0 to 1.0 for Y plane) then the ConvertBits(8) had artifacts.
- Fix potential crash on exit or cache shrink (linux/gcc only?)
- Layer: support RGB24 and RGB48 (internally processed as Planar RGB - lossless pre and post conversion)
- Fix: RGBP to 444 8-14bit right side artifacts at specific widths
Expand Down

0 comments on commit e17f4f8

Please sign in to comment.