Skip to content

Commit

Permalink
Add Neon implementation of aom_highbd_comp_avg_pred
Browse files Browse the repository at this point in the history
Add Neon implementation of aom_highbd_comp_avg_pred function.

The implementation is mostly a backport of this libvpx change[1].

[1]https://chromium-review.googlesource.com/c/webm/libvpx/+/4239538

Change-Id: Ie60ea9edd7a7742b8948d59e86b17f344d7c0eb5
  • Loading branch information
gerdamore-arm authored and jzern committed Jun 22, 2023
1 parent dedc64a commit d2e0b70
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 0 deletions.
1 change: 1 addition & 0 deletions aom_dsp/aom_dsp.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ if(CONFIG_AV1_ENCODER)

list(APPEND AOM_DSP_ENCODER_INTRIN_NEON
"${AOM_ROOT}/aom_dsp/arm/highbd_avg_neon.c"
"${AOM_ROOT}/aom_dsp/arm/highbd_avg_pred_neon.c"
"${AOM_ROOT}/aom_dsp/arm/highbd_hadamard_neon.c"
"${AOM_ROOT}/aom_dsp/arm/highbd_quantize_neon.c"
"${AOM_ROOT}/aom_dsp/arm/highbd_sad_neon.c"
Expand Down
1 change: 1 addition & 0 deletions aom_dsp/aom_dsp_rtcd_defs.pl
Original file line number Diff line number Diff line change
Expand Up @@ -1725,6 +1725,7 @@ ()

if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_highbd_comp_avg_pred/, "uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride";
specialize qw/aom_highbd_comp_avg_pred neon/;

add_proto qw/void aom_highbd_dist_wtd_comp_avg_pred/, "uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param";
specialize qw/aom_highbd_dist_wtd_comp_avg_pred sse2/;
Expand Down
70 changes: 70 additions & 0 deletions aom_dsp/arm/highbd_avg_pred_neon.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Copyright (c) 2023 The WebM project authors. All Rights Reserved.
* Copyright (c) 2023, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/

#include <arm_neon.h>
#include <assert.h>

#include "config/aom_config.h"
#include "config/aom_dsp_rtcd.h"

void aom_highbd_comp_avg_pred_neon(uint8_t *comp_pred8, const uint8_t *pred8,
int width, int height, const uint8_t *ref8,
int ref_stride) {
const uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
const uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8);

int i = height;
if (width > 8) {
do {
int j = 0;
do {
const uint16x8_t p = vld1q_u16(pred + j);
const uint16x8_t r = vld1q_u16(ref + j);

uint16x8_t avg = vrhaddq_u16(p, r);
vst1q_u16(comp_pred + j, avg);

j += 8;
} while (j < width);

comp_pred += width;
pred += width;
ref += ref_stride;
} while (--i != 0);
} else if (width == 8) {
do {
const uint16x8_t p = vld1q_u16(pred);
const uint16x8_t r = vld1q_u16(ref);

uint16x8_t avg = vrhaddq_u16(p, r);
vst1q_u16(comp_pred, avg);

comp_pred += width;
pred += width;
ref += ref_stride;
} while (--i != 0);
} else {
assert(width == 4);
do {
const uint16x4_t p = vld1_u16(pred);
const uint16x4_t r = vld1_u16(ref);

uint16x4_t avg = vrhadd_u16(p, r);
vst1_u16(comp_pred, avg);

comp_pred += width;
pred += width;
ref += ref_stride;
} while (--i != 0);
}
}
8 changes: 8 additions & 0 deletions test/comp_mask_pred_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -828,5 +828,13 @@ TEST_P(AV1HighbdCompAvgPredTest, DISABLED_Speed) {
RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
}

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(
NEON, AV1HighbdCompAvgPredTest,
::testing::Combine(::testing::Values(&aom_highbd_comp_avg_pred_neon),
::testing::ValuesIn(kValidBlockSize),
::testing::Range(8, 13, 2)));
#endif

#endif // CONFIG_AV1_HIGHBITDEPTH
} // namespace

0 comments on commit d2e0b70

Please sign in to comment.