Skip to content

Commit

Permalink
Merge pull request #1253 from rouault/floating_point_irreversible_enc…
Browse files Browse the repository at this point in the history
…oding

Single-threaded performance improvements in forward DWT for 5-3 and 9-7 (and other improvements)
  • Loading branch information
rouault authored Oct 9, 2020
2 parents 65c8f57 + 1c5627e commit 491299e
Show file tree
Hide file tree
Showing 25 changed files with 3,100 additions and 2,014 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,9 @@ if(BUILD_JPIP_SERVER)
endif()
add_subdirectory(src/lib)
option(BUILD_LUTS_GENERATOR "Build utility to generate t1_luts.h" OFF)
if(UNIX)
option(BUILD_UNIT_TESTS "Build unit tests (bench_dwt, test_sparse_array, etc..)" OFF)
endif()

#-----------------------------------------------------------------------------
# Build Applications
Expand Down
35 changes: 32 additions & 3 deletions src/bin/jp2/opj_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,10 @@ static void encode_help_display(void)
fprintf(stdout, " Currently supports only RPCL order.\n");
fprintf(stdout, "-C <comment>\n");
fprintf(stdout, " Add <comment> in the comment marker segment.\n");
if (opj_has_thread_support()) {
fprintf(stdout, " -threads <num_threads|ALL_CPUS>\n"
" Number of threads to use for encoding or ALL_CPUS for all available cores.\n");
}
/* UniPG>> */
#ifdef USE_JPWL
fprintf(stdout, "-W <params>\n");
Expand Down Expand Up @@ -579,7 +583,8 @@ static int parse_cmdline_encoder(int argc, char **argv,
img_fol_t *img_fol, raw_cparameters_t *raw_cp, char *indexfilename,
size_t indexfilename_size,
int* pOutFramerate,
OPJ_BOOL* pOutPLT)
OPJ_BOOL* pOutPLT,
int* pOutNumThreads)
{
OPJ_UINT32 i, j;
int totlen, c;
Expand All @@ -596,7 +601,8 @@ static int parse_cmdline_encoder(int argc, char **argv,
{"jpip", NO_ARG, NULL, 'J'},
{"mct", REQ_ARG, NULL, 'Y'},
{"IMF", REQ_ARG, NULL, 'Z'},
{"PLT", NO_ARG, NULL, 'A'}
{"PLT", NO_ARG, NULL, 'A'},
{"threads", REQ_ARG, NULL, 'B'}
};

/* parse the command line */
Expand Down Expand Up @@ -1679,6 +1685,19 @@ static int parse_cmdline_encoder(int argc, char **argv,
}
break;

/* ----------------------------------------------------- */
case 'B': { /* Number of threads */
if (strcmp(opj_optarg, "ALL_CPUS") == 0) {
*pOutNumThreads = opj_get_num_cpus();
if (*pOutNumThreads == 1) {
*pOutNumThreads = 0;
}
} else {
sscanf(opj_optarg, "%d", pOutNumThreads);
}
}
break;

/* ------------------------------------------------------ */


Expand Down Expand Up @@ -1860,6 +1879,7 @@ int main(int argc, char **argv)
OPJ_FLOAT64 t = opj_clock();

OPJ_BOOL PLT = OPJ_FALSE;
int num_threads = 0;

/* set encoding parameters to default values */
opj_set_default_encoder_parameters(&parameters);
Expand All @@ -1880,7 +1900,7 @@ int main(int argc, char **argv)
parameters.tcp_mct = (char)
255; /* This will be set later according to the input image or the provided option */
if (parse_cmdline_encoder(argc, argv, &parameters, &img_fol, &raw_cp,
indexfilename, sizeof(indexfilename), &framerate, &PLT) == 1) {
indexfilename, sizeof(indexfilename), &framerate, &PLT, &num_threads) == 1) {
ret = 1;
goto fin;
}
Expand Down Expand Up @@ -2141,6 +2161,15 @@ int main(int argc, char **argv)
}
}

if (num_threads >= 1 &&
!opj_codec_set_threads(l_codec, num_threads)) {
fprintf(stderr, "failed to set number of threads\n");
opj_destroy_codec(l_codec);
opj_image_destroy(image);
ret = 1;
goto fin;
}

/* open a byte stream for writing and allocate memory for all tiles */
l_stream = opj_stream_create_default_file_stream(parameters.outfile, OPJ_FALSE);
if (! l_stream) {
Expand Down
4 changes: 2 additions & 2 deletions src/lib/openjp2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ if(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
TARGET_LINK_LIBRARIES(${OPENJPEG_LIBRARY_NAME} ${CMAKE_THREAD_LIBS_INIT})
endif(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)

if(BUILD_UNIT_TESTS)
if(BUILD_UNIT_TESTS AND UNIX)
add_executable(bench_dwt bench_dwt.c)
if(UNIX)
target_link_libraries(bench_dwt m ${OPENJPEG_LIBRARY_NAME})
Expand All @@ -215,4 +215,4 @@ if(BUILD_UNIT_TESTS)
if(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
target_link_libraries(test_sparse_array ${CMAKE_THREAD_LIBS_INIT})
endif(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
endif(BUILD_UNIT_TESTS)
endif(BUILD_UNIT_TESTS AND UNIX)
140 changes: 110 additions & 30 deletions src/lib/openjp2/bench_dwt.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ void init_tilec(opj_tcd_tilecomp_t * l_tilec,
OPJ_INT32 y0,
OPJ_INT32 x1,
OPJ_INT32 y1,
OPJ_UINT32 numresolutions)
OPJ_UINT32 numresolutions,
OPJ_BOOL irreversible)
{
opj_tcd_resolution_t* l_res;
OPJ_UINT32 resno, l_level_no;
Expand All @@ -64,9 +65,16 @@ void init_tilec(opj_tcd_tilecomp_t * l_tilec,
(size_t)(l_tilec->y1 - l_tilec->y0);
l_tilec->data = (OPJ_INT32*) opj_malloc(sizeof(OPJ_INT32) * nValues);
for (i = 0; i < nValues; i++) {
l_tilec->data[i] = getValue((OPJ_UINT32)i);
OPJ_INT32 val = getValue((OPJ_UINT32)i);
if (irreversible) {
OPJ_FLOAT32 fVal = (OPJ_FLOAT32)val;
memcpy(&l_tilec->data[i], &fVal, sizeof(OPJ_FLOAT32));
} else {
l_tilec->data[i] = val;
}
}
l_tilec->numresolutions = numresolutions;
l_tilec->minimum_num_resolutions = numresolutions;
l_tilec->resolutions = (opj_tcd_resolution_t*) opj_calloc(
l_tilec->numresolutions,
sizeof(opj_tcd_resolution_t));
Expand Down Expand Up @@ -98,9 +106,9 @@ void free_tilec(opj_tcd_tilecomp_t * l_tilec)
void usage(void)
{
printf(
"bench_dwt [-size value] [-check] [-display] [-num_resolutions val]\n");
"bench_dwt [-decode|encode] [-I] [-size value] [-check] [-display]\n");
printf(
" [-offset x y] [-num_threads val]\n");
" [-num_resolutions val] [-offset x y] [-num_threads val]\n");
exit(1);
}

Expand Down Expand Up @@ -131,6 +139,17 @@ OPJ_FLOAT64 opj_clock(void)
#endif
}

static OPJ_FLOAT64 opj_wallclock(void)
{
#ifdef _WIN32
return opj_clock();
#else
struct timeval tv;
gettimeofday(&tv, NULL);
return (OPJ_FLOAT64)tv.tv_sec + 1e-6 * (OPJ_FLOAT64)tv.tv_usec;
#endif
}

int main(int argc, char** argv)
{
int num_threads = 0;
Expand All @@ -146,16 +165,24 @@ int main(int argc, char** argv)
OPJ_BOOL check = OPJ_FALSE;
OPJ_INT32 size = 16384 - 1;
OPJ_FLOAT64 start, stop;
OPJ_FLOAT64 start_wc, stop_wc;
OPJ_UINT32 offset_x = ((OPJ_UINT32)size + 1) / 2 - 1;
OPJ_UINT32 offset_y = ((OPJ_UINT32)size + 1) / 2 - 1;
OPJ_UINT32 num_resolutions = 6;
OPJ_BOOL bench_decode = OPJ_TRUE;
OPJ_BOOL irreversible = OPJ_FALSE;

for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "-display") == 0) {
if (strcmp(argv[i], "-encode") == 0) {
bench_decode = OPJ_FALSE;
} else if (strcmp(argv[i], "-decode") == 0) {
bench_decode = OPJ_TRUE;
} else if (strcmp(argv[i], "-display") == 0) {
display = OPJ_TRUE;
check = OPJ_TRUE;
} else if (strcmp(argv[i], "-check") == 0) {
check = OPJ_TRUE;
} else if (strcmp(argv[i], "-I") == 0) {
irreversible = OPJ_TRUE;
} else if (strcmp(argv[i], "-size") == 0 && i + 1 < argc) {
size = atoi(argv[i + 1]);
i ++;
Expand All @@ -179,18 +206,29 @@ int main(int argc, char** argv)
}
}

if (irreversible && check) {
/* Due to irreversible inverse DWT not being symetric of forward */
/* See BUG_WEIRD_TWO_INVK in dwt.c */
printf("-I and -check aren't compatible\n");
exit(1);
}

tp = opj_thread_pool_create(num_threads);

init_tilec(&tilec, (OPJ_INT32)offset_x, (OPJ_INT32)offset_y,
(OPJ_INT32)offset_x + size, (OPJ_INT32)offset_y + size,
num_resolutions);
num_resolutions, irreversible);

if (display) {
printf("Before\n");
k = 0;
for (j = 0; j < tilec.y1 - tilec.y0; j++) {
for (i = 0; i < tilec.x1 - tilec.x0; i++) {
printf("%d ", tilec.data[k]);
if (irreversible) {
printf("%f ", ((OPJ_FLOAT32*)tilec.data)[k]);
} else {
printf("%d ", tilec.data[k]);
}
k ++;
}
printf("\n");
Expand Down Expand Up @@ -223,45 +261,87 @@ int main(int argc, char** argv)
image_comp.dy = 1;

start = opj_clock();
opj_dwt_decode(&tcd, &tilec, tilec.numresolutions);
start_wc = opj_wallclock();
if (bench_decode) {
if (irreversible) {
opj_dwt_decode_real(&tcd, &tilec, tilec.numresolutions);
} else {
opj_dwt_decode(&tcd, &tilec, tilec.numresolutions);
}
} else {
if (irreversible) {
opj_dwt_encode_real(&tcd, &tilec);
} else {
opj_dwt_encode(&tcd, &tilec);
}
}
stop = opj_clock();
printf("time for dwt_decode: %.03f s\n", stop - start);
stop_wc = opj_wallclock();
printf("time for %s: total = %.03f s, wallclock = %.03f s\n",
bench_decode ? "dwt_decode" : "dwt_encode",
stop - start,
stop_wc - start_wc);

if (display || check) {
if (display) {
if (display) {
if (bench_decode) {
printf("After IDWT\n");
k = 0;
for (j = 0; j < tilec.y1 - tilec.y0; j++) {
for (i = 0; i < tilec.x1 - tilec.x0; i++) {
} else {
printf("After FDWT\n");
}
k = 0;
for (j = 0; j < tilec.y1 - tilec.y0; j++) {
for (i = 0; i < tilec.x1 - tilec.x0; i++) {
if (irreversible) {
printf("%f ", ((OPJ_FLOAT32*)tilec.data)[k]);
} else {
printf("%d ", tilec.data[k]);
k ++;
}
printf("\n");
k ++;
}
printf("\n");
}
}

opj_dwt_encode(&tilec);
if (display) {
printf("After FDWT\n");
if ((display || check) && !irreversible) {

if (bench_decode) {
opj_dwt_encode(&tcd, &tilec);
} else {
opj_dwt_decode(&tcd, &tilec, tilec.numresolutions);
}


if (display && !irreversible) {
if (bench_decode) {
printf("After FDWT\n");
} else {
printf("After IDWT\n");
}
k = 0;
for (j = 0; j < tilec.y1 - tilec.y0; j++) {
for (i = 0; i < tilec.x1 - tilec.x0; i++) {
printf("%d ", tilec.data[k]);
if (irreversible) {
printf("%f ", ((OPJ_FLOAT32*)tilec.data)[k]);
} else {
printf("%d ", tilec.data[k]);
}
k ++;
}
printf("\n");
}
}

if (check) {
size_t idx;
size_t nValues = (size_t)(tilec.x1 - tilec.x0) *
(size_t)(tilec.y1 - tilec.y0);
for (idx = 0; idx < nValues; idx++) {
if (tilec.data[idx] != getValue((OPJ_UINT32)idx)) {
printf("Difference found at idx = %u\n", (OPJ_UINT32)idx);
exit(1);
}
}

if (check) {

size_t idx;
size_t nValues = (size_t)(tilec.x1 - tilec.x0) *
(size_t)(tilec.y1 - tilec.y0);
for (idx = 0; idx < nValues; idx++) {
if (tilec.data[idx] != getValue((OPJ_UINT32)idx)) {
printf("Difference found at idx = %u\n", (OPJ_UINT32)idx);
exit(1);
}
}
}
Expand Down
Loading

0 comments on commit 491299e

Please sign in to comment.