From b01bdd87eb96b3d8db7ef2727d5a6ab7b122b8fc Mon Sep 17 00:00:00 2001 From: Chris Liddell Date: Tue, 5 Jan 2021 11:25:03 +0000 Subject: Update openjpeg to 2.4.0 Fixes:bugs 703275, 703276, 703277, 703278, 703279, 703280, 703281. --- openjpeg/CHANGELOG.md | 87 ++ openjpeg/NEWS.md | 10 + openjpeg/appveyor.yml | 2 +- openjpeg/doc/CMakeLists.txt | 2 +- openjpeg/src/lib/openjp2/CMakeLists.txt | 4 +- openjpeg/src/lib/openjp2/bench_dwt.c | 140 ++- openjpeg/src/lib/openjp2/dwt.c | 1768 +++++++++++++++++++++++-------- openjpeg/src/lib/openjp2/dwt.h | 20 +- openjpeg/src/lib/openjp2/j2k.c | 1137 +++++++++++++++++--- openjpeg/src/lib/openjp2/j2k.h | 28 +- openjpeg/src/lib/openjp2/jp2.c | 22 +- openjpeg/src/lib/openjp2/jp2.h | 14 + openjpeg/src/lib/openjp2/mct.c | 215 +--- openjpeg/src/lib/openjp2/mct.h | 5 +- openjpeg/src/lib/openjp2/mqc.c | 176 ++- openjpeg/src/lib/openjp2/mqc.h | 9 +- openjpeg/src/lib/openjp2/mqc_inl.h | 90 +- openjpeg/src/lib/openjp2/openjpeg.c | 45 +- openjpeg/src/lib/openjp2/openjpeg.h | 86 +- openjpeg/src/lib/openjp2/opj_codec.h | 5 + openjpeg/src/lib/openjp2/opj_common.h | 6 + openjpeg/src/lib/openjp2/opj_intmath.h | 10 + openjpeg/src/lib/openjp2/pi.c | 457 ++++---- openjpeg/src/lib/openjp2/pi.h | 25 +- openjpeg/src/lib/openjp2/t1.c | 863 ++++++++------- openjpeg/src/lib/openjp2/t1.h | 5 +- openjpeg/src/lib/openjp2/t2.c | 50 +- openjpeg/src/lib/openjp2/t2.h | 2 + openjpeg/src/lib/openjp2/tcd.c | 135 ++- openjpeg/src/lib/openjp2/tcd.h | 41 +- 30 files changed, 3880 insertions(+), 1579 deletions(-) (limited to 'openjpeg') diff --git a/openjpeg/CHANGELOG.md b/openjpeg/CHANGELOG.md index e45b32465..4187b0673 100644 --- a/openjpeg/CHANGELOG.md +++ b/openjpeg/CHANGELOG.md @@ -1,5 +1,92 @@ # Changelog +## [v2.4.0](https://github.com/uclouvain/openjpeg/releases/v2.4.0) (2020-12-28) + +[Full Changelog](https://github.com/uclouvain/openjpeg/compare/v2.3.1...v2.4.0) + +**Closed issues:** + +- OPENJPEG\_INSTALL\_DOC\_DIR does not control a destination directory where HTML docs would be installed. [\#1309](https://github.com/uclouvain/openjpeg/issues/1309) +- Heap-buffer-overflow in lib/openjp2/pi.c:312 [\#1302](https://github.com/uclouvain/openjpeg/issues/1302) +- Heap-buffer-overflow in lib/openjp2/t2.c:973 [\#1299](https://github.com/uclouvain/openjpeg/issues/1299) +- Heap-buffer-overflow in lib/openjp2/pi.c:623 [\#1293](https://github.com/uclouvain/openjpeg/issues/1293) +- Global-buffer-overflow in lib/openjp2/dwt.c:1980 [\#1286](https://github.com/uclouvain/openjpeg/issues/1286) +- Heap-buffer-overflow in lib/openjp2/tcd.c:2417 [\#1284](https://github.com/uclouvain/openjpeg/issues/1284) +- Heap-buffer-overflow in lib/openjp2/mqc.c:499 [\#1283](https://github.com/uclouvain/openjpeg/issues/1283) +- Openjpeg could not encode 32bit RGB float image [\#1281](https://github.com/uclouvain/openjpeg/issues/1281) +- Openjpeg could not encode 32bit RGB float image [\#1280](https://github.com/uclouvain/openjpeg/issues/1280) +- ISO/IEC 15444-1:2019 \(E\) compared with 'cio.h' [\#1277](https://github.com/uclouvain/openjpeg/issues/1277) +- Test-suite failure due to hash mismatch [\#1264](https://github.com/uclouvain/openjpeg/issues/1264) +- Heap use-after-free [\#1261](https://github.com/uclouvain/openjpeg/issues/1261) +- Memory leak when failing to allocate object... [\#1259](https://github.com/uclouvain/openjpeg/issues/1259) +- Memory leak of Tier 1 handle when OpenJPEG fails to set it as TLS... [\#1257](https://github.com/uclouvain/openjpeg/issues/1257) +- Any plan to build release for CVE-2020-8112/CVE-2020-6851 [\#1247](https://github.com/uclouvain/openjpeg/issues/1247) +- failing to convert 16-bit file: opj\_t2\_encode\_packet\(\): only 5251 bytes remaining in output buffer. 5621 needed. [\#1243](https://github.com/uclouvain/openjpeg/issues/1243) +- CMake+VS2017 Compile OK, thirdparty Compile OK, but thirdparty not install [\#1239](https://github.com/uclouvain/openjpeg/issues/1239) +- New release to solve CVE-2019-6988 ? [\#1238](https://github.com/uclouvain/openjpeg/issues/1238) +- Many tests fail to pass after the update of libtiff to version 4.1.0 [\#1233](https://github.com/uclouvain/openjpeg/issues/1233) +- Another heap buffer overflow in libopenjp2 [\#1231](https://github.com/uclouvain/openjpeg/issues/1231) +- Heap buffer overflow in libopenjp2 [\#1228](https://github.com/uclouvain/openjpeg/issues/1228) +- Endianness of binary volume \(JP3D\) [\#1224](https://github.com/uclouvain/openjpeg/issues/1224) +- New release to resolve CVE-2019-12973 [\#1222](https://github.com/uclouvain/openjpeg/issues/1222) +- how to set the block size,like 128,256 ? [\#1216](https://github.com/uclouvain/openjpeg/issues/1216) +- compress YUV files to motion jpeg2000 standard [\#1213](https://github.com/uclouvain/openjpeg/issues/1213) +- Repair/update Java wrapper, and include in release [\#1208](https://github.com/uclouvain/openjpeg/issues/1208) +- abc [\#1206](https://github.com/uclouvain/openjpeg/issues/1206) +- Slow decoding [\#1202](https://github.com/uclouvain/openjpeg/issues/1202) +- Installation question [\#1201](https://github.com/uclouvain/openjpeg/issues/1201) +- Typo in test\_decode\_area - \*ptilew is assigned instead of \*ptileh [\#1195](https://github.com/uclouvain/openjpeg/issues/1195) +- Creating a J2K file with one POC is broken [\#1191](https://github.com/uclouvain/openjpeg/issues/1191) +- Make fails on Arch Linux [\#1174](https://github.com/uclouvain/openjpeg/issues/1174) +- Heap buffer overflow in opj\_t1\_clbl\_decode\_processor\(\) triggered with Ghostscript [\#1158](https://github.com/uclouvain/openjpeg/issues/1158) +- opj\_stream\_get\_number\_byte\_left: Assertion `p\_stream-\>m\_byte\_offset \>= 0' failed. [\#1151](https://github.com/uclouvain/openjpeg/issues/1151) +- The fuzzer ignores too many inputs [\#1079](https://github.com/uclouvain/openjpeg/issues/1079) +- out of bounds read [\#1068](https://github.com/uclouvain/openjpeg/issues/1068) + +**Merged pull requests:** + +- Change defined WIN32 [\#1310](https://github.com/uclouvain/openjpeg/pull/1310) ([Jamaika1](https://github.com/Jamaika1)) +- docs: fix simple typo, producted -\> produced [\#1308](https://github.com/uclouvain/openjpeg/pull/1308) ([timgates42](https://github.com/timgates42)) +- Set ${OPENJPEG\_INSTALL\_DOC\_DIR} to DESTINATION of HTMLs [\#1307](https://github.com/uclouvain/openjpeg/pull/1307) ([lemniscati](https://github.com/lemniscati)) +- Use INC\_DIR for OPENJPEG\_INCLUDE\_DIRS \(fixes uclouvain\#1174\) [\#1306](https://github.com/uclouvain/openjpeg/pull/1306) ([matthew-sharp](https://github.com/matthew-sharp)) +- pi.c: avoid out of bounds access with POC \(fixes \#1302\) [\#1304](https://github.com/uclouvain/openjpeg/pull/1304) ([rouault](https://github.com/rouault)) +- Encoder: grow again buffer size [\#1303](https://github.com/uclouvain/openjpeg/pull/1303) ([zodf0055980](https://github.com/zodf0055980)) +- opj\_j2k\_write\_sod\(\): avoid potential heap buffer overflow \(fixes \#1299\) \(probably master only\) [\#1301](https://github.com/uclouvain/openjpeg/pull/1301) ([rouault](https://github.com/rouault)) +- pi.c: avoid out of bounds access with POC \(refs https://github.com/uclouvain/openjpeg/issues/1293\#issuecomment-737122836\) [\#1300](https://github.com/uclouvain/openjpeg/pull/1300) ([rouault](https://github.com/rouault)) +- opj\_t2\_encode\_packet\(\): avoid out of bound access of \#1297, but likely not the proper fix [\#1298](https://github.com/uclouvain/openjpeg/pull/1298) ([rouault](https://github.com/rouault)) +- opj\_t2\_encode\_packet\(\): avoid out of bound access of \#1294, but likely not the proper fix [\#1296](https://github.com/uclouvain/openjpeg/pull/1296) ([rouault](https://github.com/rouault)) +- opj\_j2k\_setup\_encoder\(\): validate POC compno0 and compno1 \(fixes \#1293\) [\#1295](https://github.com/uclouvain/openjpeg/pull/1295) ([rouault](https://github.com/rouault)) +- Encoder: avoid global buffer overflow on irreversible conversion when… [\#1292](https://github.com/uclouvain/openjpeg/pull/1292) ([rouault](https://github.com/rouault)) +- Decoding: deal with some SPOT6 images that have tiles with a single tile-part with TPsot == 0 and TNsot == 0, and with missing EOC [\#1291](https://github.com/uclouvain/openjpeg/pull/1291) ([rouault](https://github.com/rouault)) +- Free p\_tcd\_marker\_info to avoid memory leak [\#1288](https://github.com/uclouvain/openjpeg/pull/1288) ([zodf0055980](https://github.com/zodf0055980)) +- Encoder: grow again buffer size [\#1287](https://github.com/uclouvain/openjpeg/pull/1287) ([zodf0055980](https://github.com/zodf0055980)) +- Encoder: avoid uint32 overflow when allocating memory for codestream buffer \(fixes \#1243\) [\#1276](https://github.com/uclouvain/openjpeg/pull/1276) ([rouault](https://github.com/rouault)) +- Java compatibility from 1.5 to 1.6 [\#1263](https://github.com/uclouvain/openjpeg/pull/1263) ([jiapei100](https://github.com/jiapei100)) +- opj\_decompress: fix double-free on input directory with mix of valid and invalid images [\#1262](https://github.com/uclouvain/openjpeg/pull/1262) ([rouault](https://github.com/rouault)) +- openjp2: Plug image leak when failing to allocate codestream index. [\#1260](https://github.com/uclouvain/openjpeg/pull/1260) ([sebras](https://github.com/sebras)) +- openjp2: Plug memory leak when setting data as TLS fails. [\#1258](https://github.com/uclouvain/openjpeg/pull/1258) ([sebras](https://github.com/sebras)) +- openjp2: Error out if failing to create Tier 1 handle. [\#1256](https://github.com/uclouvain/openjpeg/pull/1256) ([sebras](https://github.com/sebras)) +- Testing for invalid values of width, height, numcomps [\#1254](https://github.com/uclouvain/openjpeg/pull/1254) ([szukw000](https://github.com/szukw000)) +- Single-threaded performance improvements in forward DWT for 5-3 and 9-7 \(and other improvements\) [\#1253](https://github.com/uclouvain/openjpeg/pull/1253) ([rouault](https://github.com/rouault)) +- Add support for multithreading in encoder [\#1248](https://github.com/uclouvain/openjpeg/pull/1248) ([rouault](https://github.com/rouault)) +- Add support for generation of PLT markers in encoder [\#1246](https://github.com/uclouvain/openjpeg/pull/1246) ([rouault](https://github.com/rouault)) +- Fix warnings about signed/unsigned casts in pi.c [\#1244](https://github.com/uclouvain/openjpeg/pull/1244) ([rouault](https://github.com/rouault)) +- opj\_decompress: add sanity checks to avoid segfault in case of decoding error [\#1240](https://github.com/uclouvain/openjpeg/pull/1240) ([rouault](https://github.com/rouault)) +- ignore wrong icc [\#1236](https://github.com/uclouvain/openjpeg/pull/1236) ([szukw000](https://github.com/szukw000)) +- Implement writing of IMF profiles [\#1235](https://github.com/uclouvain/openjpeg/pull/1235) ([rouault](https://github.com/rouault)) +- tests: add alternate checksums for libtiff 4.1 [\#1234](https://github.com/uclouvain/openjpeg/pull/1234) ([rouault](https://github.com/rouault)) +- opj\_tcd\_init\_tile\(\): avoid integer overflow [\#1232](https://github.com/uclouvain/openjpeg/pull/1232) ([rouault](https://github.com/rouault)) +- tests/fuzzers: link fuzz binaries using $LIB\_FUZZING\_ENGINE. [\#1230](https://github.com/uclouvain/openjpeg/pull/1230) ([Dor1s](https://github.com/Dor1s)) +- opj\_j2k\_update\_image\_dimensions\(\): reject images whose coordinates are beyond INT\_MAX \(fixes \#1228\) [\#1229](https://github.com/uclouvain/openjpeg/pull/1229) ([rouault](https://github.com/rouault)) +- Fix resource leaks [\#1226](https://github.com/uclouvain/openjpeg/pull/1226) ([dodys](https://github.com/dodys)) +- abi-check.sh: fix false postive ABI error, and display output error log [\#1218](https://github.com/uclouvain/openjpeg/pull/1218) ([rouault](https://github.com/rouault)) +- pi.c: avoid integer overflow, resulting in later invalid access to memory in opj\_t2\_decode\_packets\(\) [\#1217](https://github.com/uclouvain/openjpeg/pull/1217) ([rouault](https://github.com/rouault)) +- Add check to validate SGcod/SPcoc/SPcod parameter values. [\#1211](https://github.com/uclouvain/openjpeg/pull/1211) ([sebras](https://github.com/sebras)) +- Fix buffer overflow reading an image file less than four characters [\#1196](https://github.com/uclouvain/openjpeg/pull/1196) ([robert-ancell](https://github.com/robert-ancell)) +- compression: emit POC marker when only one single POC is requested \(f… [\#1192](https://github.com/uclouvain/openjpeg/pull/1192) ([rouault](https://github.com/rouault)) +- Fix several potential vulnerabilities [\#1185](https://github.com/uclouvain/openjpeg/pull/1185) ([Young-X](https://github.com/Young-X)) +- openjp2/j2k: Report error if all wanted components are not decoded. [\#1164](https://github.com/uclouvain/openjpeg/pull/1164) ([sebras](https://github.com/sebras)) + ## [v2.3.1](https://github.com/uclouvain/openjpeg/releases/v2.3.1) (2019-04-02) [Full Changelog](https://github.com/uclouvain/openjpeg/compare/v2.3.0...v2.3.1) diff --git a/openjpeg/NEWS.md b/openjpeg/NEWS.md index 794a796eb..6ac913351 100644 --- a/openjpeg/NEWS.md +++ b/openjpeg/NEWS.md @@ -2,6 +2,16 @@ More details in the [CHANGELOG](https://github.com/uclouvain/openjpeg/blob/master/CHANGELOG.md) +## OpenJPEG 2.4.0 (December 2021) + +No API/ABI break compared to v2.3.1, but additional symbols for subset of components decoding (hence the MINOR version bump). + +* Encoder: add support for multithreading [\#1248](https://github.com/uclouvain/openjpeg/pull/1248) +* Encoder: add support for generation of PLT markers [\#1246](https://github.com/uclouvain/openjpeg/pull/1246) +* Encoder: single-threaded performance improvements in forward DWT for 5-3 and 9-7 (and other improvements) [\#1253](https://github.com/uclouvain/openjpeg/pull/1253) +* Encoder: support IMF profiles [\#1235](https://github.com/uclouvain/openjpeg/pull/1235) +* Many bug fixes (including security fixes) + ## OpenJPEG 2.3.1 (April 2019) No API/ABI break compared to v2.3.0 diff --git a/openjpeg/appveyor.yml b/openjpeg/appveyor.yml index d6561f6a9..a4b5fdefb 100644 --- a/openjpeg/appveyor.yml +++ b/openjpeg/appveyor.yml @@ -1,4 +1,4 @@ -version: 2.3.1.{build} +version: 2.4.0.{build} branches: except: - coverity_scan diff --git a/openjpeg/doc/CMakeLists.txt b/openjpeg/doc/CMakeLists.txt index d4f3ddbe6..e4f9b37cc 100644 --- a/openjpeg/doc/CMakeLists.txt +++ b/openjpeg/doc/CMakeLists.txt @@ -44,7 +44,7 @@ if(DOXYGEN_FOUND) # install HTML documentation (install png files too): install(DIRECTORY ${CMAKE_BINARY_DIR}/doc/html - DESTINATION share/doc + DESTINATION ${OPENJPEG_INSTALL_DOC_DIR} PATTERN ".svn" EXCLUDE ) else() diff --git a/openjpeg/src/lib/openjp2/CMakeLists.txt b/openjpeg/src/lib/openjp2/CMakeLists.txt index b27148582..9f79b9c31 100644 --- a/openjpeg/src/lib/openjp2/CMakeLists.txt +++ b/openjpeg/src/lib/openjp2/CMakeLists.txt @@ -199,7 +199,7 @@ if(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) TARGET_LINK_LIBRARIES(${OPENJPEG_LIBRARY_NAME} ${CMAKE_THREAD_LIBS_INIT}) endif(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) -if(BUILD_UNIT_TESTS) +if(BUILD_UNIT_TESTS AND UNIX) add_executable(bench_dwt bench_dwt.c) if(UNIX) target_link_libraries(bench_dwt m ${OPENJPEG_LIBRARY_NAME}) @@ -215,4 +215,4 @@ if(BUILD_UNIT_TESTS) if(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) target_link_libraries(test_sparse_array ${CMAKE_THREAD_LIBS_INIT}) endif(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) -endif(BUILD_UNIT_TESTS) +endif(BUILD_UNIT_TESTS AND UNIX) diff --git a/openjpeg/src/lib/openjp2/bench_dwt.c b/openjpeg/src/lib/openjp2/bench_dwt.c index 8cb64d066..1124cd614 100644 --- a/openjpeg/src/lib/openjp2/bench_dwt.c +++ b/openjpeg/src/lib/openjp2/bench_dwt.c @@ -49,7 +49,8 @@ void init_tilec(opj_tcd_tilecomp_t * l_tilec, OPJ_INT32 y0, OPJ_INT32 x1, OPJ_INT32 y1, - OPJ_UINT32 numresolutions) + OPJ_UINT32 numresolutions, + OPJ_BOOL irreversible) { opj_tcd_resolution_t* l_res; OPJ_UINT32 resno, l_level_no; @@ -64,9 +65,16 @@ void init_tilec(opj_tcd_tilecomp_t * l_tilec, (size_t)(l_tilec->y1 - l_tilec->y0); l_tilec->data = (OPJ_INT32*) opj_malloc(sizeof(OPJ_INT32) * nValues); for (i = 0; i < nValues; i++) { - l_tilec->data[i] = getValue((OPJ_UINT32)i); + OPJ_INT32 val = getValue((OPJ_UINT32)i); + if (irreversible) { + OPJ_FLOAT32 fVal = (OPJ_FLOAT32)val; + memcpy(&l_tilec->data[i], &fVal, sizeof(OPJ_FLOAT32)); + } else { + l_tilec->data[i] = val; + } } l_tilec->numresolutions = numresolutions; + l_tilec->minimum_num_resolutions = numresolutions; l_tilec->resolutions = (opj_tcd_resolution_t*) opj_calloc( l_tilec->numresolutions, sizeof(opj_tcd_resolution_t)); @@ -98,9 +106,9 @@ void free_tilec(opj_tcd_tilecomp_t * l_tilec) void usage(void) { printf( - "bench_dwt [-size value] [-check] [-display] [-num_resolutions val]\n"); + "bench_dwt [-decode|encode] [-I] [-size value] [-check] [-display]\n"); printf( - " [-offset x y] [-num_threads val]\n"); + " [-num_resolutions val] [-offset x y] [-num_threads val]\n"); exit(1); } @@ -131,6 +139,17 @@ OPJ_FLOAT64 opj_clock(void) #endif } +static OPJ_FLOAT64 opj_wallclock(void) +{ +#ifdef _WIN32 + return opj_clock(); +#else + struct timeval tv; + gettimeofday(&tv, NULL); + return (OPJ_FLOAT64)tv.tv_sec + 1e-6 * (OPJ_FLOAT64)tv.tv_usec; +#endif +} + int main(int argc, char** argv) { int num_threads = 0; @@ -146,16 +165,24 @@ int main(int argc, char** argv) OPJ_BOOL check = OPJ_FALSE; OPJ_INT32 size = 16384 - 1; OPJ_FLOAT64 start, stop; + OPJ_FLOAT64 start_wc, stop_wc; OPJ_UINT32 offset_x = ((OPJ_UINT32)size + 1) / 2 - 1; OPJ_UINT32 offset_y = ((OPJ_UINT32)size + 1) / 2 - 1; OPJ_UINT32 num_resolutions = 6; + OPJ_BOOL bench_decode = OPJ_TRUE; + OPJ_BOOL irreversible = OPJ_FALSE; for (i = 1; i < argc; i++) { - if (strcmp(argv[i], "-display") == 0) { + if (strcmp(argv[i], "-encode") == 0) { + bench_decode = OPJ_FALSE; + } else if (strcmp(argv[i], "-decode") == 0) { + bench_decode = OPJ_TRUE; + } else if (strcmp(argv[i], "-display") == 0) { display = OPJ_TRUE; - check = OPJ_TRUE; } else if (strcmp(argv[i], "-check") == 0) { check = OPJ_TRUE; + } else if (strcmp(argv[i], "-I") == 0) { + irreversible = OPJ_TRUE; } else if (strcmp(argv[i], "-size") == 0 && i + 1 < argc) { size = atoi(argv[i + 1]); i ++; @@ -179,18 +206,29 @@ int main(int argc, char** argv) } } + if (irreversible && check) { + /* Due to irreversible inverse DWT not being symetric of forward */ + /* See BUG_WEIRD_TWO_INVK in dwt.c */ + printf("-I and -check aren't compatible\n"); + exit(1); + } + tp = opj_thread_pool_create(num_threads); init_tilec(&tilec, (OPJ_INT32)offset_x, (OPJ_INT32)offset_y, (OPJ_INT32)offset_x + size, (OPJ_INT32)offset_y + size, - num_resolutions); + num_resolutions, irreversible); if (display) { printf("Before\n"); k = 0; for (j = 0; j < tilec.y1 - tilec.y0; j++) { for (i = 0; i < tilec.x1 - tilec.x0; i++) { - printf("%d ", tilec.data[k]); + if (irreversible) { + printf("%f ", ((OPJ_FLOAT32*)tilec.data)[k]); + } else { + printf("%d ", tilec.data[k]); + } k ++; } printf("\n"); @@ -223,45 +261,87 @@ int main(int argc, char** argv) image_comp.dy = 1; start = opj_clock(); - opj_dwt_decode(&tcd, &tilec, tilec.numresolutions); + start_wc = opj_wallclock(); + if (bench_decode) { + if (irreversible) { + opj_dwt_decode_real(&tcd, &tilec, tilec.numresolutions); + } else { + opj_dwt_decode(&tcd, &tilec, tilec.numresolutions); + } + } else { + if (irreversible) { + opj_dwt_encode_real(&tcd, &tilec); + } else { + opj_dwt_encode(&tcd, &tilec); + } + } stop = opj_clock(); - printf("time for dwt_decode: %.03f s\n", stop - start); + stop_wc = opj_wallclock(); + printf("time for %s: total = %.03f s, wallclock = %.03f s\n", + bench_decode ? "dwt_decode" : "dwt_encode", + stop - start, + stop_wc - start_wc); - if (display || check) { - if (display) { + if (display) { + if (bench_decode) { printf("After IDWT\n"); - k = 0; - for (j = 0; j < tilec.y1 - tilec.y0; j++) { - for (i = 0; i < tilec.x1 - tilec.x0; i++) { + } else { + printf("After FDWT\n"); + } + k = 0; + for (j = 0; j < tilec.y1 - tilec.y0; j++) { + for (i = 0; i < tilec.x1 - tilec.x0; i++) { + if (irreversible) { + printf("%f ", ((OPJ_FLOAT32*)tilec.data)[k]); + } else { printf("%d ", tilec.data[k]); - k ++; } - printf("\n"); + k ++; } + printf("\n"); } + } - opj_dwt_encode(&tilec); - if (display) { - printf("After FDWT\n"); + if ((display || check) && !irreversible) { + + if (bench_decode) { + opj_dwt_encode(&tcd, &tilec); + } else { + opj_dwt_decode(&tcd, &tilec, tilec.numresolutions); + } + + + if (display && !irreversible) { + if (bench_decode) { + printf("After FDWT\n"); + } else { + printf("After IDWT\n"); + } k = 0; for (j = 0; j < tilec.y1 - tilec.y0; j++) { for (i = 0; i < tilec.x1 - tilec.x0; i++) { - printf("%d ", tilec.data[k]); + if (irreversible) { + printf("%f ", ((OPJ_FLOAT32*)tilec.data)[k]); + } else { + printf("%d ", tilec.data[k]); + } k ++; } printf("\n"); } } - if (check) { - size_t idx; - size_t nValues = (size_t)(tilec.x1 - tilec.x0) * - (size_t)(tilec.y1 - tilec.y0); - for (idx = 0; idx < nValues; idx++) { - if (tilec.data[idx] != getValue((OPJ_UINT32)idx)) { - printf("Difference found at idx = %u\n", (OPJ_UINT32)idx); - exit(1); - } + } + + if (check) { + + size_t idx; + size_t nValues = (size_t)(tilec.x1 - tilec.x0) * + (size_t)(tilec.y1 - tilec.y0); + for (idx = 0; idx < nValues; idx++) { + if (tilec.data[idx] != getValue((OPJ_UINT32)idx)) { + printf("Difference found at idx = %u\n", (OPJ_UINT32)idx); + exit(1); } } } diff --git a/openjpeg/src/lib/openjp2/dwt.c b/openjpeg/src/lib/openjp2/dwt.c index 5930d1c71..4164ba090 100644 --- a/openjpeg/src/lib/openjp2/dwt.c +++ b/openjpeg/src/lib/openjp2/dwt.c @@ -87,12 +87,14 @@ typedef struct dwt_local { OPJ_INT32 cas; /* 0 = start on even coord, 1 = start on odd coord */ } opj_dwt_t; +#define NB_ELTS_V8 8 + typedef union { - OPJ_FLOAT32 f[4]; -} opj_v4_t; + OPJ_FLOAT32 f[NB_ELTS_V8]; +} opj_v8_t; -typedef struct v4dwt_local { - opj_v4_t* wavelet ; +typedef struct v8dwt_local { + opj_v8_t* wavelet ; OPJ_INT32 dn ; /* number of elements in high pass band */ OPJ_INT32 sn ; /* number of elements in low pass band */ OPJ_INT32 cas ; /* 0 = start on even coord, 1 = start on odd coord */ @@ -100,45 +102,34 @@ typedef struct v4dwt_local { OPJ_UINT32 win_l_x1; /* end coord in low pass band */ OPJ_UINT32 win_h_x0; /* start coord in high pass band */ OPJ_UINT32 win_h_x1; /* end coord in high pass band */ -} opj_v4dwt_t ; +} opj_v8dwt_t ; -static const OPJ_FLOAT32 opj_dwt_alpha = 1.586134342f; /* 12994 */ -static const OPJ_FLOAT32 opj_dwt_beta = 0.052980118f; /* 434 */ -static const OPJ_FLOAT32 opj_dwt_gamma = -0.882911075f; /* -7233 */ -static const OPJ_FLOAT32 opj_dwt_delta = -0.443506852f; /* -3633 */ +/* From table F.4 from the standard */ +static const OPJ_FLOAT32 opj_dwt_alpha = -1.586134342f; +static const OPJ_FLOAT32 opj_dwt_beta = -0.052980118f; +static const OPJ_FLOAT32 opj_dwt_gamma = 0.882911075f; +static const OPJ_FLOAT32 opj_dwt_delta = 0.443506852f; -static const OPJ_FLOAT32 opj_K = 1.230174105f; /* 10078 */ -static const OPJ_FLOAT32 opj_c13318 = 1.625732422f; +static const OPJ_FLOAT32 opj_K = 1.230174105f; +static const OPJ_FLOAT32 opj_invK = (OPJ_FLOAT32)(1.0 / 1.230174105); /*@}*/ -/** -Virtual function type for wavelet transform in 1-D -*/ -typedef void (*DWT1DFN)(const opj_dwt_t* v); - /** @name Local static functions */ /*@{*/ /** Forward lazy transform (horizontal) */ -static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, +static void opj_dwt_deinterleave_h(const OPJ_INT32 * OPJ_RESTRICT a, + OPJ_INT32 * OPJ_RESTRICT b, + OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas); -/** -Forward lazy transform (vertical) -*/ -static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, - OPJ_INT32 sn, OPJ_INT32 x, OPJ_INT32 cas); -/** -Forward 5-3 wavelet transform in 1-D -*/ -static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, - OPJ_INT32 cas); + /** Forward 9-7 wavelet transform in 1-D */ -static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, +static void opj_dwt_encode_1_real(void *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas); /** Explicit calculation of the Quantization Stepsizes @@ -155,8 +146,29 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres); -static OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec, - void (*p_function)(OPJ_INT32 *, OPJ_INT32, OPJ_INT32, OPJ_INT32)); +/* Forward transform, for the vertical pass, processing cols columns */ +/* where cols <= NB_ELTS_V8 */ +/* Where void* is a OPJ_INT32* for 5x3 and OPJ_FLOAT32* for 9x7 */ +typedef void (*opj_encode_and_deinterleave_v_fnptr_type)( + void *array, + void *tmp, + OPJ_UINT32 height, + OPJ_BOOL even, + OPJ_UINT32 stride_width, + OPJ_UINT32 cols); + +/* Where void* is a OPJ_INT32* for 5x3 and OPJ_FLOAT32* for 9x7 */ +typedef void (*opj_encode_and_deinterleave_h_one_row_fnptr_type)( + void *row, + void *tmp, + OPJ_UINT32 width, + OPJ_BOOL even); + +static OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp, + opj_tcd_tilecomp_t * tilec, + opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v, + opj_encode_and_deinterleave_h_one_row_fnptr_type + p_encode_and_deinterleave_h_one_row); static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r, OPJ_UINT32 i); @@ -164,42 +176,6 @@ static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r, /* */ /* Inverse 9-7 wavelet transform in 1-D. */ /* */ -static void opj_v4dwt_decode(opj_v4dwt_t* OPJ_RESTRICT dwt); - -static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt, - OPJ_FLOAT32* OPJ_RESTRICT a, - OPJ_UINT32 width, - OPJ_UINT32 remaining_height); - -static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt, - OPJ_FLOAT32* OPJ_RESTRICT a, - OPJ_UINT32 width, - OPJ_UINT32 nb_elts_read); - -#ifdef __SSE__ -static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, - OPJ_UINT32 start, - OPJ_UINT32 end, - const __m128 c); - -static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, - OPJ_UINT32 start, - OPJ_UINT32 end, - OPJ_UINT32 m, __m128 c); - -#else -static void opj_v4dwt_decode_step1(opj_v4_t* w, - OPJ_UINT32 start, - OPJ_UINT32 end, - const OPJ_FLOAT32 c); - -static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, - OPJ_UINT32 start, - OPJ_UINT32 end, - OPJ_UINT32 m, - OPJ_FLOAT32 c); - -#endif /*@}*/ @@ -246,12 +222,14 @@ static const OPJ_FLOAT64 opj_dwt_norms_real[4][10] = { /* */ /* Forward lazy transform (horizontal). */ /* */ -static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, +static void opj_dwt_deinterleave_h(const OPJ_INT32 * OPJ_RESTRICT a, + OPJ_INT32 * OPJ_RESTRICT b, + OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) { OPJ_INT32 i; - OPJ_INT32 * l_dest = b; - OPJ_INT32 * l_src = a + cas; + OPJ_INT32 * OPJ_RESTRICT l_dest = b; + const OPJ_INT32 * OPJ_RESTRICT l_src = a + cas; for (i = 0; i < sn; ++i) { *l_dest++ = *l_src; @@ -267,40 +245,13 @@ static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, } } -/* */ -/* Forward lazy transform (vertical). */ -/* */ -static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, - OPJ_INT32 sn, OPJ_INT32 x, OPJ_INT32 cas) -{ - OPJ_INT32 i = sn; - OPJ_INT32 * l_dest = b; - OPJ_INT32 * l_src = a + cas; - - while (i--) { - *l_dest = *l_src; - l_dest += x; - l_src += 2; - } /* b[i*x]=a[2*i+cas]; */ - - l_dest = b + (OPJ_SIZE_T)sn * (OPJ_SIZE_T)x; - l_src = a + 1 - cas; - - i = dn; - while (i--) { - *l_dest = *l_src; - l_dest += x; - l_src += 2; - } /*b[(sn+i)*x]=a[(2*i+1-cas)];*/ -} - #ifdef STANDARD_SLOW_VERSION /* */ /* Inverse lazy transform (horizontal). */ /* */ static void opj_dwt_interleave_h(const opj_dwt_t* h, OPJ_INT32 *a) { - OPJ_INT32 *ai = a; + const OPJ_INT32 *ai = a; OPJ_INT32 *bi = h->mem + h->cas; OPJ_INT32 i = h->sn; while (i--) { @@ -321,7 +272,7 @@ static void opj_dwt_interleave_h(const opj_dwt_t* h, OPJ_INT32 *a) /* */ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x) { - OPJ_INT32 *ai = a; + const OPJ_INT32 *ai = a; OPJ_INT32 *bi = v->mem + v->cas; OPJ_INT32 i = v->sn; while (i--) { @@ -341,37 +292,6 @@ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x) #endif /* STANDARD_SLOW_VERSION */ -/* */ -/* Forward 5-3 wavelet transform in 1-D. */ -/* */ -static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, - OPJ_INT32 cas) -{ - OPJ_INT32 i; - - if (!cas) { - if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ - for (i = 0; i < dn; i++) { - OPJ_D(i) -= (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1; - } - for (i = 0; i < sn; i++) { - OPJ_S(i) += (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2; - } - } - } else { - if (!sn && dn == 1) { /* NEW : CASE ONE ELEMENT */ - OPJ_S(0) *= 2; - } else { - for (i = 0; i < dn; i++) { - OPJ_S(i) -= (OPJ_DD_(i) + OPJ_DD_(i - 1)) >> 1; - } - for (i = 0; i < sn; i++) { - OPJ_D(i) += (OPJ_SS_(i) + OPJ_SS_(i + 1) + 2) >> 2; - } - } - } -} - #ifdef STANDARD_SLOW_VERSION /* */ /* Inverse 5-3 wavelet transform in 1-D. */ @@ -1033,110 +953,799 @@ static void opj_idwt53_v(const opj_dwt_t *dwt, #endif } +#if 0 +static void opj_dwt_encode_step1(OPJ_FLOAT32* fw, + OPJ_UINT32 end, + const OPJ_FLOAT32 c) +{ + OPJ_UINT32 i = 0; + for (; i < end; ++i) { + fw[0] *= c; + fw += 2; + } +} +#else +static void opj_dwt_encode_step1_combined(OPJ_FLOAT32* fw, + OPJ_UINT32 iters_c1, + OPJ_UINT32 iters_c2, + const OPJ_FLOAT32 c1, + const OPJ_FLOAT32 c2) +{ + OPJ_UINT32 i = 0; + const OPJ_UINT32 iters_common = opj_uint_min(iters_c1, iters_c2); + assert((((OPJ_SIZE_T)fw) & 0xf) == 0); + assert(opj_int_abs((OPJ_INT32)iters_c1 - (OPJ_INT32)iters_c2) <= 1); + for (; i + 3 < iters_common; i += 4) { +#ifdef __SSE__ + const __m128 vcst = _mm_set_ps(c2, c1, c2, c1); + *(__m128*)fw = _mm_mul_ps(*(__m128*)fw, vcst); + *(__m128*)(fw + 4) = _mm_mul_ps(*(__m128*)(fw + 4), vcst); +#else + fw[0] *= c1; + fw[1] *= c2; + fw[2] *= c1; + fw[3] *= c2; + fw[4] *= c1; + fw[5] *= c2; + fw[6] *= c1; + fw[7] *= c2; +#endif + fw += 8; + } + for (; i < iters_common; i++) { + fw[0] *= c1; + fw[1] *= c2; + fw += 2; + } + if (i < iters_c1) { + fw[0] *= c1; + } else if (i < iters_c2) { + fw[1] *= c2; + } +} -/* */ -/* Forward 9-7 wavelet transform in 1-D. */ -/* */ -static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, +#endif + +static void opj_dwt_encode_step2(OPJ_FLOAT32* fl, OPJ_FLOAT32* fw, + OPJ_UINT32 end, + OPJ_UINT32 m, + OPJ_FLOAT32 c) +{ + OPJ_UINT32 i; + OPJ_UINT32 imax = opj_uint_min(end, m); + if (imax > 0) { + fw[-1] += (fl[0] + fw[0]) * c; + fw += 2; + i = 1; + for (; i + 3 < imax; i += 4) { + fw[-1] += (fw[-2] + fw[0]) * c; + fw[1] += (fw[0] + fw[2]) * c; + fw[3] += (fw[2] + fw[4]) * c; + fw[5] += (fw[4] + fw[6]) * c; + fw += 8; + } + for (; i < imax; ++i) { + fw[-1] += (fw[-2] + fw[0]) * c; + fw += 2; + } + } + if (m < end) { + assert(m + 1 == end); + fw[-1] += (2 * fw[-2]) * c; + } +} + +static void opj_dwt_encode_1_real(void *aIn, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) { - OPJ_INT32 i; - if (!cas) { - if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ - for (i = 0; i < dn; i++) { - OPJ_D(i) -= opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 12993); + OPJ_FLOAT32* w = (OPJ_FLOAT32*)aIn; + OPJ_INT32 a, b; + assert(dn + sn > 1); + if (cas == 0) { + a = 0; + b = 1; + } else { + a = 1; + b = 0; + } + opj_dwt_encode_step2(w + a, w + b + 1, + (OPJ_UINT32)dn, + (OPJ_UINT32)opj_int_min(dn, sn - b), + opj_dwt_alpha); + opj_dwt_encode_step2(w + b, w + a + 1, + (OPJ_UINT32)sn, + (OPJ_UINT32)opj_int_min(sn, dn - a), + opj_dwt_beta); + opj_dwt_encode_step2(w + a, w + b + 1, + (OPJ_UINT32)dn, + (OPJ_UINT32)opj_int_min(dn, sn - b), + opj_dwt_gamma); + opj_dwt_encode_step2(w + b, w + a + 1, + (OPJ_UINT32)sn, + (OPJ_UINT32)opj_int_min(sn, dn - a), + opj_dwt_delta); +#if 0 + opj_dwt_encode_step1(w + b, (OPJ_UINT32)dn, + opj_K); + opj_dwt_encode_step1(w + a, (OPJ_UINT32)sn, + opj_invK); +#else + if (a == 0) { + opj_dwt_encode_step1_combined(w, + (OPJ_UINT32)sn, + (OPJ_UINT32)dn, + opj_invK, + opj_K); + } else { + opj_dwt_encode_step1_combined(w, + (OPJ_UINT32)dn, + (OPJ_UINT32)sn, + opj_K, + opj_invK); + } +#endif +} + +static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, + opj_stepsize_t *bandno_stepsize) +{ + OPJ_INT32 p, n; + p = opj_int_floorlog2(stepsize) - 13; + n = 11 - opj_int_floorlog2(stepsize); + bandno_stepsize->mant = (n < 0 ? stepsize >> -n : stepsize << n) & 0x7ff; + bandno_stepsize->expn = numbps - p; +} + +/* +========================================================== + DWT interface +========================================================== +*/ + +/** Process one line for the horizontal pass of the 5x3 forward transform */ +static +void opj_dwt_encode_and_deinterleave_h_one_row(void* rowIn, + void* tmpIn, + OPJ_UINT32 width, + OPJ_BOOL even) +{ + OPJ_INT32* OPJ_RESTRICT row = (OPJ_INT32*)rowIn; + OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32*)tmpIn; + const OPJ_INT32 sn = (OPJ_INT32)((width + (even ? 1 : 0)) >> 1); + const OPJ_INT32 dn = (OPJ_INT32)(width - (OPJ_UINT32)sn); + + if (even) { + if (width > 1) { + OPJ_INT32 i; + for (i = 0; i < sn - 1; i++) { + tmp[sn + i] = row[2 * i + 1] - ((row[(i) * 2] + row[(i + 1) * 2]) >> 1); } - for (i = 0; i < sn; i++) { - OPJ_S(i) -= opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 434); + if ((width % 2) == 0) { + tmp[sn + i] = row[2 * i + 1] - row[(i) * 2]; } - for (i = 0; i < dn; i++) { - OPJ_D(i) += opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 7233); + row[0] += (tmp[sn] + tmp[sn] + 2) >> 2; + for (i = 1; i < dn; i++) { + row[i] = row[2 * i] + ((tmp[sn + (i - 1)] + tmp[sn + i] + 2) >> 2); } - for (i = 0; i < sn; i++) { - OPJ_S(i) += opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 3633); + if ((width % 2) == 1) { + row[i] = row[2 * i] + ((tmp[sn + (i - 1)] + tmp[sn + (i - 1)] + 2) >> 2); } - for (i = 0; i < dn; i++) { - OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 5038); /*5038 */ + memcpy(row + sn, tmp + sn, (OPJ_SIZE_T)dn * sizeof(OPJ_INT32)); + } + } else { + if (width == 1) { + row[0] *= 2; + } else { + OPJ_INT32 i; + tmp[sn + 0] = row[0] - row[1]; + for (i = 1; i < sn; i++) { + tmp[sn + i] = row[2 * i] - ((row[2 * i + 1] + row[2 * (i - 1) + 1]) >> 1); } - for (i = 0; i < sn; i++) { - OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 6659); /*6660 */ + if ((width % 2) == 1) { + tmp[sn + i] = row[2 * i] - row[2 * (i - 1) + 1]; + } + + for (i = 0; i < dn - 1; i++) { + row[i] = row[2 * i + 1] + ((tmp[sn + i] + tmp[sn + i + 1] + 2) >> 2); + } + if ((width % 2) == 0) { + row[i] = row[2 * i + 1] + ((tmp[sn + i] + tmp[sn + i] + 2) >> 2); + } + memcpy(row + sn, tmp + sn, (OPJ_SIZE_T)dn * sizeof(OPJ_INT32)); + } + } +} + +/** Process one line for the horizontal pass of the 9x7 forward transform */ +static +void opj_dwt_encode_and_deinterleave_h_one_row_real(void* rowIn, + void* tmpIn, + OPJ_UINT32 width, + OPJ_BOOL even) +{ + OPJ_FLOAT32* OPJ_RESTRICT row = (OPJ_FLOAT32*)rowIn; + OPJ_FLOAT32* OPJ_RESTRICT tmp = (OPJ_FLOAT32*)tmpIn; + const OPJ_INT32 sn = (OPJ_INT32)((width + (even ? 1 : 0)) >> 1); + const OPJ_INT32 dn = (OPJ_INT32)(width - (OPJ_UINT32)sn); + if (width == 1) { + return; + } + memcpy(tmp, row, width * sizeof(OPJ_FLOAT32)); + opj_dwt_encode_1_real(tmp, dn, sn, even ? 0 : 1); + opj_dwt_deinterleave_h((OPJ_INT32 * OPJ_RESTRICT)tmp, + (OPJ_INT32 * OPJ_RESTRICT)row, + dn, sn, even ? 0 : 1); +} + +typedef struct { + opj_dwt_t h; + OPJ_UINT32 rw; /* Width of the resolution to process */ + OPJ_UINT32 w; /* Width of tiledp */ + OPJ_INT32 * OPJ_RESTRICT tiledp; + OPJ_UINT32 min_j; + OPJ_UINT32 max_j; + opj_encode_and_deinterleave_h_one_row_fnptr_type p_function; +} opj_dwt_encode_h_job_t; + +static void opj_dwt_encode_h_func(void* user_data, opj_tls_t* tls) +{ + OPJ_UINT32 j; + opj_dwt_encode_h_job_t* job; + (void)tls; + + job = (opj_dwt_encode_h_job_t*)user_data; + for (j = job->min_j; j < job->max_j; j++) { + OPJ_INT32* OPJ_RESTRICT aj = job->tiledp + j * job->w; + (*job->p_function)(aj, job->h.mem, job->rw, + job->h.cas == 0 ? OPJ_TRUE : OPJ_FALSE); + } + + opj_aligned_free(job->h.mem); + opj_free(job); +} + +typedef struct { + opj_dwt_t v; + OPJ_UINT32 rh; + OPJ_UINT32 w; + OPJ_INT32 * OPJ_RESTRICT tiledp; + OPJ_UINT32 min_j; + OPJ_UINT32 max_j; + opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v; +} opj_dwt_encode_v_job_t; + +static void opj_dwt_encode_v_func(void* user_data, opj_tls_t* tls) +{ + OPJ_UINT32 j; + opj_dwt_encode_v_job_t* job; + (void)tls; + + job = (opj_dwt_encode_v_job_t*)user_data; + for (j = job->min_j; j + NB_ELTS_V8 - 1 < job->max_j; j += NB_ELTS_V8) { + (*job->p_encode_and_deinterleave_v)(job->tiledp + j, + job->v.mem, + job->rh, + job->v.cas == 0, + job->w, + NB_ELTS_V8); + } + if (j < job->max_j) { + (*job->p_encode_and_deinterleave_v)(job->tiledp + j, + job->v.mem, + job->rh, + job->v.cas == 0, + job->w, + job->max_j - j); + } + + opj_aligned_free(job->v.mem); + opj_free(job); +} + +/** Fetch up to cols <= NB_ELTS_V8 for each line, and put them in tmpOut */ +/* that has a NB_ELTS_V8 interleave factor. */ +static void opj_dwt_fetch_cols_vertical_pass(const void *arrayIn, + void *tmpOut, + OPJ_UINT32 height, + OPJ_UINT32 stride_width, + OPJ_UINT32 cols) +{ + const OPJ_INT32* OPJ_RESTRICT array = (const OPJ_INT32 * OPJ_RESTRICT)arrayIn; + OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32 * OPJ_RESTRICT)tmpOut; + if (cols == NB_ELTS_V8) { + OPJ_UINT32 k; + for (k = 0; k < height; ++k) { + memcpy(tmp + NB_ELTS_V8 * k, + array + k * stride_width, + NB_ELTS_V8 * sizeof(OPJ_INT32)); + } + } else { + OPJ_UINT32 k; + for (k = 0; k < height; ++k) { + OPJ_UINT32 c; + for (c = 0; c < cols; c++) { + tmp[NB_ELTS_V8 * k + c] = array[c + k * stride_width]; + } + for (; c < NB_ELTS_V8; c++) { + tmp[NB_ELTS_V8 * k + c] = 0; + } + } + } +} + +/* Deinterleave result of forward transform, where cols <= NB_ELTS_V8 */ +/* and src contains NB_ELTS_V8 consecutive values for up to NB_ELTS_V8 */ +/* columns. */ +static INLINE void opj_dwt_deinterleave_v_cols( + const OPJ_INT32 * OPJ_RESTRICT src, + OPJ_INT32 * OPJ_RESTRICT dst, + OPJ_INT32 dn, + OPJ_INT32 sn, + OPJ_UINT32 stride_width, + OPJ_INT32 cas, + OPJ_UINT32 cols) +{ + OPJ_INT32 k; + OPJ_INT32 i = sn; + OPJ_INT32 * OPJ_RESTRICT l_dest = dst; + const OPJ_INT32 * OPJ_RESTRICT l_src = src + cas * NB_ELTS_V8; + OPJ_UINT32 c; + + for (k = 0; k < 2; k++) { + while (i--) { + if (cols == NB_ELTS_V8) { + memcpy(l_dest, l_src, NB_ELTS_V8 * sizeof(OPJ_INT32)); + } else { + c = 0; + switch (cols) { + case 7: + l_dest[c] = l_src[c]; + c++; /* fallthru */ + case 6: + l_dest[c] = l_src[c]; + c++; /* fallthru */ + case 5: + l_dest[c] = l_src[c]; + c++; /* fallthru */ + case 4: + l_dest[c] = l_src[c]; + c++; /* fallthru */ + case 3: + l_dest[c] = l_src[c]; + c++; /* fallthru */ + case 2: + l_dest[c] = l_src[c]; + c++; /* fallthru */ + default: + l_dest[c] = l_src[c]; + break; + } + } + l_dest += stride_width; + l_src += 2 * NB_ELTS_V8; + } + + l_dest = dst + (OPJ_SIZE_T)sn * (OPJ_SIZE_T)stride_width; + l_src = src + (1 - cas) * NB_ELTS_V8; + i = dn; + } +} + + +/* Forward 5-3 transform, for the vertical pass, processing cols columns */ +/* where cols <= NB_ELTS_V8 */ +static void opj_dwt_encode_and_deinterleave_v( + void *arrayIn, + void *tmpIn, + OPJ_UINT32 height, + OPJ_BOOL even, + OPJ_UINT32 stride_width, + OPJ_UINT32 cols) +{ + OPJ_INT32* OPJ_RESTRICT array = (OPJ_INT32 * OPJ_RESTRICT)arrayIn; + OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32 * OPJ_RESTRICT)tmpIn; + const OPJ_UINT32 sn = (height + (even ? 1 : 0)) >> 1; + const OPJ_UINT32 dn = height - sn; + + opj_dwt_fetch_cols_vertical_pass(arrayIn, tmpIn, height, stride_width, cols); + +#define OPJ_Sc(i) tmp[(i)*2* NB_ELTS_V8 + c] +#define OPJ_Dc(i) tmp[((1+(i)*2))* NB_ELTS_V8 + c] + +#ifdef __SSE2__ + if (height == 1) { + if (!even) { + OPJ_UINT32 c; + for (c = 0; c < NB_ELTS_V8; c++) { + tmp[c] *= 2; + } + } + } else if (even) { + OPJ_UINT32 c; + OPJ_UINT32 i; + i = 0; + if (i + 1 < sn) { + __m128i xmm_Si_0 = *(const __m128i*)(tmp + 4 * 0); + __m128i xmm_Si_1 = *(const __m128i*)(tmp + 4 * 1); + for (; i + 1 < sn; i++) { + __m128i xmm_Sip1_0 = *(const __m128i*)(tmp + + (i + 1) * 2 * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Sip1_1 = *(const __m128i*)(tmp + + (i + 1) * 2 * NB_ELTS_V8 + 4 * 1); + __m128i xmm_Di_0 = *(const __m128i*)(tmp + + (1 + i * 2) * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Di_1 = *(const __m128i*)(tmp + + (1 + i * 2) * NB_ELTS_V8 + 4 * 1); + xmm_Di_0 = _mm_sub_epi32(xmm_Di_0, + _mm_srai_epi32(_mm_add_epi32(xmm_Si_0, xmm_Sip1_0), 1)); + xmm_Di_1 = _mm_sub_epi32(xmm_Di_1, + _mm_srai_epi32(_mm_add_epi32(xmm_Si_1, xmm_Sip1_1), 1)); + *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Di_0; + *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Di_1; + xmm_Si_0 = xmm_Sip1_0; + xmm_Si_1 = xmm_Sip1_1; } } - } else { - if ((sn > 0) || (dn > 1)) { /* NEW : CASE ONE ELEMENT */ - for (i = 0; i < dn; i++) { - OPJ_S(i) -= opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 12993); - } - for (i = 0; i < sn; i++) { - OPJ_D(i) -= opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 434); - } - for (i = 0; i < dn; i++) { - OPJ_S(i) += opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 7233); - } - for (i = 0; i < sn; i++) { - OPJ_D(i) += opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 3633); - } - for (i = 0; i < dn; i++) { - OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 5038); /*5038 */ - } - for (i = 0; i < sn; i++) { - OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 6659); /*6660 */ + if (((height) % 2) == 0) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Dc(i) -= OPJ_Sc(i); + } + } + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(0) += (OPJ_Dc(0) + OPJ_Dc(0) + 2) >> 2; + } + i = 1; + if (i < dn) { + __m128i xmm_Dim1_0 = *(const __m128i*)(tmp + (1 + + (i - 1) * 2) * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Dim1_1 = *(const __m128i*)(tmp + (1 + + (i - 1) * 2) * NB_ELTS_V8 + 4 * 1); + const __m128i xmm_two = _mm_set1_epi32(2); + for (; i < dn; i++) { + __m128i xmm_Di_0 = *(const __m128i*)(tmp + + (1 + i * 2) * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Di_1 = *(const __m128i*)(tmp + + (1 + i * 2) * NB_ELTS_V8 + 4 * 1); + __m128i xmm_Si_0 = *(const __m128i*)(tmp + + (i * 2) * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Si_1 = *(const __m128i*)(tmp + + (i * 2) * NB_ELTS_V8 + 4 * 1); + xmm_Si_0 = _mm_add_epi32(xmm_Si_0, + _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Dim1_0, xmm_Di_0), xmm_two), 2)); + xmm_Si_1 = _mm_add_epi32(xmm_Si_1, + _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Dim1_1, xmm_Di_1), xmm_two), 2)); + *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Si_0; + *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Si_1; + xmm_Dim1_0 = xmm_Di_0; + xmm_Dim1_1 = xmm_Di_1; + } + } + if (((height) % 2) == 1) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(i) += (OPJ_Dc(i - 1) + OPJ_Dc(i - 1) + 2) >> 2; + } + } + } else { + OPJ_UINT32 c; + OPJ_UINT32 i; + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(0) -= OPJ_Dc(0); + } + i = 1; + if (i < sn) { + __m128i xmm_Dim1_0 = *(const __m128i*)(tmp + (1 + + (i - 1) * 2) * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Dim1_1 = *(const __m128i*)(tmp + (1 + + (i - 1) * 2) * NB_ELTS_V8 + 4 * 1); + for (; i < sn; i++) { + __m128i xmm_Di_0 = *(const __m128i*)(tmp + + (1 + i * 2) * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Di_1 = *(const __m128i*)(tmp + + (1 + i * 2) * NB_ELTS_V8 + 4 * 1); + __m128i xmm_Si_0 = *(const __m128i*)(tmp + + (i * 2) * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Si_1 = *(const __m128i*)(tmp + + (i * 2) * NB_ELTS_V8 + 4 * 1); + xmm_Si_0 = _mm_sub_epi32(xmm_Si_0, + _mm_srai_epi32(_mm_add_epi32(xmm_Di_0, xmm_Dim1_0), 1)); + xmm_Si_1 = _mm_sub_epi32(xmm_Si_1, + _mm_srai_epi32(_mm_add_epi32(xmm_Di_1, xmm_Dim1_1), 1)); + *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Si_0; + *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Si_1; + xmm_Dim1_0 = xmm_Di_0; + xmm_Dim1_1 = xmm_Di_1; + } + } + if (((height) % 2) == 1) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(i) -= OPJ_Dc(i - 1); + } + } + i = 0; + if (i + 1 < dn) { + __m128i xmm_Si_0 = *((const __m128i*)(tmp + 4 * 0)); + __m128i xmm_Si_1 = *((const __m128i*)(tmp + 4 * 1)); + const __m128i xmm_two = _mm_set1_epi32(2); + for (; i + 1 < dn; i++) { + __m128i xmm_Sip1_0 = *(const __m128i*)(tmp + + (i + 1) * 2 * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Sip1_1 = *(const __m128i*)(tmp + + (i + 1) * 2 * NB_ELTS_V8 + 4 * 1); + __m128i xmm_Di_0 = *(const __m128i*)(tmp + + (1 + i * 2) * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Di_1 = *(const __m128i*)(tmp + + (1 + i * 2) * NB_ELTS_V8 + 4 * 1); + xmm_Di_0 = _mm_add_epi32(xmm_Di_0, + _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Si_0, xmm_Sip1_0), xmm_two), 2)); + xmm_Di_1 = _mm_add_epi32(xmm_Di_1, + _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Si_1, xmm_Sip1_1), xmm_two), 2)); + *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Di_0; + *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Di_1; + xmm_Si_0 = xmm_Sip1_0; + xmm_Si_1 = xmm_Sip1_1; + } + } + if (((height) % 2) == 0) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Dc(i) += (OPJ_Sc(i) + OPJ_Sc(i) + 2) >> 2; + } + } + } +#else + if (even) { + OPJ_UINT32 c; + if (height > 1) { + OPJ_UINT32 i; + for (i = 0; i + 1 < sn; i++) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Dc(i) -= (OPJ_Sc(i) + OPJ_Sc(i + 1)) >> 1; + } + } + if (((height) % 2) == 0) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Dc(i) -= OPJ_Sc(i); + } + } + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(0) += (OPJ_Dc(0) + OPJ_Dc(0) + 2) >> 2; + } + for (i = 1; i < dn; i++) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(i) += (OPJ_Dc(i - 1) + OPJ_Dc(i) + 2) >> 2; + } + } + if (((height) % 2) == 1) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(i) += (OPJ_Dc(i - 1) + OPJ_Dc(i - 1) + 2) >> 2; + } + } + } + } else { + OPJ_UINT32 c; + if (height == 1) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(0) *= 2; + } + } else { + OPJ_UINT32 i; + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(0) -= OPJ_Dc(0); + } + for (i = 1; i < sn; i++) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(i) -= (OPJ_Dc(i) + OPJ_Dc(i - 1)) >> 1; + } + } + if (((height) % 2) == 1) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(i) -= OPJ_Dc(i - 1); + } + } + for (i = 0; i + 1 < dn; i++) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Dc(i) += (OPJ_Sc(i) + OPJ_Sc(i + 1) + 2) >> 2; + } + } + if (((height) % 2) == 0) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Dc(i) += (OPJ_Sc(i) + OPJ_Sc(i) + 2) >> 2; + } + } + } + } +#endif + + if (cols == NB_ELTS_V8) { + opj_dwt_deinterleave_v_cols(tmp, array, (OPJ_INT32)dn, (OPJ_INT32)sn, + stride_width, even ? 0 : 1, NB_ELTS_V8); + } else { + opj_dwt_deinterleave_v_cols(tmp, array, (OPJ_INT32)dn, (OPJ_INT32)sn, + stride_width, even ? 0 : 1, cols); + } +} + +static void opj_v8dwt_encode_step1(OPJ_FLOAT32* fw, + OPJ_UINT32 end, + const OPJ_FLOAT32 cst) +{ + OPJ_UINT32 i; +#ifdef __SSE__ + __m128* vw = (__m128*) fw; + const __m128 vcst = _mm_set1_ps(cst); + for (i = 0; i < end; ++i) { + vw[0] = _mm_mul_ps(vw[0], vcst); + vw[1] = _mm_mul_ps(vw[1], vcst); + vw += 2 * (NB_ELTS_V8 * sizeof(OPJ_FLOAT32) / sizeof(__m128)); + } +#else + OPJ_UINT32 c; + for (i = 0; i < end; ++i) { + for (c = 0; c < NB_ELTS_V8; c++) { + fw[i * 2 * NB_ELTS_V8 + c] *= cst; + } + } +#endif +} + +static void opj_v8dwt_encode_step2(OPJ_FLOAT32* fl, OPJ_FLOAT32* fw, + OPJ_UINT32 end, + OPJ_UINT32 m, + OPJ_FLOAT32 cst) +{ + OPJ_UINT32 i; + OPJ_UINT32 imax = opj_uint_min(end, m); +#ifdef __SSE__ + __m128* vw = (__m128*) fw; + __m128 vcst = _mm_set1_ps(cst); + if (imax > 0) { + __m128* vl = (__m128*) fl; + vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vl[0], vw[0]), vcst)); + vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vl[1], vw[1]), vcst)); + vw += 2 * (NB_ELTS_V8 * sizeof(OPJ_FLOAT32) / sizeof(__m128)); + i = 1; + + for (; i < imax; ++i) { + vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vw[-4], vw[0]), vcst)); + vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vw[-3], vw[1]), vcst)); + vw += 2 * (NB_ELTS_V8 * sizeof(OPJ_FLOAT32) / sizeof(__m128)); + } + } + if (m < end) { + assert(m + 1 == end); + vcst = _mm_add_ps(vcst, vcst); + vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(vw[-4], vcst)); + vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(vw[-3], vcst)); + } +#else + OPJ_INT32 c; + if (imax > 0) { + for (c = 0; c < NB_ELTS_V8; c++) { + fw[-1 * NB_ELTS_V8 + c] += (fl[0 * NB_ELTS_V8 + c] + fw[0 * NB_ELTS_V8 + c]) * + cst; + } + fw += 2 * NB_ELTS_V8; + i = 1; + for (; i < imax; ++i) { + for (c = 0; c < NB_ELTS_V8; c++) { + fw[-1 * NB_ELTS_V8 + c] += (fw[-2 * NB_ELTS_V8 + c] + fw[0 * NB_ELTS_V8 + c]) * + cst; } + fw += 2 * NB_ELTS_V8; + } + } + if (m < end) { + assert(m + 1 == end); + for (c = 0; c < NB_ELTS_V8; c++) { + fw[-1 * NB_ELTS_V8 + c] += (2 * fw[-2 * NB_ELTS_V8 + c]) * cst; } } +#endif } -static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, - opj_stepsize_t *bandno_stepsize) +/* Forward 9-7 transform, for the vertical pass, processing cols columns */ +/* where cols <= NB_ELTS_V8 */ +static void opj_dwt_encode_and_deinterleave_v_real( + void *arrayIn, + void *tmpIn, + OPJ_UINT32 height, + OPJ_BOOL even, + OPJ_UINT32 stride_width, + OPJ_UINT32 cols) { - OPJ_INT32 p, n; - p = opj_int_floorlog2(stepsize) - 13; - n = 11 - opj_int_floorlog2(stepsize); - bandno_stepsize->mant = (n < 0 ? stepsize >> -n : stepsize << n) & 0x7ff; - bandno_stepsize->expn = numbps - p; -} + OPJ_FLOAT32* OPJ_RESTRICT array = (OPJ_FLOAT32 * OPJ_RESTRICT)arrayIn; + OPJ_FLOAT32* OPJ_RESTRICT tmp = (OPJ_FLOAT32 * OPJ_RESTRICT)tmpIn; + const OPJ_INT32 sn = (OPJ_INT32)((height + (even ? 1 : 0)) >> 1); + const OPJ_INT32 dn = (OPJ_INT32)(height - (OPJ_UINT32)sn); + OPJ_INT32 a, b; + + if (height == 1) { + return; + } + + opj_dwt_fetch_cols_vertical_pass(arrayIn, tmpIn, height, stride_width, cols); + + if (even) { + a = 0; + b = 1; + } else { + a = 1; + b = 0; + } + opj_v8dwt_encode_step2(tmp + a * NB_ELTS_V8, + tmp + (b + 1) * NB_ELTS_V8, + (OPJ_UINT32)dn, + (OPJ_UINT32)opj_int_min(dn, sn - b), + opj_dwt_alpha); + opj_v8dwt_encode_step2(tmp + b * NB_ELTS_V8, + tmp + (a + 1) * NB_ELTS_V8, + (OPJ_UINT32)sn, + (OPJ_UINT32)opj_int_min(sn, dn - a), + opj_dwt_beta); + opj_v8dwt_encode_step2(tmp + a * NB_ELTS_V8, + tmp + (b + 1) * NB_ELTS_V8, + (OPJ_UINT32)dn, + (OPJ_UINT32)opj_int_min(dn, sn - b), + opj_dwt_gamma); + opj_v8dwt_encode_step2(tmp + b * NB_ELTS_V8, + tmp + (a + 1) * NB_ELTS_V8, + (OPJ_UINT32)sn, + (OPJ_UINT32)opj_int_min(sn, dn - a), + opj_dwt_delta); + opj_v8dwt_encode_step1(tmp + b * NB_ELTS_V8, (OPJ_UINT32)dn, + opj_K); + opj_v8dwt_encode_step1(tmp + a * NB_ELTS_V8, (OPJ_UINT32)sn, + opj_invK); -/* -========================================================== - DWT interface -========================================================== -*/ + + if (cols == NB_ELTS_V8) { + opj_dwt_deinterleave_v_cols((OPJ_INT32*)tmp, + (OPJ_INT32*)array, + (OPJ_INT32)dn, (OPJ_INT32)sn, + stride_width, even ? 0 : 1, NB_ELTS_V8); + } else { + opj_dwt_deinterleave_v_cols((OPJ_INT32*)tmp, + (OPJ_INT32*)array, + (OPJ_INT32)dn, (OPJ_INT32)sn, + stride_width, even ? 0 : 1, cols); + } +} /* */ /* Forward 5-3 wavelet transform in 2-D. */ /* */ -static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec, - void (*p_function)(OPJ_INT32 *, OPJ_INT32, OPJ_INT32, OPJ_INT32)) +static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp, + opj_tcd_tilecomp_t * tilec, + opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v, + opj_encode_and_deinterleave_h_one_row_fnptr_type + p_encode_and_deinterleave_h_one_row) { - OPJ_INT32 i, j, k; - OPJ_INT32 *a = 00; - OPJ_INT32 *aj = 00; + OPJ_INT32 i; OPJ_INT32 *bj = 00; - OPJ_INT32 w, l; + OPJ_UINT32 w; + OPJ_INT32 l; - OPJ_INT32 rw; /* width of the resolution level computed */ - OPJ_INT32 rh; /* height of the resolution level computed */ OPJ_SIZE_T l_data_size; opj_tcd_resolution_t * l_cur_res = 0; opj_tcd_resolution_t * l_last_res = 0; + const int num_threads = opj_thread_pool_get_thread_count(tp); + OPJ_INT32 * OPJ_RESTRICT tiledp = tilec->data; - w = tilec->x1 - tilec->x0; + w = (OPJ_UINT32)(tilec->x1 - tilec->x0); l = (OPJ_INT32)tilec->numresolutions - 1; - a = tilec->data; l_cur_res = tilec->resolutions + l; l_last_res = l_cur_res - 1; l_data_size = opj_dwt_max_resolution(tilec->resolutions, tilec->numresolutions); /* overflow check */ - if (l_data_size > (SIZE_MAX / sizeof(OPJ_INT32))) { + if (l_data_size > (SIZE_MAX / (NB_ELTS_V8 * sizeof(OPJ_INT32)))) { /* FIXME event manager error callback */ return OPJ_FALSE; } - l_data_size *= sizeof(OPJ_INT32); - bj = (OPJ_INT32*)opj_malloc(l_data_size); + l_data_size *= NB_ELTS_V8 * sizeof(OPJ_INT32); + bj = (OPJ_INT32*)opj_aligned_32_malloc(l_data_size); /* l_data_size is equal to 0 when numresolutions == 1 but bj is not used */ /* in that case, so do not error out */ if (l_data_size != 0 && ! bj) { @@ -1145,43 +1754,135 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec, i = l; while (i--) { - OPJ_INT32 rw1; /* width of the resolution level once lower than computed one */ - OPJ_INT32 rh1; /* height of the resolution level once lower than computed one */ + OPJ_UINT32 j; + OPJ_UINT32 rw; /* width of the resolution level computed */ + OPJ_UINT32 rh; /* height of the resolution level computed */ + OPJ_UINT32 + rw1; /* width of the resolution level once lower than computed one */ + OPJ_UINT32 + rh1; /* height of the resolution level once lower than computed one */ OPJ_INT32 cas_col; /* 0 = non inversion on horizontal filtering 1 = inversion between low-pass and high-pass filtering */ OPJ_INT32 cas_row; /* 0 = non inversion on vertical filtering 1 = inversion between low-pass and high-pass filtering */ OPJ_INT32 dn, sn; - rw = l_cur_res->x1 - l_cur_res->x0; - rh = l_cur_res->y1 - l_cur_res->y0; - rw1 = l_last_res->x1 - l_last_res->x0; - rh1 = l_last_res->y1 - l_last_res->y0; + rw = (OPJ_UINT32)(l_cur_res->x1 - l_cur_res->x0); + rh = (OPJ_UINT32)(l_cur_res->y1 - l_cur_res->y0); + rw1 = (OPJ_UINT32)(l_last_res->x1 - l_last_res->x0); + rh1 = (OPJ_UINT32)(l_last_res->y1 - l_last_res->y0); cas_row = l_cur_res->x0 & 1; cas_col = l_cur_res->y0 & 1; - sn = rh1; - dn = rh - rh1; - for (j = 0; j < rw; ++j) { - aj = a + j; - for (k = 0; k < rh; ++k) { - bj[k] = aj[k * w]; + sn = (OPJ_INT32)rh1; + dn = (OPJ_INT32)(rh - rh1); + + /* Perform vertical pass */ + if (num_threads <= 1 || rw < 2 * NB_ELTS_V8) { + for (j = 0; j + NB_ELTS_V8 - 1 < rw; j += NB_ELTS_V8) { + p_encode_and_deinterleave_v(tiledp + j, + bj, + rh, + cas_col == 0, + w, + NB_ELTS_V8); + } + if (j < rw) { + p_encode_and_deinterleave_v(tiledp + j, + bj, + rh, + cas_col == 0, + w, + rw - j); + } + } else { + OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; + OPJ_UINT32 step_j; + + if (rw < num_jobs) { + num_jobs = rw; } + step_j = ((rw / num_jobs) / NB_ELTS_V8) * NB_ELTS_V8; - (*p_function)(bj, dn, sn, cas_col); + for (j = 0; j < num_jobs; j++) { + opj_dwt_encode_v_job_t* job; - opj_dwt_deinterleave_v(bj, aj, dn, sn, w, cas_col); + job = (opj_dwt_encode_v_job_t*) opj_malloc(sizeof(opj_dwt_encode_v_job_t)); + if (!job) { + opj_thread_pool_wait_completion(tp, 0); + opj_aligned_free(bj); + return OPJ_FALSE; + } + job->v.mem = (OPJ_INT32*)opj_aligned_32_malloc(l_data_size); + if (!job->v.mem) { + opj_thread_pool_wait_completion(tp, 0); + opj_free(job); + opj_aligned_free(bj); + return OPJ_FALSE; + } + job->v.dn = dn; + job->v.sn = sn; + job->v.cas = cas_col; + job->rh = rh; + job->w = w; + job->tiledp = tiledp; + job->min_j = j * step_j; + job->max_j = (j + 1 == num_jobs) ? rw : (j + 1) * step_j; + job->p_encode_and_deinterleave_v = p_encode_and_deinterleave_v; + opj_thread_pool_submit_job(tp, opj_dwt_encode_v_func, job); + } + opj_thread_pool_wait_completion(tp, 0); } - sn = rw1; - dn = rw - rw1; + sn = (OPJ_INT32)rw1; + dn = (OPJ_INT32)(rw - rw1); - for (j = 0; j < rh; j++) { - aj = a + j * w; - for (k = 0; k < rw; k++) { - bj[k] = aj[k]; + /* Perform horizontal pass */ + if (num_threads <= 1 || rh <= 1) { + for (j = 0; j < rh; j++) { + OPJ_INT32* OPJ_RESTRICT aj = tiledp + j * w; + (*p_encode_and_deinterleave_h_one_row)(aj, bj, rw, + cas_row == 0 ? OPJ_TRUE : OPJ_FALSE); + } + } else { + OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; + OPJ_UINT32 step_j; + + if (rh < num_jobs) { + num_jobs = rh; + } + step_j = (rh / num_jobs); + + for (j = 0; j < num_jobs; j++) { + opj_dwt_encode_h_job_t* job; + + job = (opj_dwt_encode_h_job_t*) opj_malloc(sizeof(opj_dwt_encode_h_job_t)); + if (!job) { + opj_thread_pool_wait_completion(tp, 0); + opj_aligned_free(bj); + return OPJ_FALSE; + } + job->h.mem = (OPJ_INT32*)opj_aligned_32_malloc(l_data_size); + if (!job->h.mem) { + opj_thread_pool_wait_completion(tp, 0); + opj_free(job); + opj_aligned_free(bj); + return OPJ_FALSE; + } + job->h.dn = dn; + job->h.sn = sn; + job->h.cas = cas_row; + job->rw = rw; + job->w = w; + job->tiledp = tiledp; + job->min_j = j * step_j; + job->max_j = (j + 1U) * step_j; /* this can overflow */ + if (j == (num_jobs - 1U)) { /* this will take care of the overflow */ + job->max_j = rh; + } + job->p_function = p_encode_and_deinterleave_h_one_row; + opj_thread_pool_submit_job(tp, opj_dwt_encode_h_func, job); } - (*p_function)(bj, dn, sn, cas_row); - opj_dwt_deinterleave_h(bj, aj, dn, sn, cas_row); + opj_thread_pool_wait_completion(tp, 0); } l_cur_res = l_last_res; @@ -1189,15 +1890,18 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec, --l_last_res; } - opj_free(bj); + opj_aligned_free(bj); return OPJ_TRUE; } /* Forward 5-3 wavelet transform in 2-D. */ /* */ -OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec) +OPJ_BOOL opj_dwt_encode(opj_tcd_t *p_tcd, + opj_tcd_tilecomp_t * tilec) { - return opj_dwt_encode_procedure(tilec, opj_dwt_encode_1); + return opj_dwt_encode_procedure(p_tcd->thread_pool, tilec, + opj_dwt_encode_and_deinterleave_v, + opj_dwt_encode_and_deinterleave_h_one_row); } /* */ @@ -1213,21 +1917,6 @@ OPJ_BOOL opj_dwt_decode(opj_tcd_t *p_tcd, opj_tcd_tilecomp_t* tilec, } } - -/* */ -/* Get gain of 5-3 wavelet transform. */ -/* */ -OPJ_UINT32 opj_dwt_getgain(OPJ_UINT32 orient) -{ - if (orient == 0) { - return 0; - } - if (orient == 1 || orient == 2) { - return 1; - } - return 2; -} - /* */ /* Get norm of 5-3 wavelet. */ /* */ @@ -1247,18 +1936,12 @@ OPJ_FLOAT64 opj_dwt_getnorm(OPJ_UINT32 level, OPJ_UINT32 orient) /* */ /* Forward 9-7 wavelet transform in 2-D. */ /* */ -OPJ_BOOL opj_dwt_encode_real(opj_tcd_tilecomp_t * tilec) -{ - return opj_dwt_encode_procedure(tilec, opj_dwt_encode_1_real); -} - -/* */ -/* Get gain of 9-7 wavelet transform. */ -/* */ -OPJ_UINT32 opj_dwt_getgain_real(OPJ_UINT32 orient) +OPJ_BOOL opj_dwt_encode_real(opj_tcd_t *p_tcd, + opj_tcd_tilecomp_t * tilec) { - (void)orient; - return 0; + return opj_dwt_encode_procedure(p_tcd->thread_pool, tilec, + opj_dwt_encode_and_deinterleave_v_real, + opj_dwt_encode_and_deinterleave_h_one_row_real); } /* */ @@ -1293,7 +1976,7 @@ void opj_dwt_calc_explicit_stepsizes(opj_tccp_t * tccp, OPJ_UINT32 prec) if (tccp->qntsty == J2K_CCP_QNTSTY_NOQNT) { stepsize = 1.0; } else { - OPJ_FLOAT64 norm = opj_dwt_norms_real[orient][level]; + OPJ_FLOAT64 norm = opj_dwt_getnorm_real(level, orient); stepsize = (1 << (gain)) / norm; } opj_dwt_encode_stepsize((OPJ_INT32) floor(stepsize * 8192.0), @@ -1328,15 +2011,15 @@ typedef struct { OPJ_INT32 * OPJ_RESTRICT tiledp; OPJ_UINT32 min_j; OPJ_UINT32 max_j; -} opj_dwd_decode_h_job_t; +} opj_dwt_decode_h_job_t; static void opj_dwt_decode_h_func(void* user_data, opj_tls_t* tls) { OPJ_UINT32 j; - opj_dwd_decode_h_job_t* job; + opj_dwt_decode_h_job_t* job; (void)tls; - job = (opj_dwd_decode_h_job_t*)user_data; + job = (opj_dwt_decode_h_job_t*)user_data; for (j = job->min_j; j < job->max_j; j++) { opj_idwt53_h(&job->h, &job->tiledp[j * job->w]); } @@ -1352,15 +2035,15 @@ typedef struct { OPJ_INT32 * OPJ_RESTRICT tiledp; OPJ_UINT32 min_j; OPJ_UINT32 max_j; -} opj_dwd_decode_v_job_t; +} opj_dwt_decode_v_job_t; static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls) { OPJ_UINT32 j; - opj_dwd_decode_v_job_t* job; + opj_dwt_decode_v_job_t* job; (void)tls; - job = (opj_dwd_decode_v_job_t*)user_data; + job = (opj_dwt_decode_v_job_t*)user_data; for (j = job->min_j; j + PARALLEL_COLS_53 <= job->max_j; j += PARALLEL_COLS_53) { opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_SIZE_T)job->w, @@ -1447,9 +2130,9 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, step_j = (rh / num_jobs); for (j = 0; j < num_jobs; j++) { - opj_dwd_decode_h_job_t* job; + opj_dwt_decode_h_job_t* job; - job = (opj_dwd_decode_h_job_t*) opj_malloc(sizeof(opj_dwd_decode_h_job_t)); + job = (opj_dwt_decode_h_job_t*) opj_malloc(sizeof(opj_dwt_decode_h_job_t)); if (!job) { /* It would be nice to fallback to single thread case, but */ /* unfortunately some jobs may be launched and have modified */ @@ -1502,9 +2185,9 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, step_j = (rw / num_jobs); for (j = 0; j < num_jobs; j++) { - opj_dwd_decode_v_job_t* job; + opj_dwt_decode_v_job_t* job; - job = (opj_dwd_decode_v_job_t*) opj_malloc(sizeof(opj_dwd_decode_v_job_t)); + job = (opj_dwt_decode_v_job_t*) opj_malloc(sizeof(opj_dwt_decode_v_job_t)); if (!job) { /* It would be nice to fallback to single thread case, but */ /* unfortunately some jobs may be launched and have modified */ @@ -2168,7 +2851,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( return OPJ_TRUE; } -static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt, +static void opj_v8dwt_interleave_h(opj_v8dwt_t* OPJ_RESTRICT dwt, OPJ_FLOAT32* OPJ_RESTRICT a, OPJ_UINT32 width, OPJ_UINT32 remaining_height) @@ -2179,39 +2862,69 @@ static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt, OPJ_UINT32 x1 = dwt->win_l_x1; for (k = 0; k < 2; ++k) { - if (remaining_height >= 4 && ((OPJ_SIZE_T) a & 0x0f) == 0 && - ((OPJ_SIZE_T) bi & 0x0f) == 0 && (width & 0x0f) == 0) { + if (remaining_height >= NB_ELTS_V8 && ((OPJ_SIZE_T) a & 0x0f) == 0 && + ((OPJ_SIZE_T) bi & 0x0f) == 0) { /* Fast code path */ for (i = x0; i < x1; ++i) { OPJ_UINT32 j = i; - bi[i * 8 ] = a[j]; + OPJ_FLOAT32* OPJ_RESTRICT dst = bi + i * 2 * NB_ELTS_V8; + dst[0] = a[j]; j += width; - bi[i * 8 + 1] = a[j]; + dst[1] = a[j]; j += width; - bi[i * 8 + 2] = a[j]; + dst[2] = a[j]; j += width; - bi[i * 8 + 3] = a[j]; + dst[3] = a[j]; + j += width; + dst[4] = a[j]; + j += width; + dst[5] = a[j]; + j += width; + dst[6] = a[j]; + j += width; + dst[7] = a[j]; } } else { /* Slow code path */ for (i = x0; i < x1; ++i) { OPJ_UINT32 j = i; - bi[i * 8 ] = a[j]; + OPJ_FLOAT32* OPJ_RESTRICT dst = bi + i * 2 * NB_ELTS_V8; + dst[0] = a[j]; j += width; if (remaining_height == 1) { continue; } - bi[i * 8 + 1] = a[j]; + dst[1] = a[j]; j += width; if (remaining_height == 2) { continue; } - bi[i * 8 + 2] = a[j]; + dst[2] = a[j]; j += width; if (remaining_height == 3) { continue; } - bi[i * 8 + 3] = a[j]; /* This one*/ + dst[3] = a[j]; + j += width; + if (remaining_height == 4) { + continue; + } + dst[4] = a[j]; + j += width; + if (remaining_height == 5) { + continue; + } + dst[5] = a[j]; + j += width; + if (remaining_height == 6) { + continue; + } + dst[6] = a[j]; + j += width; + if (remaining_height == 7) { + continue; + } + dst[7] = a[j]; } } @@ -2222,7 +2935,7 @@ static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt, } } -static void opj_v4dwt_interleave_partial_h(opj_v4dwt_t* dwt, +static void opj_v8dwt_interleave_partial_h(opj_v8dwt_t* dwt, opj_sparse_array_int32_t* sa, OPJ_UINT32 sa_line, OPJ_UINT32 remaining_height) @@ -2235,25 +2948,25 @@ static void opj_v4dwt_interleave_partial_h(opj_v4dwt_t* dwt, dwt->win_l_x1, sa_line + i + 1, /* Nasty cast from float* to int32* */ (OPJ_INT32*)(dwt->wavelet + dwt->cas + 2 * dwt->win_l_x0) + i, - 8, 0, OPJ_TRUE); + 2 * NB_ELTS_V8, 0, OPJ_TRUE); assert(ret); ret = opj_sparse_array_int32_read(sa, (OPJ_UINT32)dwt->sn + dwt->win_h_x0, sa_line + i, (OPJ_UINT32)dwt->sn + dwt->win_h_x1, sa_line + i + 1, /* Nasty cast from float* to int32* */ (OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0) + i, - 8, 0, OPJ_TRUE); + 2 * NB_ELTS_V8, 0, OPJ_TRUE); assert(ret); OPJ_UNUSED(ret); } } -static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt, - OPJ_FLOAT32* OPJ_RESTRICT a, - OPJ_UINT32 width, - OPJ_UINT32 nb_elts_read) +static INLINE void opj_v8dwt_interleave_v(opj_v8dwt_t* OPJ_RESTRICT dwt, + OPJ_FLOAT32* OPJ_RESTRICT a, + OPJ_UINT32 width, + OPJ_UINT32 nb_elts_read) { - opj_v4_t* OPJ_RESTRICT bi = dwt->wavelet + dwt->cas; + opj_v8_t* OPJ_RESTRICT bi = dwt->wavelet + dwt->cas; OPJ_UINT32 i; for (i = dwt->win_l_x0; i < dwt->win_l_x1; ++i) { @@ -2270,7 +2983,7 @@ static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt, } } -static void opj_v4dwt_interleave_partial_v(opj_v4dwt_t* OPJ_RESTRICT dwt, +static void opj_v8dwt_interleave_partial_v(opj_v8dwt_t* OPJ_RESTRICT dwt, opj_sparse_array_int32_t* sa, OPJ_UINT32 sa_col, OPJ_UINT32 nb_elts_read) @@ -2280,44 +2993,36 @@ static void opj_v4dwt_interleave_partial_v(opj_v4dwt_t* OPJ_RESTRICT dwt, sa_col, dwt->win_l_x0, sa_col + nb_elts_read, dwt->win_l_x1, (OPJ_INT32*)(dwt->wavelet + dwt->cas + 2 * dwt->win_l_x0), - 1, 8, OPJ_TRUE); + 1, 2 * NB_ELTS_V8, OPJ_TRUE); assert(ret); ret = opj_sparse_array_int32_read(sa, sa_col, (OPJ_UINT32)dwt->sn + dwt->win_h_x0, sa_col + nb_elts_read, (OPJ_UINT32)dwt->sn + dwt->win_h_x1, (OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0), - 1, 8, OPJ_TRUE); + 1, 2 * NB_ELTS_V8, OPJ_TRUE); assert(ret); OPJ_UNUSED(ret); } #ifdef __SSE__ -static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, +static void opj_v8dwt_decode_step1_sse(opj_v8_t* w, OPJ_UINT32 start, OPJ_UINT32 end, const __m128 c) { __m128* OPJ_RESTRICT vw = (__m128*) w; - OPJ_UINT32 i; - /* 4x unrolled loop */ - vw += 2 * start; - for (i = start; i + 3 < end; i += 4, vw += 8) { - __m128 xmm0 = _mm_mul_ps(vw[0], c); - __m128 xmm2 = _mm_mul_ps(vw[2], c); - __m128 xmm4 = _mm_mul_ps(vw[4], c); - __m128 xmm6 = _mm_mul_ps(vw[6], c); - vw[0] = xmm0; - vw[2] = xmm2; - vw[4] = xmm4; - vw[6] = xmm6; - } - for (; i < end; ++i, vw += 2) { + OPJ_UINT32 i = start; + /* To be adapted if NB_ELTS_V8 changes */ + vw += 4 * start; + /* Note: attempt at loop unrolling x2 doesn't help */ + for (; i < end; ++i, vw += 4) { vw[0] = _mm_mul_ps(vw[0], c); + vw[1] = _mm_mul_ps(vw[1], c); } } -static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, +static void opj_v8dwt_decode_step2_sse(opj_v8_t* l, opj_v8_t* w, OPJ_UINT32 start, OPJ_UINT32 end, OPJ_UINT32 m, @@ -2325,74 +3030,58 @@ static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, { __m128* OPJ_RESTRICT vl = (__m128*) l; __m128* OPJ_RESTRICT vw = (__m128*) w; + /* To be adapted if NB_ELTS_V8 changes */ OPJ_UINT32 i; OPJ_UINT32 imax = opj_uint_min(end, m); - __m128 tmp1, tmp2, tmp3; if (start == 0) { - tmp1 = vl[0]; + if (imax >= 1) { + vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vl[0], vw[0]), c)); + vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vl[1], vw[1]), c)); + vw += 4; + start = 1; + } } else { - vw += start * 2; - tmp1 = vw[-3]; + vw += start * 4; } i = start; - - /* 4x loop unrolling */ - for (; i + 3 < imax; i += 4) { - __m128 tmp4, tmp5, tmp6, tmp7, tmp8, tmp9; - tmp2 = vw[-1]; - tmp3 = vw[ 0]; - tmp4 = vw[ 1]; - tmp5 = vw[ 2]; - tmp6 = vw[ 3]; - tmp7 = vw[ 4]; - tmp8 = vw[ 5]; - tmp9 = vw[ 6]; - vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c)); - vw[ 1] = _mm_add_ps(tmp4, _mm_mul_ps(_mm_add_ps(tmp3, tmp5), c)); - vw[ 3] = _mm_add_ps(tmp6, _mm_mul_ps(_mm_add_ps(tmp5, tmp7), c)); - vw[ 5] = _mm_add_ps(tmp8, _mm_mul_ps(_mm_add_ps(tmp7, tmp9), c)); - tmp1 = tmp9; - vw += 8; - } - + /* Note: attempt at loop unrolling x2 doesn't help */ for (; i < imax; ++i) { - tmp2 = vw[-1]; - tmp3 = vw[ 0]; - vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c)); - tmp1 = tmp3; - vw += 2; + vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vw[-4], vw[0]), c)); + vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vw[-3], vw[1]), c)); + vw += 4; } if (m < end) { assert(m + 1 == end); c = _mm_add_ps(c, c); - c = _mm_mul_ps(c, vw[-2]); - vw[-1] = _mm_add_ps(vw[-1], c); + vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(c, vw[-4])); + vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(c, vw[-3])); } } #else -static void opj_v4dwt_decode_step1(opj_v4_t* w, +static void opj_v8dwt_decode_step1(opj_v8_t* w, OPJ_UINT32 start, OPJ_UINT32 end, const OPJ_FLOAT32 c) { OPJ_FLOAT32* OPJ_RESTRICT fw = (OPJ_FLOAT32*) w; OPJ_UINT32 i; + /* To be adapted if NB_ELTS_V8 changes */ for (i = start; i < end; ++i) { - OPJ_FLOAT32 tmp1 = fw[i * 8 ]; - OPJ_FLOAT32 tmp2 = fw[i * 8 + 1]; - OPJ_FLOAT32 tmp3 = fw[i * 8 + 2]; - OPJ_FLOAT32 tmp4 = fw[i * 8 + 3]; - fw[i * 8 ] = tmp1 * c; - fw[i * 8 + 1] = tmp2 * c; - fw[i * 8 + 2] = tmp3 * c; - fw[i * 8 + 3] = tmp4 * c; + fw[i * 2 * 8 ] = fw[i * 2 * 8 ] * c; + fw[i * 2 * 8 + 1] = fw[i * 2 * 8 + 1] * c; + fw[i * 2 * 8 + 2] = fw[i * 2 * 8 + 2] * c; + fw[i * 2 * 8 + 3] = fw[i * 2 * 8 + 3] * c; + fw[i * 2 * 8 + 4] = fw[i * 2 * 8 + 4] * c; + fw[i * 2 * 8 + 5] = fw[i * 2 * 8 + 5] * c; + fw[i * 2 * 8 + 6] = fw[i * 2 * 8 + 6] * c; + fw[i * 2 * 8 + 7] = fw[i * 2 * 8 + 7] * c; } } -static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, +static void opj_v8dwt_decode_step2(opj_v8_t* l, opj_v8_t* w, OPJ_UINT32 start, OPJ_UINT32 end, OPJ_UINT32 m, @@ -2403,36 +3092,33 @@ static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_UINT32 i; OPJ_UINT32 imax = opj_uint_min(end, m); if (start > 0) { - fw += 8 * start; - fl = fw - 8; + fw += 2 * NB_ELTS_V8 * start; + fl = fw - 2 * NB_ELTS_V8; } + /* To be adapted if NB_ELTS_V8 changes */ for (i = start; i < imax; ++i) { - OPJ_FLOAT32 tmp1_1 = fl[0]; - OPJ_FLOAT32 tmp1_2 = fl[1]; - OPJ_FLOAT32 tmp1_3 = fl[2]; - OPJ_FLOAT32 tmp1_4 = fl[3]; - OPJ_FLOAT32 tmp2_1 = fw[-4]; - OPJ_FLOAT32 tmp2_2 = fw[-3]; - OPJ_FLOAT32 tmp2_3 = fw[-2]; - OPJ_FLOAT32 tmp2_4 = fw[-1]; - OPJ_FLOAT32 tmp3_1 = fw[0]; - OPJ_FLOAT32 tmp3_2 = fw[1]; - OPJ_FLOAT32 tmp3_3 = fw[2]; - OPJ_FLOAT32 tmp3_4 = fw[3]; - fw[-4] = tmp2_1 + ((tmp1_1 + tmp3_1) * c); - fw[-3] = tmp2_2 + ((tmp1_2 + tmp3_2) * c); - fw[-2] = tmp2_3 + ((tmp1_3 + tmp3_3) * c); - fw[-1] = tmp2_4 + ((tmp1_4 + tmp3_4) * c); + fw[-8] = fw[-8] + ((fl[0] + fw[0]) * c); + fw[-7] = fw[-7] + ((fl[1] + fw[1]) * c); + fw[-6] = fw[-6] + ((fl[2] + fw[2]) * c); + fw[-5] = fw[-5] + ((fl[3] + fw[3]) * c); + fw[-4] = fw[-4] + ((fl[4] + fw[4]) * c); + fw[-3] = fw[-3] + ((fl[5] + fw[5]) * c); + fw[-2] = fw[-2] + ((fl[6] + fw[6]) * c); + fw[-1] = fw[-1] + ((fl[7] + fw[7]) * c); fl = fw; - fw += 8; + fw += 2 * NB_ELTS_V8; } if (m < end) { assert(m + 1 == end); c += c; - fw[-4] = fw[-4] + fl[0] * c; - fw[-3] = fw[-3] + fl[1] * c; - fw[-2] = fw[-2] + fl[2] * c; - fw[-1] = fw[-1] + fl[3] * c; + fw[-8] = fw[-8] + fl[0] * c; + fw[-7] = fw[-7] + fl[1] * c; + fw[-6] = fw[-6] + fl[2] * c; + fw[-5] = fw[-5] + fl[3] * c; + fw[-4] = fw[-4] + fl[4] * c; + fw[-3] = fw[-3] + fl[5] * c; + fw[-2] = fw[-2] + fl[6] * c; + fw[-1] = fw[-1] + fl[7] * c; } } @@ -2441,9 +3127,17 @@ static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, /* */ /* Inverse 9-7 wavelet transform in 1-D. */ /* */ -static void opj_v4dwt_decode(opj_v4dwt_t* OPJ_RESTRICT dwt) +static void opj_v8dwt_decode(opj_v8dwt_t* OPJ_RESTRICT dwt) { OPJ_INT32 a, b; + /* BUG_WEIRD_TWO_INVK (look for this identifier in tcd.c) */ + /* Historic value for 2 / opj_invK */ + /* Normally, we should use invK, but if we do so, we have failures in the */ + /* conformance test, due to MSE and peak errors significantly higher than */ + /* accepted value */ + /* Due to using two_invK instead of invK, we have to compensate in tcd.c */ + /* the computation of the stepsize for the non LL subbands */ + const float two_invK = 1.625732422f; if (dwt->cas == 0) { if (!((dwt->dn > 0) || (dwt->sn > 1))) { return; @@ -2458,60 +3152,147 @@ static void opj_v4dwt_decode(opj_v4dwt_t* OPJ_RESTRICT dwt) b = 0; } #ifdef __SSE__ - opj_v4dwt_decode_step1_sse(dwt->wavelet + a, dwt->win_l_x0, dwt->win_l_x1, + opj_v8dwt_decode_step1_sse(dwt->wavelet + a, dwt->win_l_x0, dwt->win_l_x1, _mm_set1_ps(opj_K)); - opj_v4dwt_decode_step1_sse(dwt->wavelet + b, dwt->win_h_x0, dwt->win_h_x1, - _mm_set1_ps(opj_c13318)); - opj_v4dwt_decode_step2_sse(dwt->wavelet + b, dwt->wavelet + a + 1, + opj_v8dwt_decode_step1_sse(dwt->wavelet + b, dwt->win_h_x0, dwt->win_h_x1, + _mm_set1_ps(two_invK)); + opj_v8dwt_decode_step2_sse(dwt->wavelet + b, dwt->wavelet + a + 1, dwt->win_l_x0, dwt->win_l_x1, (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a), - _mm_set1_ps(opj_dwt_delta)); - opj_v4dwt_decode_step2_sse(dwt->wavelet + a, dwt->wavelet + b + 1, + _mm_set1_ps(-opj_dwt_delta)); + opj_v8dwt_decode_step2_sse(dwt->wavelet + a, dwt->wavelet + b + 1, dwt->win_h_x0, dwt->win_h_x1, (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b), - _mm_set1_ps(opj_dwt_gamma)); - opj_v4dwt_decode_step2_sse(dwt->wavelet + b, dwt->wavelet + a + 1, + _mm_set1_ps(-opj_dwt_gamma)); + opj_v8dwt_decode_step2_sse(dwt->wavelet + b, dwt->wavelet + a + 1, dwt->win_l_x0, dwt->win_l_x1, (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a), - _mm_set1_ps(opj_dwt_beta)); - opj_v4dwt_decode_step2_sse(dwt->wavelet + a, dwt->wavelet + b + 1, + _mm_set1_ps(-opj_dwt_beta)); + opj_v8dwt_decode_step2_sse(dwt->wavelet + a, dwt->wavelet + b + 1, dwt->win_h_x0, dwt->win_h_x1, (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b), - _mm_set1_ps(opj_dwt_alpha)); + _mm_set1_ps(-opj_dwt_alpha)); #else - opj_v4dwt_decode_step1(dwt->wavelet + a, dwt->win_l_x0, dwt->win_l_x1, + opj_v8dwt_decode_step1(dwt->wavelet + a, dwt->win_l_x0, dwt->win_l_x1, opj_K); - opj_v4dwt_decode_step1(dwt->wavelet + b, dwt->win_h_x0, dwt->win_h_x1, - opj_c13318); - opj_v4dwt_decode_step2(dwt->wavelet + b, dwt->wavelet + a + 1, + opj_v8dwt_decode_step1(dwt->wavelet + b, dwt->win_h_x0, dwt->win_h_x1, + two_invK); + opj_v8dwt_decode_step2(dwt->wavelet + b, dwt->wavelet + a + 1, dwt->win_l_x0, dwt->win_l_x1, (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a), - opj_dwt_delta); - opj_v4dwt_decode_step2(dwt->wavelet + a, dwt->wavelet + b + 1, + -opj_dwt_delta); + opj_v8dwt_decode_step2(dwt->wavelet + a, dwt->wavelet + b + 1, dwt->win_h_x0, dwt->win_h_x1, (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b), - opj_dwt_gamma); - opj_v4dwt_decode_step2(dwt->wavelet + b, dwt->wavelet + a + 1, + -opj_dwt_gamma); + opj_v8dwt_decode_step2(dwt->wavelet + b, dwt->wavelet + a + 1, dwt->win_l_x0, dwt->win_l_x1, (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a), - opj_dwt_beta); - opj_v4dwt_decode_step2(dwt->wavelet + a, dwt->wavelet + b + 1, + -opj_dwt_beta); + opj_v8dwt_decode_step2(dwt->wavelet + a, dwt->wavelet + b + 1, dwt->win_h_x0, dwt->win_h_x1, (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b), - opj_dwt_alpha); + -opj_dwt_alpha); #endif } +typedef struct { + opj_v8dwt_t h; + OPJ_UINT32 rw; + OPJ_UINT32 w; + OPJ_FLOAT32 * OPJ_RESTRICT aj; + OPJ_UINT32 nb_rows; +} opj_dwt97_decode_h_job_t; + +static void opj_dwt97_decode_h_func(void* user_data, opj_tls_t* tls) +{ + OPJ_UINT32 j; + opj_dwt97_decode_h_job_t* job; + OPJ_FLOAT32 * OPJ_RESTRICT aj; + OPJ_UINT32 w; + (void)tls; + + job = (opj_dwt97_decode_h_job_t*)user_data; + w = job->w; + + assert((job->nb_rows % NB_ELTS_V8) == 0); + + aj = job->aj; + for (j = 0; j + NB_ELTS_V8 <= job->nb_rows; j += NB_ELTS_V8) { + OPJ_UINT32 k; + opj_v8dwt_interleave_h(&job->h, aj, job->w, NB_ELTS_V8); + opj_v8dwt_decode(&job->h); + + /* To be adapted if NB_ELTS_V8 changes */ + for (k = 0; k < job->rw; k++) { + aj[k ] = job->h.wavelet[k].f[0]; + aj[k + (OPJ_SIZE_T)w ] = job->h.wavelet[k].f[1]; + aj[k + (OPJ_SIZE_T)w * 2] = job->h.wavelet[k].f[2]; + aj[k + (OPJ_SIZE_T)w * 3] = job->h.wavelet[k].f[3]; + } + for (k = 0; k < job->rw; k++) { + aj[k + (OPJ_SIZE_T)w * 4] = job->h.wavelet[k].f[4]; + aj[k + (OPJ_SIZE_T)w * 5] = job->h.wavelet[k].f[5]; + aj[k + (OPJ_SIZE_T)w * 6] = job->h.wavelet[k].f[6]; + aj[k + (OPJ_SIZE_T)w * 7] = job->h.wavelet[k].f[7]; + } + + aj += w * NB_ELTS_V8; + } + + opj_aligned_free(job->h.wavelet); + opj_free(job); +} + + +typedef struct { + opj_v8dwt_t v; + OPJ_UINT32 rh; + OPJ_UINT32 w; + OPJ_FLOAT32 * OPJ_RESTRICT aj; + OPJ_UINT32 nb_columns; +} opj_dwt97_decode_v_job_t; + +static void opj_dwt97_decode_v_func(void* user_data, opj_tls_t* tls) +{ + OPJ_UINT32 j; + opj_dwt97_decode_v_job_t* job; + OPJ_FLOAT32 * OPJ_RESTRICT aj; + (void)tls; + + job = (opj_dwt97_decode_v_job_t*)user_data; + + assert((job->nb_columns % NB_ELTS_V8) == 0); + + aj = job->aj; + for (j = 0; j + NB_ELTS_V8 <= job->nb_columns; j += NB_ELTS_V8) { + OPJ_UINT32 k; + + opj_v8dwt_interleave_v(&job->v, aj, job->w, NB_ELTS_V8); + opj_v8dwt_decode(&job->v); + + for (k = 0; k < job->rh; ++k) { + memcpy(&aj[k * (OPJ_SIZE_T)job->w], &job->v.wavelet[k], + NB_ELTS_V8 * sizeof(OPJ_FLOAT32)); + } + aj += NB_ELTS_V8; + } + + opj_aligned_free(job->v.wavelet); + opj_free(job); +} + /* */ /* Inverse 9-7 wavelet transform in 2-D. */ /* */ static -OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, +OPJ_BOOL opj_dwt_decode_tile_97(opj_thread_pool_t* tp, + opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, OPJ_UINT32 numres) { - opj_v4dwt_t h; - opj_v4dwt_t v; + opj_v8dwt_t h; + opj_v8dwt_t v; opj_tcd_resolution_t* res = tilec->resolutions; @@ -2525,20 +3306,19 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, tilec->resolutions[tilec->minimum_num_resolutions - 1].x0); OPJ_SIZE_T l_data_size; + const int num_threads = opj_thread_pool_get_thread_count(tp); - l_data_size = opj_dwt_max_resolution(res, numres); - /* overflow check */ - if (l_data_size > (SIZE_MAX - 5U)) { - /* FIXME event manager error callback */ - return OPJ_FALSE; + if (numres == 1) { + return OPJ_TRUE; } - l_data_size += 5U; + + l_data_size = opj_dwt_max_resolution(res, numres); /* overflow check */ - if (l_data_size > (SIZE_MAX / sizeof(opj_v4_t))) { + if (l_data_size > (SIZE_MAX / sizeof(opj_v8_t))) { /* FIXME event manager error callback */ return OPJ_FALSE; } - h.wavelet = (opj_v4_t*) opj_aligned_malloc(l_data_size * sizeof(opj_v4_t)); + h.wavelet = (opj_v8_t*) opj_aligned_malloc(l_data_size * sizeof(opj_v8_t)); if (!h.wavelet) { /* FIXME event manager error callback */ return OPJ_FALSE; @@ -2566,35 +3346,80 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, h.win_l_x1 = (OPJ_UINT32)h.sn; h.win_h_x0 = 0; h.win_h_x1 = (OPJ_UINT32)h.dn; - for (j = 0; j + 3 < rh; j += 4) { - OPJ_UINT32 k; - opj_v4dwt_interleave_h(&h, aj, w, rh - j); - opj_v4dwt_decode(&h); - for (k = 0; k < rw; k++) { - aj[k ] = h.wavelet[k].f[0]; - aj[k + (OPJ_SIZE_T)w ] = h.wavelet[k].f[1]; - aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2]; - aj[k + (OPJ_SIZE_T)w * 3] = h.wavelet[k].f[3]; + if (num_threads <= 1 || rh < 2 * NB_ELTS_V8) { + for (j = 0; j + (NB_ELTS_V8 - 1) < rh; j += NB_ELTS_V8) { + OPJ_UINT32 k; + opj_v8dwt_interleave_h(&h, aj, w, NB_ELTS_V8); + opj_v8dwt_decode(&h); + + /* To be adapted if NB_ELTS_V8 changes */ + for (k = 0; k < rw; k++) { + aj[k ] = h.wavelet[k].f[0]; + aj[k + (OPJ_SIZE_T)w ] = h.wavelet[k].f[1]; + aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2]; + aj[k + (OPJ_SIZE_T)w * 3] = h.wavelet[k].f[3]; + } + for (k = 0; k < rw; k++) { + aj[k + (OPJ_SIZE_T)w * 4] = h.wavelet[k].f[4]; + aj[k + (OPJ_SIZE_T)w * 5] = h.wavelet[k].f[5]; + aj[k + (OPJ_SIZE_T)w * 6] = h.wavelet[k].f[6]; + aj[k + (OPJ_SIZE_T)w * 7] = h.wavelet[k].f[7]; + } + + aj += w * NB_ELTS_V8; + } + } else { + OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; + OPJ_UINT32 step_j; + + if ((rh / NB_ELTS_V8) < num_jobs) { + num_jobs = rh / NB_ELTS_V8; } + step_j = ((rh / num_jobs) / NB_ELTS_V8) * NB_ELTS_V8; + for (j = 0; j < num_jobs; j++) { + opj_dwt97_decode_h_job_t* job; - aj += w * 4; + job = (opj_dwt97_decode_h_job_t*) opj_malloc(sizeof(opj_dwt97_decode_h_job_t)); + if (!job) { + opj_thread_pool_wait_completion(tp, 0); + opj_aligned_free(h.wavelet); + return OPJ_FALSE; + } + job->h.wavelet = (opj_v8_t*)opj_aligned_malloc(l_data_size * sizeof(opj_v8_t)); + if (!job->h.wavelet) { + opj_thread_pool_wait_completion(tp, 0); + opj_free(job); + opj_aligned_free(h.wavelet); + return OPJ_FALSE; + } + job->h.dn = h.dn; + job->h.sn = h.sn; + job->h.cas = h.cas; + job->h.win_l_x0 = h.win_l_x0; + job->h.win_l_x1 = h.win_l_x1; + job->h.win_h_x0 = h.win_h_x0; + job->h.win_h_x1 = h.win_h_x1; + job->rw = rw; + job->w = w; + job->aj = aj; + job->nb_rows = (j + 1 == num_jobs) ? (rh & (OPJ_UINT32)~ + (NB_ELTS_V8 - 1)) - j * step_j : step_j; + aj += w * job->nb_rows; + opj_thread_pool_submit_job(tp, opj_dwt97_decode_h_func, job); + } + opj_thread_pool_wait_completion(tp, 0); + j = rh & (OPJ_UINT32)~(NB_ELTS_V8 - 1); } if (j < rh) { OPJ_UINT32 k; - opj_v4dwt_interleave_h(&h, aj, w, rh - j); - opj_v4dwt_decode(&h); + opj_v8dwt_interleave_h(&h, aj, w, rh - j); + opj_v8dwt_decode(&h); for (k = 0; k < rw; k++) { - switch (rh - j) { - case 3: - aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2]; - /* FALLTHRU */ - case 2: - aj[k + (OPJ_SIZE_T)w ] = h.wavelet[k].f[1]; - /* FALLTHRU */ - case 1: - aj[k] = h.wavelet[k].f[0]; + OPJ_UINT32 l; + for (l = 0; l < rh - j; l++) { + aj[k + (OPJ_SIZE_T)w * l ] = h.wavelet[k].f[l]; } } } @@ -2607,25 +3432,71 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, v.win_h_x1 = (OPJ_UINT32)v.dn; aj = (OPJ_FLOAT32*) tilec->data; - for (j = rw; j > 3; j -= 4) { - OPJ_UINT32 k; + if (num_threads <= 1 || rw < 2 * NB_ELTS_V8) { + for (j = rw; j > (NB_ELTS_V8 - 1); j -= NB_ELTS_V8) { + OPJ_UINT32 k; - opj_v4dwt_interleave_v(&v, aj, w, 4); - opj_v4dwt_decode(&v); + opj_v8dwt_interleave_v(&v, aj, w, NB_ELTS_V8); + opj_v8dwt_decode(&v); - for (k = 0; k < rh; ++k) { - memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32)); + for (k = 0; k < rh; ++k) { + memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], NB_ELTS_V8 * sizeof(OPJ_FLOAT32)); + } + aj += NB_ELTS_V8; + } + } else { + /* "bench_dwt -I" shows that scaling is poor, likely due to RAM + transfer being the limiting factor. So limit the number of + threads. + */ + OPJ_UINT32 num_jobs = opj_uint_max((OPJ_UINT32)num_threads / 2, 2U); + OPJ_UINT32 step_j; + + if ((rw / NB_ELTS_V8) < num_jobs) { + num_jobs = rw / NB_ELTS_V8; + } + step_j = ((rw / num_jobs) / NB_ELTS_V8) * NB_ELTS_V8; + for (j = 0; j < num_jobs; j++) { + opj_dwt97_decode_v_job_t* job; + + job = (opj_dwt97_decode_v_job_t*) opj_malloc(sizeof(opj_dwt97_decode_v_job_t)); + if (!job) { + opj_thread_pool_wait_completion(tp, 0); + opj_aligned_free(h.wavelet); + return OPJ_FALSE; + } + job->v.wavelet = (opj_v8_t*)opj_aligned_malloc(l_data_size * sizeof(opj_v8_t)); + if (!job->v.wavelet) { + opj_thread_pool_wait_completion(tp, 0); + opj_free(job); + opj_aligned_free(h.wavelet); + return OPJ_FALSE; + } + job->v.dn = v.dn; + job->v.sn = v.sn; + job->v.cas = v.cas; + job->v.win_l_x0 = v.win_l_x0; + job->v.win_l_x1 = v.win_l_x1; + job->v.win_h_x0 = v.win_h_x0; + job->v.win_h_x1 = v.win_h_x1; + job->rh = rh; + job->w = w; + job->aj = aj; + job->nb_columns = (j + 1 == num_jobs) ? (rw & (OPJ_UINT32)~ + (NB_ELTS_V8 - 1)) - j * step_j : step_j; + aj += job->nb_columns; + opj_thread_pool_submit_job(tp, opj_dwt97_decode_v_func, job); } - aj += 4; + opj_thread_pool_wait_completion(tp, 0); } - if (rw & 0x03) { + if (rw & (NB_ELTS_V8 - 1)) { OPJ_UINT32 k; - j = rw & 0x03; + j = rw & (NB_ELTS_V8 - 1); - opj_v4dwt_interleave_v(&v, aj, w, j); - opj_v4dwt_decode(&v); + opj_v8dwt_interleave_v(&v, aj, w, j); + opj_v8dwt_decode(&v); for (k = 0; k < rh; ++k) { memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], @@ -2643,8 +3514,8 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, OPJ_UINT32 numres) { opj_sparse_array_int32_t* sa; - opj_v4dwt_t h; - opj_v4dwt_t v; + opj_v8dwt_t h; + opj_v8dwt_t v; OPJ_UINT32 resno; /* This value matches the maximum left/right extension given in tables */ /* F.2 and F.3 of the standard. Note: in opj_tcd_is_subband_area_of_interest() */ @@ -2694,19 +3565,12 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, l_data_size = opj_dwt_max_resolution(tr, numres); /* overflow check */ - if (l_data_size > (SIZE_MAX - 5U)) { - /* FIXME event manager error callback */ - opj_sparse_array_int32_free(sa); - return OPJ_FALSE; - } - l_data_size += 5U; - /* overflow check */ - if (l_data_size > (SIZE_MAX / sizeof(opj_v4_t))) { + if (l_data_size > (SIZE_MAX / sizeof(opj_v8_t))) { /* FIXME event manager error callback */ opj_sparse_array_int32_free(sa); return OPJ_FALSE; } - h.wavelet = (opj_v4_t*) opj_aligned_malloc(l_data_size * sizeof(opj_v4_t)); + h.wavelet = (opj_v8_t*) opj_aligned_malloc(l_data_size * sizeof(opj_v8_t)); if (!h.wavelet) { /* FIXME event manager error callback */ opj_sparse_array_int32_free(sa); @@ -2801,17 +3665,17 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, h.win_l_x1 = win_ll_x1; h.win_h_x0 = win_hl_x0; h.win_h_x1 = win_hl_x1; - for (j = 0; j + 3 < rh; j += 4) { - if ((j + 3 >= win_ll_y0 && j < win_ll_y1) || - (j + 3 >= win_lh_y0 + (OPJ_UINT32)v.sn && + for (j = 0; j + (NB_ELTS_V8 - 1) < rh; j += NB_ELTS_V8) { + if ((j + (NB_ELTS_V8 - 1) >= win_ll_y0 && j < win_ll_y1) || + (j + (NB_ELTS_V8 - 1) >= win_lh_y0 + (OPJ_UINT32)v.sn && j < win_lh_y1 + (OPJ_UINT32)v.sn)) { - opj_v4dwt_interleave_partial_h(&h, sa, j, opj_uint_min(4U, rh - j)); - opj_v4dwt_decode(&h); + opj_v8dwt_interleave_partial_h(&h, sa, j, opj_uint_min(NB_ELTS_V8, rh - j)); + opj_v8dwt_decode(&h); if (!opj_sparse_array_int32_write(sa, win_tr_x0, j, - win_tr_x1, j + 4, + win_tr_x1, j + NB_ELTS_V8, (OPJ_INT32*)&h.wavelet[win_tr_x0].f[0], - 4, 1, OPJ_TRUE)) { + NB_ELTS_V8, 1, OPJ_TRUE)) { /* FIXME event manager error callback */ opj_sparse_array_int32_free(sa); opj_aligned_free(h.wavelet); @@ -2821,16 +3685,16 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, } if (j < rh && - ((j + 3 >= win_ll_y0 && j < win_ll_y1) || - (j + 3 >= win_lh_y0 + (OPJ_UINT32)v.sn && + ((j + (NB_ELTS_V8 - 1) >= win_ll_y0 && j < win_ll_y1) || + (j + (NB_ELTS_V8 - 1) >= win_lh_y0 + (OPJ_UINT32)v.sn && j < win_lh_y1 + (OPJ_UINT32)v.sn))) { - opj_v4dwt_interleave_partial_h(&h, sa, j, rh - j); - opj_v4dwt_decode(&h); + opj_v8dwt_interleave_partial_h(&h, sa, j, rh - j); + opj_v8dwt_decode(&h); if (!opj_sparse_array_int32_write(sa, win_tr_x0, j, win_tr_x1, rh, (OPJ_INT32*)&h.wavelet[win_tr_x0].f[0], - 4, 1, OPJ_TRUE)) { + NB_ELTS_V8, 1, OPJ_TRUE)) { /* FIXME event manager error callback */ opj_sparse_array_int32_free(sa); opj_aligned_free(h.wavelet); @@ -2842,17 +3706,17 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, v.win_l_x1 = win_ll_y1; v.win_h_x0 = win_lh_y0; v.win_h_x1 = win_lh_y1; - for (j = win_tr_x0; j < win_tr_x1; j += 4) { - OPJ_UINT32 nb_elts = opj_uint_min(4U, win_tr_x1 - j); + for (j = win_tr_x0; j < win_tr_x1; j += NB_ELTS_V8) { + OPJ_UINT32 nb_elts = opj_uint_min(NB_ELTS_V8, win_tr_x1 - j); - opj_v4dwt_interleave_partial_v(&v, sa, j, nb_elts); - opj_v4dwt_decode(&v); + opj_v8dwt_interleave_partial_v(&v, sa, j, nb_elts); + opj_v8dwt_decode(&v); if (!opj_sparse_array_int32_write(sa, j, win_tr_y0, j + nb_elts, win_tr_y1, (OPJ_INT32*)&h.wavelet[win_tr_y0].f[0], - 1, 4, OPJ_TRUE)) { + 1, NB_ELTS_V8, OPJ_TRUE)) { /* FIXME event manager error callback */ opj_sparse_array_int32_free(sa); opj_aligned_free(h.wavelet); @@ -2885,7 +3749,7 @@ OPJ_BOOL opj_dwt_decode_real(opj_tcd_t *p_tcd, OPJ_UINT32 numres) { if (p_tcd->whole_tile_decoding) { - return opj_dwt_decode_tile_97(tilec, numres); + return opj_dwt_decode_tile_97(p_tcd->thread_pool, tilec, numres); } else { return opj_dwt_decode_partial_97(tilec, numres); } diff --git a/openjpeg/src/lib/openjp2/dwt.h b/openjpeg/src/lib/openjp2/dwt.h index 4f63e524a..215061e6b 100644 --- a/openjpeg/src/lib/openjp2/dwt.h +++ b/openjpeg/src/lib/openjp2/dwt.h @@ -56,9 +56,11 @@ DWT.C are used by some function in TCD.C. /** Forward 5-3 wavelet transform in 2-D. Apply a reversible DWT transform to a component of an image. +@param p_tcd TCD handle @param tilec Tile component information (current tile) */ -OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec); +OPJ_BOOL opj_dwt_encode(opj_tcd_t *p_tcd, + opj_tcd_tilecomp_t * tilec); /** Inverse 5-3 wavelet transform in 2-D. @@ -71,12 +73,6 @@ OPJ_BOOL opj_dwt_decode(opj_tcd_t *p_tcd, opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres); -/** -Get the gain of a subband for the reversible 5-3 DWT. -@param orient Number that identifies the subband (0->LL, 1->HL, 2->LH, 3->HH) -@return Returns 0 if orient = 0, returns 1 if orient = 1 or 2, returns 2 otherwise -*/ -OPJ_UINT32 opj_dwt_getgain(OPJ_UINT32 orient) ; /** Get the norm of a wavelet function of a subband at a specified level for the reversible 5-3 DWT. @param level Level of the wavelet function @@ -87,9 +83,11 @@ OPJ_FLOAT64 opj_dwt_getnorm(OPJ_UINT32 level, OPJ_UINT32 orient); /** Forward 9-7 wavelet transform in 2-D. Apply an irreversible DWT transform to a component of an image. +@param p_tcd TCD handle @param tilec Tile component information (current tile) */ -OPJ_BOOL opj_dwt_encode_real(opj_tcd_tilecomp_t * tilec); +OPJ_BOOL opj_dwt_encode_real(opj_tcd_t *p_tcd, + opj_tcd_tilecomp_t * tilec); /** Inverse 9-7 wavelet transform in 2-D. Apply an irreversible inverse DWT transform to a component of an image. @@ -101,12 +99,6 @@ OPJ_BOOL opj_dwt_decode_real(opj_tcd_t *p_tcd, opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, OPJ_UINT32 numres); -/** -Get the gain of a subband for the irreversible 9-7 DWT. -@param orient Number that identifies the subband (0->LL, 1->HL, 2->LH, 3->HH) -@return Returns the gain of the 9-7 wavelet transform -*/ -OPJ_UINT32 opj_dwt_getgain_real(OPJ_UINT32 orient); /** Get the norm of a wavelet function of a subband at a specified level for the irreversible 9-7 DWT @param level Level of the wavelet function diff --git a/openjpeg/src/lib/openjp2/j2k.c b/openjpeg/src/lib/openjp2/j2k.c index 4a4b3494d..8e343ab2e 100644 --- a/openjpeg/src/lib/openjp2/j2k.c +++ b/openjpeg/src/lib/openjp2/j2k.c @@ -400,14 +400,14 @@ static OPJ_BOOL opj_j2k_setup_header_writing(opj_j2k_t *p_j2k, static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, OPJ_BYTE * p_data, OPJ_UINT32 * p_data_written, - OPJ_UINT32 p_total_data_size, + OPJ_UINT32 total_data_size, opj_stream_private_t *p_stream, struct opj_event_mgr * p_manager); static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k, OPJ_BYTE * p_data, OPJ_UINT32 * p_data_written, - OPJ_UINT32 p_total_data_size, + OPJ_UINT32 total_data_size, opj_stream_private_t *p_stream, struct opj_event_mgr * p_manager); @@ -832,14 +832,14 @@ static OPJ_BOOL opj_j2k_write_tlm(opj_j2k_t *p_j2k, * * @param p_j2k J2K codec. * @param p_data Output buffer - * @param p_total_data_size Output buffer size + * @param total_data_size Output buffer size * @param p_data_written Number of bytes written into stream * @param p_stream the stream to write data to. * @param p_manager the user event manager. */ static OPJ_BOOL opj_j2k_write_sot(opj_j2k_t *p_j2k, OPJ_BYTE * p_data, - OPJ_UINT32 p_total_data_size, + OPJ_UINT32 total_data_size, OPJ_UINT32 * p_data_written, const opj_stream_private_t *p_stream, opj_event_mgr_t * p_manager); @@ -879,11 +879,13 @@ static OPJ_BOOL opj_j2k_read_sot(opj_j2k_t *p_j2k, /** * Writes the SOD marker (Start of data) * + * This also writes optional PLT markers (before SOD) + * * @param p_j2k J2K codec. * @param p_tile_coder FIXME DOC * @param p_data FIXME DOC * @param p_data_written FIXME DOC - * @param p_total_data_size FIXME DOC + * @param total_data_size FIXME DOC * @param p_stream the stream to write data to. * @param p_manager the user event manager. */ @@ -891,7 +893,7 @@ static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k, opj_tcd_t * p_tile_coder, OPJ_BYTE * p_data, OPJ_UINT32 * p_data_written, - OPJ_UINT32 p_total_data_size, + OPJ_UINT32 total_data_size, const opj_stream_private_t *p_stream, opj_event_mgr_t * p_manager); @@ -1219,6 +1221,7 @@ static OPJ_BOOL opj_j2k_write_epc(opj_j2k_t *p_j2k, * A nice message is outputted at errors. * * @param p_pocs the progression order changes. + * @param tileno the tile number of interest * @param p_nb_pocs the number of progression order changes. * @param p_nb_resolutions the number of resolutions. * @param numcomps the number of components @@ -1228,6 +1231,7 @@ static OPJ_BOOL opj_j2k_write_epc(opj_j2k_t *p_j2k, * @return true if the pocs are valid. */ static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs, + OPJ_UINT32 tileno, OPJ_UINT32 p_nb_pocs, OPJ_UINT32 p_nb_resolutions, OPJ_UINT32 numcomps, @@ -1282,6 +1286,13 @@ static void opj_j2k_set_cinema_parameters(opj_cparameters_t *parameters, static OPJ_BOOL opj_j2k_is_cinema_compliant(opj_image_t *image, OPJ_UINT16 rsiz, opj_event_mgr_t *p_manager); +static void opj_j2k_set_imf_parameters(opj_cparameters_t *parameters, + opj_image_t *image, opj_event_mgr_t *p_manager); + +static OPJ_BOOL opj_j2k_is_imf_compliant(opj_cparameters_t *parameters, + opj_image_t *image, + opj_event_mgr_t *p_manager); + /** * Checks for invalid number of tile-parts in SOT marker (TPsot==TNsot). See issue 254. * @@ -1615,6 +1626,7 @@ const char *opj_j2k_convert_progression_order(OPJ_PROG_ORDER prg_order) } static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs, + OPJ_UINT32 tileno, OPJ_UINT32 p_nb_pocs, OPJ_UINT32 p_nb_resolutions, OPJ_UINT32 p_num_comps, @@ -1628,7 +1640,8 @@ static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs, OPJ_UINT32 step_r = p_num_comps * step_c; OPJ_UINT32 step_l = p_nb_resolutions * step_r; OPJ_BOOL loss = OPJ_FALSE; - OPJ_UINT32 layno0 = 0; + + assert(p_nb_pocs > 0); packet_array = (OPJ_UINT32*) opj_calloc(step_l * p_num_layers, sizeof(OPJ_UINT32)); @@ -1638,63 +1651,37 @@ static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs, return OPJ_FALSE; } - if (p_nb_pocs == 0) { - opj_free(packet_array); - return OPJ_TRUE; - } - - index = step_r * p_pocs->resno0; - /* take each resolution for each poc */ - for (resno = p_pocs->resno0 ; resno < p_pocs->resno1 ; ++resno) { - OPJ_UINT32 res_index = index + p_pocs->compno0 * step_c; - - /* take each comp of each resolution for each poc */ - for (compno = p_pocs->compno0 ; compno < p_pocs->compno1 ; ++compno) { - OPJ_UINT32 comp_index = res_index + layno0 * step_l; + /* iterate through all the pocs that match our tile of interest. */ + for (i = 0; i < p_nb_pocs; ++i) { + const opj_poc_t *poc = &p_pocs[i]; + if (tileno + 1 == poc->tile) { + index = step_r * poc->resno0; - /* and finally take each layer of each res of ... */ - for (layno = layno0; layno < p_pocs->layno1 ; ++layno) { - /*index = step_r * resno + step_c * compno + step_l * layno;*/ - packet_array[comp_index] = 1; - comp_index += step_l; - } + /* take each resolution for each poc */ + for (resno = poc->resno0 ; + resno < opj_uint_min(poc->resno1, p_nb_resolutions); ++resno) { + OPJ_UINT32 res_index = index + poc->compno0 * step_c; - res_index += step_c; - } + /* take each comp of each resolution for each poc */ + for (compno = poc->compno0 ; + compno < opj_uint_min(poc->compno1, p_num_comps); ++compno) { + /* The layer index always starts at zero for every progression. */ + const OPJ_UINT32 layno0 = 0; + OPJ_UINT32 comp_index = res_index + layno0 * step_l; - index += step_r; - } - ++p_pocs; - - /* iterate through all the pocs */ - for (i = 1; i < p_nb_pocs ; ++i) { - OPJ_UINT32 l_last_layno1 = (p_pocs - 1)->layno1 ; - - layno0 = (p_pocs->layno1 > l_last_layno1) ? l_last_layno1 : 0; - index = step_r * p_pocs->resno0; - - /* take each resolution for each poc */ - for (resno = p_pocs->resno0 ; resno < p_pocs->resno1 ; ++resno) { - OPJ_UINT32 res_index = index + p_pocs->compno0 * step_c; - - /* take each comp of each resolution for each poc */ - for (compno = p_pocs->compno0 ; compno < p_pocs->compno1 ; ++compno) { - OPJ_UINT32 comp_index = res_index + layno0 * step_l; + /* and finally take each layer of each res of ... */ + for (layno = layno0; layno < opj_uint_min(poc->layno1, p_num_layers); + ++layno) { + packet_array[comp_index] = 1; + comp_index += step_l; + } - /* and finally take each layer of each res of ... */ - for (layno = layno0; layno < p_pocs->layno1 ; ++layno) { - /*index = step_r * resno + step_c * compno + step_l * layno;*/ - packet_array[comp_index] = 1; - comp_index += step_l; + res_index += step_c; } - res_index += step_c; + index += step_r; } - - index += step_r; } - - ++p_pocs; } index = 0; @@ -1702,7 +1689,13 @@ static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs, for (resno = 0; resno < p_nb_resolutions; ++resno) { for (compno = 0; compno < p_num_comps; ++compno) { loss |= (packet_array[index] != 1); - /*index = step_r * resno + step_c * compno + step_l * layno;*/ +#ifdef DEBUG_VERBOSE + if (packet_array[index] != 1) { + fprintf(stderr, + "Missing packet in POC: layno=%d resno=%d compno=%d\n", + layno, resno, compno); + } +#endif index += step_c; } } @@ -2714,6 +2707,12 @@ static OPJ_BOOL opj_j2k_read_cod(opj_j2k_t *p_j2k, opj_read_bytes(p_header_data, &l_tcp->mct, 1); /* SGcod (C) */ ++p_header_data; + if (l_tcp->mct > 1) { + opj_event_msg(p_manager, EVT_ERROR, + "Invalid multiple component transformation\n"); + return OPJ_FALSE; + } + p_header_size -= 5; for (i = 0; i < l_image->numcomps; ++i) { l_tcp->tccps[i].csty = l_tcp->csty & J2K_CCP_CSTY_PRT; @@ -3452,6 +3451,28 @@ static OPJ_UINT32 opj_j2k_get_specific_header_sizes(opj_j2k_t *p_j2k) l_nb_bytes += opj_j2k_get_max_poc_size(p_j2k); + if (p_j2k->m_specific_param.m_encoder.m_PLT) { + /* Reserve space for PLT markers */ + + OPJ_UINT32 i; + const opj_cp_t * l_cp = &(p_j2k->m_cp); + OPJ_UINT32 l_max_packet_count = 0; + for (i = 0; i < l_cp->th * l_cp->tw; ++i) { + l_max_packet_count = opj_uint_max(l_max_packet_count, + opj_get_encoding_packet_count(p_j2k->m_private_image, l_cp, i)); + } + /* Minimum 6 bytes per PLT marker, and at a minimum (taking a pessimistic */ + /* estimate of 4 bytes for a packet size), one can write */ + /* (65536-6) / 4 = 16382 paquet sizes per PLT marker */ + p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT = + 6 * opj_uint_ceildiv(l_max_packet_count, 16382); + /* Maximum 5 bytes per packet to encode a full UINT32 */ + p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT += + l_nb_bytes += 5 * l_max_packet_count; + p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT += 1; + l_nb_bytes += p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT; + } + /*** DEVELOPER CORNER, Add room for your headers ***/ return l_nb_bytes; @@ -4205,7 +4226,7 @@ static OPJ_BOOL opj_j2k_write_tlm(opj_j2k_t *p_j2k, static OPJ_BOOL opj_j2k_write_sot(opj_j2k_t *p_j2k, OPJ_BYTE * p_data, - OPJ_UINT32 p_total_data_size, + OPJ_UINT32 total_data_size, OPJ_UINT32 * p_data_written, const opj_stream_private_t *p_stream, opj_event_mgr_t * p_manager @@ -4218,7 +4239,7 @@ static OPJ_BOOL opj_j2k_write_sot(opj_j2k_t *p_j2k, OPJ_UNUSED(p_stream); - if (p_total_data_size < 12) { + if (total_data_size < 12) { opj_event_msg(p_manager, EVT_ERROR, "Not enough bytes in output buffer to write SOT marker\n"); return OPJ_FALSE; @@ -4611,17 +4632,105 @@ static OPJ_BOOL opj_j2k_read_sot(opj_j2k_t *p_j2k, return OPJ_TRUE; } +/** + * Write one or more PLT markers in the provided buffer + */ +static OPJ_BOOL opj_j2k_write_plt_in_memory(opj_j2k_t *p_j2k, + opj_tcd_marker_info_t* marker_info, + OPJ_BYTE * p_data, + OPJ_UINT32 * p_data_written, + opj_event_mgr_t * p_manager) +{ + OPJ_BYTE Zplt = 0; + OPJ_UINT16 Lplt; + OPJ_BYTE* p_data_start = p_data; + OPJ_BYTE* p_data_Lplt = p_data + 2; + OPJ_UINT32 i; + + OPJ_UNUSED(p_j2k); + + opj_write_bytes(p_data, J2K_MS_PLT, 2); + p_data += 2; + + /* Reserve space for Lplt */ + p_data += 2; + + opj_write_bytes(p_data, Zplt, 1); + p_data += 1; + + Lplt = 3; + + for (i = 0; i < marker_info->packet_count; i++) { + OPJ_BYTE var_bytes[5]; + OPJ_UINT8 var_bytes_size = 0; + OPJ_UINT32 packet_size = marker_info->p_packet_size[i]; + + /* Packet size written in variable-length way, starting with LSB */ + var_bytes[var_bytes_size] = (OPJ_BYTE)(packet_size & 0x7f); + var_bytes_size ++; + packet_size >>= 7; + while (packet_size > 0) { + var_bytes[var_bytes_size] = (OPJ_BYTE)((packet_size & 0x7f) | 0x80); + var_bytes_size ++; + packet_size >>= 7; + } + + /* Check if that can fit in the current PLT marker. If not, finish */ + /* current one, and start a new one */ + if (Lplt + var_bytes_size > 65535) { + if (Zplt == 255) { + opj_event_msg(p_manager, EVT_ERROR, + "More than 255 PLT markers would be needed for current tile-part !\n"); + return OPJ_FALSE; + } + + /* Patch Lplt */ + opj_write_bytes(p_data_Lplt, Lplt, 2); + + /* Start new segment */ + opj_write_bytes(p_data, J2K_MS_PLT, 2); + p_data += 2; + + /* Reserve space for Lplt */ + p_data_Lplt = p_data; + p_data += 2; + + Zplt ++; + opj_write_bytes(p_data, Zplt, 1); + p_data += 1; + + Lplt = 3; + } + + Lplt = (OPJ_UINT16)(Lplt + var_bytes_size); + + /* Serialize variable-length packet size, starting with MSB */ + for (; var_bytes_size > 0; --var_bytes_size) { + opj_write_bytes(p_data, var_bytes[var_bytes_size - 1], 1); + p_data += 1; + } + } + + *p_data_written = (OPJ_UINT32)(p_data - p_data_start); + + /* Patch Lplt */ + opj_write_bytes(p_data_Lplt, Lplt, 2); + + return OPJ_TRUE; +} + static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k, opj_tcd_t * p_tile_coder, OPJ_BYTE * p_data, OPJ_UINT32 * p_data_written, - OPJ_UINT32 p_total_data_size, + OPJ_UINT32 total_data_size, const opj_stream_private_t *p_stream, opj_event_mgr_t * p_manager ) { opj_codestream_info_t *l_cstr_info = 00; OPJ_UINT32 l_remaining_data; + opj_tcd_marker_info_t* marker_info = NULL; /* preconditions */ assert(p_j2k != 00); @@ -4630,7 +4739,7 @@ static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k, OPJ_UNUSED(p_stream); - if (p_total_data_size < 4) { + if (total_data_size < 4) { opj_event_msg(p_manager, EVT_ERROR, "Not enough bytes in output buffer to write SOD marker\n"); return OPJ_FALSE; @@ -4638,10 +4747,9 @@ static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k, opj_write_bytes(p_data, J2K_MS_SOD, 2); /* SOD */ - p_data += 2; /* make room for the EOF marker */ - l_remaining_data = p_total_data_size - 4; + l_remaining_data = total_data_size - 4; /* update tile coder */ p_tile_coder->tp_num = @@ -4688,15 +4796,69 @@ static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k, *p_data_written = 0; - if (! opj_tcd_encode_tile(p_tile_coder, p_j2k->m_current_tile_number, p_data, + if (p_j2k->m_specific_param.m_encoder.m_PLT) { + marker_info = opj_tcd_marker_info_create( + p_j2k->m_specific_param.m_encoder.m_PLT); + if (marker_info == NULL) { + opj_event_msg(p_manager, EVT_ERROR, + "Cannot encode tile: opj_tcd_marker_info_create() failed\n"); + return OPJ_FALSE; + } + } + + if (l_remaining_data < + p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT) { + opj_event_msg(p_manager, EVT_ERROR, + "Not enough bytes in output buffer to write SOD marker\n"); + opj_tcd_marker_info_destroy(marker_info); + return OPJ_FALSE; + } + l_remaining_data -= p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT; + + if (! opj_tcd_encode_tile(p_tile_coder, p_j2k->m_current_tile_number, + p_data + 2, p_data_written, l_remaining_data, l_cstr_info, + marker_info, p_manager)) { opj_event_msg(p_manager, EVT_ERROR, "Cannot encode tile\n"); + opj_tcd_marker_info_destroy(marker_info); return OPJ_FALSE; } + /* For SOD */ *p_data_written += 2; + if (p_j2k->m_specific_param.m_encoder.m_PLT) { + OPJ_UINT32 l_data_written_PLT = 0; + OPJ_BYTE* p_PLT_buffer = (OPJ_BYTE*)opj_malloc( + p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT); + if (!p_PLT_buffer) { + opj_event_msg(p_manager, EVT_ERROR, "Cannot allocate memory\n"); + opj_tcd_marker_info_destroy(marker_info); + return OPJ_FALSE; + } + if (!opj_j2k_write_plt_in_memory(p_j2k, + marker_info, + p_PLT_buffer, + &l_data_written_PLT, + p_manager)) { + opj_tcd_marker_info_destroy(marker_info); + opj_free(p_PLT_buffer); + return OPJ_FALSE; + } + + assert(l_data_written_PLT <= + p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT); + + /* Move PLT marker(s) before SOD */ + memmove(p_data + l_data_written_PLT, p_data, *p_data_written); + memcpy(p_data, p_PLT_buffer, l_data_written_PLT); + opj_free(p_PLT_buffer); + *p_data_written += l_data_written_PLT; + } + + opj_tcd_marker_info_destroy(marker_info); + return OPJ_TRUE; } @@ -5046,7 +5208,7 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k, OPJ_FLOAT32 * l_rates = 0; OPJ_FLOAT32 l_sot_remove; OPJ_UINT32 l_bits_empty, l_size_pixel; - OPJ_UINT32 l_tile_size = 0; + OPJ_UINT64 l_tile_size = 0; OPJ_UINT32 l_last_res; OPJ_FLOAT32(* l_tp_stride_func)(opj_tcp_t *) = 00; @@ -5090,25 +5252,12 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k, l_rates = l_tcp->rates; /* Modification of the RATE >> */ - if (*l_rates > 0.0f) { - *l_rates = (((OPJ_FLOAT32)(l_size_pixel * (OPJ_UINT32)(l_x1 - l_x0) * - (OPJ_UINT32)(l_y1 - l_y0))) - / - ((*l_rates) * (OPJ_FLOAT32)l_bits_empty) - ) - - - l_offset; - } - - ++l_rates; - - for (k = 1; k < l_tcp->numlayers; ++k) { + for (k = 0; k < l_tcp->numlayers; ++k) { if (*l_rates > 0.0f) { - *l_rates = (((OPJ_FLOAT32)(l_size_pixel * (OPJ_UINT32)(l_x1 - l_x0) * - (OPJ_UINT32)(l_y1 - l_y0))) - / - ((*l_rates) * (OPJ_FLOAT32)l_bits_empty) - ) + *l_rates = (OPJ_FLOAT32)(((OPJ_FLOAT64)l_size_pixel * (OPJ_UINT32)( + l_x1 - l_x0) * + (OPJ_UINT32)(l_y1 - l_y0)) + / ((*l_rates) * (OPJ_FLOAT32)l_bits_empty)) - l_offset; } @@ -5168,12 +5317,11 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k, l_tile_size = 0; for (i = 0; i < l_image->numcomps; ++i) { - l_tile_size += (opj_uint_ceildiv(l_cp->tdx, l_img_comp->dx) - * - opj_uint_ceildiv(l_cp->tdy, l_img_comp->dy) - * - l_img_comp->prec - ); + l_tile_size += (OPJ_UINT64)opj_uint_ceildiv(l_cp->tdx, l_img_comp->dx) + * + opj_uint_ceildiv(l_cp->tdy, l_img_comp->dy) + * + l_img_comp->prec; ++l_img_comp; } @@ -5184,7 +5332,7 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k, /* bin/test_tile_encoder 1 256 256 32 32 8 0 reversible_with_precinct.j2k 4 4 3 0 0 1 16 16 */ /* TODO revise this to take into account the overhead linked to the */ /* number of packets and number of code blocks in packets */ - l_tile_size = (OPJ_UINT32)(l_tile_size * 1.4 / 8); + l_tile_size = (OPJ_UINT64)((double)l_tile_size * 1.4 / 8); /* Arbitrary amount to make the following work: */ /* bin/test_tile_encoder 1 256 256 17 16 8 0 reversible_no_precinct.j2k 4 4 3 0 0 1 */ @@ -5192,14 +5340,21 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k, l_tile_size += opj_j2k_get_specific_header_sizes(p_j2k); - p_j2k->m_specific_param.m_encoder.m_encoded_tile_size = l_tile_size; + if (l_tile_size > UINT_MAX) { + l_tile_size = UINT_MAX; + } + + p_j2k->m_specific_param.m_encoder.m_encoded_tile_size = (OPJ_UINT32)l_tile_size; p_j2k->m_specific_param.m_encoder.m_encoded_tile_data = (OPJ_BYTE *) opj_malloc(p_j2k->m_specific_param.m_encoder.m_encoded_tile_size); if (p_j2k->m_specific_param.m_encoder.m_encoded_tile_data == 00) { + opj_event_msg(p_manager, EVT_ERROR, + "Not enough memory to allocate m_encoded_tile_data. %u MB required\n", + (OPJ_UINT32)(l_tile_size / 1024 / 1024)); return OPJ_FALSE; } - if (OPJ_IS_CINEMA(l_cp->rsiz)) { + if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) { p_j2k->m_specific_param.m_encoder.m_tlm_sot_offsets_buffer = (OPJ_BYTE *) opj_malloc(5 * p_j2k->m_specific_param.m_encoder.m_total_tile_parts); @@ -6627,7 +6782,7 @@ static void opj_j2k_set_cinema_parameters(opj_cparameters_t *parameters, } /* Precincts */ - parameters->csty |= 0x01; + parameters->csty |= J2K_CP_CSTY_PRT; if (parameters->numresolution == 1) { parameters->res_spec = 1; parameters->prcw_init[0] = 128; @@ -6753,6 +6908,589 @@ static OPJ_BOOL opj_j2k_is_cinema_compliant(opj_image_t *image, OPJ_UINT16 rsiz, return OPJ_TRUE; } +static int opj_j2k_get_imf_max_NL(opj_cparameters_t *parameters, + opj_image_t *image) +{ + /* Decomposition levels */ + const OPJ_UINT16 rsiz = parameters->rsiz; + const OPJ_UINT16 profile = OPJ_GET_IMF_PROFILE(rsiz); + const OPJ_UINT32 XTsiz = parameters->tile_size_on ? (OPJ_UINT32) + parameters->cp_tdx : image->x1; + switch (profile) { + case OPJ_PROFILE_IMF_2K: + return 5; + case OPJ_PROFILE_IMF_4K: + return 6; + case OPJ_PROFILE_IMF_8K: + return 7; + case OPJ_PROFILE_IMF_2K_R: { + if (XTsiz >= 2048) { + return 5; + } else if (XTsiz >= 1024) { + return 4; + } + break; + } + case OPJ_PROFILE_IMF_4K_R: { + if (XTsiz >= 4096) { + return 6; + } else if (XTsiz >= 2048) { + return 5; + } else if (XTsiz >= 1024) { + return 4; + } + break; + } + case OPJ_PROFILE_IMF_8K_R: { + if (XTsiz >= 8192) { + return 7; + } else if (XTsiz >= 4096) { + return 6; + } else if (XTsiz >= 2048) { + return 5; + } else if (XTsiz >= 1024) { + return 4; + } + break; + } + default: + break; + } + return -1; +} + +static void opj_j2k_set_imf_parameters(opj_cparameters_t *parameters, + opj_image_t *image, opj_event_mgr_t *p_manager) +{ + const OPJ_UINT16 rsiz = parameters->rsiz; + const OPJ_UINT16 profile = OPJ_GET_IMF_PROFILE(rsiz); + + OPJ_UNUSED(p_manager); + + /* Override defaults set by opj_set_default_encoder_parameters */ + if (parameters->cblockw_init == OPJ_COMP_PARAM_DEFAULT_CBLOCKW && + parameters->cblockh_init == OPJ_COMP_PARAM_DEFAULT_CBLOCKH) { + parameters->cblockw_init = 32; + parameters->cblockh_init = 32; + } + + /* One tile part for each component */ + parameters->tp_flag = 'C'; + parameters->tp_on = 1; + + if (parameters->prog_order == OPJ_COMP_PARAM_DEFAULT_PROG_ORDER) { + parameters->prog_order = OPJ_CPRL; + } + + if (profile == OPJ_PROFILE_IMF_2K || + profile == OPJ_PROFILE_IMF_4K || + profile == OPJ_PROFILE_IMF_8K) { + /* 9-7 transform */ + parameters->irreversible = 1; + } + + /* Adjust the number of resolutions if set to its defaults */ + if (parameters->numresolution == OPJ_COMP_PARAM_DEFAULT_NUMRESOLUTION && + image->x0 == 0 && + image->y0 == 0) { + const int max_NL = opj_j2k_get_imf_max_NL(parameters, image); + if (max_NL >= 0 && parameters->numresolution > max_NL) { + parameters->numresolution = max_NL + 1; + } + + /* Note: below is generic logic */ + if (!parameters->tile_size_on) { + while (parameters->numresolution > 0) { + if (image->x1 < (1U << ((OPJ_UINT32)parameters->numresolution - 1U))) { + parameters->numresolution --; + continue; + } + if (image->y1 < (1U << ((OPJ_UINT32)parameters->numresolution - 1U))) { + parameters->numresolution --; + continue; + } + break; + } + } + } + + /* Set defaults precincts */ + if (parameters->csty == 0) { + parameters->csty |= J2K_CP_CSTY_PRT; + if (parameters->numresolution == 1) { + parameters->res_spec = 1; + parameters->prcw_init[0] = 128; + parameters->prch_init[0] = 128; + } else { + int i; + parameters->res_spec = parameters->numresolution - 1; + for (i = 0; i < parameters->res_spec; i++) { + parameters->prcw_init[i] = 256; + parameters->prch_init[i] = 256; + } + } + } +} + +/* Table A.53 from JPEG2000 standard */ +static const OPJ_UINT16 tabMaxSubLevelFromMainLevel[] = { + 15, /* unspecified */ + 1, + 1, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 +}; + +static OPJ_BOOL opj_j2k_is_imf_compliant(opj_cparameters_t *parameters, + opj_image_t *image, + opj_event_mgr_t *p_manager) +{ + OPJ_UINT32 i; + const OPJ_UINT16 rsiz = parameters->rsiz; + const OPJ_UINT16 profile = OPJ_GET_IMF_PROFILE(rsiz); + const OPJ_UINT16 mainlevel = OPJ_GET_IMF_MAINLEVEL(rsiz); + const OPJ_UINT16 sublevel = OPJ_GET_IMF_SUBLEVEL(rsiz); + const int NL = parameters->numresolution - 1; + const OPJ_UINT32 XTsiz = parameters->tile_size_on ? (OPJ_UINT32) + parameters->cp_tdx : image->x1; + OPJ_BOOL ret = OPJ_TRUE; + + /* Validate mainlevel */ + if (mainlevel > OPJ_IMF_MAINLEVEL_MAX) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profile require mainlevel <= 11.\n" + "-> %d is thus not compliant\n" + "-> Non-IMF codestream will be generated\n", + mainlevel); + ret = OPJ_FALSE; + } + + /* Validate sublevel */ + assert(sizeof(tabMaxSubLevelFromMainLevel) == + (OPJ_IMF_MAINLEVEL_MAX + 1) * sizeof(tabMaxSubLevelFromMainLevel[0])); + if (sublevel > tabMaxSubLevelFromMainLevel[mainlevel]) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profile require sublevel <= %d for mainlevel = %d.\n" + "-> %d is thus not compliant\n" + "-> Non-IMF codestream will be generated\n", + tabMaxSubLevelFromMainLevel[mainlevel], + mainlevel, + sublevel); + ret = OPJ_FALSE; + } + + /* Number of components */ + if (image->numcomps > 3) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require at most 3 components.\n" + "-> Number of components of input image (%d) is not compliant\n" + "-> Non-IMF codestream will be generated\n", + image->numcomps); + ret = OPJ_FALSE; + } + + if (image->x0 != 0 || image->y0 != 0) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require image origin to be at 0,0.\n" + "-> %d,%d is not compliant\n" + "-> Non-IMF codestream will be generated\n", + image->x0, image->y0 != 0); + ret = OPJ_FALSE; + } + + if (parameters->cp_tx0 != 0 || parameters->cp_ty0 != 0) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require tile origin to be at 0,0.\n" + "-> %d,%d is not compliant\n" + "-> Non-IMF codestream will be generated\n", + parameters->cp_tx0, parameters->cp_ty0); + ret = OPJ_FALSE; + } + + if (parameters->tile_size_on) { + if (profile == OPJ_PROFILE_IMF_2K || + profile == OPJ_PROFILE_IMF_4K || + profile == OPJ_PROFILE_IMF_8K) { + if ((OPJ_UINT32)parameters->cp_tdx < image->x1 || + (OPJ_UINT32)parameters->cp_tdy < image->y1) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 2K/4K/8K single tile profiles require tile to be greater or equal to image size.\n" + "-> %d,%d is lesser than %d,%d\n" + "-> Non-IMF codestream will be generated\n", + parameters->cp_tdx, + parameters->cp_tdy, + image->x1, + image->y1); + ret = OPJ_FALSE; + } + } else { + if ((OPJ_UINT32)parameters->cp_tdx >= image->x1 && + (OPJ_UINT32)parameters->cp_tdy >= image->y1) { + /* ok */ + } else if (parameters->cp_tdx == 1024 && + parameters->cp_tdy == 1024) { + /* ok */ + } else if (parameters->cp_tdx == 2048 && + parameters->cp_tdy == 2048 && + (profile == OPJ_PROFILE_IMF_4K || + profile == OPJ_PROFILE_IMF_8K)) { + /* ok */ + } else if (parameters->cp_tdx == 4096 && + parameters->cp_tdy == 4096 && + profile == OPJ_PROFILE_IMF_8K) { + /* ok */ + } else { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 2K_R/4K_R/8K_R single/multiple tile profiles " + "require tile to be greater or equal to image size,\n" + "or to be (1024,1024), or (2048,2048) for 4K_R/8K_R " + "or (4096,4096) for 8K_R.\n" + "-> %d,%d is non conformant\n" + "-> Non-IMF codestream will be generated\n", + parameters->cp_tdx, + parameters->cp_tdy); + ret = OPJ_FALSE; + } + } + } + + /* Bitdepth */ + for (i = 0; i < image->numcomps; i++) { + if (!(image->comps[i].bpp >= 8 && image->comps[i].bpp <= 16) || + (image->comps[i].sgnd)) { + char signed_str[] = "signed"; + char unsigned_str[] = "unsigned"; + char *tmp_str = image->comps[i].sgnd ? signed_str : unsigned_str; + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require precision of each component to b in [8-16] bits unsigned" + "-> At least component %d of input image (%d bits, %s) is not compliant\n" + "-> Non-IMF codestream will be generated\n", + i, image->comps[i].bpp, tmp_str); + ret = OPJ_FALSE; + } + } + + /* Sub-sampling */ + for (i = 0; i < image->numcomps; i++) { + if (i == 0 && image->comps[i].dx != 1) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require XRSiz1 == 1. Here it is set to %d.\n" + "-> Non-IMF codestream will be generated\n", + image->comps[i].dx); + ret = OPJ_FALSE; + } + if (i == 1 && image->comps[i].dx != 1 && image->comps[i].dx != 2) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require XRSiz2 == 1 or 2. Here it is set to %d.\n" + "-> Non-IMF codestream will be generated\n", + image->comps[i].dx); + ret = OPJ_FALSE; + } + if (i > 1 && image->comps[i].dx != image->comps[i - 1].dx) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require XRSiz%d to be the same as XRSiz2. " + "Here it is set to %d instead of %d.\n" + "-> Non-IMF codestream will be generated\n", + i + 1, image->comps[i].dx, image->comps[i - 1].dx); + ret = OPJ_FALSE; + } + if (image->comps[i].dy != 1) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require YRsiz == 1. " + "Here it is set to %d for component i.\n" + "-> Non-IMF codestream will be generated\n", + image->comps[i].dy, i); + ret = OPJ_FALSE; + } + } + + /* Image size */ + switch (profile) { + case OPJ_PROFILE_IMF_2K: + case OPJ_PROFILE_IMF_2K_R: + if (((image->comps[0].w > 2048) | (image->comps[0].h > 1556))) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 2K/2K_R profile require:\n" + "width <= 2048 and height <= 1556\n" + "-> Input image size %d x %d is not compliant\n" + "-> Non-IMF codestream will be generated\n", + image->comps[0].w, image->comps[0].h); + ret = OPJ_FALSE; + } + break; + case OPJ_PROFILE_IMF_4K: + case OPJ_PROFILE_IMF_4K_R: + if (((image->comps[0].w > 4096) | (image->comps[0].h > 3112))) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 4K/4K_R profile require:\n" + "width <= 4096 and height <= 3112\n" + "-> Input image size %d x %d is not compliant\n" + "-> Non-IMF codestream will be generated\n", + image->comps[0].w, image->comps[0].h); + ret = OPJ_FALSE; + } + break; + case OPJ_PROFILE_IMF_8K: + case OPJ_PROFILE_IMF_8K_R: + if (((image->comps[0].w > 8192) | (image->comps[0].h > 6224))) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 8K/8K_R profile require:\n" + "width <= 8192 and height <= 6224\n" + "-> Input image size %d x %d is not compliant\n" + "-> Non-IMF codestream will be generated\n", + image->comps[0].w, image->comps[0].h); + ret = OPJ_FALSE; + } + break; + default : + assert(0); + return OPJ_FALSE; + } + + if (parameters->roi_compno != -1) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profile forbid RGN / region of interest marker.\n" + "-> Compression parameters specify a ROI\n" + "-> Non-IMF codestream will be generated\n"); + ret = OPJ_FALSE; + } + + if (parameters->cblockw_init != 32 || parameters->cblockh_init != 32) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profile require code block size to be 32x32.\n" + "-> Compression parameters set it to %dx%d.\n" + "-> Non-IMF codestream will be generated\n", + parameters->cblockw_init, + parameters->cblockh_init); + ret = OPJ_FALSE; + } + + if (parameters->prog_order != OPJ_CPRL) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profile require progression order to be CPRL.\n" + "-> Compression parameters set it to %d.\n" + "-> Non-IMF codestream will be generated\n", + parameters->prog_order); + ret = OPJ_FALSE; + } + + if (parameters->numpocs != 0) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profile forbid POC markers.\n" + "-> Compression parameters set %d POC.\n" + "-> Non-IMF codestream will be generated\n", + parameters->numpocs); + ret = OPJ_FALSE; + } + + /* Codeblock style: no mode switch enabled */ + if (parameters->mode != 0) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profile forbid mode switch in code block style.\n" + "-> Compression parameters set code block style to %d.\n" + "-> Non-IMF codestream will be generated\n", + parameters->mode); + ret = OPJ_FALSE; + } + + if (profile == OPJ_PROFILE_IMF_2K || + profile == OPJ_PROFILE_IMF_4K || + profile == OPJ_PROFILE_IMF_8K) { + /* Expect 9-7 transform */ + if (parameters->irreversible != 1) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 2K/4K/8K profiles require 9-7 Irreversible Transform.\n" + "-> Compression parameters set it to reversible.\n" + "-> Non-IMF codestream will be generated\n"); + ret = OPJ_FALSE; + } + } else { + /* Expect 5-3 transform */ + if (parameters->irreversible != 0) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 2K/4K/8K profiles require 5-3 reversible Transform.\n" + "-> Compression parameters set it to irreversible.\n" + "-> Non-IMF codestream will be generated\n"); + ret = OPJ_FALSE; + } + } + + /* Number of layers */ + if (parameters->tcp_numlayers != 1) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 2K/4K/8K profiles require 1 single quality layer.\n" + "-> Number of layers is %d.\n" + "-> Non-IMF codestream will be generated\n", + parameters->tcp_numlayers); + ret = OPJ_FALSE; + } + + /* Decomposition levels */ + switch (profile) { + case OPJ_PROFILE_IMF_2K: + if (!(NL >= 1 && NL <= 5)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 2K profile requires 1 <= NL <= 5:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + break; + case OPJ_PROFILE_IMF_4K: + if (!(NL >= 1 && NL <= 6)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 4K profile requires 1 <= NL <= 6:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + break; + case OPJ_PROFILE_IMF_8K: + if (!(NL >= 1 && NL <= 7)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 8K profile requires 1 <= NL <= 7:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + break; + case OPJ_PROFILE_IMF_2K_R: { + if (XTsiz >= 2048) { + if (!(NL >= 1 && NL <= 5)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 2K_R profile requires 1 <= NL <= 5 for XTsiz >= 2048:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } else if (XTsiz >= 1024) { + if (!(NL >= 1 && NL <= 4)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 2K_R profile requires 1 <= NL <= 4 for XTsiz in [1024,2048[:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } + break; + } + case OPJ_PROFILE_IMF_4K_R: { + if (XTsiz >= 4096) { + if (!(NL >= 1 && NL <= 6)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 4K_R profile requires 1 <= NL <= 6 for XTsiz >= 4096:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } else if (XTsiz >= 2048) { + if (!(NL >= 1 && NL <= 5)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 4K_R profile requires 1 <= NL <= 5 for XTsiz in [2048,4096[:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } else if (XTsiz >= 1024) { + if (!(NL >= 1 && NL <= 4)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 4K_R profile requires 1 <= NL <= 4 for XTsiz in [1024,2048[:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } + break; + } + case OPJ_PROFILE_IMF_8K_R: { + if (XTsiz >= 8192) { + if (!(NL >= 1 && NL <= 7)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 4K_R profile requires 1 <= NL <= 7 for XTsiz >= 8192:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } else if (XTsiz >= 4096) { + if (!(NL >= 1 && NL <= 6)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 4K_R profile requires 1 <= NL <= 6 for XTsiz in [4096,8192[:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } else if (XTsiz >= 2048) { + if (!(NL >= 1 && NL <= 5)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 4K_R profile requires 1 <= NL <= 5 for XTsiz in [2048,4096[:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } else if (XTsiz >= 1024) { + if (!(NL >= 1 && NL <= 4)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 4K_R profile requires 1 <= NL <= 4 for XTsiz in [1024,2048[:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } + break; + } + default: + break; + } + + if (parameters->numresolution == 1) { + if (parameters->res_spec != 1 || + parameters->prcw_init[0] != 128 || + parameters->prch_init[0] != 128) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require PPx = PPy = 7 for NLLL band, else 8.\n" + "-> Supplied values are different from that.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } else { + int i; + for (i = 0; i < parameters->res_spec; i++) { + if (parameters->prcw_init[i] != 256 || + parameters->prch_init[i] != 256) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require PPx = PPy = 7 for NLLL band, else 8.\n" + "-> Supplied values are different from that.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } + } + + return ret; +} + + OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, opj_cparameters_t *parameters, opj_image_t *image, @@ -6945,6 +7683,15 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, } else { OPJ_FLOAT32 temp_rate; OPJ_BOOL cap = OPJ_FALSE; + + if (OPJ_IS_IMF(parameters->rsiz) && parameters->max_cs_size > 0 && + parameters->tcp_numlayers == 1 && parameters->tcp_rates[0] == 0) { + parameters->tcp_rates[0] = (OPJ_FLOAT32)(image->numcomps * image->comps[0].w * + image->comps[0].h * image->comps[0].prec) / + (OPJ_FLOAT32)(((OPJ_UINT32)parameters->max_cs_size) * 8 * image->comps[0].dx * + image->comps[0].dy); + } + temp_rate = (OPJ_FLOAT32)(((double)image->numcomps * image->comps[0].w * image->comps[0].h * image->comps[0].prec) / (((double)parameters->max_cs_size) * 8 * image->comps[0].dx * @@ -6985,9 +7732,10 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, "JPEG 2000 Broadcast profiles not yet supported\n"); parameters->rsiz = OPJ_PROFILE_NONE; } else if (OPJ_IS_IMF(parameters->rsiz)) { - opj_event_msg(p_manager, EVT_WARNING, - "JPEG 2000 IMF profiles not yet supported\n"); - parameters->rsiz = OPJ_PROFILE_NONE; + opj_j2k_set_imf_parameters(parameters, image, p_manager); + if (!opj_j2k_is_imf_compliant(parameters, image, p_manager)) { + parameters->rsiz = OPJ_PROFILE_NONE; + } } else if (OPJ_IS_PART2(parameters->rsiz)) { if (parameters->rsiz == ((OPJ_PROFILE_PART2) | (OPJ_EXTENSION_NONE))) { opj_event_msg(p_manager, EVT_WARNING, @@ -7079,6 +7827,14 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, */ if (parameters->tile_size_on) { + if (cp->tdx == 0) { + opj_event_msg(p_manager, EVT_ERROR, "Invalid tile width\n"); + return OPJ_FALSE; + } + if (cp->tdy == 0) { + opj_event_msg(p_manager, EVT_ERROR, "Invalid tile height\n"); + return OPJ_FALSE; + } cp->tw = (OPJ_UINT32)opj_int_ceildiv((OPJ_INT32)(image->x1 - cp->tx0), (OPJ_INT32)cp->tdx); cp->th = (OPJ_UINT32)opj_int_ceildiv((OPJ_INT32)(image->y1 - cp->ty0), @@ -7157,20 +7913,13 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, "Not enough memory to allocate tile coding parameters\n"); return OPJ_FALSE; } - if (parameters->numpocs) { - /* initialisation of POC */ - opj_j2k_check_poc_val(parameters->POC, parameters->numpocs, - (OPJ_UINT32)parameters->numresolution, image->numcomps, - (OPJ_UINT32)parameters->tcp_numlayers, p_manager); - /* TODO MSD use the return value*/ - } for (tileno = 0; tileno < cp->tw * cp->th; tileno++) { opj_tcp_t *tcp = &cp->tcps[tileno]; tcp->numlayers = (OPJ_UINT32)parameters->tcp_numlayers; for (j = 0; j < tcp->numlayers; j++) { - if (OPJ_IS_CINEMA(cp->rsiz)) { + if (OPJ_IS_CINEMA(cp->rsiz) || OPJ_IS_IMF(cp->rsiz)) { if (cp->m_specific_param.m_enc.m_fixed_quality) { tcp->distoratio[j] = parameters->tcp_distoratio[j]; } @@ -7197,16 +7946,22 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, if (parameters->numpocs) { /* initialisation of POC */ - tcp->POC = 1; for (i = 0; i < parameters->numpocs; i++) { if (tileno + 1 == parameters->POC[i].tile) { opj_poc_t *tcp_poc = &tcp->pocs[numpocs_tile]; + if (parameters->POC[numpocs_tile].compno0 >= image->numcomps) { + opj_event_msg(p_manager, EVT_ERROR, + "Invalid compno0 for POC %d\n", i); + return OPJ_FALSE; + } + tcp_poc->resno0 = parameters->POC[numpocs_tile].resno0; tcp_poc->compno0 = parameters->POC[numpocs_tile].compno0; tcp_poc->layno1 = parameters->POC[numpocs_tile].layno1; tcp_poc->resno1 = parameters->POC[numpocs_tile].resno1; - tcp_poc->compno1 = parameters->POC[numpocs_tile].compno1; + tcp_poc->compno1 = opj_uint_min(parameters->POC[numpocs_tile].compno1, + image->numcomps); tcp_poc->prg1 = parameters->POC[numpocs_tile].prg1; tcp_poc->tile = parameters->POC[numpocs_tile].tile; @@ -7214,7 +7969,16 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, } } - tcp->numpocs = numpocs_tile - 1 ; + if (numpocs_tile) { + + /* TODO MSD use the return value*/ + opj_j2k_check_poc_val(parameters->POC, tileno, parameters->numpocs, + (OPJ_UINT32)parameters->numresolution, image->numcomps, + (OPJ_UINT32)parameters->tcp_numlayers, p_manager); + + tcp->POC = 1; + tcp->numpocs = numpocs_tile - 1 ; + } } else { tcp->numpocs = 0; } @@ -7542,6 +8306,8 @@ OPJ_BOOL opj_j2k_read_header(opj_stream_private_t *p_stream, /*Allocate and initialize some elements of codestrem index*/ if (!opj_j2k_allocate_tile_element_cstr_index(p_j2k)) { + opj_image_destroy(*p_image); + *p_image = NULL; return OPJ_FALSE; } @@ -8628,6 +9394,7 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k, OPJ_UINT32 l_marker_size; const opj_dec_memory_marker_handler_t * l_marker_handler = 00; opj_tcp_t * l_tcp = NULL; + const OPJ_UINT32 l_nb_tiles = p_j2k->m_cp.tw * p_j2k->m_cp.th; /* preconditions */ assert(p_stream != 00); @@ -8803,7 +9570,6 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k, return OPJ_FALSE; } if (l_correction_needed) { - OPJ_UINT32 l_nb_tiles = p_j2k->m_cp.tw * p_j2k->m_cp.th; OPJ_UINT32 l_tile_no; p_j2k->m_specific_param.m_decoder.m_can_decode = 0; @@ -8818,27 +9584,42 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k, "Non conformant codestream TPsot==TNsot.\n"); } } - if (! p_j2k->m_specific_param.m_decoder.m_can_decode) { - /* Try to read 2 bytes (the next marker ID) from stream and copy them into the buffer */ - if (opj_stream_read_data(p_stream, - p_j2k->m_specific_param.m_decoder.m_header_data, 2, p_manager) != 2) { - opj_event_msg(p_manager, EVT_ERROR, "Stream too short\n"); - return OPJ_FALSE; - } - - /* Read 2 bytes from buffer as the new marker ID */ - opj_read_bytes(p_j2k->m_specific_param.m_decoder.m_header_data, - &l_current_marker, 2); - } } else { /* Indicate we will try to read a new tile-part header*/ p_j2k->m_specific_param.m_decoder.m_skip_data = 0; p_j2k->m_specific_param.m_decoder.m_can_decode = 0; p_j2k->m_specific_param.m_decoder.m_state = J2K_STATE_TPHSOT; + } + if (! p_j2k->m_specific_param.m_decoder.m_can_decode) { /* Try to read 2 bytes (the next marker ID) from stream and copy them into the buffer */ if (opj_stream_read_data(p_stream, p_j2k->m_specific_param.m_decoder.m_header_data, 2, p_manager) != 2) { + + /* Deal with likely non conformant SPOT6 files, where the last */ + /* row of tiles have TPsot == 0 and TNsot == 0, and missing EOC, */ + /* but no other tile-parts were found. */ + if (p_j2k->m_current_tile_number + 1 == l_nb_tiles) { + OPJ_UINT32 l_tile_no; + for (l_tile_no = 0U; l_tile_no < l_nb_tiles; ++l_tile_no) { + if (p_j2k->m_cp.tcps[l_tile_no].m_current_tile_part_number == 0 && + p_j2k->m_cp.tcps[l_tile_no].m_nb_tile_parts == 0) { + break; + } + } + if (l_tile_no < l_nb_tiles) { + opj_event_msg(p_manager, EVT_INFO, + "Tile %u has TPsot == 0 and TNsot == 0, " + "but no other tile-parts were found. " + "EOC is also missing.\n", + l_tile_no); + p_j2k->m_current_tile_number = l_tile_no; + l_current_marker = J2K_MS_EOC; + p_j2k->m_specific_param.m_decoder.m_state = J2K_STATE_EOC; + break; + } + } + opj_event_msg(p_manager, EVT_ERROR, "Stream too short\n"); return OPJ_FALSE; } @@ -8857,9 +9638,8 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k, } } - /* FIXME DOC ???*/ + /* Deal with tiles that have a single tile-part with TPsot == 0 and TNsot == 0 */ if (! p_j2k->m_specific_param.m_decoder.m_can_decode) { - OPJ_UINT32 l_nb_tiles = p_j2k->m_cp.th * p_j2k->m_cp.tw; l_tcp = p_j2k->m_cp.tcps + p_j2k->m_current_tile_number; while ((p_j2k->m_current_tile_number < l_nb_tiles) && (l_tcp->m_data == 00)) { @@ -9762,9 +10542,9 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k, return OPJ_FALSE; } - opj_read_bytes(l_current_ptr, &l_tccp->numresolutions, - 1); /* SPcox (D) */ - ++l_tccp->numresolutions; /* tccp->numresolutions = read() + 1 */ + /* SPcod (D) / SPcoc (A) */ + opj_read_bytes(l_current_ptr, &l_tccp->numresolutions, 1); + ++l_tccp->numresolutions; /* tccp->numresolutions = read() + 1 */ if (l_tccp->numresolutions > OPJ_J2K_MAXRLVLS) { opj_event_msg(p_manager, EVT_ERROR, "Invalid value for numresolutions : %d, max value is set in openjpeg.h at %d\n", @@ -9785,11 +10565,13 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k, return OPJ_FALSE; } - opj_read_bytes(l_current_ptr, &l_tccp->cblkw, 1); /* SPcoc (E) */ + /* SPcod (E) / SPcoc (B) */ + opj_read_bytes(l_current_ptr, &l_tccp->cblkw, 1); ++l_current_ptr; l_tccp->cblkw += 2; - opj_read_bytes(l_current_ptr, &l_tccp->cblkh, 1); /* SPcoc (F) */ + /* SPcod (F) / SPcoc (C) */ + opj_read_bytes(l_current_ptr, &l_tccp->cblkh, 1); ++l_current_ptr; l_tccp->cblkh += 2; @@ -9800,8 +10582,8 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k, return OPJ_FALSE; } - - opj_read_bytes(l_current_ptr, &l_tccp->cblksty, 1); /* SPcoc (G) */ + /* SPcod (G) / SPcoc (D) */ + opj_read_bytes(l_current_ptr, &l_tccp->cblksty, 1); ++l_current_ptr; if (l_tccp->cblksty & 0xC0U) { /* 2 msb are reserved, assume we can't read */ opj_event_msg(p_manager, EVT_ERROR, @@ -9809,9 +10591,16 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k, return OPJ_FALSE; } - opj_read_bytes(l_current_ptr, &l_tccp->qmfbid, 1); /* SPcoc (H) */ + /* SPcod (H) / SPcoc (E) */ + opj_read_bytes(l_current_ptr, &l_tccp->qmfbid, 1); ++l_current_ptr; + if (l_tccp->qmfbid > 1) { + opj_event_msg(p_manager, EVT_ERROR, + "Error reading SPCod SPCoc element, Invalid transformation found\n"); + return OPJ_FALSE; + } + *p_header_size = *p_header_size - 5; /* use custom precinct size ? */ @@ -9821,8 +10610,9 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k, return OPJ_FALSE; } + /* SPcod (I_i) / SPcoc (F_i) */ for (i = 0; i < l_tccp->numresolutions; ++i) { - opj_read_bytes(l_current_ptr, &l_tmp, 1); /* SPcoc (I_i) */ + opj_read_bytes(l_current_ptr, &l_tmp, 1); ++l_current_ptr; /* Precinct exponent 0 is only allowed for lowest resolution level (Table A.21) */ if ((i != 0) && (((l_tmp & 0xf) == 0) || ((l_tmp >> 4) == 0))) { @@ -11234,6 +12024,42 @@ OPJ_BOOL opj_j2k_set_decoded_resolution_factor(opj_j2k_t *p_j2k, return OPJ_FALSE; } +/* ----------------------------------------------------------------------- */ + +OPJ_BOOL opj_j2k_encoder_set_extra_options( + opj_j2k_t *p_j2k, + const char* const* p_options, + opj_event_mgr_t * p_manager) +{ + const char* const* p_option_iter; + + if (p_options == NULL) { + return OPJ_TRUE; + } + + for (p_option_iter = p_options; *p_option_iter != NULL; ++p_option_iter) { + if (strncmp(*p_option_iter, "PLT=", 4) == 0) { + if (strcmp(*p_option_iter, "PLT=YES") == 0) { + p_j2k->m_specific_param.m_encoder.m_PLT = OPJ_TRUE; + } else if (strcmp(*p_option_iter, "PLT=NO") == 0) { + p_j2k->m_specific_param.m_encoder.m_PLT = OPJ_FALSE; + } else { + opj_event_msg(p_manager, EVT_ERROR, + "Invalid value for option: %s.\n", *p_option_iter); + return OPJ_FALSE; + } + } else { + opj_event_msg(p_manager, EVT_ERROR, + "Invalid option: %s.\n", *p_option_iter); + return OPJ_FALSE; + } + } + + return OPJ_TRUE; +} + +/* ----------------------------------------------------------------------- */ + OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k, opj_stream_private_t *p_stream, opj_event_mgr_t * p_manager) @@ -11291,7 +12117,7 @@ OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k, } } } - l_current_tile_size = opj_tcd_get_encoded_tile_size(p_j2k->m_tcd); + l_current_tile_size = opj_tcd_get_encoder_input_buffer_size(p_j2k->m_tcd); if (!l_reuse_data) { if (l_current_tile_size > l_max_tile_size) { OPJ_BYTE *l_new_current_data = (OPJ_BYTE *) opj_realloc(l_current_data, @@ -11619,7 +12445,7 @@ static OPJ_BOOL opj_j2k_setup_end_compress(opj_j2k_t *p_j2k, return OPJ_FALSE; } - if (OPJ_IS_CINEMA(p_j2k->m_cp.rsiz)) { + if (OPJ_IS_CINEMA(p_j2k->m_cp.rsiz) || OPJ_IS_IMF(p_j2k->m_cp.rsiz)) { if (! opj_procedure_list_add_procedure(p_j2k->m_procedure_list, (opj_procedure)opj_j2k_write_updated_tlm, p_manager)) { return OPJ_FALSE; @@ -11702,7 +12528,7 @@ static OPJ_BOOL opj_j2k_setup_header_writing(opj_j2k_t *p_j2k, return OPJ_FALSE; } - if (OPJ_IS_CINEMA(p_j2k->m_cp.rsiz)) { + if (OPJ_IS_CINEMA(p_j2k->m_cp.rsiz) || OPJ_IS_IMF(p_j2k->m_cp.rsiz)) { if (! opj_procedure_list_add_procedure(p_j2k->m_procedure_list, (opj_procedure)opj_j2k_write_tlm, p_manager)) { return OPJ_FALSE; @@ -11729,7 +12555,8 @@ static OPJ_BOOL opj_j2k_setup_header_writing(opj_j2k_t *p_j2k, } /* DEVELOPER CORNER, insert your custom procedures */ - if (p_j2k->m_cp.rsiz & OPJ_EXTENSION_MCT) { + if ((p_j2k->m_cp.rsiz & (OPJ_PROFILE_PART2 | OPJ_EXTENSION_MCT)) == + (OPJ_PROFILE_PART2 | OPJ_EXTENSION_MCT)) { if (! opj_procedure_list_add_procedure(p_j2k->m_procedure_list, (opj_procedure)opj_j2k_write_mct_data_group, p_manager)) { return OPJ_FALSE; @@ -11759,7 +12586,7 @@ static OPJ_BOOL opj_j2k_setup_header_writing(opj_j2k_t *p_j2k, static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, OPJ_BYTE * p_data, OPJ_UINT32 * p_data_written, - OPJ_UINT32 p_total_data_size, + OPJ_UINT32 total_data_size, opj_stream_private_t *p_stream, struct opj_event_mgr * p_manager) { @@ -11783,7 +12610,7 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, l_current_nb_bytes_written = 0; l_begin_data = p_data; - if (! opj_j2k_write_sot(p_j2k, p_data, p_total_data_size, + if (! opj_j2k_write_sot(p_j2k, p_data, total_data_size, &l_current_nb_bytes_written, p_stream, p_manager)) { return OPJ_FALSE; @@ -11791,7 +12618,7 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, l_nb_bytes_written += l_current_nb_bytes_written; p_data += l_current_nb_bytes_written; - p_total_data_size -= l_current_nb_bytes_written; + total_data_size -= l_current_nb_bytes_written; if (!OPJ_IS_CINEMA(l_cp->rsiz)) { #if 0 @@ -11801,29 +12628,29 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, p_manager); l_nb_bytes_written += l_current_nb_bytes_written; p_data += l_current_nb_bytes_written; - p_total_data_size -= l_current_nb_bytes_written; + total_data_size -= l_current_nb_bytes_written; l_current_nb_bytes_written = 0; opj_j2k_write_qcc_in_memory(p_j2k, compno, p_data, &l_current_nb_bytes_written, p_manager); l_nb_bytes_written += l_current_nb_bytes_written; p_data += l_current_nb_bytes_written; - p_total_data_size -= l_current_nb_bytes_written; + total_data_size -= l_current_nb_bytes_written; } #endif - if (l_cp->tcps[p_j2k->m_current_tile_number].numpocs) { + if (l_cp->tcps[p_j2k->m_current_tile_number].POC) { l_current_nb_bytes_written = 0; opj_j2k_write_poc_in_memory(p_j2k, p_data, &l_current_nb_bytes_written, p_manager); l_nb_bytes_written += l_current_nb_bytes_written; p_data += l_current_nb_bytes_written; - p_total_data_size -= l_current_nb_bytes_written; + total_data_size -= l_current_nb_bytes_written; } } l_current_nb_bytes_written = 0; if (! opj_j2k_write_sod(p_j2k, l_tcd, p_data, &l_current_nb_bytes_written, - p_total_data_size, p_stream, p_manager)) { + total_data_size, p_stream, p_manager)) { return OPJ_FALSE; } @@ -11834,7 +12661,7 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, opj_write_bytes(l_begin_data + 6, l_nb_bytes_written, 4); /* PSOT */ - if (OPJ_IS_CINEMA(l_cp->rsiz)) { + if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) { opj_j2k_update_tlm(p_j2k, l_nb_bytes_written); } @@ -11844,7 +12671,7 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k, OPJ_BYTE * p_data, OPJ_UINT32 * p_data_written, - OPJ_UINT32 p_total_data_size, + OPJ_UINT32 total_data_size, opj_stream_private_t *p_stream, struct opj_event_mgr * p_manager ) @@ -11877,7 +12704,7 @@ static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k, l_begin_data = p_data; if (! opj_j2k_write_sot(p_j2k, p_data, - p_total_data_size, + total_data_size, &l_current_nb_bytes_written, p_stream, p_manager)) { @@ -11886,25 +12713,25 @@ static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k, l_nb_bytes_written += l_current_nb_bytes_written; p_data += l_current_nb_bytes_written; - p_total_data_size -= l_current_nb_bytes_written; + total_data_size -= l_current_nb_bytes_written; l_part_tile_size += l_current_nb_bytes_written; l_current_nb_bytes_written = 0; if (! opj_j2k_write_sod(p_j2k, l_tcd, p_data, &l_current_nb_bytes_written, - p_total_data_size, p_stream, p_manager)) { + total_data_size, p_stream, p_manager)) { return OPJ_FALSE; } p_data += l_current_nb_bytes_written; l_nb_bytes_written += l_current_nb_bytes_written; - p_total_data_size -= l_current_nb_bytes_written; + total_data_size -= l_current_nb_bytes_written; l_part_tile_size += l_current_nb_bytes_written; /* Writing Psot in SOT marker */ opj_write_bytes(l_begin_data + 6, l_part_tile_size, 4); /* PSOT */ - if (OPJ_IS_CINEMA(l_cp->rsiz)) { + if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) { opj_j2k_update_tlm(p_j2k, l_part_tile_size); } @@ -11923,7 +12750,7 @@ static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k, l_begin_data = p_data; if (! opj_j2k_write_sot(p_j2k, p_data, - p_total_data_size, + total_data_size, &l_current_nb_bytes_written, p_stream, p_manager)) { return OPJ_FALSE; @@ -11931,26 +12758,26 @@ static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k, l_nb_bytes_written += l_current_nb_bytes_written; p_data += l_current_nb_bytes_written; - p_total_data_size -= l_current_nb_bytes_written; + total_data_size -= l_current_nb_bytes_written; l_part_tile_size += l_current_nb_bytes_written; l_current_nb_bytes_written = 0; if (! opj_j2k_write_sod(p_j2k, l_tcd, p_data, &l_current_nb_bytes_written, - p_total_data_size, p_stream, p_manager)) { + total_data_size, p_stream, p_manager)) { return OPJ_FALSE; } l_nb_bytes_written += l_current_nb_bytes_written; p_data += l_current_nb_bytes_written; - p_total_data_size -= l_current_nb_bytes_written; + total_data_size -= l_current_nb_bytes_written; l_part_tile_size += l_current_nb_bytes_written; /* Writing Psot in SOT marker */ opj_write_bytes(l_begin_data + 6, l_part_tile_size, 4); /* PSOT */ - if (OPJ_IS_CINEMA(l_cp->rsiz)) { + if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) { opj_j2k_update_tlm(p_j2k, l_part_tile_size); } diff --git a/openjpeg/src/lib/openjp2/j2k.h b/openjpeg/src/lib/openjp2/j2k.h index 5d393c981..9eb50b50d 100644 --- a/openjpeg/src/lib/openjp2/j2k.h +++ b/openjpeg/src/lib/openjp2/j2k.h @@ -531,8 +531,14 @@ typedef struct opj_j2k_enc { OPJ_BYTE * m_header_tile_data; /* size of the encoded_data */ + OPJ_UINT32 m_header_tile_data_size; + /* whether to generate PLT markers */ + OPJ_BOOL m_PLT; + + /* reserved bytes in m_encoded_tile_size for PLT markers */ + OPJ_UINT32 m_reserved_bytes_for_PLT; } opj_j2k_enc_t; @@ -577,15 +583,16 @@ typedef struct opj_j2k { /** the current tile coder/decoder **/ struct opj_tcd * m_tcd; - /** Number of threads to use */ - int m_num_threads; - /** Thread pool */ opj_thread_pool_t* m_tp; + /** Image width coming from JP2 IHDR box. 0 from a pure codestream */ OPJ_UINT32 ihdr_w; + + /** Image height coming from JP2 IHDR box. 0 from a pure codestream */ OPJ_UINT32 ihdr_h; - OPJ_UINT32 enumcs; + + /** Set to 1 by the decoder initialization if OPJ_DPARAMETERS_DUMP_FLAG is set */ unsigned int dump_state; } opj_j2k_t; @@ -827,6 +834,19 @@ OPJ_BOOL opj_j2k_set_decoded_resolution_factor(opj_j2k_t *p_j2k, OPJ_UINT32 res_factor, opj_event_mgr_t * p_manager); +/** + * Specify extra options for the encoder. + * + * @param p_j2k the jpeg2000 codec. + * @param p_options options + * @param p_manager the user event manager + * + * @see opj_encoder_set_extra_options() for more details. + */ +OPJ_BOOL opj_j2k_encoder_set_extra_options( + opj_j2k_t *p_j2k, + const char* const* p_options, + opj_event_mgr_t * p_manager); /** * Writes a tile. diff --git a/openjpeg/src/lib/openjp2/jp2.c b/openjpeg/src/lib/openjp2/jp2.c index 4402ffe3c..7c065ba74 100644 --- a/openjpeg/src/lib/openjp2/jp2.c +++ b/openjpeg/src/lib/openjp2/jp2.c @@ -586,6 +586,12 @@ static OPJ_BOOL opj_jp2_read_ihdr(opj_jp2_t *jp2, opj_read_bytes(p_image_header_data, &(jp2->numcomps), 2); /* NC */ p_image_header_data += 2; + if (jp2->h < 1 || jp2->w < 1 || jp2->numcomps < 1) { + opj_event_msg(p_manager, EVT_ERROR, + "Wrong values for: w(%d) h(%d) numcomps(%d) (ihdr)\n", + jp2->w, jp2->h, jp2->numcomps); + return OPJ_FALSE; + } if ((jp2->numcomps - 1U) >= 16384U) { /* unsigned underflow is well defined: 1U <= jp2->numcomps <= 16384U */ opj_event_msg(p_manager, EVT_ERROR, "Invalid number of components (ihdr)\n"); @@ -1584,9 +1590,7 @@ static OPJ_BOOL opj_jp2_read_colr(opj_jp2_t *jp2, "COLR BOX meth value is not a regular value (%d), " "so we will ignore the entire Colour Specification box. \n", jp2->meth); } - if (jp2->color.jp2_has_colr) { - jp2->j2k->enumcs = jp2->enumcs; - } + return OPJ_TRUE; } @@ -3236,6 +3240,18 @@ OPJ_BOOL opj_jp2_set_decoded_resolution_factor(opj_jp2_t *p_jp2, return opj_j2k_set_decoded_resolution_factor(p_jp2->j2k, res_factor, p_manager); } +/* ----------------------------------------------------------------------- */ + +OPJ_BOOL opj_jp2_encoder_set_extra_options( + opj_jp2_t *p_jp2, + const char* const* p_options, + opj_event_mgr_t * p_manager) +{ + return opj_j2k_encoder_set_extra_options(p_jp2->j2k, p_options, p_manager); +} + +/* ----------------------------------------------------------------------- */ + /* JPIP specific */ #ifdef USE_JPIP diff --git a/openjpeg/src/lib/openjp2/jp2.h b/openjpeg/src/lib/openjp2/jp2.h index 34abd5118..9e7fa5667 100644 --- a/openjpeg/src/lib/openjp2/jp2.h +++ b/openjpeg/src/lib/openjp2/jp2.h @@ -459,6 +459,20 @@ OPJ_BOOL opj_jp2_set_decoded_resolution_factor(opj_jp2_t *p_jp2, OPJ_UINT32 res_factor, opj_event_mgr_t * p_manager); +/** + * Specify extra options for the encoder. + * + * @param p_jp2 the jpeg2000 codec. + * @param p_options options + * @param p_manager the user event manager + * + * @see opj_encoder_set_extra_options() for more details. + */ +OPJ_BOOL opj_jp2_encoder_set_extra_options( + opj_jp2_t *p_jp2, + const char* const* p_options, + opj_event_mgr_t * p_manager); + /* TODO MSD: clean these 3 functions */ /** diff --git a/openjpeg/src/lib/openjp2/mct.c b/openjpeg/src/lib/openjp2/mct.c index b79d4b87c..88c8f4092 100644 --- a/openjpeg/src/lib/openjp2/mct.c +++ b/openjpeg/src/lib/openjp2/mct.c @@ -183,7 +183,7 @@ void opj_mct_decode( OPJ_INT32* OPJ_RESTRICT c2, OPJ_SIZE_T n) { - OPJ_UINT32 i; + OPJ_SIZE_T i; for (i = 0; i < n; ++i) { OPJ_INT32 y = c0[i]; OPJ_INT32 u = c1[i]; @@ -209,175 +209,72 @@ OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno) /* */ /* Forward irreversible MCT. */ /* */ -#ifdef __SSE4_1__ void opj_mct_encode_real( - OPJ_INT32* OPJ_RESTRICT c0, - OPJ_INT32* OPJ_RESTRICT c1, - OPJ_INT32* OPJ_RESTRICT c2, + OPJ_FLOAT32* OPJ_RESTRICT c0, + OPJ_FLOAT32* OPJ_RESTRICT c1, + OPJ_FLOAT32* OPJ_RESTRICT c2, OPJ_SIZE_T n) { OPJ_SIZE_T i; - const OPJ_SIZE_T len = n; - - const __m128i ry = _mm_set1_epi32(2449); - const __m128i gy = _mm_set1_epi32(4809); - const __m128i by = _mm_set1_epi32(934); - const __m128i ru = _mm_set1_epi32(1382); - const __m128i gu = _mm_set1_epi32(2714); - /* const __m128i bu = _mm_set1_epi32(4096); */ - /* const __m128i rv = _mm_set1_epi32(4096); */ - const __m128i gv = _mm_set1_epi32(3430); - const __m128i bv = _mm_set1_epi32(666); - const __m128i mulround = _mm_shuffle_epi32(_mm_cvtsi32_si128(4096), - _MM_SHUFFLE(1, 0, 1, 0)); - - for (i = 0; i < (len & ~3U); i += 4) { - __m128i lo, hi; - __m128i y, u, v; - __m128i r = _mm_load_si128((const __m128i *) & (c0[i])); - __m128i g = _mm_load_si128((const __m128i *) & (c1[i])); - __m128i b = _mm_load_si128((const __m128i *) & (c2[i])); - - lo = r; - hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); - lo = _mm_mul_epi32(lo, ry); - hi = _mm_mul_epi32(hi, ry); - lo = _mm_add_epi64(lo, mulround); - hi = _mm_add_epi64(hi, mulround); - lo = _mm_srli_epi64(lo, 13); - hi = _mm_slli_epi64(hi, 32 - 13); - y = _mm_blend_epi16(lo, hi, 0xCC); - - lo = g; - hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1)); - lo = _mm_mul_epi32(lo, gy); - hi = _mm_mul_epi32(hi, gy); - lo = _mm_add_epi64(lo, mulround); - hi = _mm_add_epi64(hi, mulround); - lo = _mm_srli_epi64(lo, 13); - hi = _mm_slli_epi64(hi, 32 - 13); - y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC)); - - lo = b; - hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)); - lo = _mm_mul_epi32(lo, by); - hi = _mm_mul_epi32(hi, by); - lo = _mm_add_epi64(lo, mulround); - hi = _mm_add_epi64(hi, mulround); - lo = _mm_srli_epi64(lo, 13); - hi = _mm_slli_epi64(hi, 32 - 13); - y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC)); - _mm_store_si128((__m128i *) & (c0[i]), y); - - /*lo = b; - hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)); - lo = _mm_mul_epi32(lo, mulround); - hi = _mm_mul_epi32(hi, mulround);*/ - lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 2, 0))); - hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 3, 1))); - lo = _mm_slli_epi64(lo, 12); - hi = _mm_slli_epi64(hi, 12); - lo = _mm_add_epi64(lo, mulround); - hi = _mm_add_epi64(hi, mulround); - lo = _mm_srli_epi64(lo, 13); - hi = _mm_slli_epi64(hi, 32 - 13); - u = _mm_blend_epi16(lo, hi, 0xCC); - - lo = r; - hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); - lo = _mm_mul_epi32(lo, ru); - hi = _mm_mul_epi32(hi, ru); - lo = _mm_add_epi64(lo, mulround); - hi = _mm_add_epi64(hi, mulround); - lo = _mm_srli_epi64(lo, 13); - hi = _mm_slli_epi64(hi, 32 - 13); - u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC)); - - lo = g; - hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1)); - lo = _mm_mul_epi32(lo, gu); - hi = _mm_mul_epi32(hi, gu); - lo = _mm_add_epi64(lo, mulround); - hi = _mm_add_epi64(hi, mulround); - lo = _mm_srli_epi64(lo, 13); - hi = _mm_slli_epi64(hi, 32 - 13); - u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC)); - _mm_store_si128((__m128i *) & (c1[i]), u); +#ifdef __SSE__ + const __m128 YR = _mm_set1_ps(0.299f); + const __m128 YG = _mm_set1_ps(0.587f); + const __m128 YB = _mm_set1_ps(0.114f); + const __m128 UR = _mm_set1_ps(-0.16875f); + const __m128 UG = _mm_set1_ps(-0.331260f); + const __m128 UB = _mm_set1_ps(0.5f); + const __m128 VR = _mm_set1_ps(0.5f); + const __m128 VG = _mm_set1_ps(-0.41869f); + const __m128 VB = _mm_set1_ps(-0.08131f); + for (i = 0; i < (n >> 3); i ++) { + __m128 r, g, b, y, u, v; + + r = _mm_load_ps(c0); + g = _mm_load_ps(c1); + b = _mm_load_ps(c2); + y = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, YR), _mm_mul_ps(g, YG)), + _mm_mul_ps(b, YB)); + u = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, UR), _mm_mul_ps(g, UG)), + _mm_mul_ps(b, UB)); + v = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, VR), _mm_mul_ps(g, VG)), + _mm_mul_ps(b, VB)); + _mm_store_ps(c0, y); + _mm_store_ps(c1, u); + _mm_store_ps(c2, v); + c0 += 4; + c1 += 4; + c2 += 4; - /*lo = r; - hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); - lo = _mm_mul_epi32(lo, mulround); - hi = _mm_mul_epi32(hi, mulround);*/ - lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 2, 0))); - hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 3, 1))); - lo = _mm_slli_epi64(lo, 12); - hi = _mm_slli_epi64(hi, 12); - lo = _mm_add_epi64(lo, mulround); - hi = _mm_add_epi64(hi, mulround); - lo = _mm_srli_epi64(lo, 13); - hi = _mm_slli_epi64(hi, 32 - 13); - v = _mm_blend_epi16(lo, hi, 0xCC); - - lo = g; - hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1)); - lo = _mm_mul_epi32(lo, gv); - hi = _mm_mul_epi32(hi, gv); - lo = _mm_add_epi64(lo, mulround); - hi = _mm_add_epi64(hi, mulround); - lo = _mm_srli_epi64(lo, 13); - hi = _mm_slli_epi64(hi, 32 - 13); - v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC)); - - lo = b; - hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)); - lo = _mm_mul_epi32(lo, bv); - hi = _mm_mul_epi32(hi, bv); - lo = _mm_add_epi64(lo, mulround); - hi = _mm_add_epi64(hi, mulround); - lo = _mm_srli_epi64(lo, 13); - hi = _mm_slli_epi64(hi, 32 - 13); - v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC)); - _mm_store_si128((__m128i *) & (c2[i]), v); - } - for (; i < len; ++i) { - OPJ_INT32 r = c0[i]; - OPJ_INT32 g = c1[i]; - OPJ_INT32 b = c2[i]; - OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, - 4809) + opj_int_fix_mul(b, 934); - OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, - 2714) + opj_int_fix_mul(b, 4096); - OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, - 3430) - opj_int_fix_mul(b, 666); - c0[i] = y; - c1[i] = u; - c2[i] = v; + r = _mm_load_ps(c0); + g = _mm_load_ps(c1); + b = _mm_load_ps(c2); + y = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, YR), _mm_mul_ps(g, YG)), + _mm_mul_ps(b, YB)); + u = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, UR), _mm_mul_ps(g, UG)), + _mm_mul_ps(b, UB)); + v = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, VR), _mm_mul_ps(g, VG)), + _mm_mul_ps(b, VB)); + _mm_store_ps(c0, y); + _mm_store_ps(c1, u); + _mm_store_ps(c2, v); + c0 += 4; + c1 += 4; + c2 += 4; } -} -#else -void opj_mct_encode_real( - OPJ_INT32* OPJ_RESTRICT c0, - OPJ_INT32* OPJ_RESTRICT c1, - OPJ_INT32* OPJ_RESTRICT c2, - OPJ_SIZE_T n) -{ - OPJ_UINT32 i; + n &= 7; +#endif for (i = 0; i < n; ++i) { - OPJ_INT32 r = c0[i]; - OPJ_INT32 g = c1[i]; - OPJ_INT32 b = c2[i]; - OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, - 4809) + opj_int_fix_mul(b, 934); - OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, - 2714) + opj_int_fix_mul(b, 4096); - OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, - 3430) - opj_int_fix_mul(b, 666); + OPJ_FLOAT32 r = c0[i]; + OPJ_FLOAT32 g = c1[i]; + OPJ_FLOAT32 b = c2[i]; + OPJ_FLOAT32 y = 0.299f * r + 0.587f * g + 0.114f * b; + OPJ_FLOAT32 u = -0.16875f * r - 0.331260f * g + 0.5f * b; + OPJ_FLOAT32 v = 0.5f * r - 0.41869f * g - 0.08131f * b; c0[i] = y; c1[i] = u; c2[i] = v; } } -#endif /* */ /* Inverse irreversible MCT. */ @@ -388,7 +285,7 @@ void opj_mct_decode_real( OPJ_FLOAT32* OPJ_RESTRICT c2, OPJ_SIZE_T n) { - OPJ_UINT32 i; + OPJ_SIZE_T i; #ifdef __SSE__ __m128 vrv, vgu, vgv, vbu; vrv = _mm_set1_ps(1.402f); diff --git a/openjpeg/src/lib/openjp2/mct.h b/openjpeg/src/lib/openjp2/mct.h index 2e37ce733..3e1f5e494 100644 --- a/openjpeg/src/lib/openjp2/mct.h +++ b/openjpeg/src/lib/openjp2/mct.h @@ -85,8 +85,9 @@ Apply an irreversible multi-component transform to an image @param c2 Samples blue component @param n Number of samples for each component */ -void opj_mct_encode_real(OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, - OPJ_INT32* OPJ_RESTRICT c2, OPJ_SIZE_T n); +void opj_mct_encode_real(OPJ_FLOAT32* OPJ_RESTRICT c0, + OPJ_FLOAT32* OPJ_RESTRICT c1, + OPJ_FLOAT32* OPJ_RESTRICT c2, OPJ_SIZE_T n); /** Apply an irreversible multi-component inverse transform to an image @param c0 Samples for luminance component diff --git a/openjpeg/src/lib/openjp2/mqc.c b/openjpeg/src/lib/openjp2/mqc.c index 6299b171d..4cbfabd03 100644 --- a/openjpeg/src/lib/openjp2/mqc.c +++ b/openjpeg/src/lib/openjp2/mqc.c @@ -46,27 +46,6 @@ /** @name Local static functions */ /*@{*/ -/** -Output a byte, doing bit-stuffing if necessary. -After a 0xff byte, the next byte must be smaller than 0x90. -@param mqc MQC handle -*/ -static void opj_mqc_byteout(opj_mqc_t *mqc); -/** -Renormalize mqc->a and mqc->c while encoding, so that mqc->a stays between 0x8000 and 0x10000 -@param mqc MQC handle -*/ -static void opj_mqc_renorme(opj_mqc_t *mqc); -/** -Encode the most probable symbol -@param mqc MQC handle -*/ -static void opj_mqc_codemps(opj_mqc_t *mqc); -/** -Encode the most least symbol -@param mqc MQC handle -*/ -static void opj_mqc_codelps(opj_mqc_t *mqc); /** Fill mqc->c with 1's for flushing @param mqc MQC handle @@ -182,80 +161,6 @@ static const opj_mqc_state_t mqc_states[47 * 2] = { ========================================================== */ -static void opj_mqc_byteout(opj_mqc_t *mqc) -{ - /* bp is initialized to start - 1 in opj_mqc_init_enc() */ - /* but this is safe, see opj_tcd_code_block_enc_allocate_data() */ - assert(mqc->bp >= mqc->start - 1); - if (*mqc->bp == 0xff) { - mqc->bp++; - *mqc->bp = (OPJ_BYTE)(mqc->c >> 20); - mqc->c &= 0xfffff; - mqc->ct = 7; - } else { - if ((mqc->c & 0x8000000) == 0) { - mqc->bp++; - *mqc->bp = (OPJ_BYTE)(mqc->c >> 19); - mqc->c &= 0x7ffff; - mqc->ct = 8; - } else { - (*mqc->bp)++; - if (*mqc->bp == 0xff) { - mqc->c &= 0x7ffffff; - mqc->bp++; - *mqc->bp = (OPJ_BYTE)(mqc->c >> 20); - mqc->c &= 0xfffff; - mqc->ct = 7; - } else { - mqc->bp++; - *mqc->bp = (OPJ_BYTE)(mqc->c >> 19); - mqc->c &= 0x7ffff; - mqc->ct = 8; - } - } - } -} - -static void opj_mqc_renorme(opj_mqc_t *mqc) -{ - do { - mqc->a <<= 1; - mqc->c <<= 1; - mqc->ct--; - if (mqc->ct == 0) { - opj_mqc_byteout(mqc); - } - } while ((mqc->a & 0x8000) == 0); -} - -static void opj_mqc_codemps(opj_mqc_t *mqc) -{ - mqc->a -= (*mqc->curctx)->qeval; - if ((mqc->a & 0x8000) == 0) { - if (mqc->a < (*mqc->curctx)->qeval) { - mqc->a = (*mqc->curctx)->qeval; - } else { - mqc->c += (*mqc->curctx)->qeval; - } - *mqc->curctx = (*mqc->curctx)->nmps; - opj_mqc_renorme(mqc); - } else { - mqc->c += (*mqc->curctx)->qeval; - } -} - -static void opj_mqc_codelps(opj_mqc_t *mqc) -{ - mqc->a -= (*mqc->curctx)->qeval; - if (mqc->a < (*mqc->curctx)->qeval) { - mqc->c += (*mqc->curctx)->qeval; - } else { - mqc->a = (*mqc->curctx)->qeval; - } - *mqc->curctx = (*mqc->curctx)->nlps; - opj_mqc_renorme(mqc); -} - static void opj_mqc_setbits(opj_mqc_t *mqc) { OPJ_UINT32 tempc = mqc->c + mqc->a; @@ -303,14 +208,6 @@ void opj_mqc_init_enc(opj_mqc_t *mqc, OPJ_BYTE *bp) mqc->end_of_byte_stream_counter = 0; } -void opj_mqc_encode(opj_mqc_t *mqc, OPJ_UINT32 d) -{ - if ((*mqc->curctx)->mps == d) { - opj_mqc_codemps(mqc); - } else { - opj_mqc_codelps(mqc); - } -} void opj_mqc_flush(opj_mqc_t *mqc) { @@ -329,8 +226,6 @@ void opj_mqc_flush(opj_mqc_t *mqc) } } -#define BYPASS_CT_INIT 0xDEADBEEF - void opj_mqc_bypass_init_enc(opj_mqc_t *mqc) { /* This function is normally called after at least one opj_mqc_flush() */ @@ -475,6 +370,43 @@ void opj_mqc_erterm_enc(opj_mqc_t *mqc) } } +static INLINE void opj_mqc_renorme(opj_mqc_t *mqc) +{ + opj_mqc_renorme_macro(mqc, mqc->a, mqc->c, mqc->ct); +} + +/** +Encode the most probable symbol +@param mqc MQC handle +*/ +static INLINE void opj_mqc_codemps(opj_mqc_t *mqc) +{ + opj_mqc_codemps_macro(mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct); +} + +/** +Encode the most least symbol +@param mqc MQC handle +*/ +static INLINE void opj_mqc_codelps(opj_mqc_t *mqc) +{ + opj_mqc_codelps_macro(mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct); +} + +/** +Encode a symbol using the MQ-coder +@param mqc MQC handle +@param d The symbol to be encoded (0 or 1) +*/ +static INLINE void opj_mqc_encode(opj_mqc_t *mqc, OPJ_UINT32 d) +{ + if ((*mqc->curctx)->mps == d) { + opj_mqc_codemps(mqc); + } else { + opj_mqc_codelps(mqc); + } +} + void opj_mqc_segmark_enc(opj_mqc_t *mqc) { OPJ_UINT32 i; @@ -557,4 +489,36 @@ void opj_mqc_setstate(opj_mqc_t *mqc, OPJ_UINT32 ctxno, OPJ_UINT32 msb, mqc->ctxs[ctxno] = &mqc_states[msb + (OPJ_UINT32)(prob << 1)]; } - +void opj_mqc_byteout(opj_mqc_t *mqc) +{ + /* bp is initialized to start - 1 in opj_mqc_init_enc() */ + /* but this is safe, see opj_tcd_code_block_enc_allocate_data() */ + assert(mqc->bp >= mqc->start - 1); + if (*mqc->bp == 0xff) { + mqc->bp++; + *mqc->bp = (OPJ_BYTE)(mqc->c >> 20); + mqc->c &= 0xfffff; + mqc->ct = 7; + } else { + if ((mqc->c & 0x8000000) == 0) { + mqc->bp++; + *mqc->bp = (OPJ_BYTE)(mqc->c >> 19); + mqc->c &= 0x7ffff; + mqc->ct = 8; + } else { + (*mqc->bp)++; + if (*mqc->bp == 0xff) { + mqc->c &= 0x7ffffff; + mqc->bp++; + *mqc->bp = (OPJ_BYTE)(mqc->c >> 20); + mqc->c &= 0xfffff; + mqc->ct = 7; + } else { + mqc->bp++; + *mqc->bp = (OPJ_BYTE)(mqc->c >> 19); + mqc->c &= 0x7ffff; + mqc->ct = 8; + } + } + } +} \ No newline at end of file diff --git a/openjpeg/src/lib/openjp2/mqc.h b/openjpeg/src/lib/openjp2/mqc.h index 69a2a79dc..9850fed03 100644 --- a/openjpeg/src/lib/openjp2/mqc.h +++ b/openjpeg/src/lib/openjp2/mqc.h @@ -96,6 +96,8 @@ typedef struct opj_mqc { OPJ_BYTE backup[OPJ_COMMON_CBLK_DATA_EXTRA]; } opj_mqc_t; +#define BYPASS_CT_INIT 0xDEADBEEF + #include "mqc_inl.h" /** @name Exported functions */ @@ -135,12 +137,7 @@ Set the current context used for coding/decoding @param ctxno Number that identifies the context */ #define opj_mqc_setcurctx(mqc, ctxno) (mqc)->curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)] -/** -Encode a symbol using the MQ-coder -@param mqc MQC handle -@param d The symbol to be encoded (0 or 1) -*/ -void opj_mqc_encode(opj_mqc_t *mqc, OPJ_UINT32 d); + /** Flush the encoder, so that all remaining data is written @param mqc MQC handle diff --git a/openjpeg/src/lib/openjp2/mqc_inl.h b/openjpeg/src/lib/openjp2/mqc_inl.h index 310a3287f..0031b94be 100644 --- a/openjpeg/src/lib/openjp2/mqc_inl.h +++ b/openjpeg/src/lib/openjp2/mqc_inl.h @@ -156,13 +156,13 @@ static INLINE OPJ_UINT32 opj_mqc_raw_decode(opj_mqc_t *mqc) } \ } -#define DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct) \ +#define DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct) \ register const opj_mqc_state_t **curctx = mqc->curctx; \ register OPJ_UINT32 c = mqc->c; \ register OPJ_UINT32 a = mqc->a; \ register OPJ_UINT32 ct = mqc->ct -#define UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct) \ +#define UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct) \ mqc->curctx = curctx; \ mqc->c = c; \ mqc->a = a; \ @@ -193,4 +193,90 @@ Decode a symbol #define opj_mqc_decode(d, mqc) \ opj_mqc_decode_macro(d, mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct) +/** +Output a byte, doing bit-stuffing if necessary. +After a 0xff byte, the next byte must be smaller than 0x90. +@param mqc MQC handle +*/ +void opj_mqc_byteout(opj_mqc_t *mqc); + +/** +Renormalize mqc->a and mqc->c while encoding, so that mqc->a stays between 0x8000 and 0x10000 +@param mqc MQC handle +@param a_ value of mqc->a +@param c_ value of mqc->c_ +@param ct_ value of mqc->ct_ +*/ +#define opj_mqc_renorme_macro(mqc, a_, c_, ct_) \ +{ \ + do { \ + a_ <<= 1; \ + c_ <<= 1; \ + ct_--; \ + if (ct_ == 0) { \ + mqc->c = c_; \ + opj_mqc_byteout(mqc); \ + c_ = mqc->c; \ + ct_ = mqc->ct; \ + } \ + } while( (a_ & 0x8000) == 0); \ +} + +#define opj_mqc_codemps_macro(mqc, curctx, a, c, ct) \ +{ \ + a -= (*curctx)->qeval; \ + if ((a & 0x8000) == 0) { \ + if (a < (*curctx)->qeval) { \ + a = (*curctx)->qeval; \ + } else { \ + c += (*curctx)->qeval; \ + } \ + *curctx = (*curctx)->nmps; \ + opj_mqc_renorme_macro(mqc, a, c, ct); \ + } else { \ + c += (*curctx)->qeval; \ + } \ +} + +#define opj_mqc_codelps_macro(mqc, curctx, a, c, ct) \ +{ \ + a -= (*curctx)->qeval; \ + if (a < (*curctx)->qeval) { \ + c += (*curctx)->qeval; \ + } else { \ + a = (*curctx)->qeval; \ + } \ + *curctx = (*curctx)->nlps; \ + opj_mqc_renorme_macro(mqc, a, c, ct); \ +} + +#define opj_mqc_encode_macro(mqc, curctx, a, c, ct, d) \ +{ \ + if ((*curctx)->mps == (d)) { \ + opj_mqc_codemps_macro(mqc, curctx, a, c, ct); \ + } else { \ + opj_mqc_codelps_macro(mqc, curctx, a, c, ct); \ + } \ +} + + +#define opj_mqc_bypass_enc_macro(mqc, c, ct, d) \ +{\ + if (ct == BYPASS_CT_INIT) {\ + ct = 8;\ + }\ + ct--;\ + c = c + ((d) << ct);\ + if (ct == 0) {\ + *mqc->bp = (OPJ_BYTE)c;\ + ct = 8;\ + /* If the previous byte was 0xff, make sure that the next msb is 0 */ \ + if (*mqc->bp == 0xff) {\ + ct = 7;\ + }\ + mqc->bp++;\ + c = 0;\ + }\ +} + #endif /* OPJ_MQC_INL_H */ diff --git a/openjpeg/src/lib/openjp2/openjpeg.c b/openjpeg/src/lib/openjp2/openjpeg.c index 7b1230342..9c9b6eb0c 100644 --- a/openjpeg/src/lib/openjp2/openjpeg.c +++ b/openjpeg/src/lib/openjp2/openjpeg.c @@ -652,6 +652,14 @@ opj_codec_t* OPJ_CALLCONV opj_create_compress(OPJ_CODEC_FORMAT p_format) struct opj_image *, struct opj_event_mgr *)) opj_j2k_setup_encoder; + l_codec->m_codec_data.m_compression.opj_encoder_set_extra_options = (OPJ_BOOL( + *)(void *, + const char* const*, + struct opj_event_mgr *)) opj_j2k_encoder_set_extra_options; + + l_codec->opj_set_threads = + (OPJ_BOOL(*)(void * p_codec, OPJ_UINT32 num_threads)) opj_j2k_set_threads; + l_codec->m_codec = opj_j2k_create_compress(); if (! l_codec->m_codec) { opj_free(l_codec); @@ -690,6 +698,14 @@ opj_codec_t* OPJ_CALLCONV opj_create_compress(OPJ_CODEC_FORMAT p_format) struct opj_image *, struct opj_event_mgr *)) opj_jp2_setup_encoder; + l_codec->m_codec_data.m_compression.opj_encoder_set_extra_options = (OPJ_BOOL( + *)(void *, + const char* const*, + struct opj_event_mgr *)) opj_jp2_encoder_set_extra_options; + + l_codec->opj_set_threads = + (OPJ_BOOL(*)(void * p_codec, OPJ_UINT32 num_threads)) opj_jp2_set_threads; + l_codec->m_codec = opj_jp2_create(OPJ_FALSE); if (! l_codec->m_codec) { opj_free(l_codec); @@ -718,11 +734,11 @@ void OPJ_CALLCONV opj_set_default_encoder_parameters(opj_cparameters_t parameters->cp_cinema = OPJ_OFF; /* DEPRECATED */ parameters->rsiz = OPJ_PROFILE_NONE; parameters->max_comp_size = 0; - parameters->numresolution = 6; + parameters->numresolution = OPJ_COMP_PARAM_DEFAULT_NUMRESOLUTION; parameters->cp_rsiz = OPJ_STD_RSIZ; /* DEPRECATED */ - parameters->cblockw_init = 64; - parameters->cblockh_init = 64; - parameters->prog_order = OPJ_LRCP; + parameters->cblockw_init = OPJ_COMP_PARAM_DEFAULT_CBLOCKW; + parameters->cblockh_init = OPJ_COMP_PARAM_DEFAULT_CBLOCKH; + parameters->prog_order = OPJ_COMP_PARAM_DEFAULT_PROG_ORDER; parameters->roi_compno = -1; /* no ROI */ parameters->subsampling_dx = 1; parameters->subsampling_dy = 1; @@ -788,6 +804,27 @@ OPJ_BOOL OPJ_CALLCONV opj_setup_encoder(opj_codec_t *p_codec, return OPJ_FALSE; } +/* ----------------------------------------------------------------------- */ + +OPJ_BOOL OPJ_CALLCONV opj_encoder_set_extra_options(opj_codec_t *p_codec, + const char* const* options) +{ + if (p_codec) { + opj_codec_private_t * l_codec = (opj_codec_private_t *) p_codec; + + if (! l_codec->is_decompressor) { + return l_codec->m_codec_data.m_compression.opj_encoder_set_extra_options( + l_codec->m_codec, + options, + &(l_codec->m_event_mgr)); + } + } + + return OPJ_FALSE; +} + +/* ----------------------------------------------------------------------- */ + OPJ_BOOL OPJ_CALLCONV opj_start_compress(opj_codec_t *p_codec, opj_image_t * p_image, opj_stream_t *p_stream) diff --git a/openjpeg/src/lib/openjp2/openjpeg.h b/openjpeg/src/lib/openjp2/openjpeg.h index 53a0e10c5..269ac329a 100644 --- a/openjpeg/src/lib/openjp2/openjpeg.h +++ b/openjpeg/src/lib/openjp2/openjpeg.h @@ -78,7 +78,7 @@ Most compilers implement their own version of this keyword ... #if defined(OPJ_STATIC) || !defined(_WIN32) /* http://gcc.gnu.org/wiki/Visibility */ -# if __GNUC__ >= 4 +# if !defined(_WIN32) && __GNUC__ >= 4 # if defined(OPJ_STATIC) /* static library uses "hidden" */ # define OPJ_API __attribute__ ((visibility ("hidden"))) # else @@ -204,11 +204,11 @@ typedef size_t OPJ_SIZE_T; #define OPJ_PROFILE_BC_MULTI 0x0200 /** Multi Tile Broadcast profile defined in 15444-1 AMD3 */ #define OPJ_PROFILE_BC_MULTI_R 0x0300 /** Multi Tile Reversible Broadcast profile defined in 15444-1 AMD3 */ #define OPJ_PROFILE_IMF_2K 0x0400 /** 2K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */ -#define OPJ_PROFILE_IMF_4K 0x0401 /** 4K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */ -#define OPJ_PROFILE_IMF_8K 0x0402 /** 8K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */ -#define OPJ_PROFILE_IMF_2K_R 0x0403 /** 2K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */ +#define OPJ_PROFILE_IMF_4K 0x0500 /** 4K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */ +#define OPJ_PROFILE_IMF_8K 0x0600 /** 8K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */ +#define OPJ_PROFILE_IMF_2K_R 0x0700 /** 2K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */ #define OPJ_PROFILE_IMF_4K_R 0x0800 /** 4K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */ -#define OPJ_PROFILE_IMF_8K_R 0x0801 /** 8K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */ +#define OPJ_PROFILE_IMF_8K_R 0x0900 /** 8K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */ /** * JPEG 2000 Part-2 extensions @@ -225,6 +225,36 @@ typedef size_t OPJ_SIZE_T; #define OPJ_IS_IMF(v) (((v) >= OPJ_PROFILE_IMF_2K)&&((v) <= ((OPJ_PROFILE_IMF_8K_R) | (0x009b)))) #define OPJ_IS_PART2(v) ((v) & OPJ_PROFILE_PART2) +#define OPJ_GET_IMF_PROFILE(v) ((v) & 0xff00) /** Extract IMF profile without mainlevel/sublevel */ +#define OPJ_GET_IMF_MAINLEVEL(v) ((v) & 0xf) /** Extract IMF main level */ +#define OPJ_GET_IMF_SUBLEVEL(v) (((v) >> 4) & 0xf) /** Extract IMF sub level */ + +#define OPJ_IMF_MAINLEVEL_MAX 11 /** Maximum main level */ + +/** Max. Components Sampling Rate (MSamples/sec) per IMF main level */ +#define OPJ_IMF_MAINLEVEL_1_MSAMPLESEC 65 /** MSamples/sec for IMF main level 1 */ +#define OPJ_IMF_MAINLEVEL_2_MSAMPLESEC 130 /** MSamples/sec for IMF main level 2 */ +#define OPJ_IMF_MAINLEVEL_3_MSAMPLESEC 195 /** MSamples/sec for IMF main level 3 */ +#define OPJ_IMF_MAINLEVEL_4_MSAMPLESEC 260 /** MSamples/sec for IMF main level 4 */ +#define OPJ_IMF_MAINLEVEL_5_MSAMPLESEC 520 /** MSamples/sec for IMF main level 5 */ +#define OPJ_IMF_MAINLEVEL_6_MSAMPLESEC 1200 /** MSamples/sec for IMF main level 6 */ +#define OPJ_IMF_MAINLEVEL_7_MSAMPLESEC 2400 /** MSamples/sec for IMF main level 7 */ +#define OPJ_IMF_MAINLEVEL_8_MSAMPLESEC 4800 /** MSamples/sec for IMF main level 8 */ +#define OPJ_IMF_MAINLEVEL_9_MSAMPLESEC 9600 /** MSamples/sec for IMF main level 9 */ +#define OPJ_IMF_MAINLEVEL_10_MSAMPLESEC 19200 /** MSamples/sec for IMF main level 10 */ +#define OPJ_IMF_MAINLEVEL_11_MSAMPLESEC 38400 /** MSamples/sec for IMF main level 11 */ + +/** Max. compressed Bit Rate (Mbits/s) per IMF sub level */ +#define OPJ_IMF_SUBLEVEL_1_MBITSSEC 200 /** Mbits/s for IMF sub level 1 */ +#define OPJ_IMF_SUBLEVEL_2_MBITSSEC 400 /** Mbits/s for IMF sub level 2 */ +#define OPJ_IMF_SUBLEVEL_3_MBITSSEC 800 /** Mbits/s for IMF sub level 3 */ +#define OPJ_IMF_SUBLEVEL_4_MBITSSEC 1600 /** Mbits/s for IMF sub level 4 */ +#define OPJ_IMF_SUBLEVEL_5_MBITSSEC 3200 /** Mbits/s for IMF sub level 5 */ +#define OPJ_IMF_SUBLEVEL_6_MBITSSEC 6400 /** Mbits/s for IMF sub level 6 */ +#define OPJ_IMF_SUBLEVEL_7_MBITSSEC 12800 /** Mbits/s for IMF sub level 7 */ +#define OPJ_IMF_SUBLEVEL_8_MBITSSEC 25600 /** Mbits/s for IMF sub level 8 */ +#define OPJ_IMF_SUBLEVEL_9_MBITSSEC 51200 /** Mbits/s for IMF sub level 9 */ + /** * JPEG 2000 codestream and component size limits in cinema profiles * */ @@ -318,6 +348,10 @@ typedef void (*opj_msg_callback)(const char *msg, void *client_data); ========================================================== */ +#ifndef OPJ_UINT32_SEMANTICALLY_BUT_INT32 +#define OPJ_UINT32_SEMANTICALLY_BUT_INT32 OPJ_INT32 +#endif + /** * Progression order changes * @@ -333,10 +367,10 @@ typedef struct opj_poc { OPJ_PROG_ORDER prg1, prg; /** Progression order string*/ OPJ_CHAR progorder[5]; - /** Tile number */ + /** Tile number (starting at 1) */ OPJ_UINT32 tile; /** Start and end values for Tile width and height*/ - OPJ_INT32 tx0, tx1, ty0, ty1; + OPJ_UINT32_SEMANTICALLY_BUT_INT32 tx0, tx1, ty0, ty1; /** Start value, initialised in pi_initialise_encode*/ OPJ_UINT32 layS, resS, compS, prcS; /** End value, initialised in pi_initialise_encode */ @@ -1314,15 +1348,14 @@ OPJ_API OPJ_BOOL OPJ_CALLCONV opj_setup_decoder(opj_codec_t *p_codec, * number, or "ALL_CPUS". If OPJ_NUM_THREADS is set and this function is called, * this function will override the behaviour of the environment variable. * - * Currently this function must be called after opj_setup_decoder() and - * before opj_read_header(). - * - * Note: currently only has effect on the decompressor. + * This function must be called after opj_setup_decoder() and + * before opj_read_header() for the decoding side, or after opj_setup_encoder() + * and before opj_start_compress() for the encoding side. * - * @param p_codec decompressor handler + * @param p_codec decompressor or compressor handler * @param num_threads number of threads. * - * @return OPJ_TRUE if the decoder is correctly set + * @return OPJ_TRUE if the function is successful. */ OPJ_API OPJ_BOOL OPJ_CALLCONV opj_codec_set_threads(opj_codec_t *p_codec, int num_threads); @@ -1546,6 +1579,33 @@ OPJ_API OPJ_BOOL OPJ_CALLCONV opj_setup_encoder(opj_codec_t *p_codec, opj_cparameters_t *parameters, opj_image_t *image); + +/** + * Specify extra options for the encoder. + * + * This may be called after opj_setup_encoder() and before opj_start_compress() + * + * This is the way to add new options in a fully ABI compatible way, without + * extending the opj_cparameters_t structure. + * + * Currently supported options are: + *
    + *
  • PLT=YES/NO. Defaults to NO. If set to YES, PLT marker segments, + * indicating the length of each packet in the tile-part header, will be + * written. Since 2.3.2
  • + *
+ * + * @param p_codec Compressor handle + * @param p_options Compression options. This should be a NULL terminated + * array of strings. Each string is of the form KEY=VALUE. + * + * @return OPJ_TRUE in case of success. + * @since 2.3.2 + */ +OPJ_API OPJ_BOOL OPJ_CALLCONV opj_encoder_set_extra_options( + opj_codec_t *p_codec, + const char* const* p_options); + /** * Start to compress the current image. * @param p_codec Compressor handle diff --git a/openjpeg/src/lib/openjp2/opj_codec.h b/openjpeg/src/lib/openjp2/opj_codec.h index b962b1216..8a8af9119 100644 --- a/openjpeg/src/lib/openjp2/opj_codec.h +++ b/openjpeg/src/lib/openjp2/opj_codec.h @@ -148,6 +148,11 @@ typedef struct opj_codec_private { opj_cparameters_t * p_param, struct opj_image * p_image, struct opj_event_mgr * p_manager); + + OPJ_BOOL(* opj_encoder_set_extra_options)(void * p_codec, + const char* const* p_options, + struct opj_event_mgr * p_manager); + } m_compression; } m_codec_data; /** FIXME DOC*/ diff --git a/openjpeg/src/lib/openjp2/opj_common.h b/openjpeg/src/lib/openjp2/opj_common.h index a05133915..ee8adf472 100644 --- a/openjpeg/src/lib/openjp2/opj_common.h +++ b/openjpeg/src/lib/openjp2/opj_common.h @@ -38,4 +38,10 @@ */ #define OPJ_COMMON_CBLK_DATA_EXTRA 2 /**< Margin for a fake FFFF marker */ + +#define OPJ_COMP_PARAM_DEFAULT_CBLOCKW 64 +#define OPJ_COMP_PARAM_DEFAULT_CBLOCKH 64 +#define OPJ_COMP_PARAM_DEFAULT_PROG_ORDER OPJ_LRCP +#define OPJ_COMP_PARAM_DEFAULT_NUMRESOLUTION 6 + #endif /* OPJ_COMMMON_H */ diff --git a/openjpeg/src/lib/openjp2/opj_intmath.h b/openjpeg/src/lib/openjp2/opj_intmath.h index 754b5512f..afe69d90c 100644 --- a/openjpeg/src/lib/openjp2/opj_intmath.h +++ b/openjpeg/src/lib/openjp2/opj_intmath.h @@ -208,6 +208,16 @@ static INLINE OPJ_INT32 opj_int_floordivpow2(OPJ_INT32 a, OPJ_INT32 b) { return a >> b; } + +/** +Divide an integer by a power of 2 and round downwards +@return Returns a divided by 2^b +*/ +static INLINE OPJ_UINT32 opj_uint_floordivpow2(OPJ_UINT32 a, OPJ_UINT32 b) +{ + return a >> b; +} + /** Get logarithm of an integer and round downwards @return Returns log2(a) diff --git a/openjpeg/src/lib/openjp2/pi.c b/openjpeg/src/lib/openjp2/pi.c index 4a6ed68e2..4f7dd50f1 100644 --- a/openjpeg/src/lib/openjp2/pi.c +++ b/openjpeg/src/lib/openjp2/pi.c @@ -36,6 +36,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#define OPJ_UINT32_SEMANTICALLY_BUT_INT32 OPJ_UINT32 + #include "opj_includes.h" /** @defgroup PI PI - Implementation of a packet iterator */ @@ -91,10 +93,10 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi); */ static void opj_pi_update_encode_poc_and_final(opj_cp_t *p_cp, OPJ_UINT32 p_tileno, - OPJ_INT32 p_tx0, - OPJ_INT32 p_tx1, - OPJ_INT32 p_ty0, - OPJ_INT32 p_ty1, + OPJ_UINT32 p_tx0, + OPJ_UINT32 p_tx1, + OPJ_UINT32 p_ty0, + OPJ_UINT32 p_ty1, OPJ_UINT32 p_max_prec, OPJ_UINT32 p_max_res, OPJ_UINT32 p_dx_min, @@ -118,10 +120,10 @@ static void opj_pi_update_encode_poc_and_final(opj_cp_t *p_cp, static void opj_pi_update_encode_not_poc(opj_cp_t *p_cp, OPJ_UINT32 p_num_comps, OPJ_UINT32 p_tileno, - OPJ_INT32 p_tx0, - OPJ_INT32 p_tx1, - OPJ_INT32 p_ty0, - OPJ_INT32 p_ty1, + OPJ_UINT32 p_tx0, + OPJ_UINT32 p_tx1, + OPJ_UINT32 p_ty0, + OPJ_UINT32 p_ty1, OPJ_UINT32 p_max_prec, OPJ_UINT32 p_max_res, OPJ_UINT32 p_dx_min, @@ -144,10 +146,10 @@ static void opj_pi_update_encode_not_poc(opj_cp_t *p_cp, static void opj_get_encoding_parameters(const opj_image_t *p_image, const opj_cp_t *p_cp, OPJ_UINT32 tileno, - OPJ_INT32 * p_tx0, - OPJ_INT32 * p_tx1, - OPJ_INT32 * p_ty0, - OPJ_INT32 * p_ty1, + OPJ_UINT32 * p_tx0, + OPJ_UINT32 * p_tx1, + OPJ_UINT32 * p_ty0, + OPJ_UINT32 * p_ty1, OPJ_UINT32 * p_dx_min, OPJ_UINT32 * p_dy_min, OPJ_UINT32 * p_max_prec, @@ -176,10 +178,10 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image, static void opj_get_all_encoding_parameters(const opj_image_t *p_image, const opj_cp_t *p_cp, OPJ_UINT32 tileno, - OPJ_INT32 * p_tx0, - OPJ_INT32 * p_tx1, - OPJ_INT32 * p_ty0, - OPJ_INT32 * p_ty1, + OPJ_UINT32 * p_tx0, + OPJ_UINT32 * p_tx1, + OPJ_UINT32 * p_ty0, + OPJ_UINT32 * p_ty1, OPJ_UINT32 * p_dx_min, OPJ_UINT32 * p_dy_min, OPJ_UINT32 * p_max_prec, @@ -192,10 +194,12 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image, * @param p_image the image used to initialize the packet iterator (in fact only the number of components is relevant. * @param p_cp the coding parameters. * @param tileno the index of the tile from which creating the packet iterator. + * @param manager Event manager */ static opj_pi_iterator_t * opj_pi_create(const opj_image_t *p_image, const opj_cp_t *p_cp, - OPJ_UINT32 tileno); + OPJ_UINT32 tileno, + opj_event_mgr_t* manager); /** * FIXME DOC */ @@ -230,18 +234,19 @@ static OPJ_BOOL opj_pi_check_next_level(OPJ_INT32 pos, ========================================================== */ -static void opj_pi_emit_error(opj_pi_iterator_t * pi, const char* msg) -{ - (void)pi; - (void)msg; -} - static OPJ_BOOL opj_pi_next_lrcp(opj_pi_iterator_t * pi) { opj_pi_comp_t *comp = NULL; opj_pi_resolution_t *res = NULL; OPJ_UINT32 index = 0; + if (pi->poc.compno0 >= pi->numcomps || + pi->poc.compno1 >= pi->numcomps + 1) { + opj_event_msg(pi->manager, EVT_ERROR, + "opj_pi_next_lrcp(): invalid compno0/compno1\n"); + return OPJ_FALSE; + } + if (!pi->first) { comp = &pi->comps[pi->compno]; res = &comp->resolutions[pi->resno]; @@ -272,7 +277,7 @@ static OPJ_BOOL opj_pi_next_lrcp(opj_pi_iterator_t * pi) /* include should be resized when a POC arises, or */ /* the POC should be rejected */ if (index >= pi->include_size) { - opj_pi_emit_error(pi, "Invalid access to pi->include"); + opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include"); return OPJ_FALSE; } if (!pi->include[index]) { @@ -295,6 +300,13 @@ static OPJ_BOOL opj_pi_next_rlcp(opj_pi_iterator_t * pi) opj_pi_resolution_t *res = NULL; OPJ_UINT32 index = 0; + if (pi->poc.compno0 >= pi->numcomps || + pi->poc.compno1 >= pi->numcomps + 1) { + opj_event_msg(pi->manager, EVT_ERROR, + "opj_pi_next_rlcp(): invalid compno0/compno1\n"); + return OPJ_FALSE; + } + if (!pi->first) { comp = &pi->comps[pi->compno]; res = &comp->resolutions[pi->resno]; @@ -318,7 +330,7 @@ static OPJ_BOOL opj_pi_next_rlcp(opj_pi_iterator_t * pi) index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno * pi->step_c + pi->precno * pi->step_p; if (index >= pi->include_size) { - opj_pi_emit_error(pi, "Invalid access to pi->include"); + opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include"); return OPJ_FALSE; } if (!pi->include[index]) { @@ -341,6 +353,13 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi) opj_pi_resolution_t *res = NULL; OPJ_UINT32 index = 0; + if (pi->poc.compno0 >= pi->numcomps || + pi->poc.compno1 >= pi->numcomps + 1) { + opj_event_msg(pi->manager, EVT_ERROR, + "opj_pi_next_rpcl(): invalid compno0/compno1\n"); + return OPJ_FALSE; + } + if (!pi->first) { goto LABEL_SKIP; } else { @@ -376,16 +395,16 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi) pi->poc.tx1 = pi->tx1; } for (pi->resno = pi->poc.resno0; pi->resno < pi->poc.resno1; pi->resno++) { - for (pi->y = pi->poc.ty0; pi->y < pi->poc.ty1; - pi->y += (OPJ_INT32)(pi->dy - (OPJ_UINT32)(pi->y % (OPJ_INT32)pi->dy))) { - for (pi->x = pi->poc.tx0; pi->x < pi->poc.tx1; - pi->x += (OPJ_INT32)(pi->dx - (OPJ_UINT32)(pi->x % (OPJ_INT32)pi->dx))) { + for (pi->y = (OPJ_UINT32)pi->poc.ty0; pi->y < (OPJ_UINT32)pi->poc.ty1; + pi->y += (pi->dy - (pi->y % pi->dy))) { + for (pi->x = (OPJ_UINT32)pi->poc.tx0; pi->x < (OPJ_UINT32)pi->poc.tx1; + pi->x += (pi->dx - (pi->x % pi->dx))) { for (pi->compno = pi->poc.compno0; pi->compno < pi->poc.compno1; pi->compno++) { OPJ_UINT32 levelno; - OPJ_INT32 trx0, try0; - OPJ_INT32 trx1, try1; + OPJ_UINT32 trx0, try0; + OPJ_UINT32 trx1, try1; OPJ_UINT32 rpx, rpy; - OPJ_INT32 prci, prcj; + OPJ_UINT32 prci, prcj; comp = &pi->comps[pi->compno]; if (pi->resno >= comp->numresolutions) { continue; @@ -404,10 +423,10 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi) (comp->dy << levelno) > INT_MAX) { continue; } - trx0 = opj_int_ceildiv(pi->tx0, (OPJ_INT32)(comp->dx << levelno)); - try0 = opj_int_ceildiv(pi->ty0, (OPJ_INT32)(comp->dy << levelno)); - trx1 = opj_int_ceildiv(pi->tx1, (OPJ_INT32)(comp->dx << levelno)); - try1 = opj_int_ceildiv(pi->ty1, (OPJ_INT32)(comp->dy << levelno)); + trx0 = opj_uint_ceildiv(pi->tx0, (comp->dx << levelno)); + try0 = opj_uint_ceildiv(pi->ty0, (comp->dy << levelno)); + trx1 = opj_uint_ceildiv(pi->tx1, (comp->dx << levelno)); + try1 = opj_uint_ceildiv(pi->ty1, (comp->dy << levelno)); rpx = res->pdx + levelno; rpy = res->pdy + levelno; @@ -421,12 +440,12 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi) } /* See ISO-15441. B.12.1.3 Resolution level-position-component-layer progression */ - if (!((pi->y % (OPJ_INT32)(comp->dy << rpy) == 0) || ((pi->y == pi->ty0) && - ((try0 << levelno) % (1 << rpy))))) { + if (!((pi->y % (comp->dy << rpy) == 0) || ((pi->y == pi->ty0) && + ((try0 << levelno) % (1U << rpy))))) { continue; } - if (!((pi->x % (OPJ_INT32)(comp->dx << rpx) == 0) || ((pi->x == pi->tx0) && - ((trx0 << levelno) % (1 << rpx))))) { + if (!((pi->x % (comp->dx << rpx) == 0) || ((pi->x == pi->tx0) && + ((trx0 << levelno) % (1U << rpx))))) { continue; } @@ -438,18 +457,18 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi) continue; } - prci = opj_int_floordivpow2(opj_int_ceildiv(pi->x, - (OPJ_INT32)(comp->dx << levelno)), (OPJ_INT32)res->pdx) - - opj_int_floordivpow2(trx0, (OPJ_INT32)res->pdx); - prcj = opj_int_floordivpow2(opj_int_ceildiv(pi->y, - (OPJ_INT32)(comp->dy << levelno)), (OPJ_INT32)res->pdy) - - opj_int_floordivpow2(try0, (OPJ_INT32)res->pdy); - pi->precno = (OPJ_UINT32)(prci + prcj * (OPJ_INT32)res->pw); + prci = opj_uint_floordivpow2(opj_uint_ceildiv(pi->x, + (comp->dx << levelno)), res->pdx) + - opj_uint_floordivpow2(trx0, res->pdx); + prcj = opj_uint_floordivpow2(opj_uint_ceildiv(pi->y, + (comp->dy << levelno)), res->pdy) + - opj_uint_floordivpow2(try0, res->pdy); + pi->precno = prci + prcj * res->pw; for (pi->layno = pi->poc.layno0; pi->layno < pi->poc.layno1; pi->layno++) { index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno * pi->step_c + pi->precno * pi->step_p; if (index >= pi->include_size) { - opj_pi_emit_error(pi, "Invalid access to pi->include"); + opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include"); return OPJ_FALSE; } if (!pi->include[index]) { @@ -473,6 +492,13 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi) opj_pi_resolution_t *res = NULL; OPJ_UINT32 index = 0; + if (pi->poc.compno0 >= pi->numcomps || + pi->poc.compno1 >= pi->numcomps + 1) { + opj_event_msg(pi->manager, EVT_ERROR, + "opj_pi_next_pcrl(): invalid compno0/compno1\n"); + return OPJ_FALSE; + } + if (!pi->first) { comp = &pi->comps[pi->compno]; goto LABEL_SKIP; @@ -508,19 +534,19 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi) pi->poc.ty1 = pi->ty1; pi->poc.tx1 = pi->tx1; } - for (pi->y = pi->poc.ty0; pi->y < pi->poc.ty1; - pi->y += (OPJ_INT32)(pi->dy - (OPJ_UINT32)(pi->y % (OPJ_INT32)pi->dy))) { - for (pi->x = pi->poc.tx0; pi->x < pi->poc.tx1; - pi->x += (OPJ_INT32)(pi->dx - (OPJ_UINT32)(pi->x % (OPJ_INT32)pi->dx))) { + for (pi->y = (OPJ_UINT32)pi->poc.ty0; pi->y < (OPJ_UINT32)pi->poc.ty1; + pi->y += (pi->dy - (pi->y % pi->dy))) { + for (pi->x = (OPJ_UINT32)pi->poc.tx0; pi->x < (OPJ_UINT32)pi->poc.tx1; + pi->x += (pi->dx - (pi->x % pi->dx))) { for (pi->compno = pi->poc.compno0; pi->compno < pi->poc.compno1; pi->compno++) { comp = &pi->comps[pi->compno]; for (pi->resno = pi->poc.resno0; pi->resno < opj_uint_min(pi->poc.resno1, comp->numresolutions); pi->resno++) { OPJ_UINT32 levelno; - OPJ_INT32 trx0, try0; - OPJ_INT32 trx1, try1; + OPJ_UINT32 trx0, try0; + OPJ_UINT32 trx1, try1; OPJ_UINT32 rpx, rpy; - OPJ_INT32 prci, prcj; + OPJ_UINT32 prci, prcj; res = &comp->resolutions[pi->resno]; levelno = comp->numresolutions - 1 - pi->resno; /* Avoids division by zero */ @@ -535,10 +561,10 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi) (comp->dy << levelno) > INT_MAX) { continue; } - trx0 = opj_int_ceildiv(pi->tx0, (OPJ_INT32)(comp->dx << levelno)); - try0 = opj_int_ceildiv(pi->ty0, (OPJ_INT32)(comp->dy << levelno)); - trx1 = opj_int_ceildiv(pi->tx1, (OPJ_INT32)(comp->dx << levelno)); - try1 = opj_int_ceildiv(pi->ty1, (OPJ_INT32)(comp->dy << levelno)); + trx0 = opj_uint_ceildiv(pi->tx0, (comp->dx << levelno)); + try0 = opj_uint_ceildiv(pi->ty0, (comp->dy << levelno)); + trx1 = opj_uint_ceildiv(pi->tx1, (comp->dx << levelno)); + try1 = opj_uint_ceildiv(pi->ty1, (comp->dy << levelno)); rpx = res->pdx + levelno; rpy = res->pdy + levelno; @@ -552,12 +578,12 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi) } /* See ISO-15441. B.12.1.4 Position-component-resolution level-layer progression */ - if (!((pi->y % (OPJ_INT32)(comp->dy << rpy) == 0) || ((pi->y == pi->ty0) && - ((try0 << levelno) % (1 << rpy))))) { + if (!((pi->y % (comp->dy << rpy) == 0) || ((pi->y == pi->ty0) && + ((try0 << levelno) % (1U << rpy))))) { continue; } - if (!((pi->x % (OPJ_INT32)(comp->dx << rpx) == 0) || ((pi->x == pi->tx0) && - ((trx0 << levelno) % (1 << rpx))))) { + if (!((pi->x % (comp->dx << rpx) == 0) || ((pi->x == pi->tx0) && + ((trx0 << levelno) % (1U << rpx))))) { continue; } @@ -569,18 +595,18 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi) continue; } - prci = opj_int_floordivpow2(opj_int_ceildiv(pi->x, - (OPJ_INT32)(comp->dx << levelno)), (OPJ_INT32)res->pdx) - - opj_int_floordivpow2(trx0, (OPJ_INT32)res->pdx); - prcj = opj_int_floordivpow2(opj_int_ceildiv(pi->y, - (OPJ_INT32)(comp->dy << levelno)), (OPJ_INT32)res->pdy) - - opj_int_floordivpow2(try0, (OPJ_INT32)res->pdy); - pi->precno = (OPJ_UINT32)(prci + prcj * (OPJ_INT32)res->pw); + prci = opj_uint_floordivpow2(opj_uint_ceildiv(pi->x, + (comp->dx << levelno)), res->pdx) + - opj_uint_floordivpow2(trx0, res->pdx); + prcj = opj_uint_floordivpow2(opj_uint_ceildiv(pi->y, + (comp->dy << levelno)), res->pdy) + - opj_uint_floordivpow2(try0, res->pdy); + pi->precno = prci + prcj * res->pw; for (pi->layno = pi->poc.layno0; pi->layno < pi->poc.layno1; pi->layno++) { index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno * pi->step_c + pi->precno * pi->step_p; if (index >= pi->include_size) { - opj_pi_emit_error(pi, "Invalid access to pi->include"); + opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include"); return OPJ_FALSE; } if (!pi->include[index]) { @@ -604,6 +630,13 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi) opj_pi_resolution_t *res = NULL; OPJ_UINT32 index = 0; + if (pi->poc.compno0 >= pi->numcomps || + pi->poc.compno1 >= pi->numcomps + 1) { + opj_event_msg(pi->manager, EVT_ERROR, + "opj_pi_next_cprl(): invalid compno0/compno1\n"); + return OPJ_FALSE; + } + if (!pi->first) { comp = &pi->comps[pi->compno]; goto LABEL_SKIP; @@ -639,17 +672,17 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi) pi->poc.ty1 = pi->ty1; pi->poc.tx1 = pi->tx1; } - for (pi->y = pi->poc.ty0; pi->y < pi->poc.ty1; - pi->y += (OPJ_INT32)(pi->dy - (OPJ_UINT32)(pi->y % (OPJ_INT32)pi->dy))) { - for (pi->x = pi->poc.tx0; pi->x < pi->poc.tx1; - pi->x += (OPJ_INT32)(pi->dx - (OPJ_UINT32)(pi->x % (OPJ_INT32)pi->dx))) { + for (pi->y = (OPJ_UINT32)pi->poc.ty0; pi->y < (OPJ_UINT32)pi->poc.ty1; + pi->y += (pi->dy - (pi->y % pi->dy))) { + for (pi->x = (OPJ_UINT32)pi->poc.tx0; pi->x < (OPJ_UINT32)pi->poc.tx1; + pi->x += (pi->dx - (pi->x % pi->dx))) { for (pi->resno = pi->poc.resno0; pi->resno < opj_uint_min(pi->poc.resno1, comp->numresolutions); pi->resno++) { OPJ_UINT32 levelno; - OPJ_INT32 trx0, try0; - OPJ_INT32 trx1, try1; + OPJ_UINT32 trx0, try0; + OPJ_UINT32 trx1, try1; OPJ_UINT32 rpx, rpy; - OPJ_INT32 prci, prcj; + OPJ_UINT32 prci, prcj; res = &comp->resolutions[pi->resno]; levelno = comp->numresolutions - 1 - pi->resno; /* Avoids division by zero on id_000004,sig_06,src_000679,op_arith8,pos_49,val_-17 */ @@ -663,10 +696,10 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi) (comp->dy << levelno) > INT_MAX) { continue; } - trx0 = opj_int_ceildiv(pi->tx0, (OPJ_INT32)(comp->dx << levelno)); - try0 = opj_int_ceildiv(pi->ty0, (OPJ_INT32)(comp->dy << levelno)); - trx1 = opj_int_ceildiv(pi->tx1, (OPJ_INT32)(comp->dx << levelno)); - try1 = opj_int_ceildiv(pi->ty1, (OPJ_INT32)(comp->dy << levelno)); + trx0 = opj_uint_ceildiv(pi->tx0, (comp->dx << levelno)); + try0 = opj_uint_ceildiv(pi->ty0, (comp->dy << levelno)); + trx1 = opj_uint_ceildiv(pi->tx1, (comp->dx << levelno)); + try1 = opj_uint_ceildiv(pi->ty1, (comp->dy << levelno)); rpx = res->pdx + levelno; rpy = res->pdy + levelno; @@ -680,12 +713,12 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi) } /* See ISO-15441. B.12.1.5 Component-position-resolution level-layer progression */ - if (!((pi->y % (OPJ_INT32)(comp->dy << rpy) == 0) || ((pi->y == pi->ty0) && - ((try0 << levelno) % (1 << rpy))))) { + if (!((pi->y % (comp->dy << rpy) == 0) || ((pi->y == pi->ty0) && + ((try0 << levelno) % (1U << rpy))))) { continue; } - if (!((pi->x % (OPJ_INT32)(comp->dx << rpx) == 0) || ((pi->x == pi->tx0) && - ((trx0 << levelno) % (1 << rpx))))) { + if (!((pi->x % (comp->dx << rpx) == 0) || ((pi->x == pi->tx0) && + ((trx0 << levelno) % (1U << rpx))))) { continue; } @@ -697,18 +730,18 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi) continue; } - prci = opj_int_floordivpow2(opj_int_ceildiv(pi->x, - (OPJ_INT32)(comp->dx << levelno)), (OPJ_INT32)res->pdx) - - opj_int_floordivpow2(trx0, (OPJ_INT32)res->pdx); - prcj = opj_int_floordivpow2(opj_int_ceildiv(pi->y, - (OPJ_INT32)(comp->dy << levelno)), (OPJ_INT32)res->pdy) - - opj_int_floordivpow2(try0, (OPJ_INT32)res->pdy); - pi->precno = (OPJ_UINT32)(prci + prcj * (OPJ_INT32)res->pw); + prci = opj_uint_floordivpow2(opj_uint_ceildiv(pi->x, + (comp->dx << levelno)), res->pdx) + - opj_uint_floordivpow2(trx0, res->pdx); + prcj = opj_uint_floordivpow2(opj_uint_ceildiv(pi->y, + (comp->dy << levelno)), res->pdy) + - opj_uint_floordivpow2(try0, res->pdy); + pi->precno = (OPJ_UINT32)(prci + prcj * res->pw); for (pi->layno = pi->poc.layno0; pi->layno < pi->poc.layno1; pi->layno++) { index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno * pi->step_c + pi->precno * pi->step_p; if (index >= pi->include_size) { - opj_pi_emit_error(pi, "Invalid access to pi->include"); + opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include"); return OPJ_FALSE; } if (!pi->include[index]) { @@ -729,10 +762,10 @@ LABEL_SKIP: static void opj_get_encoding_parameters(const opj_image_t *p_image, const opj_cp_t *p_cp, OPJ_UINT32 p_tileno, - OPJ_INT32 * p_tx0, - OPJ_INT32 * p_tx1, - OPJ_INT32 * p_ty0, - OPJ_INT32 * p_ty1, + OPJ_UINT32 * p_tx0, + OPJ_UINT32 * p_tx1, + OPJ_UINT32 * p_ty0, + OPJ_UINT32 * p_ty1, OPJ_UINT32 * p_dx_min, OPJ_UINT32 * p_dy_min, OPJ_UINT32 * p_max_prec, @@ -768,12 +801,12 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image, /* find extent of tile */ l_tx0 = p_cp->tx0 + p * p_cp->tdx; /* can't be greater than p_image->x1 so won't overflow */ - *p_tx0 = (OPJ_INT32)opj_uint_max(l_tx0, p_image->x0); - *p_tx1 = (OPJ_INT32)opj_uint_min(opj_uint_adds(l_tx0, p_cp->tdx), p_image->x1); + *p_tx0 = opj_uint_max(l_tx0, p_image->x0); + *p_tx1 = opj_uint_min(opj_uint_adds(l_tx0, p_cp->tdx), p_image->x1); l_ty0 = p_cp->ty0 + q * p_cp->tdy; /* can't be greater than p_image->y1 so won't overflow */ - *p_ty0 = (OPJ_INT32)opj_uint_max(l_ty0, p_image->y0); - *p_ty1 = (OPJ_INT32)opj_uint_min(opj_uint_adds(l_ty0, p_cp->tdy), p_image->y1); + *p_ty0 = opj_uint_max(l_ty0, p_image->y0); + *p_ty1 = opj_uint_min(opj_uint_adds(l_ty0, p_cp->tdy), p_image->y1); /* max precision is 0 (can only grow) */ *p_max_prec = 0; @@ -786,17 +819,17 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image, for (compno = 0; compno < p_image->numcomps; ++compno) { /* arithmetic variables to calculate */ OPJ_UINT32 l_level_no; - OPJ_INT32 l_rx0, l_ry0, l_rx1, l_ry1; - OPJ_INT32 l_px0, l_py0, l_px1, py1; + OPJ_UINT32 l_rx0, l_ry0, l_rx1, l_ry1; + OPJ_UINT32 l_px0, l_py0, l_px1, py1; OPJ_UINT32 l_pdx, l_pdy; OPJ_UINT32 l_pw, l_ph; OPJ_UINT32 l_product; - OPJ_INT32 l_tcx0, l_tcy0, l_tcx1, l_tcy1; + OPJ_UINT32 l_tcx0, l_tcy0, l_tcx1, l_tcy1; - l_tcx0 = opj_int_ceildiv(*p_tx0, (OPJ_INT32)l_img_comp->dx); - l_tcy0 = opj_int_ceildiv(*p_ty0, (OPJ_INT32)l_img_comp->dy); - l_tcx1 = opj_int_ceildiv(*p_tx1, (OPJ_INT32)l_img_comp->dx); - l_tcy1 = opj_int_ceildiv(*p_ty1, (OPJ_INT32)l_img_comp->dy); + l_tcx0 = opj_uint_ceildiv(*p_tx0, l_img_comp->dx); + l_tcy0 = opj_uint_ceildiv(*p_ty0, l_img_comp->dy); + l_tcx1 = opj_uint_ceildiv(*p_tx1, l_img_comp->dx); + l_tcy1 = opj_uint_ceildiv(*p_ty1, l_img_comp->dy); if (l_tccp->numresolutions > *p_max_res) { *p_max_res = l_tccp->numresolutions; @@ -820,19 +853,19 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image, /* various calculations of extents */ l_level_no = l_tccp->numresolutions - 1 - resno; - l_rx0 = opj_int_ceildivpow2(l_tcx0, (OPJ_INT32)l_level_no); - l_ry0 = opj_int_ceildivpow2(l_tcy0, (OPJ_INT32)l_level_no); - l_rx1 = opj_int_ceildivpow2(l_tcx1, (OPJ_INT32)l_level_no); - l_ry1 = opj_int_ceildivpow2(l_tcy1, (OPJ_INT32)l_level_no); + l_rx0 = opj_uint_ceildivpow2(l_tcx0, l_level_no); + l_ry0 = opj_uint_ceildivpow2(l_tcy0, l_level_no); + l_rx1 = opj_uint_ceildivpow2(l_tcx1, l_level_no); + l_ry1 = opj_uint_ceildivpow2(l_tcy1, l_level_no); - l_px0 = opj_int_floordivpow2(l_rx0, (OPJ_INT32)l_pdx) << l_pdx; - l_py0 = opj_int_floordivpow2(l_ry0, (OPJ_INT32)l_pdy) << l_pdy; - l_px1 = opj_int_ceildivpow2(l_rx1, (OPJ_INT32)l_pdx) << l_pdx; + l_px0 = opj_uint_floordivpow2(l_rx0, l_pdx) << l_pdx; + l_py0 = opj_uint_floordivpow2(l_ry0, l_pdy) << l_pdy; + l_px1 = opj_uint_ceildivpow2(l_rx1, l_pdx) << l_pdx; - py1 = opj_int_ceildivpow2(l_ry1, (OPJ_INT32)l_pdy) << l_pdy; + py1 = opj_uint_ceildivpow2(l_ry1, l_pdy) << l_pdy; - l_pw = (l_rx0 == l_rx1) ? 0 : (OPJ_UINT32)((l_px1 - l_px0) >> l_pdx); - l_ph = (l_ry0 == l_ry1) ? 0 : (OPJ_UINT32)((py1 - l_py0) >> l_pdy); + l_pw = (l_rx0 == l_rx1) ? 0 : ((l_px1 - l_px0) >> l_pdx); + l_ph = (l_ry0 == l_ry1) ? 0 : ((py1 - l_py0) >> l_pdy); l_product = l_pw * l_ph; @@ -850,10 +883,10 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image, static void opj_get_all_encoding_parameters(const opj_image_t *p_image, const opj_cp_t *p_cp, OPJ_UINT32 tileno, - OPJ_INT32 * p_tx0, - OPJ_INT32 * p_tx1, - OPJ_INT32 * p_ty0, - OPJ_INT32 * p_ty1, + OPJ_UINT32 * p_tx0, + OPJ_UINT32 * p_tx1, + OPJ_UINT32 * p_ty0, + OPJ_UINT32 * p_ty1, OPJ_UINT32 * p_dx_min, OPJ_UINT32 * p_dy_min, OPJ_UINT32 * p_max_prec, @@ -894,12 +927,12 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image, /* here calculation of tx0, tx1, ty0, ty1, maxprec, l_dx and l_dy */ l_tx0 = p_cp->tx0 + p * p_cp->tdx; /* can't be greater than p_image->x1 so won't overflow */ - *p_tx0 = (OPJ_INT32)opj_uint_max(l_tx0, p_image->x0); - *p_tx1 = (OPJ_INT32)opj_uint_min(opj_uint_adds(l_tx0, p_cp->tdx), p_image->x1); + *p_tx0 = opj_uint_max(l_tx0, p_image->x0); + *p_tx1 = opj_uint_min(opj_uint_adds(l_tx0, p_cp->tdx), p_image->x1); l_ty0 = p_cp->ty0 + q * p_cp->tdy; /* can't be greater than p_image->y1 so won't overflow */ - *p_ty0 = (OPJ_INT32)opj_uint_max(l_ty0, p_image->y0); - *p_ty1 = (OPJ_INT32)opj_uint_min(opj_uint_adds(l_ty0, p_cp->tdy), p_image->y1); + *p_ty0 = opj_uint_max(l_ty0, p_image->y0); + *p_ty1 = opj_uint_min(opj_uint_adds(l_ty0, p_cp->tdy), p_image->y1); /* max precision and resolution is 0 (can only grow)*/ *p_max_prec = 0; @@ -912,18 +945,18 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image, for (compno = 0; compno < p_image->numcomps; ++compno) { /* aritmetic variables to calculate*/ OPJ_UINT32 l_level_no; - OPJ_INT32 l_rx0, l_ry0, l_rx1, l_ry1; - OPJ_INT32 l_px0, l_py0, l_px1, py1; + OPJ_UINT32 l_rx0, l_ry0, l_rx1, l_ry1; + OPJ_UINT32 l_px0, l_py0, l_px1, py1; OPJ_UINT32 l_product; - OPJ_INT32 l_tcx0, l_tcy0, l_tcx1, l_tcy1; + OPJ_UINT32 l_tcx0, l_tcy0, l_tcx1, l_tcy1; OPJ_UINT32 l_pdx, l_pdy, l_pw, l_ph; - lResolutionPtr = p_resolutions[compno]; + lResolutionPtr = p_resolutions ? p_resolutions[compno] : NULL; - l_tcx0 = opj_int_ceildiv(*p_tx0, (OPJ_INT32)l_img_comp->dx); - l_tcy0 = opj_int_ceildiv(*p_ty0, (OPJ_INT32)l_img_comp->dy); - l_tcx1 = opj_int_ceildiv(*p_tx1, (OPJ_INT32)l_img_comp->dx); - l_tcy1 = opj_int_ceildiv(*p_ty1, (OPJ_INT32)l_img_comp->dy); + l_tcx0 = opj_uint_ceildiv(*p_tx0, l_img_comp->dx); + l_tcy0 = opj_uint_ceildiv(*p_ty0, l_img_comp->dy); + l_tcx1 = opj_uint_ceildiv(*p_tx1, l_img_comp->dx); + l_tcy1 = opj_uint_ceildiv(*p_ty1, l_img_comp->dy); if (l_tccp->numresolutions > *p_max_res) { *p_max_res = l_tccp->numresolutions; @@ -939,33 +972,37 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image, /* precinct width and height*/ l_pdx = l_tccp->prcw[resno]; l_pdy = l_tccp->prch[resno]; - *lResolutionPtr++ = l_pdx; - *lResolutionPtr++ = l_pdy; + if (lResolutionPtr) { + *lResolutionPtr++ = l_pdx; + *lResolutionPtr++ = l_pdy; + } if (l_pdx + l_level_no < 32 && l_img_comp->dx <= UINT_MAX / (1u << (l_pdx + l_level_no))) { l_dx = l_img_comp->dx * (1u << (l_pdx + l_level_no)); /* take the minimum size for l_dx for each comp and resolution*/ - *p_dx_min = (OPJ_UINT32)opj_int_min((OPJ_INT32) * p_dx_min, (OPJ_INT32)l_dx); + *p_dx_min = opj_uint_min(*p_dx_min, l_dx); } if (l_pdy + l_level_no < 32 && l_img_comp->dy <= UINT_MAX / (1u << (l_pdy + l_level_no))) { l_dy = l_img_comp->dy * (1u << (l_pdy + l_level_no)); - *p_dy_min = (OPJ_UINT32)opj_int_min((OPJ_INT32) * p_dy_min, (OPJ_INT32)l_dy); + *p_dy_min = opj_uint_min(*p_dy_min, l_dy); } /* various calculations of extents*/ - l_rx0 = opj_int_ceildivpow2(l_tcx0, (OPJ_INT32)l_level_no); - l_ry0 = opj_int_ceildivpow2(l_tcy0, (OPJ_INT32)l_level_no); - l_rx1 = opj_int_ceildivpow2(l_tcx1, (OPJ_INT32)l_level_no); - l_ry1 = opj_int_ceildivpow2(l_tcy1, (OPJ_INT32)l_level_no); - l_px0 = opj_int_floordivpow2(l_rx0, (OPJ_INT32)l_pdx) << l_pdx; - l_py0 = opj_int_floordivpow2(l_ry0, (OPJ_INT32)l_pdy) << l_pdy; - l_px1 = opj_int_ceildivpow2(l_rx1, (OPJ_INT32)l_pdx) << l_pdx; - py1 = opj_int_ceildivpow2(l_ry1, (OPJ_INT32)l_pdy) << l_pdy; - l_pw = (l_rx0 == l_rx1) ? 0 : (OPJ_UINT32)((l_px1 - l_px0) >> l_pdx); - l_ph = (l_ry0 == l_ry1) ? 0 : (OPJ_UINT32)((py1 - l_py0) >> l_pdy); - *lResolutionPtr++ = l_pw; - *lResolutionPtr++ = l_ph; + l_rx0 = opj_uint_ceildivpow2(l_tcx0, l_level_no); + l_ry0 = opj_uint_ceildivpow2(l_tcy0, l_level_no); + l_rx1 = opj_uint_ceildivpow2(l_tcx1, l_level_no); + l_ry1 = opj_uint_ceildivpow2(l_tcy1, l_level_no); + l_px0 = opj_uint_floordivpow2(l_rx0, l_pdx) << l_pdx; + l_py0 = opj_uint_floordivpow2(l_ry0, l_pdy) << l_pdy; + l_px1 = opj_uint_ceildivpow2(l_rx1, l_pdx) << l_pdx; + py1 = opj_uint_ceildivpow2(l_ry1, l_pdy) << l_pdy; + l_pw = (l_rx0 == l_rx1) ? 0 : ((l_px1 - l_px0) >> l_pdx); + l_ph = (l_ry0 == l_ry1) ? 0 : ((py1 - l_py0) >> l_pdy); + if (lResolutionPtr) { + *lResolutionPtr++ = l_pw; + *lResolutionPtr++ = l_ph; + } l_product = l_pw * l_ph; /* update precision*/ @@ -981,7 +1018,8 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image, static opj_pi_iterator_t * opj_pi_create(const opj_image_t *image, const opj_cp_t *cp, - OPJ_UINT32 tileno) + OPJ_UINT32 tileno, + opj_event_mgr_t* manager) { /* loop*/ OPJ_UINT32 pino, compno; @@ -1015,6 +1053,8 @@ static opj_pi_iterator_t * opj_pi_create(const opj_image_t *image, l_current_pi = l_pi; for (pino = 0; pino < l_poc_bound ; ++pino) { + l_current_pi->manager = manager; + l_current_pi->comps = (opj_pi_comp_t*) opj_calloc(image->numcomps, sizeof(opj_pi_comp_t)); if (! l_current_pi->comps) { @@ -1045,10 +1085,10 @@ static opj_pi_iterator_t * opj_pi_create(const opj_image_t *image, static void opj_pi_update_encode_poc_and_final(opj_cp_t *p_cp, OPJ_UINT32 p_tileno, - OPJ_INT32 p_tx0, - OPJ_INT32 p_tx1, - OPJ_INT32 p_ty0, - OPJ_INT32 p_ty1, + OPJ_UINT32 p_tx0, + OPJ_UINT32 p_tx1, + OPJ_UINT32 p_ty0, + OPJ_UINT32 p_ty1, OPJ_UINT32 p_max_prec, OPJ_UINT32 p_max_res, OPJ_UINT32 p_dx_min, @@ -1125,10 +1165,10 @@ static void opj_pi_update_encode_poc_and_final(opj_cp_t *p_cp, static void opj_pi_update_encode_not_poc(opj_cp_t *p_cp, OPJ_UINT32 p_num_comps, OPJ_UINT32 p_tileno, - OPJ_INT32 p_tx0, - OPJ_INT32 p_tx1, - OPJ_INT32 p_ty0, - OPJ_INT32 p_ty1, + OPJ_UINT32 p_tx0, + OPJ_UINT32 p_tx1, + OPJ_UINT32 p_ty0, + OPJ_UINT32 p_ty1, OPJ_UINT32 p_max_prec, OPJ_UINT32 p_max_res, OPJ_UINT32 p_dx_min, @@ -1167,10 +1207,10 @@ static void opj_pi_update_encode_not_poc(opj_cp_t *p_cp, l_current_poc->prg = l_tcp->prg; l_current_poc->prcS = 0; l_current_poc->prcE = p_max_prec; - l_current_poc->txS = (OPJ_UINT32)p_tx0; - l_current_poc->txE = (OPJ_UINT32)p_tx1; - l_current_poc->tyS = (OPJ_UINT32)p_ty0; - l_current_poc->tyE = (OPJ_UINT32)p_ty1; + l_current_poc->txS = p_tx0; + l_current_poc->txE = p_tx1; + l_current_poc->tyS = p_ty0; + l_current_poc->tyE = p_ty1; l_current_poc->dx = p_dx_min; l_current_poc->dy = p_dy_min; ++ l_current_poc; @@ -1352,7 +1392,8 @@ static OPJ_BOOL opj_pi_check_next_level(OPJ_INT32 pos, */ opj_pi_iterator_t *opj_pi_create_decode(opj_image_t *p_image, opj_cp_t *p_cp, - OPJ_UINT32 p_tile_no) + OPJ_UINT32 p_tile_no, + opj_event_mgr_t* manager) { OPJ_UINT32 numcomps = p_image->numcomps; @@ -1367,7 +1408,7 @@ opj_pi_iterator_t *opj_pi_create_decode(opj_image_t *p_image, /* encoding prameters to set */ OPJ_UINT32 l_max_res; OPJ_UINT32 l_max_prec; - OPJ_INT32 l_tx0, l_tx1, l_ty0, l_ty1; + OPJ_UINT32 l_tx0, l_tx1, l_ty0, l_ty1; OPJ_UINT32 l_dx_min, l_dy_min; OPJ_UINT32 l_bound; OPJ_UINT32 l_step_p, l_step_c, l_step_r, l_step_l ; @@ -1407,7 +1448,7 @@ opj_pi_iterator_t *opj_pi_create_decode(opj_image_t *p_image, } /* memory allocation for pi */ - l_pi = opj_pi_create(p_image, p_cp, p_tile_no); + l_pi = opj_pi_create(p_image, p_cp, p_tile_no, manager); if (!l_pi) { opj_free(l_tmp_data); opj_free(l_tmp_ptr); @@ -1548,11 +1589,34 @@ opj_pi_iterator_t *opj_pi_create_decode(opj_image_t *p_image, } +OPJ_UINT32 opj_get_encoding_packet_count(const opj_image_t *p_image, + const opj_cp_t *p_cp, + OPJ_UINT32 p_tile_no) +{ + OPJ_UINT32 l_max_res; + OPJ_UINT32 l_max_prec; + OPJ_UINT32 l_tx0, l_tx1, l_ty0, l_ty1; + OPJ_UINT32 l_dx_min, l_dy_min; + + /* preconditions in debug*/ + assert(p_cp != 00); + assert(p_image != 00); + assert(p_tile_no < p_cp->tw * p_cp->th); + + /* get encoding parameters*/ + opj_get_all_encoding_parameters(p_image, p_cp, p_tile_no, &l_tx0, &l_tx1, + &l_ty0, &l_ty1, &l_dx_min, &l_dy_min, &l_max_prec, &l_max_res, NULL); + + return p_cp->tcps[p_tile_no].numlayers * l_max_prec * p_image->numcomps * + l_max_res; +} + opj_pi_iterator_t *opj_pi_initialise_encode(const opj_image_t *p_image, opj_cp_t *p_cp, OPJ_UINT32 p_tile_no, - J2K_T2_MODE p_t2_mode) + J2K_T2_MODE p_t2_mode, + opj_event_mgr_t* manager) { OPJ_UINT32 numcomps = p_image->numcomps; @@ -1567,7 +1631,7 @@ opj_pi_iterator_t *opj_pi_initialise_encode(const opj_image_t *p_image, /* encoding prameters to set*/ OPJ_UINT32 l_max_res; OPJ_UINT32 l_max_prec; - OPJ_INT32 l_tx0, l_tx1, l_ty0, l_ty1; + OPJ_UINT32 l_tx0, l_tx1, l_ty0, l_ty1; OPJ_UINT32 l_dx_min, l_dy_min; OPJ_UINT32 l_bound; OPJ_UINT32 l_step_p, l_step_c, l_step_r, l_step_l ; @@ -1606,7 +1670,7 @@ opj_pi_iterator_t *opj_pi_initialise_encode(const opj_image_t *p_image, } /* memory allocation for pi*/ - l_pi = opj_pi_create(p_image, p_cp, p_tile_no); + l_pi = opj_pi_create(p_image, p_cp, p_tile_no, manager); if (!l_pi) { opj_free(l_tmp_data); opj_free(l_tmp_ptr); @@ -1761,7 +1825,8 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, pi[pino].poc.prg = tcp->prg; if (!(cp->m_specific_param.m_enc.m_tp_on && ((!OPJ_IS_CINEMA(cp->rsiz) && - (t2_mode == FINAL_PASS)) || OPJ_IS_CINEMA(cp->rsiz)))) { + !OPJ_IS_IMF(cp->rsiz) && + (t2_mode == FINAL_PASS)) || OPJ_IS_CINEMA(cp->rsiz) || OPJ_IS_IMF(cp->rsiz)))) { pi[pino].poc.resno0 = tcp->resS; pi[pino].poc.resno1 = tcp->resE; pi[pino].poc.compno0 = tcp->compS; @@ -1770,10 +1835,10 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, pi[pino].poc.layno1 = tcp->layE; pi[pino].poc.precno0 = tcp->prcS; pi[pino].poc.precno1 = tcp->prcE; - pi[pino].poc.tx0 = (OPJ_INT32)tcp->txS; - pi[pino].poc.ty0 = (OPJ_INT32)tcp->tyS; - pi[pino].poc.tx1 = (OPJ_INT32)tcp->txE; - pi[pino].poc.ty1 = (OPJ_INT32)tcp->tyE; + pi[pino].poc.tx0 = tcp->txS; + pi[pino].poc.ty0 = tcp->tyS; + pi[pino].poc.tx1 = tcp->txE; + pi[pino].poc.ty1 = tcp->tyE; } else { for (i = tppos + 1; i < 4; i++) { switch (prog[i]) { @@ -1797,10 +1862,10 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, pi[pino].poc.precno1 = tcp->prcE; break; default: - pi[pino].poc.tx0 = (OPJ_INT32)tcp->txS; - pi[pino].poc.ty0 = (OPJ_INT32)tcp->tyS; - pi[pino].poc.tx1 = (OPJ_INT32)tcp->txE; - pi[pino].poc.ty1 = (OPJ_INT32)tcp->tyE; + pi[pino].poc.tx0 = tcp->txS; + pi[pino].poc.ty0 = tcp->tyS; + pi[pino].poc.tx1 = tcp->txE; + pi[pino].poc.ty1 = tcp->tyE; break; } break; @@ -1840,10 +1905,10 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, default: tcp->tx0_t = tcp->txS; tcp->ty0_t = tcp->tyS; - pi[pino].poc.tx0 = (OPJ_INT32)tcp->tx0_t; - pi[pino].poc.tx1 = (OPJ_INT32)(tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx)); - pi[pino].poc.ty0 = (OPJ_INT32)tcp->ty0_t; - pi[pino].poc.ty1 = (OPJ_INT32)(tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy)); + pi[pino].poc.tx0 = tcp->tx0_t; + pi[pino].poc.tx1 = tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx); + pi[pino].poc.ty0 = tcp->ty0_t; + pi[pino].poc.ty1 = tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy); tcp->tx0_t = (OPJ_UINT32)pi[pino].poc.tx1; tcp->ty0_t = (OPJ_UINT32)pi[pino].poc.ty1; break; @@ -1875,10 +1940,10 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, pi[pino].poc.precno1 = tcp->prc_t; break; default: - pi[pino].poc.tx0 = (OPJ_INT32)(tcp->tx0_t - tcp->dx - (tcp->tx0_t % tcp->dx)); - pi[pino].poc.tx1 = (OPJ_INT32)tcp->tx0_t ; - pi[pino].poc.ty0 = (OPJ_INT32)(tcp->ty0_t - tcp->dy - (tcp->ty0_t % tcp->dy)); - pi[pino].poc.ty1 = (OPJ_INT32)tcp->ty0_t ; + pi[pino].poc.tx0 = tcp->tx0_t - tcp->dx - (tcp->tx0_t % tcp->dx); + pi[pino].poc.tx1 = tcp->tx0_t ; + pi[pino].poc.ty0 = tcp->ty0_t - tcp->dy - (tcp->ty0_t % tcp->dy); + pi[pino].poc.ty1 = tcp->ty0_t ; break; } break; @@ -1965,8 +2030,8 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, if (tcp->ty0_t >= tcp->tyE) { if (opj_pi_check_next_level(i - 1, cp, tileno, pino, prog)) { tcp->ty0_t = tcp->tyS; - pi[pino].poc.ty0 = (OPJ_INT32)tcp->ty0_t; - pi[pino].poc.ty1 = (OPJ_INT32)(tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy)); + pi[pino].poc.ty0 = tcp->ty0_t; + pi[pino].poc.ty1 = tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy); tcp->ty0_t = (OPJ_UINT32)pi[pino].poc.ty1; incr_top = 1; resetX = 1; @@ -1975,21 +2040,21 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, resetX = 0; } } else { - pi[pino].poc.ty0 = (OPJ_INT32)tcp->ty0_t; - pi[pino].poc.ty1 = (OPJ_INT32)(tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy)); + pi[pino].poc.ty0 = tcp->ty0_t; + pi[pino].poc.ty1 = tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy); tcp->ty0_t = (OPJ_UINT32)pi[pino].poc.ty1; incr_top = 0; resetX = 1; } if (resetX == 1) { tcp->tx0_t = tcp->txS; - pi[pino].poc.tx0 = (OPJ_INT32)tcp->tx0_t; - pi[pino].poc.tx1 = (OPJ_INT32)(tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx)); + pi[pino].poc.tx0 = tcp->tx0_t; + pi[pino].poc.tx1 = tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx); tcp->tx0_t = (OPJ_UINT32)pi[pino].poc.tx1; } } else { - pi[pino].poc.tx0 = (OPJ_INT32)tcp->tx0_t; - pi[pino].poc.tx1 = (OPJ_INT32)(tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx)); + pi[pino].poc.tx0 = tcp->tx0_t; + pi[pino].poc.tx1 = tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx); tcp->tx0_t = (OPJ_UINT32)pi[pino].poc.tx1; incr_top = 0; } @@ -2042,7 +2107,7 @@ void opj_pi_update_encoding_parameters(const opj_image_t *p_image, /* encoding parameters to set */ OPJ_UINT32 l_max_res; OPJ_UINT32 l_max_prec; - OPJ_INT32 l_tx0, l_tx1, l_ty0, l_ty1; + OPJ_UINT32 l_tx0, l_tx1, l_ty0, l_ty1; OPJ_UINT32 l_dx_min, l_dy_min; /* pointers */ diff --git a/openjpeg/src/lib/openjp2/pi.h b/openjpeg/src/lib/openjp2/pi.h index 8c0dc25c1..0320523b7 100644 --- a/openjpeg/src/lib/openjp2/pi.h +++ b/openjpeg/src/lib/openjp2/pi.h @@ -102,11 +102,13 @@ typedef struct opj_pi_iterator { /** Components*/ opj_pi_comp_t *comps; /** FIXME DOC*/ - OPJ_INT32 tx0, ty0, tx1, ty1; + OPJ_UINT32 tx0, ty0, tx1, ty1; /** FIXME DOC*/ - OPJ_INT32 x, y; + OPJ_UINT32 x, y; /** FIXME DOC*/ OPJ_UINT32 dx, dy; + /** event manager */ + opj_event_mgr_t* manager; } opj_pi_iterator_t; /** @name Exported functions */ @@ -119,13 +121,15 @@ typedef struct opj_pi_iterator { * @param cp the coding parameters. * @param tileno index of the tile being encoded. * @param t2_mode the type of pass for generating the packet iterator + * @param manager Event manager * * @return a list of packet iterator that points to the first packet of the tile (not true). */ opj_pi_iterator_t *opj_pi_initialise_encode(const opj_image_t *image, opj_cp_t *cp, OPJ_UINT32 tileno, - J2K_T2_MODE t2_mode); + J2K_T2_MODE t2_mode, + opj_event_mgr_t* manager); /** * Updates the encoding parameters of the codec. @@ -161,12 +165,14 @@ Create a packet iterator for Decoder @param image Raw image for which the packets will be listed @param cp Coding parameters @param tileno Number that identifies the tile for which to list the packets +@param manager Event manager @return Returns a packet iterator that points to the first packet of the tile @see opj_pi_destroy */ opj_pi_iterator_t *opj_pi_create_decode(opj_image_t * image, opj_cp_t * cp, - OPJ_UINT32 tileno); + OPJ_UINT32 tileno, + opj_event_mgr_t* manager); /** * Destroys a packet iterator array. * @@ -182,6 +188,17 @@ Modify the packet iterator to point to the next packet @return Returns false if pi pointed to the last packet or else returns true */ OPJ_BOOL opj_pi_next(opj_pi_iterator_t * pi); + +/** + * Return the number of packets in the tile. + * @param image the image being encoded. + * @param cp Coding parameters + * @param tileno Number that identifies the tile. + */ +OPJ_UINT32 opj_get_encoding_packet_count(const opj_image_t *p_image, + const opj_cp_t *p_cp, + OPJ_UINT32 p_tile_no); + /* ----------------------------------------------------------------------- */ /*@}*/ diff --git a/openjpeg/src/lib/openjp2/t1.c b/openjpeg/src/lib/openjp2/t1.c index f6f767119..1bea54b0d 100644 --- a/openjpeg/src/lib/openjp2/t1.c +++ b/openjpeg/src/lib/openjp2/t1.c @@ -61,6 +61,13 @@ #define opj_t1_setcurctx(curctx, ctxno) curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)] +/* Macros to deal with signed integer with just MSB bit set for + * negative values (smr = signed magnitude representation) */ +#define opj_smr_abs(x) (((OPJ_UINT32)(x)) & 0x7FFFFFFFU) +#define opj_smr_sign(x) (((OPJ_UINT32)(x)) >> 31) +#define opj_to_smr(x) ((x) >= 0 ? (OPJ_UINT32)(x) : ((OPJ_UINT32)(-x) | 0x80000000U)) + + /** @name Local static functions */ /*@{*/ @@ -177,18 +184,18 @@ static OPJ_FLOAT64 opj_t1_getwmsedec( const OPJ_FLOAT64 * mct_norms, OPJ_UINT32 mct_numcomps); -static void opj_t1_encode_cblk(opj_t1_t *t1, - opj_tcd_cblk_enc_t* cblk, - OPJ_UINT32 orient, - OPJ_UINT32 compno, - OPJ_UINT32 level, - OPJ_UINT32 qmfbid, - OPJ_FLOAT64 stepsize, - OPJ_UINT32 cblksty, - OPJ_UINT32 numcomps, - opj_tcd_tile_t * tile, - const OPJ_FLOAT64 * mct_norms, - OPJ_UINT32 mct_numcomps); +/** Return "cumwmsedec" that should be used to increase tile->distotile */ +static double opj_t1_encode_cblk(opj_t1_t *t1, + opj_tcd_cblk_enc_t* cblk, + OPJ_UINT32 orient, + OPJ_UINT32 compno, + OPJ_UINT32 level, + OPJ_UINT32 qmfbid, + OPJ_FLOAT64 stepsize, + OPJ_UINT32 cblksty, + OPJ_UINT32 numcomps, + const OPJ_FLOAT64 * mct_norms, + OPJ_UINT32 mct_numcomps); /** Decode 1 code-block @@ -329,61 +336,53 @@ static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci, /** Encode significant pass */ -static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 bpno, - OPJ_INT32 one, - OPJ_INT32 *nmsedec, - OPJ_BYTE type, - OPJ_UINT32 ci, - OPJ_UINT32 vsc) -{ - OPJ_UINT32 v; - - opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ - - OPJ_UINT32 const flags = *flagsp; - - if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && - (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { - OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); - v = (opj_int_abs(*datap) & one) ? 1 : 0; -#ifdef DEBUG_ENC_SIG - fprintf(stderr, " ctxt1=%d\n", ctxt1); -#endif - opj_mqc_setcurctx(mqc, ctxt1); - if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ - opj_mqc_bypass_enc(mqc, v); - } else { - opj_mqc_encode(mqc, v); - } - if (v) { - OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( - *flagsp, - flagsp[-1], flagsp[1], - ci); - OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); - v = *datap < 0 ? 1U : 0U; - *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap), - (OPJ_UINT32)bpno); -#ifdef DEBUG_ENC_SIG - fprintf(stderr, " ctxt2=%d\n", ctxt2); -#endif - opj_mqc_setcurctx(mqc, ctxt2); - if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ - opj_mqc_bypass_enc(mqc, v); - } else { - OPJ_UINT32 spb = opj_t1_getspb(lu); -#ifdef DEBUG_ENC_SIG - fprintf(stderr, " spb=%d\n", spb); -#endif - opj_mqc_encode(mqc, v ^ spb); - } - opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); - } - *flagsp |= T1_PI_THIS << (ci * 3U); - } +#define opj_t1_enc_sigpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, type, ciIn, vscIn) \ +{ \ + OPJ_UINT32 v; \ + const OPJ_UINT32 ci = (ciIn); \ + const OPJ_UINT32 vsc = (vscIn); \ + const OPJ_INT32* l_datap = (datapIn); \ + opj_flag_t* flagsp = (flagspIn); \ + OPJ_UINT32 const flags = *flagsp; \ + if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \ + (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \ + OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \ + v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \ +/* #ifdef DEBUG_ENC_SIG */ \ +/* fprintf(stderr, " ctxt1=%d\n", ctxt1); */ \ +/* #endif */ \ + opj_t1_setcurctx(curctx, ctxt1); \ + if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \ + opj_mqc_bypass_enc_macro(mqc, c, ct, v); \ + } else { \ + opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \ + } \ + if (v) { \ + OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \ + *flagsp, \ + flagsp[-1], flagsp[1], \ + ci); \ + OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \ + v = opj_smr_sign(*l_datap); \ + *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \ + (OPJ_UINT32)bpno); \ +/* #ifdef DEBUG_ENC_SIG */ \ +/* fprintf(stderr, " ctxt2=%d\n", ctxt2); */ \ +/* #endif */ \ + opj_t1_setcurctx(curctx, ctxt2); \ + if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \ + opj_mqc_bypass_enc_macro(mqc, c, ct, v); \ + } else { \ + OPJ_UINT32 spb = opj_t1_getspb(lu); \ +/* #ifdef DEBUG_ENC_SIG */ \ +/* fprintf(stderr, " spb=%d\n", spb); */ \ +/* #endif */ \ + opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \ + } \ + opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); \ + } \ + *flagsp |= T1_PI_THIS << (ci * 3U); \ + } \ } static INLINE void opj_t1_dec_sigpass_step_raw( @@ -464,63 +463,64 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS); opj_flag_t* f = &T1_FLAGS(0, 0); OPJ_UINT32 const extra = 2; + opj_mqc_t* mqc = &(t1->mqc); + DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); + const OPJ_INT32* datap = t1->data; *nmsedec = 0; #ifdef DEBUG_ENC_SIG fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno); #endif - for (k = 0; k < (t1->h & ~3U); k += 4) { + for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) { + const OPJ_UINT32 w = t1->w; #ifdef DEBUG_ENC_SIG fprintf(stderr, " k=%d\n", k); #endif - for (i = 0; i < t1->w; ++i) { + for (i = 0; i < w; ++i, ++f, datap += 4) { #ifdef DEBUG_ENC_SIG fprintf(stderr, " i=%d\n", i); #endif if (*f == 0U) { /* Nothing to do for any of the 4 data points */ - f++; continue; } - opj_t1_enc_sigpass_step( - t1, + opj_t1_enc_sigpass_step_macro( + mqc, curctx, a, c, ct, f, - &t1->data[((k + 0) * t1->data_stride) + i], + &datap[0], bpno, one, nmsedec, type, 0, cblksty & J2K_CCP_CBLKSTY_VSC); - opj_t1_enc_sigpass_step( - t1, + opj_t1_enc_sigpass_step_macro( + mqc, curctx, a, c, ct, f, - &t1->data[((k + 1) * t1->data_stride) + i], + &datap[1], bpno, one, nmsedec, type, 1, 0); - opj_t1_enc_sigpass_step( - t1, + opj_t1_enc_sigpass_step_macro( + mqc, curctx, a, c, ct, f, - &t1->data[((k + 2) * t1->data_stride) + i], + &datap[2], bpno, one, nmsedec, type, 2, 0); - opj_t1_enc_sigpass_step( - t1, + opj_t1_enc_sigpass_step_macro( + mqc, curctx, a, c, ct, f, - &t1->data[((k + 3) * t1->data_stride) + i], + &datap[3], bpno, one, nmsedec, type, 3, 0); - ++f; } - f += extra; } if (k < t1->h) { @@ -528,20 +528,20 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, #ifdef DEBUG_ENC_SIG fprintf(stderr, " k=%d\n", k); #endif - for (i = 0; i < t1->w; ++i) { + for (i = 0; i < t1->w; ++i, ++f) { #ifdef DEBUG_ENC_SIG fprintf(stderr, " i=%d\n", i); #endif if (*f == 0U) { /* Nothing to do for any of the 4 data points */ - f++; + datap += (t1->h - k); continue; } - for (j = k; j < t1->h; ++j) { - opj_t1_enc_sigpass_step( - t1, + for (j = k; j < t1->h; ++j, ++datap) { + opj_t1_enc_sigpass_step_macro( + mqc, curctx, a, c, ct, f, - &t1->data[(j * t1->data_stride) + i], + &datap[0], bpno, one, nmsedec, @@ -549,9 +549,10 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, j - k, (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0)); } - ++f; } } + + UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); } static void opj_t1_dec_sigpass_raw( @@ -626,7 +627,7 @@ static void opj_t1_dec_sigpass_raw( register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \ const OPJ_UINT32 l_w = w; \ opj_mqc_t* mqc = &(t1->mqc); \ - DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ register OPJ_UINT32 v; \ one = 1 << bpno; \ half = one >> 1; \ @@ -651,7 +652,7 @@ static void opj_t1_dec_sigpass_raw( } \ } \ } \ - UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ if( k < h ) { \ for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ for (j = 0; j < h - k; ++j) { \ @@ -715,38 +716,27 @@ static void opj_t1_dec_sigpass_mqc( /** Encode refinement pass step */ -static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 bpno, - OPJ_INT32 one, - OPJ_INT32 *nmsedec, - OPJ_BYTE type, - OPJ_UINT32 ci) -{ - OPJ_UINT32 v; - - opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ - - OPJ_UINT32 const shift_flags = - (*flagsp >> (ci * 3U)); - - if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) { - OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); - *nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap), - (OPJ_UINT32)bpno); - v = (opj_int_abs(*datap) & one) ? 1 : 0; -#ifdef DEBUG_ENC_REF - fprintf(stderr, " ctxt=%d\n", ctxt); -#endif - opj_mqc_setcurctx(mqc, ctxt); - if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ - opj_mqc_bypass_enc(mqc, v); - } else { - opj_mqc_encode(mqc, v); - } - *flagsp |= T1_MU_THIS << (ci * 3U); - } +#define opj_t1_enc_refpass_step_macro(mqc, curctx, a, c, ct, flags, flagsUpdated, datap, bpno, one, nmsedec, type, ci) \ +{\ + OPJ_UINT32 v; \ + if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << ((ci) * 3U))) == (T1_SIGMA_THIS << ((ci) * 3U))) { \ + const OPJ_UINT32 shift_flags = (flags >> ((ci) * 3U)); \ + OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); \ + OPJ_UINT32 abs_data = opj_smr_abs(*datap); \ + *nmsedec += opj_t1_getnmsedec_ref(abs_data, \ + (OPJ_UINT32)bpno); \ + v = ((OPJ_INT32)abs_data & one) ? 1 : 0; \ +/* #ifdef DEBUG_ENC_REF */ \ +/* fprintf(stderr, " ctxt=%d\n", ctxt); */ \ +/* #endif */ \ + opj_t1_setcurctx(curctx, ctxt); \ + if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \ + opj_mqc_bypass_enc_macro(mqc, c, ct, v); \ + } else { \ + opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \ + } \ + flagsUpdated |= T1_MU_THIS << ((ci) * 3U); \ + } \ } @@ -807,100 +797,104 @@ static void opj_t1_enc_refpass( const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS); opj_flag_t* f = &T1_FLAGS(0, 0); const OPJ_UINT32 extra = 2U; + opj_mqc_t* mqc = &(t1->mqc); + DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); + const OPJ_INT32* datap = t1->data; *nmsedec = 0; #ifdef DEBUG_ENC_REF fprintf(stderr, "enc_refpass: bpno=%d\n", bpno); #endif - for (k = 0; k < (t1->h & ~3U); k += 4) { + for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) { #ifdef DEBUG_ENC_REF fprintf(stderr, " k=%d\n", k); #endif - for (i = 0; i < t1->w; ++i) { + for (i = 0; i < t1->w; ++i, f++, datap += 4) { + const OPJ_UINT32 flags = *f; + OPJ_UINT32 flagsUpdated = flags; #ifdef DEBUG_ENC_REF fprintf(stderr, " i=%d\n", i); #endif - if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) { + if ((flags & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) { /* none significant */ - f++; continue; } - if ((*f & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) == + if ((flags & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) == (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) { /* all processed by sigpass */ - f++; continue; } - opj_t1_enc_refpass_step( - t1, - f, - &t1->data[((k + 0) * t1->data_stride) + i], + opj_t1_enc_refpass_step_macro( + mqc, curctx, a, c, ct, + flags, flagsUpdated, + &datap[0], bpno, one, nmsedec, type, 0); - opj_t1_enc_refpass_step( - t1, - f, - &t1->data[((k + 1) * t1->data_stride) + i], + opj_t1_enc_refpass_step_macro( + mqc, curctx, a, c, ct, + flags, flagsUpdated, + &datap[1], bpno, one, nmsedec, type, 1); - opj_t1_enc_refpass_step( - t1, - f, - &t1->data[((k + 2) * t1->data_stride) + i], + opj_t1_enc_refpass_step_macro( + mqc, curctx, a, c, ct, + flags, flagsUpdated, + &datap[2], bpno, one, nmsedec, type, 2); - opj_t1_enc_refpass_step( - t1, - f, - &t1->data[((k + 3) * t1->data_stride) + i], + opj_t1_enc_refpass_step_macro( + mqc, curctx, a, c, ct, + flags, flagsUpdated, + &datap[3], bpno, one, nmsedec, type, 3); - ++f; + *f = flagsUpdated; } - f += extra; } if (k < t1->h) { OPJ_UINT32 j; + const OPJ_UINT32 remaining_lines = t1->h - k; #ifdef DEBUG_ENC_REF fprintf(stderr, " k=%d\n", k); #endif - for (i = 0; i < t1->w; ++i) { + for (i = 0; i < t1->w; ++i, ++f) { #ifdef DEBUG_ENC_REF fprintf(stderr, " i=%d\n", i); #endif if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) { /* none significant */ - f++; + datap += remaining_lines; continue; } - for (j = k; j < t1->h; ++j) { - opj_t1_enc_refpass_step( - t1, - f, - &t1->data[(j * t1->data_stride) + i], + for (j = 0; j < remaining_lines; ++j, datap ++) { + opj_t1_enc_refpass_step_macro( + mqc, curctx, a, c, ct, + *f, *f, + &datap[0], bpno, one, nmsedec, type, - j - k); + j); } - ++f; } } + + UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); } @@ -968,7 +962,7 @@ static void opj_t1_dec_refpass_raw( register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ const OPJ_UINT32 l_w = w; \ opj_mqc_t* mqc = &(t1->mqc); \ - DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ register OPJ_UINT32 v; \ one = 1 << bpno; \ poshalf = one >> 1; \ @@ -992,7 +986,7 @@ static void opj_t1_dec_refpass_raw( } \ } \ } \ - UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ if( k < h ) { \ for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ for (j = 0; j < h - k; ++j) { \ @@ -1030,86 +1024,71 @@ static void opj_t1_dec_refpass_mqc( /** Encode clean-up pass step */ -static void opj_t1_enc_clnpass_step( - opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 bpno, - OPJ_INT32 one, - OPJ_INT32 *nmsedec, - OPJ_UINT32 agg, - OPJ_UINT32 runlen, - OPJ_UINT32 lim, - OPJ_UINT32 cblksty) -{ - OPJ_UINT32 v; - OPJ_UINT32 ci; - opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ - - const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | - T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); - - if ((*flagsp & check) == check) { - if (runlen == 0) { - *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); - } else if (runlen == 1) { - *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); - } else if (runlen == 2) { - *flagsp &= ~(T1_PI_2 | T1_PI_3); - } else if (runlen == 3) { - *flagsp &= ~(T1_PI_3); - } - return; - } - - for (ci = runlen; ci < lim; ++ci) { - OPJ_UINT32 vsc; - opj_flag_t flags; - OPJ_UINT32 ctxt1; - - flags = *flagsp; - - if ((agg != 0) && (ci == runlen)) { - goto LABEL_PARTIAL; - } - - if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { - ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); -#ifdef DEBUG_ENC_CLN - printf(" ctxt1=%d\n", ctxt1); -#endif - opj_mqc_setcurctx(mqc, ctxt1); - v = (opj_int_abs(*datap) & one) ? 1 : 0; - opj_mqc_encode(mqc, v); - if (v) { - OPJ_UINT32 ctxt2, spb; - OPJ_UINT32 lu; -LABEL_PARTIAL: - lu = opj_t1_getctxtno_sc_or_spb_index( - *flagsp, - flagsp[-1], flagsp[1], - ci); - *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap), - (OPJ_UINT32)bpno); - ctxt2 = opj_t1_getctxno_sc(lu); -#ifdef DEBUG_ENC_CLN - printf(" ctxt2=%d\n", ctxt2); -#endif - opj_mqc_setcurctx(mqc, ctxt2); - - v = *datap < 0 ? 1U : 0U; - spb = opj_t1_getspb(lu); -#ifdef DEBUG_ENC_CLN - printf(" spb=%d\n", spb); -#endif - opj_mqc_encode(mqc, v ^ spb); - vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0; - opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc); - } - } - *flagsp &= ~(T1_PI_THIS << (3U * ci)); - datap += t1->data_stride; - } +#define opj_t1_enc_clnpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, agg, runlen, lim, cblksty) \ +{ \ + OPJ_UINT32 v; \ + OPJ_UINT32 ci; \ + opj_flag_t* const flagsp = (flagspIn); \ + const OPJ_INT32* l_datap = (datapIn); \ + const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | \ + T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ + \ + if ((*flagsp & check) == check) { \ + if (runlen == 0) { \ + *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ + } else if (runlen == 1) { \ + *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); \ + } else if (runlen == 2) { \ + *flagsp &= ~(T1_PI_2 | T1_PI_3); \ + } else if (runlen == 3) { \ + *flagsp &= ~(T1_PI_3); \ + } \ + } \ + else \ + for (ci = runlen; ci < lim; ++ci) { \ + OPJ_BOOL goto_PARTIAL = OPJ_FALSE; \ + if ((agg != 0) && (ci == runlen)) { \ + goto_PARTIAL = OPJ_TRUE; \ + } \ + else if (!(*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { \ + OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); \ +/* #ifdef DEBUG_ENC_CLN */ \ +/* printf(" ctxt1=%d\n", ctxt1); */ \ +/* #endif */ \ + opj_t1_setcurctx(curctx, ctxt1); \ + v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \ + opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \ + if (v) { \ + goto_PARTIAL = OPJ_TRUE; \ + } \ + } \ + if( goto_PARTIAL ) { \ + OPJ_UINT32 vsc; \ + OPJ_UINT32 ctxt2, spb; \ + OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \ + *flagsp, \ + flagsp[-1], flagsp[1], \ + ci); \ + *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \ + (OPJ_UINT32)bpno); \ + ctxt2 = opj_t1_getctxno_sc(lu); \ +/* #ifdef DEBUG_ENC_CLN */ \ +/* printf(" ctxt2=%d\n", ctxt2); */ \ +/* #endif */ \ + opj_t1_setcurctx(curctx, ctxt2); \ + \ + v = opj_smr_sign(*l_datap); \ + spb = opj_t1_getspb(lu); \ +/* #ifdef DEBUG_ENC_CLN */ \ +/* printf(" spb=%d\n", spb); */\ +/* #endif */ \ + opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \ + vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0; \ + opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc); \ + } \ + *flagsp &= ~(T1_PI_THIS << (3U * ci)); \ + l_datap ++; \ + } \ } #define opj_t1_dec_clnpass_step_macro(check_flags, partial, \ @@ -1165,47 +1144,50 @@ static void opj_t1_enc_clnpass( { OPJ_UINT32 i, k; const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS); - OPJ_UINT32 agg, runlen; - - opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ + opj_mqc_t* mqc = &(t1->mqc); + DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); + const OPJ_INT32* datap = t1->data; + opj_flag_t *f = &T1_FLAGS(0, 0); + const OPJ_UINT32 extra = 2U; *nmsedec = 0; #ifdef DEBUG_ENC_CLN printf("enc_clnpass: bpno=%d\n", bpno); #endif - for (k = 0; k < (t1->h & ~3U); k += 4) { + for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) { #ifdef DEBUG_ENC_CLN printf(" k=%d\n", k); #endif - for (i = 0; i < t1->w; ++i) { + for (i = 0; i < t1->w; ++i, f++) { + OPJ_UINT32 agg, runlen; #ifdef DEBUG_ENC_CLN printf(" i=%d\n", i); #endif - agg = !(T1_FLAGS(i, k)); + agg = !*f; #ifdef DEBUG_ENC_CLN printf(" agg=%d\n", agg); #endif if (agg) { - for (runlen = 0; runlen < 4; ++runlen) { - if (opj_int_abs(t1->data[((k + runlen)*t1->data_stride) + i]) & one) { + for (runlen = 0; runlen < 4; ++runlen, ++datap) { + if (opj_smr_abs(*datap) & (OPJ_UINT32)one) { break; } } - opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); - opj_mqc_encode(mqc, runlen != 4); + opj_t1_setcurctx(curctx, T1_CTXNO_AGG); + opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen != 4); if (runlen == 4) { continue; } - opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); - opj_mqc_encode(mqc, runlen >> 1); - opj_mqc_encode(mqc, runlen & 1); + opj_t1_setcurctx(curctx, T1_CTXNO_UNI); + opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen >> 1); + opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen & 1); } else { runlen = 0; } - opj_t1_enc_clnpass_step( - t1, - &T1_FLAGS(i, k), - &t1->data[((k + runlen) * t1->data_stride) + i], + opj_t1_enc_clnpass_step_macro( + mqc, curctx, a, c, ct, + f, + datap, bpno, one, nmsedec, @@ -1213,23 +1195,24 @@ static void opj_t1_enc_clnpass( runlen, 4U, cblksty); + datap += 4 - runlen; } } if (k < t1->h) { - agg = 0; - runlen = 0; + const OPJ_UINT32 agg = 0; + const OPJ_UINT32 runlen = 0; #ifdef DEBUG_ENC_CLN printf(" k=%d\n", k); #endif - for (i = 0; i < t1->w; ++i) { + for (i = 0; i < t1->w; ++i, f++) { #ifdef DEBUG_ENC_CLN printf(" i=%d\n", i); printf(" agg=%d\n", agg); #endif - opj_t1_enc_clnpass_step( - t1, - &T1_FLAGS(i, k), - &t1->data[((k + runlen) * t1->data_stride) + i], + opj_t1_enc_clnpass_step_macro( + mqc, curctx, a, c, ct, + f, + datap, bpno, one, nmsedec, @@ -1237,8 +1220,11 @@ static void opj_t1_enc_clnpass( runlen, t1->h - k, cblksty); + datap += t1->h - k; } } + + UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); } #define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \ @@ -1250,7 +1236,7 @@ static void opj_t1_enc_clnpass( opj_mqc_t* mqc = &(t1->mqc); \ register OPJ_INT32 *data = t1->data; \ register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ - DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ register OPJ_UINT32 v; \ one = 1 << bpno; \ half = one >> 1; \ @@ -1319,7 +1305,7 @@ static void opj_t1_enc_clnpass( *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ } \ } \ - UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ if( k < h ) { \ for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \ for (j = 0; j < h - k; ++j) { \ @@ -1426,7 +1412,11 @@ static OPJ_FLOAT64 opj_t1_getwmsedec( if (qmfbid == 1) { w2 = opj_dwt_getnorm(level, orient); } else { /* if (qmfbid == 0) */ + const OPJ_INT32 log2_gain = (orient == 0) ? 0 : + (orient == 3) ? 2 : 1; w2 = opj_dwt_getnorm_real(level, orient); + /* Not sure this is right. But preserves past behaviour */ + stepsize /= (1 << log2_gain); } wmsedec = w1 * w2 * stepsize * (1 << bpno); @@ -1450,7 +1440,7 @@ static OPJ_BOOL opj_t1_allocate_buffers( assert(w * h <= 4096); /* encoder uses tile buffer, so no need to allocate */ - if (!t1->encoder) { + { OPJ_UINT32 datasize = w * h; if (datasize > t1->datasize) { @@ -1560,8 +1550,7 @@ void opj_t1_destroy(opj_t1_t *p_t1) return; } - /* encoder uses tile buffer, so no need to free */ - if (!p_t1->encoder && p_t1->data) { + if (p_t1->data) { opj_aligned_free(p_t1->data); p_t1->data = 00; } @@ -1658,7 +1647,21 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1); if (t1 == NULL) { t1 = opj_t1_create(OPJ_FALSE); - opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper); + if (t1 == NULL) { + opj_event_msg(job->p_manager, EVT_ERROR, + "Cannot allocate Tier 1 handle\n"); + *(job->pret) = OPJ_FALSE; + opj_free(job); + return; + } + if (!opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper)) { + opj_event_msg(job->p_manager, EVT_ERROR, + "Unable to set t1 handle as TLS\n"); + opj_t1_destroy(t1); + *(job->pret) = OPJ_FALSE; + opj_free(job); + return; + } } t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer; @@ -1725,10 +1728,11 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) datap[i] /= 2; } } else { /* if (tccp->qmfbid == 0) */ + const float stepsize = 0.5f * band->stepsize; i = 0; #ifdef __SSE2__ { - const __m128 xmm_stepsize = _mm_set1_ps(band->stepsize); + const __m128 xmm_stepsize = _mm_set1_ps(stepsize); for (; i < (cblk_size & ~15U); i += 16) { __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)( datap + 0))); @@ -1747,7 +1751,7 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) } #endif for (; i < cblk_size; ++i) { - OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize; + OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * stepsize; memcpy(datap, &tmp, sizeof(tmp)); datap++; } @@ -1773,12 +1777,13 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) } } } else { /* if (tccp->qmfbid == 0) */ + const float stepsize = 0.5f * band->stepsize; OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x]; for (j = 0; j < cblk_h; ++j) { OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp; for (i = 0; i < cblk_w; ++i) { - OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * band->stepsize; + OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * stepsize; *tiledp2 = tmp; datap++; tiledp2++; @@ -2100,124 +2105,232 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, } +typedef struct { + OPJ_UINT32 compno; + OPJ_UINT32 resno; + opj_tcd_cblk_enc_t* cblk; + opj_tcd_tile_t *tile; + opj_tcd_band_t* band; + opj_tcd_tilecomp_t* tilec; + opj_tccp_t* tccp; + const OPJ_FLOAT64 * mct_norms; + OPJ_UINT32 mct_numcomps; + volatile OPJ_BOOL* pret; + opj_mutex_t* mutex; +} opj_t1_cblk_encode_processing_job_t; + +/** Procedure to deal with a asynchronous code-block encoding job. + * + * @param user_data Pointer to a opj_t1_cblk_encode_processing_job_t* structure + * @param tls TLS handle. + */ +static void opj_t1_cblk_encode_processor(void* user_data, opj_tls_t* tls) +{ + opj_t1_cblk_encode_processing_job_t* job = + (opj_t1_cblk_encode_processing_job_t*)user_data; + opj_tcd_cblk_enc_t* cblk = job->cblk; + const opj_tcd_band_t* band = job->band; + const opj_tcd_tilecomp_t* tilec = job->tilec; + const opj_tccp_t* tccp = job->tccp; + const OPJ_UINT32 resno = job->resno; + opj_t1_t* t1; + const OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0); + + OPJ_INT32* OPJ_RESTRICT tiledp; + OPJ_UINT32 cblk_w; + OPJ_UINT32 cblk_h; + OPJ_UINT32 i, j; + + OPJ_INT32 x = cblk->x0 - band->x0; + OPJ_INT32 y = cblk->y0 - band->y0; + + if (!*(job->pret)) { + opj_free(job); + return; + } + + t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1); + if (t1 == NULL) { + t1 = opj_t1_create(OPJ_TRUE); /* OPJ_TRUE == T1 for encoding */ + opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper); + } + + if (band->bandno & 1) { + opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1]; + x += pres->x1 - pres->x0; + } + if (band->bandno & 2) { + opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1]; + y += pres->y1 - pres->y0; + } + + if (!opj_t1_allocate_buffers( + t1, + (OPJ_UINT32)(cblk->x1 - cblk->x0), + (OPJ_UINT32)(cblk->y1 - cblk->y0))) { + *(job->pret) = OPJ_FALSE; + opj_free(job); + return; + } + + cblk_w = t1->w; + cblk_h = t1->h; + + tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x]; + + if (tccp->qmfbid == 1) { + /* Do multiplication on unsigned type, even if the + * underlying type is signed, to avoid potential + * int overflow on large value (the output will be + * incorrect in such situation, but whatever...) + * This assumes complement-to-2 signed integer + * representation + * Fixes https://github.com/uclouvain/openjpeg/issues/1053 + */ + OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp; + OPJ_UINT32* OPJ_RESTRICT t1data = (OPJ_UINT32*) t1->data; + /* Change from "natural" order to "zigzag" order of T1 passes */ + for (j = 0; j < (cblk_h & ~3U); j += 4) { + for (i = 0; i < cblk_w; ++i) { + t1data[0] = tiledp_u[(j + 0) * tile_w + i] << T1_NMSEDEC_FRACBITS; + t1data[1] = tiledp_u[(j + 1) * tile_w + i] << T1_NMSEDEC_FRACBITS; + t1data[2] = tiledp_u[(j + 2) * tile_w + i] << T1_NMSEDEC_FRACBITS; + t1data[3] = tiledp_u[(j + 3) * tile_w + i] << T1_NMSEDEC_FRACBITS; + t1data += 4; + } + } + if (j < cblk_h) { + for (i = 0; i < cblk_w; ++i) { + OPJ_UINT32 k; + for (k = j; k < cblk_h; k++) { + t1data[0] = tiledp_u[k * tile_w + i] << T1_NMSEDEC_FRACBITS; + t1data ++; + } + } + } + } else { /* if (tccp->qmfbid == 0) */ + OPJ_FLOAT32* OPJ_RESTRICT tiledp_f = (OPJ_FLOAT32*) tiledp; + OPJ_INT32* OPJ_RESTRICT t1data = t1->data; + /* Change from "natural" order to "zigzag" order of T1 passes */ + for (j = 0; j < (cblk_h & ~3U); j += 4) { + for (i = 0; i < cblk_w; ++i) { + t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 0) * tile_w + i] / + band->stepsize) * (1 << T1_NMSEDEC_FRACBITS)); + t1data[1] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 1) * tile_w + i] / + band->stepsize) * (1 << T1_NMSEDEC_FRACBITS)); + t1data[2] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 2) * tile_w + i] / + band->stepsize) * (1 << T1_NMSEDEC_FRACBITS)); + t1data[3] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 3) * tile_w + i] / + band->stepsize) * (1 << T1_NMSEDEC_FRACBITS)); + t1data += 4; + } + } + if (j < cblk_h) { + for (i = 0; i < cblk_w; ++i) { + OPJ_UINT32 k; + for (k = j; k < cblk_h; k++) { + t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[k * tile_w + i] / band->stepsize) + * (1 << T1_NMSEDEC_FRACBITS)); + t1data ++; + } + } + } + } + + { + OPJ_FLOAT64 cumwmsedec = + opj_t1_encode_cblk( + t1, + cblk, + band->bandno, + job->compno, + tilec->numresolutions - 1 - resno, + tccp->qmfbid, + band->stepsize, + tccp->cblksty, + job->tile->numcomps, + job->mct_norms, + job->mct_numcomps); + if (job->mutex) { + opj_mutex_lock(job->mutex); + } + job->tile->distotile += cumwmsedec; + if (job->mutex) { + opj_mutex_unlock(job->mutex); + } + } + + opj_free(job); +} -OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1, +OPJ_BOOL opj_t1_encode_cblks(opj_tcd_t* tcd, opj_tcd_tile_t *tile, opj_tcp_t *tcp, const OPJ_FLOAT64 * mct_norms, OPJ_UINT32 mct_numcomps ) { + volatile OPJ_BOOL ret = OPJ_TRUE; + opj_thread_pool_t* tp = tcd->thread_pool; OPJ_UINT32 compno, resno, bandno, precno, cblkno; + opj_mutex_t* mutex = opj_mutex_create(); tile->distotile = 0; /* fixed_quality */ for (compno = 0; compno < tile->numcomps; ++compno) { opj_tcd_tilecomp_t* tilec = &tile->comps[compno]; opj_tccp_t* tccp = &tcp->tccps[compno]; - OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0); for (resno = 0; resno < tilec->numresolutions; ++resno) { opj_tcd_resolution_t *res = &tilec->resolutions[resno]; for (bandno = 0; bandno < res->numbands; ++bandno) { opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno]; - OPJ_INT32 bandconst; /* Skip empty bands */ if (opj_tcd_is_band_empty(band)) { continue; } - - bandconst = 8192 * 8192 / ((OPJ_INT32) floor(band->stepsize * 8192)); for (precno = 0; precno < res->pw * res->ph; ++precno) { opj_tcd_precinct_t *prc = &band->precincts[precno]; for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) { opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno]; - OPJ_INT32* OPJ_RESTRICT tiledp; - OPJ_UINT32 cblk_w; - OPJ_UINT32 cblk_h; - OPJ_UINT32 i, j, tileLineAdvance; - OPJ_SIZE_T tileIndex = 0; - - OPJ_INT32 x = cblk->x0 - band->x0; - OPJ_INT32 y = cblk->y0 - band->y0; - if (band->bandno & 1) { - opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1]; - x += pres->x1 - pres->x0; - } - if (band->bandno & 2) { - opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1]; - y += pres->y1 - pres->y0; - } - - if (!opj_t1_allocate_buffers( - t1, - (OPJ_UINT32)(cblk->x1 - cblk->x0), - (OPJ_UINT32)(cblk->y1 - cblk->y0))) { - return OPJ_FALSE; - } - cblk_w = t1->w; - cblk_h = t1->h; - tileLineAdvance = tile_w - cblk_w; - - tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x]; - t1->data = tiledp; - t1->data_stride = tile_w; - if (tccp->qmfbid == 1) { - /* Do multiplication on unsigned type, even if the - * underlying type is signed, to avoid potential - * int overflow on large value (the output will be - * incorrect in such situation, but whatever...) - * This assumes complement-to-2 signed integer - * representation - * Fixes https://github.com/uclouvain/openjpeg/issues/1053 - */ - OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp; - for (j = 0; j < cblk_h; ++j) { - for (i = 0; i < cblk_w; ++i) { - tiledp_u[tileIndex] <<= T1_NMSEDEC_FRACBITS; - tileIndex++; - } - tileIndex += tileLineAdvance; - } - } else { /* if (tccp->qmfbid == 0) */ - for (j = 0; j < cblk_h; ++j) { - for (i = 0; i < cblk_w; ++i) { - OPJ_INT32 tmp = tiledp[tileIndex]; - tiledp[tileIndex] = - opj_int_fix_mul_t1( - tmp, - bandconst); - tileIndex++; - } - tileIndex += tileLineAdvance; - } + opj_t1_cblk_encode_processing_job_t* job = + (opj_t1_cblk_encode_processing_job_t*) opj_calloc(1, + sizeof(opj_t1_cblk_encode_processing_job_t)); + if (!job) { + ret = OPJ_FALSE; + goto end; } - - opj_t1_encode_cblk( - t1, - cblk, - band->bandno, - compno, - tilec->numresolutions - 1 - resno, - tccp->qmfbid, - band->stepsize, - tccp->cblksty, - tile->numcomps, - tile, - mct_norms, - mct_numcomps); + job->compno = compno; + job->tile = tile; + job->resno = resno; + job->cblk = cblk; + job->band = band; + job->tilec = tilec; + job->tccp = tccp; + job->mct_norms = mct_norms; + job->mct_numcomps = mct_numcomps; + job->pret = &ret; + job->mutex = mutex; + opj_thread_pool_submit_job(tp, opj_t1_cblk_encode_processor, job); } /* cblkno */ } /* precno */ } /* bandno */ } /* resno */ } /* compno */ - return OPJ_TRUE; + +end: + opj_thread_pool_wait_completion(tcd->thread_pool, 0); + if (mutex) { + opj_mutex_destroy(mutex); + } + + return ret; } /* Returns whether the pass (bpno, passtype) is terminated */ @@ -2252,18 +2365,17 @@ static int opj_t1_enc_is_term_pass(opj_tcd_cblk_enc_t* cblk, /** mod fixed_quality */ -static void opj_t1_encode_cblk(opj_t1_t *t1, - opj_tcd_cblk_enc_t* cblk, - OPJ_UINT32 orient, - OPJ_UINT32 compno, - OPJ_UINT32 level, - OPJ_UINT32 qmfbid, - OPJ_FLOAT64 stepsize, - OPJ_UINT32 cblksty, - OPJ_UINT32 numcomps, - opj_tcd_tile_t * tile, - const OPJ_FLOAT64 * mct_norms, - OPJ_UINT32 mct_numcomps) +static OPJ_FLOAT64 opj_t1_encode_cblk(opj_t1_t *t1, + opj_tcd_cblk_enc_t* cblk, + OPJ_UINT32 orient, + OPJ_UINT32 compno, + OPJ_UINT32 level, + OPJ_UINT32 qmfbid, + OPJ_FLOAT64 stepsize, + OPJ_UINT32 cblksty, + OPJ_UINT32 numcomps, + const OPJ_FLOAT64 * mct_norms, + OPJ_UINT32 mct_numcomps) { OPJ_FLOAT64 cumwmsedec = 0.0; @@ -2277,6 +2389,7 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, OPJ_UINT32 i, j; OPJ_BYTE type = T1_TYPE_MQ; OPJ_FLOAT64 tempwmsedec; + OPJ_INT32* datap; #ifdef EXTRA_DEBUG printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n", @@ -2286,10 +2399,19 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9); max = 0; - for (i = 0; i < t1->w; ++i) { - for (j = 0; j < t1->h; ++j) { - OPJ_INT32 tmp = abs(t1->data[i + j * t1->data_stride]); - max = opj_int_max(max, tmp); + datap = t1->data; + for (j = 0; j < t1->h; ++j) { + const OPJ_UINT32 w = t1->w; + for (i = 0; i < w; ++i, ++datap) { + OPJ_INT32 tmp = *datap; + if (tmp < 0) { + OPJ_UINT32 tmp_unsigned; + max = opj_int_max(max, -tmp); + tmp_unsigned = opj_to_smr(tmp); + memcpy(datap, &tmp_unsigned, sizeof(OPJ_INT32)); + } else { + max = opj_int_max(max, tmp); + } } } @@ -2297,7 +2419,7 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, T1_NMSEDEC_FRACBITS) : 0; if (cblk->numbps == 0) { cblk->totalpasses = 0; - return; + return cumwmsedec; } bpno = (OPJ_INT32)(cblk->numbps - 1); @@ -2343,7 +2465,6 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, tempwmsedec = opj_t1_getwmsedec(nmsedec, compno, level, orient, bpno, qmfbid, stepsize, numcomps, mct_norms, mct_numcomps) ; cumwmsedec += tempwmsedec; - tile->distotile += tempwmsedec; pass->distortiondec = cumwmsedec; if (opj_t1_enc_is_term_pass(cblk, cblksty, bpno, passtype)) { @@ -2425,4 +2546,6 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, } } #endif + + return cumwmsedec; } diff --git a/openjpeg/src/lib/openjp2/t1.h b/openjpeg/src/lib/openjp2/t1.h index 171dfb0a7..81ad0d00f 100644 --- a/openjpeg/src/lib/openjp2/t1.h +++ b/openjpeg/src/lib/openjp2/t1.h @@ -198,7 +198,6 @@ typedef struct opj_t1 { OPJ_UINT32 h; OPJ_UINT32 datasize; OPJ_UINT32 flagssize; - OPJ_UINT32 data_stride; OPJ_BOOL encoder; /* Thre 3 variables below are only used by the decoder */ @@ -216,13 +215,13 @@ typedef struct opj_t1 { /** Encode the code-blocks of a tile -@param t1 T1 handle +@param tcd TCD handle @param tile The tile to encode @param tcp Tile coding parameters @param mct_norms FIXME DOC @param mct_numcomps Number of components used for MCT */ -OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1, +OPJ_BOOL opj_t1_encode_cblks(opj_tcd_t* tcd, opj_tcd_tile_t *tile, opj_tcp_t *tcp, const OPJ_FLOAT64 * mct_norms, diff --git a/openjpeg/src/lib/openjp2/t2.c b/openjpeg/src/lib/openjp2/t2.c index 9825118cf..1481e16f4 100644 --- a/openjpeg/src/lib/openjp2/t2.c +++ b/openjpeg/src/lib/openjp2/t2.c @@ -224,6 +224,7 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* p_t2, OPJ_UINT32 * p_data_written, OPJ_UINT32 p_max_len, opj_codestream_info_t *cstr_info, + opj_tcd_marker_info_t* p_marker_info, OPJ_UINT32 p_tp_num, OPJ_INT32 p_tp_pos, OPJ_UINT32 p_pino, @@ -244,7 +245,7 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* p_t2, l_image->numcomps : 1; OPJ_UINT32 l_nb_pocs = l_tcp->numpocs + 1; - l_pi = opj_pi_initialise_encode(l_image, l_cp, p_tile_no, p_t2_mode); + l_pi = opj_pi_initialise_encode(l_image, l_cp, p_tile_no, p_t2_mode, p_manager); if (!l_pi) { return OPJ_FALSE; } @@ -310,6 +311,20 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* p_t2, opj_pi_destroy(l_pi, l_nb_pocs); return OPJ_FALSE; } + + if (p_marker_info && p_marker_info->need_PLT) { + /* One time use intended */ + assert(p_marker_info->packet_count == 0); + assert(p_marker_info->p_packet_size == NULL); + + p_marker_info->p_packet_size = (OPJ_UINT32*) opj_malloc( + opj_get_encoding_packet_count(l_image, l_cp, p_tile_no) * sizeof(OPJ_UINT32)); + if (p_marker_info->p_packet_size == NULL) { + opj_pi_destroy(l_pi, l_nb_pocs); + return OPJ_FALSE; + } + } + while (opj_pi_next(l_current_pi)) { if (l_current_pi->layno < p_maxlayers) { l_nb_bytes = 0; @@ -326,6 +341,11 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* p_t2, * p_data_written += l_nb_bytes; + if (p_marker_info && p_marker_info->need_PLT) { + p_marker_info->p_packet_size[p_marker_info->packet_count] = l_nb_bytes; + p_marker_info->packet_count ++; + } + /* INDEX >> */ if (cstr_info) { if (cstr_info->index_write) { @@ -405,7 +425,7 @@ OPJ_BOOL opj_t2_decode_packets(opj_tcd_t* tcd, #endif /* create a packet iterator */ - l_pi = opj_pi_create_decode(l_image, l_cp, p_tile_no); + l_pi = opj_pi_create_decode(l_image, l_cp, p_tile_no, p_manager); if (!l_pi) { return OPJ_FALSE; } @@ -673,6 +693,14 @@ static OPJ_BOOL opj_t2_encode_packet(OPJ_UINT32 tileno, OPJ_BOOL packet_empty = OPJ_FALSE; #endif +#ifdef DEBUG_VERBOSE + if (p_t2_mode == FINAL_PASS) { + fprintf(stderr, + "encode packet compono=%d, resno=%d, precno=%d, layno=%d\n", + compno, resno, precno, layno); + } +#endif + /* */ if (tcp->csty & J2K_CP_CSTY_SOP) { if (length < 6) { @@ -711,6 +739,15 @@ static OPJ_BOOL opj_t2_encode_packet(OPJ_UINT32 tileno, continue; } + /* Avoid out of bounds access of https://github.com/uclouvain/openjpeg/issues/1294 */ + /* but likely not a proper fix. */ + if (precno >= res->pw * res->ph) { + opj_event_msg(p_manager, EVT_ERROR, + "opj_t2_encode_packet(): accessing precno=%u >= %u\n", + precno, res->pw * res->ph); + return OPJ_FALSE; + } + prc = &band->precincts[precno]; opj_tgt_reset(prc->incltree); opj_tgt_reset(prc->imsbtree); @@ -778,6 +815,15 @@ static OPJ_BOOL opj_t2_encode_packet(OPJ_UINT32 tileno, continue; } + /* Avoid out of bounds access of https://github.com/uclouvain/openjpeg/issues/1297 */ + /* but likely not a proper fix. */ + if (precno >= res->pw * res->ph) { + opj_event_msg(p_manager, EVT_ERROR, + "opj_t2_encode_packet(): accessing precno=%u >= %u\n", + precno, res->pw * res->ph); + return OPJ_FALSE; + } + prc = &band->precincts[precno]; l_nb_blocks = prc->cw * prc->ch; cblk = prc->cblks.enc; diff --git a/openjpeg/src/lib/openjp2/t2.h b/openjpeg/src/lib/openjp2/t2.h index 66500b169..becfa91a4 100644 --- a/openjpeg/src/lib/openjp2/t2.h +++ b/openjpeg/src/lib/openjp2/t2.h @@ -73,6 +73,7 @@ Encode the packets of a tile to a destination buffer @param p_data_written FIXME DOC @param len the length of the destination buffer @param cstr_info Codestream information structure +@param p_marker_info Marker information structure @param tpnum Tile part number of the current tile @param tppos The position of the tile part flag in the progression order @param pino FIXME DOC @@ -87,6 +88,7 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* t2, OPJ_UINT32 * p_data_written, OPJ_UINT32 len, opj_codestream_info_t *cstr_info, + opj_tcd_marker_info_t* p_marker_info, OPJ_UINT32 tpnum, OPJ_INT32 tppos, OPJ_UINT32 pino, diff --git a/openjpeg/src/lib/openjp2/tcd.c b/openjpeg/src/lib/openjp2/tcd.c index 647991c76..6442669d6 100644 --- a/openjpeg/src/lib/openjp2/tcd.c +++ b/openjpeg/src/lib/openjp2/tcd.c @@ -112,7 +112,7 @@ void tcd_dump(FILE *fd, opj_tcd_t *tcd, opj_tcd_image_t * img) * Initializes tile coding/decoding */ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, - OPJ_BOOL isEncoder, OPJ_FLOAT32 fraction, OPJ_SIZE_T sizeof_block, + OPJ_BOOL isEncoder, OPJ_SIZE_T sizeof_block, opj_event_mgr_t* manager); /** @@ -182,6 +182,7 @@ static OPJ_BOOL opj_tcd_t2_encode(opj_tcd_t *p_tcd, OPJ_UINT32 * p_data_written, OPJ_UINT32 p_max_dest_size, opj_codestream_info_t *p_cstr_info, + opj_tcd_marker_info_t* p_marker_info, opj_event_mgr_t *p_manager); static OPJ_BOOL opj_tcd_rate_allocate_encode(opj_tcd_t *p_tcd, @@ -573,9 +574,10 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd, opj_tcd_makelayer(tcd, layno, thresh, 0); if (cp->m_specific_param.m_enc.m_fixed_quality) { /* fixed_quality */ - if (OPJ_IS_CINEMA(cp->rsiz)) { + if (OPJ_IS_CINEMA(cp->rsiz) || OPJ_IS_IMF(cp->rsiz)) { if (! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest, - p_data_written, maxlen, cstr_info, tcd->cur_tp_num, tcd->tp_pos, tcd->cur_pino, + p_data_written, maxlen, cstr_info, NULL, tcd->cur_tp_num, tcd->tp_pos, + tcd->cur_pino, THRESH_CALC, p_manager)) { lo = thresh; @@ -605,7 +607,8 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd, } } else { if (! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest, - p_data_written, maxlen, cstr_info, tcd->cur_tp_num, tcd->tp_pos, tcd->cur_pino, + p_data_written, maxlen, cstr_info, NULL, tcd->cur_tp_num, tcd->tp_pos, + tcd->cur_pino, THRESH_CALC, p_manager)) { /* TODO: what to do with l ??? seek / tell ??? */ /* opj_event_msg(tcd->cinfo, EVT_INFO, "rate alloc: len=%d, max=%d\n", l, maxlen); */ @@ -718,10 +721,9 @@ OPJ_BOOL opj_alloc_tile_component_data(opj_tcd_tilecomp_t *l_tilec) /* ----------------------------------------------------------------------- */ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, - OPJ_BOOL isEncoder, OPJ_FLOAT32 fraction, OPJ_SIZE_T sizeof_block, + OPJ_BOOL isEncoder, OPJ_SIZE_T sizeof_block, opj_event_mgr_t* manager) { - OPJ_UINT32(*l_gain_ptr)(OPJ_UINT32) = 00; OPJ_UINT32 compno, resno, bandno, precno, cblkno; opj_tcp_t * l_tcp = 00; opj_cp_t * l_cp = 00; @@ -737,7 +739,6 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, OPJ_UINT32 p, q; OPJ_UINT32 l_level_no; OPJ_UINT32 l_pdx, l_pdy; - OPJ_UINT32 l_gain; OPJ_INT32 l_x0b, l_y0b; OPJ_UINT32 l_tx0, l_ty0; /* extent of precincts , top left, bottom right**/ @@ -876,11 +877,6 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, l_level_no = l_tilec->numresolutions; l_res = l_tilec->resolutions; l_step_size = l_tccp->stepsizes; - if (l_tccp->qmfbid == 0) { - l_gain_ptr = &opj_dwt_getgain_real; - } else { - l_gain_ptr = &opj_dwt_getgain; - } /*fprintf(stderr, "\tlevel_no=%d\n",l_level_no);*/ for (resno = 0; resno < l_tilec->numresolutions; ++resno) { @@ -967,7 +963,6 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, l_band = l_res->bands; for (bandno = 0; bandno < l_res->numbands; ++bandno, ++l_band, ++l_step_size) { - OPJ_INT32 numbps; /*fprintf(stderr, "\t\t\tband_no=%d/%d\n", bandno, l_res->numbands );*/ if (resno == 0) { @@ -1003,11 +998,24 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, } } - /** avoid an if with storing function pointer */ - l_gain = (*l_gain_ptr)(l_band->bandno); - numbps = (OPJ_INT32)(l_image_comp->prec + l_gain); - l_band->stepsize = (OPJ_FLOAT32)(((1.0 + l_step_size->mant / 2048.0) * pow(2.0, - (OPJ_INT32)(numbps - l_step_size->expn)))) * fraction; + { + /* Table E-1 - Sub-band gains */ + /* BUG_WEIRD_TWO_INVK (look for this identifier in dwt.c): */ + /* the test (!isEncoder && l_tccp->qmfbid == 0) is strongly */ + /* linked to the use of two_invK instead of invK */ + const OPJ_INT32 log2_gain = (!isEncoder && + l_tccp->qmfbid == 0) ? 0 : (l_band->bandno == 0) ? 0 : + (l_band->bandno == 3) ? 2 : 1; + + /* Nominal dynamic range. Equation E-4 */ + const OPJ_INT32 Rb = (OPJ_INT32)l_image_comp->prec + log2_gain; + + /* Delta_b value of Equation E-3 in "E.1 Inverse quantization + * procedure" of the standard */ + l_band->stepsize = (OPJ_FLOAT32)(((1.0 + l_step_size->mant / 2048.0) * pow(2.0, + (OPJ_INT32)(Rb - l_step_size->expn)))); + } + /* Mb value of Equation E-2 in "E.1 Inverse quantization * procedure" of the standard */ l_band->numbps = l_step_size->expn + (OPJ_INT32)l_tccp->numgbits - @@ -1190,14 +1198,14 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, OPJ_BOOL opj_tcd_init_encode_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, opj_event_mgr_t* p_manager) { - return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_TRUE, 1.0F, + return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_TRUE, sizeof(opj_tcd_cblk_enc_t), p_manager); } OPJ_BOOL opj_tcd_init_decode_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, opj_event_mgr_t* p_manager) { - return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_FALSE, 0.5F, + return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_FALSE, sizeof(opj_tcd_cblk_dec_t), p_manager); } @@ -1235,10 +1243,16 @@ static OPJ_BOOL opj_tcd_code_block_enc_allocate_data(opj_tcd_cblk_enc_t * /* +1 is needed for https://github.com/uclouvain/openjpeg/issues/835 */ /* and actually +2 required for https://github.com/uclouvain/openjpeg/issues/982 */ + /* and +7 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 3) */ + /* and +26 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 7) */ + /* and +28 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 44) */ + /* and +33 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 4) */ + /* and +63 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 4 -IMF 2K) */ + /* and +74 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 4 -n 8 -s 7,7 -I) */ /* TODO: is there a theoretical upper-bound for the compressed code */ /* block size ? */ - l_data_size = 2 + (OPJ_UINT32)((p_code_block->x1 - p_code_block->x0) * - (p_code_block->y1 - p_code_block->y0) * (OPJ_INT32)sizeof(OPJ_UINT32)); + l_data_size = 74 + (OPJ_UINT32)((p_code_block->x1 - p_code_block->x0) * + (p_code_block->y1 - p_code_block->y0) * (OPJ_INT32)sizeof(OPJ_UINT32)); if (l_data_size > p_code_block->data_size) { if (p_code_block->data) { @@ -1370,6 +1384,7 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd, OPJ_UINT32 * p_data_written, OPJ_UINT32 p_max_length, opj_codestream_info_t *p_cstr_info, + opj_tcd_marker_info_t* p_marker_info, opj_event_mgr_t *p_manager) { @@ -1449,7 +1464,7 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd, /* FIXME _ProfStart(PGROUP_T2); */ if (! opj_tcd_t2_encode(p_tcd, p_dest, p_data_written, p_max_length, - p_cstr_info, p_manager)) { + p_cstr_info, p_marker_info, p_manager)) { return OPJ_FALSE; } /* FIXME _ProfStop(PGROUP_T2); */ @@ -2033,7 +2048,8 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager) opj_tcd_tile_t * l_tile = p_tcd->tcd_image->tiles; opj_tcp_t * l_tcp = p_tcd->tcp; opj_tcd_tilecomp_t * l_tile_comp = l_tile->comps; - OPJ_UINT32 l_samples, i; + OPJ_SIZE_T l_samples; + OPJ_UINT32 i; if (l_tcp->mct == 0 || p_tcd->used_component != NULL) { return OPJ_TRUE; @@ -2046,8 +2062,8 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager) /* A bit inefficient: we process more data than needed if */ /* resno_decoded < l_tile_comp->minimum_num_resolutions-1, */ /* but we would need to take into account a stride then */ - l_samples = (OPJ_UINT32)((res_comp0->x1 - res_comp0->x0) * - (res_comp0->y1 - res_comp0->y0)); + l_samples = (OPJ_SIZE_T)(res_comp0->x1 - res_comp0->x0) * + (OPJ_SIZE_T)(res_comp0->y1 - res_comp0->y0); if (l_tile->numcomps >= 3) { if (l_tile_comp->minimum_num_resolutions != l_tile->comps[1].minimum_num_resolutions || @@ -2081,8 +2097,8 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager) opj_tcd_resolution_t* res_comp0 = l_tile->comps[0].resolutions + p_tcd->image->comps[0].resno_decoded; - l_samples = (res_comp0->win_x1 - res_comp0->win_x0) * - (res_comp0->win_y1 - res_comp0->win_y0); + l_samples = (OPJ_SIZE_T)(res_comp0->win_x1 - res_comp0->win_x0) * + (OPJ_SIZE_T)(res_comp0->win_y1 - res_comp0->win_y0); if (l_tile->numcomps >= 3) { opj_tcd_resolution_t* res_comp1 = l_tile->comps[1].resolutions + p_tcd->image->comps[1].resno_decoded; @@ -2348,7 +2364,7 @@ static void opj_tcd_code_block_enc_deallocate(opj_tcd_precinct_t * p_precinct) } } -OPJ_SIZE_T opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd) +OPJ_SIZE_T opj_tcd_get_encoder_input_buffer_size(opj_tcd_t *p_tcd) { OPJ_UINT32 i; OPJ_SIZE_T l_data_size = 0; @@ -2406,7 +2422,8 @@ static OPJ_BOOL opj_tcd_dc_level_shift_encode(opj_tcd_t *p_tcd) } } else { for (i = 0; i < l_nb_elem; ++i) { - *l_current_ptr = (*l_current_ptr - l_tccp->m_dc_level_shift) * (1 << 11); + *((OPJ_FLOAT32 *) l_current_ptr) = (OPJ_FLOAT32)(*l_current_ptr - + l_tccp->m_dc_level_shift); ++l_current_ptr; } } @@ -2464,8 +2481,11 @@ static OPJ_BOOL opj_tcd_mct_encode(opj_tcd_t *p_tcd) opj_free(l_data); } else if (l_tcp->tccps->qmfbid == 0) { - opj_mct_encode_real(l_tile->comps[0].data, l_tile->comps[1].data, - l_tile->comps[2].data, samples); + opj_mct_encode_real( + (OPJ_FLOAT32*)l_tile->comps[0].data, + (OPJ_FLOAT32*)l_tile->comps[1].data, + (OPJ_FLOAT32*)l_tile->comps[2].data, + samples); } else { opj_mct_encode(l_tile->comps[0].data, l_tile->comps[1].data, l_tile->comps[2].data, samples); @@ -2483,11 +2503,11 @@ static OPJ_BOOL opj_tcd_dwt_encode(opj_tcd_t *p_tcd) for (compno = 0; compno < l_tile->numcomps; ++compno) { if (l_tccp->qmfbid == 1) { - if (! opj_dwt_encode(l_tile_comp)) { + if (! opj_dwt_encode(p_tcd, l_tile_comp)) { return OPJ_FALSE; } } else if (l_tccp->qmfbid == 0) { - if (! opj_dwt_encode_real(l_tile_comp)) { + if (! opj_dwt_encode_real(p_tcd, l_tile_comp)) { return OPJ_FALSE; } } @@ -2501,16 +2521,10 @@ static OPJ_BOOL opj_tcd_dwt_encode(opj_tcd_t *p_tcd) static OPJ_BOOL opj_tcd_t1_encode(opj_tcd_t *p_tcd) { - opj_t1_t * l_t1; const OPJ_FLOAT64 * l_mct_norms; OPJ_UINT32 l_mct_numcomps = 0U; opj_tcp_t * l_tcp = p_tcd->tcp; - l_t1 = opj_t1_create(OPJ_TRUE); - if (l_t1 == 00) { - return OPJ_FALSE; - } - if (l_tcp->mct == 1) { l_mct_numcomps = 3U; /* irreversible encoding */ @@ -2524,13 +2538,9 @@ static OPJ_BOOL opj_tcd_t1_encode(opj_tcd_t *p_tcd) l_mct_norms = (const OPJ_FLOAT64 *)(l_tcp->mct_norms); } - if (! opj_t1_encode_cblks(l_t1, p_tcd->tcd_image->tiles, l_tcp, l_mct_norms, - l_mct_numcomps)) { - opj_t1_destroy(l_t1); - return OPJ_FALSE; - } - - opj_t1_destroy(l_t1); + return opj_t1_encode_cblks(p_tcd, + p_tcd->tcd_image->tiles, l_tcp, l_mct_norms, + l_mct_numcomps); return OPJ_TRUE; } @@ -2540,6 +2550,7 @@ static OPJ_BOOL opj_tcd_t2_encode(opj_tcd_t *p_tcd, OPJ_UINT32 * p_data_written, OPJ_UINT32 p_max_dest_size, opj_codestream_info_t *p_cstr_info, + opj_tcd_marker_info_t* p_marker_info, opj_event_mgr_t *p_manager) { opj_t2_t * l_t2; @@ -2558,6 +2569,7 @@ static OPJ_BOOL opj_tcd_t2_encode(opj_tcd_t *p_tcd, p_data_written, p_max_dest_size, p_cstr_info, + p_marker_info, p_tcd->tp_num, p_tcd->tp_pos, p_tcd->cur_pino, @@ -2616,7 +2628,7 @@ OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd, OPJ_UINT32 l_size_comp, l_remaining; OPJ_SIZE_T l_nb_elem; - l_data_size = opj_tcd_get_encoded_tile_size(p_tcd); + l_data_size = opj_tcd_get_encoder_input_buffer_size(p_tcd); if (l_data_size != p_src_length) { return OPJ_FALSE; } @@ -2818,3 +2830,30 @@ static OPJ_BOOL opj_tcd_is_whole_tilecomp_decoding(opj_tcd_t *p_tcd, (((OPJ_UINT32)tilec->x1 - tcx1) >> shift) == 0 && (((OPJ_UINT32)tilec->y1 - tcy1) >> shift) == 0))); } + +/* ----------------------------------------------------------------------- */ + +opj_tcd_marker_info_t* opj_tcd_marker_info_create(OPJ_BOOL need_PLT) +{ + opj_tcd_marker_info_t *l_tcd_marker_info = + (opj_tcd_marker_info_t*) opj_calloc(1, sizeof(opj_tcd_marker_info_t)); + if (!l_tcd_marker_info) { + return NULL; + } + + l_tcd_marker_info->need_PLT = need_PLT; + + return l_tcd_marker_info; +} + +/* ----------------------------------------------------------------------- */ + +void opj_tcd_marker_info_destroy(opj_tcd_marker_info_t *p_tcd_marker_info) +{ + if (p_tcd_marker_info) { + opj_free(p_tcd_marker_info->p_packet_size); + opj_free(p_tcd_marker_info); + } +} + +/* ----------------------------------------------------------------------- */ diff --git a/openjpeg/src/lib/openjp2/tcd.h b/openjpeg/src/lib/openjp2/tcd.h index e3214c1d9..f1b52b8da 100644 --- a/openjpeg/src/lib/openjp2/tcd.h +++ b/openjpeg/src/lib/openjp2/tcd.h @@ -284,6 +284,22 @@ typedef struct opj_tcd { OPJ_BOOL* used_component; } opj_tcd_t; +/** + * Structure to hold information needed to generate some markers. + * Used by encoder. + */ +typedef struct opj_tcd_marker_info { + /** In: Whether information to generate PLT markers in needed */ + OPJ_BOOL need_PLT; + + /** OUT: Number of elements in p_packet_size[] array */ + OPJ_UINT32 packet_count; + + /** OUT: Array of size packet_count, such that p_packet_size[i] is + * the size in bytes of the ith packet */ + OPJ_UINT32* p_packet_size; +} opj_tcd_marker_info_t; + /** @name Exported functions */ /*@{*/ /* ----------------------------------------------------------------------- */ @@ -306,6 +322,21 @@ Destroy a previously created TCD handle */ void opj_tcd_destroy(opj_tcd_t *tcd); + +/** + * Create a new opj_tcd_marker_info_t* structure + * @param need_PLT Whether information is needed to generate PLT markers. + */ +opj_tcd_marker_info_t* opj_tcd_marker_info_create(OPJ_BOOL need_PLT); + + +/** +Destroy a previously created opj_tcd_marker_info_t* structure +@param p_tcd_marker_info Structure to destroy +*/ +void opj_tcd_marker_info_destroy(opj_tcd_marker_info_t *p_tcd_marker_info); + + /** * Initialize the tile coder and may reuse some memory. * @param p_tcd TCD handle. @@ -364,6 +395,7 @@ OPJ_UINT32 opj_tcd_get_decoded_tile_size(opj_tcd_t *p_tcd, * @param p_data_written pointer to an int that is incremented by the number of bytes really written on p_dest * @param p_len Maximum length of the destination buffer * @param p_cstr_info Codestream information structure + * @param p_marker_info Marker information structure * @param p_manager the user event manager * @return true if the coding is successful. */ @@ -373,6 +405,7 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd, OPJ_UINT32 * p_data_written, OPJ_UINT32 p_len, struct opj_codestream_info *p_cstr_info, + opj_tcd_marker_info_t* p_marker_info, opj_event_mgr_t *p_manager); @@ -415,9 +448,11 @@ OPJ_BOOL opj_tcd_update_tile_data(opj_tcd_t *p_tcd, OPJ_UINT32 p_dest_length); /** - * + * Get the size in bytes of the input buffer provided before encoded. + * This must be the size provided to the p_src_length argument of + * opj_tcd_copy_tile_data() */ -OPJ_SIZE_T opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd); +OPJ_SIZE_T opj_tcd_get_encoder_input_buffer_size(opj_tcd_t *p_tcd); /** * Initialize the tile coder and may reuse some meory. @@ -433,6 +468,8 @@ OPJ_BOOL opj_tcd_init_encode_tile(opj_tcd_t *p_tcd, /** * Copies tile data from the given memory block onto the system. + * + * p_src_length must be equal to opj_tcd_get_encoder_input_buffer_size() */ OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd, OPJ_BYTE * p_src, -- cgit v1.2.1