diff --git a/CharLS.sln b/CharLS.sln index 2d96007..5c29761 100644 --- a/CharLS.sln +++ b/CharLS.sln @@ -142,12 +142,15 @@ Global {E09F024E-A125-48AA-8E9D-7D1302BEAC97}.Release|x86.ActiveCfg = Release|Win32 {E09F024E-A125-48AA-8E9D-7D1302BEAC97}.Release|x86.Build.0 = Release|Win32 {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Checked|ARM64.ActiveCfg = Checked|ARM64 + {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Checked|ARM64.Build.0 = Checked|ARM64 {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Checked|x64.ActiveCfg = Checked|x64 {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Checked|x86.ActiveCfg = Checked|Win32 {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Debug|ARM64.Build.0 = Debug|ARM64 {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Debug|x64.ActiveCfg = Debug|x64 {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Debug|x86.ActiveCfg = Debug|Win32 {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Release|ARM64.ActiveCfg = Release|ARM64 + {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Release|ARM64.Build.0 = Release|ARM64 {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Release|x64.ActiveCfg = Release|x64 {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Release|x86.ActiveCfg = Release|Win32 {5637C116-ABF5-4274-A71F-34433713A538}.Checked|ARM64.ActiveCfg = Checked|ARM64 diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 2f76848..e406d71 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -3,7 +3,7 @@ #include -#include "../src/jpegls_preset_coding_parameters.h" +#include "../src/jpegls_preset_coding_parameters.hpp" #include #include @@ -12,7 +12,7 @@ #pragma warning(disable : 26409) // Avoid calling new explicitly (triggered by BENCHMARK macro) -int8_t quantize_gradient_org(const charls::jpegls_pc_parameters& preset, const int32_t di) noexcept +static int8_t quantize_gradient_org(const charls::jpegls_pc_parameters& preset, const int32_t di) noexcept { constexpr int32_t near_lossless{}; @@ -36,7 +36,7 @@ int8_t quantize_gradient_org(const charls::jpegls_pc_parameters& preset, const i return 4; } -std::vector create_quantize_lut_lossless(const int32_t bit_count) +static std::vector create_quantize_lut_lossless(const int32_t bit_count) { const charls::jpegls_pc_parameters preset{charls::compute_default((1 << static_cast(bit_count)) - 1, 0)}; const int32_t range{preset.maximum_sample_value + 1}; @@ -100,7 +100,8 @@ struct lossless_traits final }; -__declspec(noinline) int32_t get_predicted_value_default(const int32_t ra, const int32_t rb, const int32_t rc) noexcept +static __declspec(noinline) int32_t + get_predicted_value_default(const int32_t ra, const int32_t rb, const int32_t rc) noexcept { if (ra < rb) { @@ -126,13 +127,14 @@ __declspec(noinline) int32_t get_predicted_value_default(const int32_t ra, const constexpr size_t int32_t_bit_count = sizeof(int32_t) * 8; -constexpr int32_t bit_wise_sign(const int32_t i) noexcept +static constexpr int32_t bit_wise_sign(const int32_t i) noexcept { return i >> (int32_t_bit_count - 1); } -__declspec(noinline) int32_t get_predicted_value_optimized(const int32_t ra, const int32_t rb, const int32_t rc) noexcept +static __declspec(noinline) int32_t + get_predicted_value_optimized(const int32_t ra, const int32_t rb, const int32_t rc) noexcept { // sign trick reduces the number of if statements (branches) const int32_t sign{bit_wise_sign(rb - ra)}; @@ -153,7 +155,7 @@ __declspec(noinline) int32_t get_predicted_value_optimized(const int32_t ra, con #if defined(_M_X64) || defined(_M_ARM64) -inline int countl_zero(const uint64_t value) noexcept +inline static int countl_zero(const uint64_t value) noexcept { if (value == 0) return 64; @@ -211,7 +213,7 @@ static void bm_quantize_gradient_lut(benchmark::State& state) BENCHMARK(bm_quantize_gradient_lut); -int peek_zero_bits(uint64_t val_test) noexcept +static int peek_zero_bits(uint64_t val_test) noexcept { for (int32_t count{}; count < 16; ++count) { @@ -254,7 +256,7 @@ BENCHMARK(bm_peek_zero_bits_intrinsic); #endif -std::vector allocate_buffer(const size_t size) +static std::vector allocate_buffer(const size_t size) { std::vector buffer; buffer.resize(size); @@ -306,7 +308,7 @@ private: }; -overwrite_buffer allocate_overwrite_buffer(const size_t size) +static overwrite_buffer allocate_overwrite_buffer(const size_t size) { overwrite_buffer buffer; buffer.reset(size); @@ -324,7 +326,7 @@ static void bm_resize_overwrite_buffer(benchmark::State& state) BENCHMARK(bm_resize_overwrite_buffer); -int memset_buffer(uint8_t* data, const size_t size) +static int memset_buffer(uint8_t* data, const size_t size) noexcept { memset(data, 0, size); return 0; @@ -342,7 +344,7 @@ static void bm_memset_buffer(benchmark::State& state) BENCHMARK(bm_memset_buffer); -bool has_ff_byte_classic(const unsigned int value) +constexpr static bool has_ff_byte_classic(const unsigned int value) noexcept { // Check if any byte is equal to 0xFF return ((value & 0xFF) == 0xFF) || (((value >> 8) & 0xFF) == 0xFF) || (((value >> 16) & 0xFF) == 0xFF) || @@ -358,7 +360,7 @@ static void bm_has_ff_byte_classic(benchmark::State& state) } BENCHMARK(bm_has_ff_byte_classic); -bool has_ff_byte_loop(const unsigned int value) +static bool has_ff_byte_loop(const unsigned int value) noexcept { // Iterate over each byte and check if it is equal to 0xFF for (int i = 0; i < sizeof(unsigned int); ++i) @@ -380,7 +382,8 @@ static void bm_has_ff_byte_loop(benchmark::State& state) } BENCHMARK(bm_has_ff_byte_loop); -bool has_ff_byte_simd(const unsigned int value) { +#if !defined(_M_ARM64) +static bool has_ff_byte_simd(const unsigned int value) { // Use SSE instructions for parallel comparison const __m128i xmm_value = _mm_set1_epi32(value); const __m128i xmm_ff = _mm_set1_epi32(0xFF); @@ -400,9 +403,9 @@ static void bm_has_ff_byte_simd(benchmark::State& state) } } BENCHMARK(bm_has_ff_byte_simd); +#endif - -const std::byte* find_jpeg_marker_start_byte(const std::byte* position, const std::byte* end_position) noexcept +static const std::byte* find_jpeg_marker_start_byte(const std::byte* position, const std::byte* end_position) noexcept { constexpr std::byte jpeg_marker_start_byte{0xFF}; @@ -484,7 +487,8 @@ T read_big_endian_unaligned(const void* buffer) noexcept #endif } -uint32_t read_all_bytes_with_ff_check(const std::byte* position, const std::byte* end_position) +#if !defined(_M_ARM64) +static uint32_t read_all_bytes_with_ff_check(const std::byte* position, const std::byte* end_position) { uint32_t result{}; @@ -514,9 +518,10 @@ static void bm_read_all_bytes_with_ff_check(benchmark::State& state) } } BENCHMARK(bm_read_all_bytes_with_ff_check); +#endif - -bool has_ff_byte_simd64(const uint64_t value) +#if !defined(_M_ARM64) +static bool has_ff_byte_simd64(const uint64_t value) { // Use SSE instructions for parallel comparison const __m128i xmm_value = _mm_set1_epi64x(value); @@ -529,7 +534,7 @@ bool has_ff_byte_simd64(const uint64_t value) return _mm_testz_si128(comparison, comparison) == 0; } -uint64_t read_all_bytes_with_ff_check64(const std::byte* position, const std::byte* end_position) +static uint64_t read_all_bytes_with_ff_check64(const std::byte* position, const std::byte* end_position) { uint64_t result{}; @@ -557,9 +562,10 @@ static void bm_read_all_bytes_with_ff_check64(benchmark::State& state) } } BENCHMARK(bm_read_all_bytes_with_ff_check64); +#endif -uint32_t read_all_bytes_no_check(const std::byte* position, const std::byte* end_position) +static uint32_t read_all_bytes_no_check(const std::byte* position, const std::byte* end_position) noexcept { uint32_t result{}; @@ -582,7 +588,7 @@ static void bm_read_all_bytes_no_check(benchmark::State& state) } BENCHMARK(bm_read_all_bytes_no_check); -uint64_t read_all_bytes_no_check64(const std::byte* position, const std::byte* end_position) +static uint64_t read_all_bytes_no_check64(const std::byte* position, const std::byte* end_position) noexcept { uint64_t result{}; @@ -605,7 +611,9 @@ static void bm_read_all_bytes_no_check64(benchmark::State& state) } BENCHMARK(bm_read_all_bytes_no_check64); +// Tips to run the benchmark tests: - +// To run a single benchmark: +// benchmark --benchmark_filter = bm_decode BENCHMARK_MAIN(); diff --git a/benchmark/benchmark.vcxproj b/benchmark/benchmark.vcxproj index 0212694..7312584 100644 --- a/benchmark/benchmark.vcxproj +++ b/benchmark/benchmark.vcxproj @@ -177,6 +177,7 @@ + @@ -187,6 +188,9 @@ + + + diff --git a/benchmark/benchmark.vcxproj.filters b/benchmark/benchmark.vcxproj.filters index 8ecb6d6..1c000eb 100644 --- a/benchmark/benchmark.vcxproj.filters +++ b/benchmark/benchmark.vcxproj.filters @@ -20,10 +20,16 @@ Source Files + + Source Files + Header Files + + + \ No newline at end of file diff --git a/benchmark/context_regular_mode.cpp b/benchmark/context_regular_mode.cpp index ac1e579..8db990f 100644 --- a/benchmark/context_regular_mode.cpp +++ b/benchmark/context_regular_mode.cpp @@ -6,10 +6,11 @@ #include "context_regular_mode_v220.h" #pragma warning(disable : 26409) // Avoid calling new explicitly (triggered by BENCHMARK macro) +#pragma warning(disable : 4746) // volatile access of 'reset_threshold' is subject to /volatile: setting; (in ARM64 mode) using namespace charls; -context_regular_mode g_context; +regular_mode_context g_context; jls_context_v220 g_context_v220; volatile int32_t error_value; @@ -29,7 +30,7 @@ BENCHMARK(bm_regular_mode_update_variables_220); static void bm_regular_mode_update_variables(benchmark::State& state) { - g_context = context_regular_mode(); + g_context = regular_mode_context(); for (const auto _ : state) { @@ -52,12 +53,12 @@ BENCHMARK(bm_regular_mode_get_golomb_coding_parameter_v220); static void bm_regular_mode_get_golomb_coding_parameter(benchmark::State& state) { - g_context = context_regular_mode(); + g_context = regular_mode_context(); g_context.update_variables_and_bias(error_value, near_lossless, reset_threshold); for (const auto _ : state) { - benchmark::DoNotOptimize(g_context.get_golomb_coding_parameter()); + benchmark::DoNotOptimize(g_context.compute_golomb_coding_parameter()); } } BENCHMARK(bm_regular_mode_get_golomb_coding_parameter); diff --git a/benchmark/context_regular_mode_v220.h b/benchmark/context_regular_mode_v220.h index 0d9790a..cf33004 100644 --- a/benchmark/context_regular_mode_v220.h +++ b/benchmark/context_regular_mode_v220.h @@ -3,7 +3,8 @@ #pragma once -#include "../src/context_regular_mode.h" +#include "../src/regular_mode_context.hpp" + #include #include @@ -43,7 +44,7 @@ struct jls_context_v220 final int n{N}; if (constexpr int limit{65536 * 256}; UNLIKELY(a >= limit || std::abs(b) >= limit)) - impl::throw_jpegls_error(jpegls_errc::invalid_encoded_data); + impl::throw_jpegls_error(jpegls_errc::invalid_data); if (n == reset_threshold) { @@ -92,7 +93,7 @@ struct jls_context_v220 final } if (UNLIKELY(k == max_k_value)) - impl::throw_jpegls_error(jpegls_errc::invalid_encoded_data); + impl::throw_jpegls_error(jpegls_errc::invalid_data); return k; } diff --git a/benchmark/decode.cpp b/benchmark/decode.cpp new file mode 100644 index 0000000..224ac16 --- /dev/null +++ b/benchmark/decode.cpp @@ -0,0 +1,74 @@ +// Copyright (c) Team CharLS. +// SPDX-License-Identifier: BSD-3-Clause + +#include + +#include "../include/charls/charls.hpp" + +#include +#include +#include +#include +#include + +#pragma warning(disable : 26409) // Avoid calling new explicitly (triggered by BENCHMARK macro) + +using namespace charls; +using std::byte; +using std::ifstream; +using std::ios; +using std::vector; + +template +void read(std::istream& input, Container& destination) +{ + input.read(reinterpret_cast(destination.data()), static_cast(destination.size())); +} + +vector read_file(const char* filename, long offset = 0, size_t bytes = 0) +try +{ + ifstream input; + input.exceptions(ios::eofbit | ios::failbit | ios::badbit); + input.open(filename, ios::in | ios::binary); + + input.seekg(0, ios::end); + const auto byte_count_file{static_cast(input.tellg())}; + input.seekg(offset, ios::beg); + + if (offset < 0) + { + offset = static_cast(byte_count_file - bytes); + } + if (bytes == 0) + { + bytes = static_cast(byte_count_file) - offset; + } + + vector buffer(bytes); + read(input, buffer); + + return buffer; +} +catch (const std::ifstream::failure&) +{ + std::cout << "Failed to open/read file: " << std::filesystem::absolute(filename) << "\n"; + throw; +} + + +static void bm_decode(benchmark::State& state) +{ + const auto source{read_file("d:/benchmark-test-image.jls")}; + + // Pre-allocate the destination outside the measurement loop. + // std::vector initializes its elements and this step needs to be excluded from the measurement. + vector destination(jpegls_decoder{source, true}.get_destination_size()); + + for (const auto _ : state) + { + jpegls_decoder decoder(source.data(), source.size()); + decoder.decode(destination); + } +} +BENCHMARK(bm_decode); diff --git a/benchmark/log2.cpp b/benchmark/log2.cpp index 68a5b61..16d8b33 100644 --- a/benchmark/log2.cpp +++ b/benchmark/log2.cpp @@ -3,18 +3,19 @@ #include -#include "../src/util.h" +#include "../src/jpegls_algorithm.hpp" #include #include +#pragma warning(disable : 26409) // Avoid calling new explicitly (triggered by BENCHMARK macro) -uint32_t log2_floor(const uint32_t n) noexcept +static uint32_t log2_floor(const uint32_t n) noexcept { return 31 - charls::countl_zero(n); } -uint32_t max_value_to_bits_per_sample(const uint32_t max_value) noexcept +static uint32_t max_value_to_bits_per_sample(const uint32_t max_value) noexcept { ASSERT(max_value > 0); return log2_floor(max_value) + 1; @@ -48,9 +49,9 @@ static void bm_log2_ceil_int32(benchmark::State& state) { for (const auto _ : state) { - benchmark::DoNotOptimize(charls::log2_ceil(256)); - benchmark::DoNotOptimize(charls::log2_ceil(1024)); - benchmark::DoNotOptimize(charls::log2_ceil(std::numeric_limits::max())); + benchmark::DoNotOptimize(charls::log2_ceiling(256)); + benchmark::DoNotOptimize(charls::log2_ceiling(1024)); + benchmark::DoNotOptimize(charls::log2_ceiling(std::numeric_limits::max())); } } BENCHMARK(bm_log2_ceil_int32); diff --git a/benchmark/vcpkg.json b/benchmark/vcpkg.json index f8f0a3a..1072f42 100644 --- a/benchmark/vcpkg.json +++ b/benchmark/vcpkg.json @@ -1,8 +1,5 @@ { - "$schema": "https://raw.githubusercontent.com/microsoft/vcpkg/master/scripts/vcpkg.schema.json", - "name": "charls-benchmark", - "version": "1.0.0", - "dependencies": [ - "benchmark" - ] - } \ No newline at end of file + "$schema": "https://raw.githubusercontent.com/microsoft/vcpkg/master/scripts/vcpkg.schema.json", + "dependencies": [ { "name": "benchmark", "version>=":"1.8.5" } ], + "builtin-baseline": "3508985146f1b1d248c67ead13f8f54be5b4f5da" +} diff --git a/fuzzing/libfuzzer/main.cpp b/fuzzing/libfuzzer/main.cpp index 354e68b..b0fec30 100644 --- a/fuzzing/libfuzzer/main.cpp +++ b/fuzzing/libfuzzer/main.cpp @@ -1,9 +1,10 @@ // Copyright (c) Team CharLS. // SPDX-License-Identifier: BSD-3-Clause -#include #include "../include/charls/jpegls_decoder.hpp" +#include + extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, const size_t size) { charls::jpegls_decoder decoder(data, size, false); diff --git a/include/charls/charls.ixx b/include/charls/charls.ixx index e91e31e..84c1edf 100644 --- a/include/charls/charls.ixx +++ b/include/charls/charls.ixx @@ -15,6 +15,6 @@ module; export module charls; -#include "charls_jpegls_decoder.h" -#include "charls_jpegls_encoder.h" +#include "jpegls_decoder.hpp" +#include "jpegls_encoder.hpp" #include "version.h" diff --git a/spelling.dic b/spelling.dic index b4463ef..3ed862f 100644 --- a/spelling.dic +++ b/spelling.dic @@ -22,3 +22,5 @@ palletised rect cmove Fuzzer +argv' +argc'