From bb7185254c6bb5697b9ed3c687881816d9119c27 Mon Sep 17 00:00:00 2001 From: Victor Derks Date: Sat, 24 Aug 2024 19:06:41 +0200 Subject: [PATCH] Update benchmark project for code changes + enable ARM64 build (#323) By design the benchmark project is not build as it relies on Google Benchmark that is retrieved using vcpkg. Vcpkg is now part of Visual Studio 2022, so building with VS 2022 works. One of the build steps of the CI pipeline build CharLS however with VS 2019 to ensure that VS 2019 still can be used. Enabled benchmark in the solution file for x86 and X64 would break VS 2019. ARM64 build are only support in VS 2022, so enabling that version doesn't break VS 2019. --- CharLS.sln | 3 ++ benchmark/benchmark.cpp | 54 ++++++++++--------- benchmark/benchmark.vcxproj | 4 ++ benchmark/benchmark.vcxproj.filters | 6 +++ benchmark/context_regular_mode.cpp | 9 ++-- benchmark/context_regular_mode_v220.h | 7 +-- benchmark/decode.cpp | 74 +++++++++++++++++++++++++++ benchmark/log2.cpp | 13 ++--- benchmark/vcpkg.json | 11 ++-- fuzzing/libfuzzer/main.cpp | 3 +- include/charls/charls.ixx | 4 +- spelling.dic | 2 + 12 files changed, 144 insertions(+), 46 deletions(-) create mode 100644 benchmark/decode.cpp diff --git a/CharLS.sln b/CharLS.sln index 2d96007..5c29761 100644 --- a/CharLS.sln +++ b/CharLS.sln @@ -142,12 +142,15 @@ Global {E09F024E-A125-48AA-8E9D-7D1302BEAC97}.Release|x86.ActiveCfg = Release|Win32 {E09F024E-A125-48AA-8E9D-7D1302BEAC97}.Release|x86.Build.0 = Release|Win32 {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Checked|ARM64.ActiveCfg = Checked|ARM64 + {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Checked|ARM64.Build.0 = Checked|ARM64 {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Checked|x64.ActiveCfg = Checked|x64 {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Checked|x86.ActiveCfg = Checked|Win32 {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Debug|ARM64.Build.0 = Debug|ARM64 {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Debug|x64.ActiveCfg = Debug|x64 {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Debug|x86.ActiveCfg = Debug|Win32 {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Release|ARM64.ActiveCfg = Release|ARM64 + {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Release|ARM64.Build.0 = Release|ARM64 {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Release|x64.ActiveCfg = Release|x64 {F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Release|x86.ActiveCfg = Release|Win32 {5637C116-ABF5-4274-A71F-34433713A538}.Checked|ARM64.ActiveCfg = Checked|ARM64 diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 2f76848..e406d71 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -3,7 +3,7 @@ #include -#include "../src/jpegls_preset_coding_parameters.h" +#include "../src/jpegls_preset_coding_parameters.hpp" #include #include @@ -12,7 +12,7 @@ #pragma warning(disable : 26409) // Avoid calling new explicitly (triggered by BENCHMARK macro) -int8_t quantize_gradient_org(const charls::jpegls_pc_parameters& preset, const int32_t di) noexcept +static int8_t quantize_gradient_org(const charls::jpegls_pc_parameters& preset, const int32_t di) noexcept { constexpr int32_t near_lossless{}; @@ -36,7 +36,7 @@ int8_t quantize_gradient_org(const charls::jpegls_pc_parameters& preset, const i return 4; } -std::vector create_quantize_lut_lossless(const int32_t bit_count) +static std::vector create_quantize_lut_lossless(const int32_t bit_count) { const charls::jpegls_pc_parameters preset{charls::compute_default((1 << static_cast(bit_count)) - 1, 0)}; const int32_t range{preset.maximum_sample_value + 1}; @@ -100,7 +100,8 @@ struct lossless_traits final }; -__declspec(noinline) int32_t get_predicted_value_default(const int32_t ra, const int32_t rb, const int32_t rc) noexcept +static __declspec(noinline) int32_t + get_predicted_value_default(const int32_t ra, const int32_t rb, const int32_t rc) noexcept { if (ra < rb) { @@ -126,13 +127,14 @@ __declspec(noinline) int32_t get_predicted_value_default(const int32_t ra, const constexpr size_t int32_t_bit_count = sizeof(int32_t) * 8; -constexpr int32_t bit_wise_sign(const int32_t i) noexcept +static constexpr int32_t bit_wise_sign(const int32_t i) noexcept { return i >> (int32_t_bit_count - 1); } -__declspec(noinline) int32_t get_predicted_value_optimized(const int32_t ra, const int32_t rb, const int32_t rc) noexcept +static __declspec(noinline) int32_t + get_predicted_value_optimized(const int32_t ra, const int32_t rb, const int32_t rc) noexcept { // sign trick reduces the number of if statements (branches) const int32_t sign{bit_wise_sign(rb - ra)}; @@ -153,7 +155,7 @@ __declspec(noinline) int32_t get_predicted_value_optimized(const int32_t ra, con #if defined(_M_X64) || defined(_M_ARM64) -inline int countl_zero(const uint64_t value) noexcept +inline static int countl_zero(const uint64_t value) noexcept { if (value == 0) return 64; @@ -211,7 +213,7 @@ static void bm_quantize_gradient_lut(benchmark::State& state) BENCHMARK(bm_quantize_gradient_lut); -int peek_zero_bits(uint64_t val_test) noexcept +static int peek_zero_bits(uint64_t val_test) noexcept { for (int32_t count{}; count < 16; ++count) { @@ -254,7 +256,7 @@ BENCHMARK(bm_peek_zero_bits_intrinsic); #endif -std::vector allocate_buffer(const size_t size) +static std::vector allocate_buffer(const size_t size) { std::vector buffer; buffer.resize(size); @@ -306,7 +308,7 @@ private: }; -overwrite_buffer allocate_overwrite_buffer(const size_t size) +static overwrite_buffer allocate_overwrite_buffer(const size_t size) { overwrite_buffer buffer; buffer.reset(size); @@ -324,7 +326,7 @@ static void bm_resize_overwrite_buffer(benchmark::State& state) BENCHMARK(bm_resize_overwrite_buffer); -int memset_buffer(uint8_t* data, const size_t size) +static int memset_buffer(uint8_t* data, const size_t size) noexcept { memset(data, 0, size); return 0; @@ -342,7 +344,7 @@ static void bm_memset_buffer(benchmark::State& state) BENCHMARK(bm_memset_buffer); -bool has_ff_byte_classic(const unsigned int value) +constexpr static bool has_ff_byte_classic(const unsigned int value) noexcept { // Check if any byte is equal to 0xFF return ((value & 0xFF) == 0xFF) || (((value >> 8) & 0xFF) == 0xFF) || (((value >> 16) & 0xFF) == 0xFF) || @@ -358,7 +360,7 @@ static void bm_has_ff_byte_classic(benchmark::State& state) } BENCHMARK(bm_has_ff_byte_classic); -bool has_ff_byte_loop(const unsigned int value) +static bool has_ff_byte_loop(const unsigned int value) noexcept { // Iterate over each byte and check if it is equal to 0xFF for (int i = 0; i < sizeof(unsigned int); ++i) @@ -380,7 +382,8 @@ static void bm_has_ff_byte_loop(benchmark::State& state) } BENCHMARK(bm_has_ff_byte_loop); -bool has_ff_byte_simd(const unsigned int value) { +#if !defined(_M_ARM64) +static bool has_ff_byte_simd(const unsigned int value) { // Use SSE instructions for parallel comparison const __m128i xmm_value = _mm_set1_epi32(value); const __m128i xmm_ff = _mm_set1_epi32(0xFF); @@ -400,9 +403,9 @@ static void bm_has_ff_byte_simd(benchmark::State& state) } } BENCHMARK(bm_has_ff_byte_simd); +#endif - -const std::byte* find_jpeg_marker_start_byte(const std::byte* position, const std::byte* end_position) noexcept +static const std::byte* find_jpeg_marker_start_byte(const std::byte* position, const std::byte* end_position) noexcept { constexpr std::byte jpeg_marker_start_byte{0xFF}; @@ -484,7 +487,8 @@ T read_big_endian_unaligned(const void* buffer) noexcept #endif } -uint32_t read_all_bytes_with_ff_check(const std::byte* position, const std::byte* end_position) +#if !defined(_M_ARM64) +static uint32_t read_all_bytes_with_ff_check(const std::byte* position, const std::byte* end_position) { uint32_t result{}; @@ -514,9 +518,10 @@ static void bm_read_all_bytes_with_ff_check(benchmark::State& state) } } BENCHMARK(bm_read_all_bytes_with_ff_check); +#endif - -bool has_ff_byte_simd64(const uint64_t value) +#if !defined(_M_ARM64) +static bool has_ff_byte_simd64(const uint64_t value) { // Use SSE instructions for parallel comparison const __m128i xmm_value = _mm_set1_epi64x(value); @@ -529,7 +534,7 @@ bool has_ff_byte_simd64(const uint64_t value) return _mm_testz_si128(comparison, comparison) == 0; } -uint64_t read_all_bytes_with_ff_check64(const std::byte* position, const std::byte* end_position) +static uint64_t read_all_bytes_with_ff_check64(const std::byte* position, const std::byte* end_position) { uint64_t result{}; @@ -557,9 +562,10 @@ static void bm_read_all_bytes_with_ff_check64(benchmark::State& state) } } BENCHMARK(bm_read_all_bytes_with_ff_check64); +#endif -uint32_t read_all_bytes_no_check(const std::byte* position, const std::byte* end_position) +static uint32_t read_all_bytes_no_check(const std::byte* position, const std::byte* end_position) noexcept { uint32_t result{}; @@ -582,7 +588,7 @@ static void bm_read_all_bytes_no_check(benchmark::State& state) } BENCHMARK(bm_read_all_bytes_no_check); -uint64_t read_all_bytes_no_check64(const std::byte* position, const std::byte* end_position) +static uint64_t read_all_bytes_no_check64(const std::byte* position, const std::byte* end_position) noexcept { uint64_t result{}; @@ -605,7 +611,9 @@ static void bm_read_all_bytes_no_check64(benchmark::State& state) } BENCHMARK(bm_read_all_bytes_no_check64); +// Tips to run the benchmark tests: - +// To run a single benchmark: +// benchmark --benchmark_filter = bm_decode BENCHMARK_MAIN(); diff --git a/benchmark/benchmark.vcxproj b/benchmark/benchmark.vcxproj index 0212694..7312584 100644 --- a/benchmark/benchmark.vcxproj +++ b/benchmark/benchmark.vcxproj @@ -177,6 +177,7 @@ + @@ -187,6 +188,9 @@ + + + diff --git a/benchmark/benchmark.vcxproj.filters b/benchmark/benchmark.vcxproj.filters index 8ecb6d6..1c000eb 100644 --- a/benchmark/benchmark.vcxproj.filters +++ b/benchmark/benchmark.vcxproj.filters @@ -20,10 +20,16 @@ Source Files + + Source Files + Header Files + + + \ No newline at end of file diff --git a/benchmark/context_regular_mode.cpp b/benchmark/context_regular_mode.cpp index ac1e579..8db990f 100644 --- a/benchmark/context_regular_mode.cpp +++ b/benchmark/context_regular_mode.cpp @@ -6,10 +6,11 @@ #include "context_regular_mode_v220.h" #pragma warning(disable : 26409) // Avoid calling new explicitly (triggered by BENCHMARK macro) +#pragma warning(disable : 4746) // volatile access of 'reset_threshold' is subject to /volatile: setting; (in ARM64 mode) using namespace charls; -context_regular_mode g_context; +regular_mode_context g_context; jls_context_v220 g_context_v220; volatile int32_t error_value; @@ -29,7 +30,7 @@ BENCHMARK(bm_regular_mode_update_variables_220); static void bm_regular_mode_update_variables(benchmark::State& state) { - g_context = context_regular_mode(); + g_context = regular_mode_context(); for (const auto _ : state) { @@ -52,12 +53,12 @@ BENCHMARK(bm_regular_mode_get_golomb_coding_parameter_v220); static void bm_regular_mode_get_golomb_coding_parameter(benchmark::State& state) { - g_context = context_regular_mode(); + g_context = regular_mode_context(); g_context.update_variables_and_bias(error_value, near_lossless, reset_threshold); for (const auto _ : state) { - benchmark::DoNotOptimize(g_context.get_golomb_coding_parameter()); + benchmark::DoNotOptimize(g_context.compute_golomb_coding_parameter()); } } BENCHMARK(bm_regular_mode_get_golomb_coding_parameter); diff --git a/benchmark/context_regular_mode_v220.h b/benchmark/context_regular_mode_v220.h index 0d9790a..cf33004 100644 --- a/benchmark/context_regular_mode_v220.h +++ b/benchmark/context_regular_mode_v220.h @@ -3,7 +3,8 @@ #pragma once -#include "../src/context_regular_mode.h" +#include "../src/regular_mode_context.hpp" + #include #include @@ -43,7 +44,7 @@ struct jls_context_v220 final int n{N}; if (constexpr int limit{65536 * 256}; UNLIKELY(a >= limit || std::abs(b) >= limit)) - impl::throw_jpegls_error(jpegls_errc::invalid_encoded_data); + impl::throw_jpegls_error(jpegls_errc::invalid_data); if (n == reset_threshold) { @@ -92,7 +93,7 @@ struct jls_context_v220 final } if (UNLIKELY(k == max_k_value)) - impl::throw_jpegls_error(jpegls_errc::invalid_encoded_data); + impl::throw_jpegls_error(jpegls_errc::invalid_data); return k; } diff --git a/benchmark/decode.cpp b/benchmark/decode.cpp new file mode 100644 index 0000000..224ac16 --- /dev/null +++ b/benchmark/decode.cpp @@ -0,0 +1,74 @@ +// Copyright (c) Team CharLS. +// SPDX-License-Identifier: BSD-3-Clause + +#include + +#include "../include/charls/charls.hpp" + +#include +#include +#include +#include +#include + +#pragma warning(disable : 26409) // Avoid calling new explicitly (triggered by BENCHMARK macro) + +using namespace charls; +using std::byte; +using std::ifstream; +using std::ios; +using std::vector; + +template +void read(std::istream& input, Container& destination) +{ + input.read(reinterpret_cast(destination.data()), static_cast(destination.size())); +} + +vector read_file(const char* filename, long offset = 0, size_t bytes = 0) +try +{ + ifstream input; + input.exceptions(ios::eofbit | ios::failbit | ios::badbit); + input.open(filename, ios::in | ios::binary); + + input.seekg(0, ios::end); + const auto byte_count_file{static_cast(input.tellg())}; + input.seekg(offset, ios::beg); + + if (offset < 0) + { + offset = static_cast(byte_count_file - bytes); + } + if (bytes == 0) + { + bytes = static_cast(byte_count_file) - offset; + } + + vector buffer(bytes); + read(input, buffer); + + return buffer; +} +catch (const std::ifstream::failure&) +{ + std::cout << "Failed to open/read file: " << std::filesystem::absolute(filename) << "\n"; + throw; +} + + +static void bm_decode(benchmark::State& state) +{ + const auto source{read_file("d:/benchmark-test-image.jls")}; + + // Pre-allocate the destination outside the measurement loop. + // std::vector initializes its elements and this step needs to be excluded from the measurement. + vector destination(jpegls_decoder{source, true}.get_destination_size()); + + for (const auto _ : state) + { + jpegls_decoder decoder(source.data(), source.size()); + decoder.decode(destination); + } +} +BENCHMARK(bm_decode); diff --git a/benchmark/log2.cpp b/benchmark/log2.cpp index 68a5b61..16d8b33 100644 --- a/benchmark/log2.cpp +++ b/benchmark/log2.cpp @@ -3,18 +3,19 @@ #include -#include "../src/util.h" +#include "../src/jpegls_algorithm.hpp" #include #include +#pragma warning(disable : 26409) // Avoid calling new explicitly (triggered by BENCHMARK macro) -uint32_t log2_floor(const uint32_t n) noexcept +static uint32_t log2_floor(const uint32_t n) noexcept { return 31 - charls::countl_zero(n); } -uint32_t max_value_to_bits_per_sample(const uint32_t max_value) noexcept +static uint32_t max_value_to_bits_per_sample(const uint32_t max_value) noexcept { ASSERT(max_value > 0); return log2_floor(max_value) + 1; @@ -48,9 +49,9 @@ static void bm_log2_ceil_int32(benchmark::State& state) { for (const auto _ : state) { - benchmark::DoNotOptimize(charls::log2_ceil(256)); - benchmark::DoNotOptimize(charls::log2_ceil(1024)); - benchmark::DoNotOptimize(charls::log2_ceil(std::numeric_limits::max())); + benchmark::DoNotOptimize(charls::log2_ceiling(256)); + benchmark::DoNotOptimize(charls::log2_ceiling(1024)); + benchmark::DoNotOptimize(charls::log2_ceiling(std::numeric_limits::max())); } } BENCHMARK(bm_log2_ceil_int32); diff --git a/benchmark/vcpkg.json b/benchmark/vcpkg.json index f8f0a3a..1072f42 100644 --- a/benchmark/vcpkg.json +++ b/benchmark/vcpkg.json @@ -1,8 +1,5 @@ { - "$schema": "https://raw.githubusercontent.com/microsoft/vcpkg/master/scripts/vcpkg.schema.json", - "name": "charls-benchmark", - "version": "1.0.0", - "dependencies": [ - "benchmark" - ] - } \ No newline at end of file + "$schema": "https://raw.githubusercontent.com/microsoft/vcpkg/master/scripts/vcpkg.schema.json", + "dependencies": [ { "name": "benchmark", "version>=":"1.8.5" } ], + "builtin-baseline": "3508985146f1b1d248c67ead13f8f54be5b4f5da" +} diff --git a/fuzzing/libfuzzer/main.cpp b/fuzzing/libfuzzer/main.cpp index 354e68b..b0fec30 100644 --- a/fuzzing/libfuzzer/main.cpp +++ b/fuzzing/libfuzzer/main.cpp @@ -1,9 +1,10 @@ // Copyright (c) Team CharLS. // SPDX-License-Identifier: BSD-3-Clause -#include #include "../include/charls/jpegls_decoder.hpp" +#include + extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, const size_t size) { charls::jpegls_decoder decoder(data, size, false); diff --git a/include/charls/charls.ixx b/include/charls/charls.ixx index e91e31e..84c1edf 100644 --- a/include/charls/charls.ixx +++ b/include/charls/charls.ixx @@ -15,6 +15,6 @@ module; export module charls; -#include "charls_jpegls_decoder.h" -#include "charls_jpegls_encoder.h" +#include "jpegls_decoder.hpp" +#include "jpegls_encoder.hpp" #include "version.h" diff --git a/spelling.dic b/spelling.dic index b4463ef..3ed862f 100644 --- a/spelling.dic +++ b/spelling.dic @@ -22,3 +22,5 @@ palletised rect cmove Fuzzer +argv' +argc'