Update benchmark project for code changes + enable ARM64 build (#323)

By design the benchmark project is not build as it relies on Google Benchmark that is retrieved using vcpkg.
Vcpkg is now part of Visual Studio 2022, so building with VS 2022 works.
One of the build steps of the CI pipeline build CharLS however with VS 2019 to ensure that VS 2019 still can be used. Enabled benchmark in the solution file for x86 and X64 would break VS 2019.
ARM64 build are only support in VS 2022, so enabling that version doesn't break VS 2019.
This commit is contained in:
Victor Derks 2024-08-24 19:06:41 +02:00 committed by GitHub
parent b035a731f7
commit bb7185254c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 144 additions and 46 deletions

View File

@ -142,12 +142,15 @@ Global
{E09F024E-A125-48AA-8E9D-7D1302BEAC97}.Release|x86.ActiveCfg = Release|Win32
{E09F024E-A125-48AA-8E9D-7D1302BEAC97}.Release|x86.Build.0 = Release|Win32
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Checked|ARM64.ActiveCfg = Checked|ARM64
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Checked|ARM64.Build.0 = Checked|ARM64
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Checked|x64.ActiveCfg = Checked|x64
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Checked|x86.ActiveCfg = Checked|Win32
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Debug|ARM64.ActiveCfg = Debug|ARM64
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Debug|ARM64.Build.0 = Debug|ARM64
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Debug|x64.ActiveCfg = Debug|x64
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Debug|x86.ActiveCfg = Debug|Win32
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Release|ARM64.ActiveCfg = Release|ARM64
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Release|ARM64.Build.0 = Release|ARM64
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Release|x64.ActiveCfg = Release|x64
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Release|x86.ActiveCfg = Release|Win32
{5637C116-ABF5-4274-A71F-34433713A538}.Checked|ARM64.ActiveCfg = Checked|ARM64

View File

@ -3,7 +3,7 @@
#include <benchmark/benchmark.h>
#include "../src/jpegls_preset_coding_parameters.h"
#include "../src/jpegls_preset_coding_parameters.hpp"
#include <cstdint>
#include <memory>
@ -12,7 +12,7 @@
#pragma warning(disable : 26409) // Avoid calling new explicitly (triggered by BENCHMARK macro)
int8_t quantize_gradient_org(const charls::jpegls_pc_parameters& preset, const int32_t di) noexcept
static int8_t quantize_gradient_org(const charls::jpegls_pc_parameters& preset, const int32_t di) noexcept
{
constexpr int32_t near_lossless{};
@ -36,7 +36,7 @@ int8_t quantize_gradient_org(const charls::jpegls_pc_parameters& preset, const i
return 4;
}
std::vector<int8_t> create_quantize_lut_lossless(const int32_t bit_count)
static std::vector<int8_t> create_quantize_lut_lossless(const int32_t bit_count)
{
const charls::jpegls_pc_parameters preset{charls::compute_default((1 << static_cast<uint32_t>(bit_count)) - 1, 0)};
const int32_t range{preset.maximum_sample_value + 1};
@ -100,7 +100,8 @@ struct lossless_traits final
};
__declspec(noinline) int32_t get_predicted_value_default(const int32_t ra, const int32_t rb, const int32_t rc) noexcept
static __declspec(noinline) int32_t
get_predicted_value_default(const int32_t ra, const int32_t rb, const int32_t rc) noexcept
{
if (ra < rb)
{
@ -126,13 +127,14 @@ __declspec(noinline) int32_t get_predicted_value_default(const int32_t ra, const
constexpr size_t int32_t_bit_count = sizeof(int32_t) * 8;
constexpr int32_t bit_wise_sign(const int32_t i) noexcept
static constexpr int32_t bit_wise_sign(const int32_t i) noexcept
{
return i >> (int32_t_bit_count - 1);
}
__declspec(noinline) int32_t get_predicted_value_optimized(const int32_t ra, const int32_t rb, const int32_t rc) noexcept
static __declspec(noinline) int32_t
get_predicted_value_optimized(const int32_t ra, const int32_t rb, const int32_t rc) noexcept
{
// sign trick reduces the number of if statements (branches)
const int32_t sign{bit_wise_sign(rb - ra)};
@ -153,7 +155,7 @@ __declspec(noinline) int32_t get_predicted_value_optimized(const int32_t ra, con
#if defined(_M_X64) || defined(_M_ARM64)
inline int countl_zero(const uint64_t value) noexcept
inline static int countl_zero(const uint64_t value) noexcept
{
if (value == 0)
return 64;
@ -211,7 +213,7 @@ static void bm_quantize_gradient_lut(benchmark::State& state)
BENCHMARK(bm_quantize_gradient_lut);
int peek_zero_bits(uint64_t val_test) noexcept
static int peek_zero_bits(uint64_t val_test) noexcept
{
for (int32_t count{}; count < 16; ++count)
{
@ -254,7 +256,7 @@ BENCHMARK(bm_peek_zero_bits_intrinsic);
#endif
std::vector<uint8_t> allocate_buffer(const size_t size)
static std::vector<uint8_t> allocate_buffer(const size_t size)
{
std::vector<uint8_t> buffer;
buffer.resize(size);
@ -306,7 +308,7 @@ private:
};
overwrite_buffer allocate_overwrite_buffer(const size_t size)
static overwrite_buffer allocate_overwrite_buffer(const size_t size)
{
overwrite_buffer buffer;
buffer.reset(size);
@ -324,7 +326,7 @@ static void bm_resize_overwrite_buffer(benchmark::State& state)
BENCHMARK(bm_resize_overwrite_buffer);
int memset_buffer(uint8_t* data, const size_t size)
static int memset_buffer(uint8_t* data, const size_t size) noexcept
{
memset(data, 0, size);
return 0;
@ -342,7 +344,7 @@ static void bm_memset_buffer(benchmark::State& state)
BENCHMARK(bm_memset_buffer);
bool has_ff_byte_classic(const unsigned int value)
constexpr static bool has_ff_byte_classic(const unsigned int value) noexcept
{
// Check if any byte is equal to 0xFF
return ((value & 0xFF) == 0xFF) || (((value >> 8) & 0xFF) == 0xFF) || (((value >> 16) & 0xFF) == 0xFF) ||
@ -358,7 +360,7 @@ static void bm_has_ff_byte_classic(benchmark::State& state)
}
BENCHMARK(bm_has_ff_byte_classic);
bool has_ff_byte_loop(const unsigned int value)
static bool has_ff_byte_loop(const unsigned int value) noexcept
{
// Iterate over each byte and check if it is equal to 0xFF
for (int i = 0; i < sizeof(unsigned int); ++i)
@ -380,7 +382,8 @@ static void bm_has_ff_byte_loop(benchmark::State& state)
}
BENCHMARK(bm_has_ff_byte_loop);
bool has_ff_byte_simd(const unsigned int value) {
#if !defined(_M_ARM64)
static bool has_ff_byte_simd(const unsigned int value) {
// Use SSE instructions for parallel comparison
const __m128i xmm_value = _mm_set1_epi32(value);
const __m128i xmm_ff = _mm_set1_epi32(0xFF);
@ -400,9 +403,9 @@ static void bm_has_ff_byte_simd(benchmark::State& state)
}
}
BENCHMARK(bm_has_ff_byte_simd);
#endif
const std::byte* find_jpeg_marker_start_byte(const std::byte* position, const std::byte* end_position) noexcept
static const std::byte* find_jpeg_marker_start_byte(const std::byte* position, const std::byte* end_position) noexcept
{
constexpr std::byte jpeg_marker_start_byte{0xFF};
@ -484,7 +487,8 @@ T read_big_endian_unaligned(const void* buffer) noexcept
#endif
}
uint32_t read_all_bytes_with_ff_check(const std::byte* position, const std::byte* end_position)
#if !defined(_M_ARM64)
static uint32_t read_all_bytes_with_ff_check(const std::byte* position, const std::byte* end_position)
{
uint32_t result{};
@ -514,9 +518,10 @@ static void bm_read_all_bytes_with_ff_check(benchmark::State& state)
}
}
BENCHMARK(bm_read_all_bytes_with_ff_check);
#endif
bool has_ff_byte_simd64(const uint64_t value)
#if !defined(_M_ARM64)
static bool has_ff_byte_simd64(const uint64_t value)
{
// Use SSE instructions for parallel comparison
const __m128i xmm_value = _mm_set1_epi64x(value);
@ -529,7 +534,7 @@ bool has_ff_byte_simd64(const uint64_t value)
return _mm_testz_si128(comparison, comparison) == 0;
}
uint64_t read_all_bytes_with_ff_check64(const std::byte* position, const std::byte* end_position)
static uint64_t read_all_bytes_with_ff_check64(const std::byte* position, const std::byte* end_position)
{
uint64_t result{};
@ -557,9 +562,10 @@ static void bm_read_all_bytes_with_ff_check64(benchmark::State& state)
}
}
BENCHMARK(bm_read_all_bytes_with_ff_check64);
#endif
uint32_t read_all_bytes_no_check(const std::byte* position, const std::byte* end_position)
static uint32_t read_all_bytes_no_check(const std::byte* position, const std::byte* end_position) noexcept
{
uint32_t result{};
@ -582,7 +588,7 @@ static void bm_read_all_bytes_no_check(benchmark::State& state)
}
BENCHMARK(bm_read_all_bytes_no_check);
uint64_t read_all_bytes_no_check64(const std::byte* position, const std::byte* end_position)
static uint64_t read_all_bytes_no_check64(const std::byte* position, const std::byte* end_position) noexcept
{
uint64_t result{};
@ -605,7 +611,9 @@ static void bm_read_all_bytes_no_check64(benchmark::State& state)
}
BENCHMARK(bm_read_all_bytes_no_check64);
// Tips to run the benchmark tests:
// To run a single benchmark:
// benchmark --benchmark_filter = bm_decode
BENCHMARK_MAIN();

View File

@ -177,6 +177,7 @@
<ItemGroup>
<ClCompile Include="benchmark.cpp" />
<ClCompile Include="context_regular_mode.cpp" />
<ClCompile Include="decode.cpp" />
<ClCompile Include="log2.cpp" />
</ItemGroup>
<ItemGroup>
@ -187,6 +188,9 @@
<ItemGroup>
<ClInclude Include="context_regular_mode_v220.h" />
</ItemGroup>
<ItemGroup>
<None Include="vcpkg.json" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>

View File

@ -20,10 +20,16 @@
<ClCompile Include="log2.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="decode.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="context_regular_mode_v220.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="vcpkg.json" />
</ItemGroup>
</Project>

View File

@ -6,10 +6,11 @@
#include "context_regular_mode_v220.h"
#pragma warning(disable : 26409) // Avoid calling new explicitly (triggered by BENCHMARK macro)
#pragma warning(disable : 4746) // volatile access of 'reset_threshold' is subject to /volatile:<iso|ms> setting; (in ARM64 mode)
using namespace charls;
context_regular_mode g_context;
regular_mode_context g_context;
jls_context_v220 g_context_v220;
volatile int32_t error_value;
@ -29,7 +30,7 @@ BENCHMARK(bm_regular_mode_update_variables_220);
static void bm_regular_mode_update_variables(benchmark::State& state)
{
g_context = context_regular_mode();
g_context = regular_mode_context();
for (const auto _ : state)
{
@ -52,12 +53,12 @@ BENCHMARK(bm_regular_mode_get_golomb_coding_parameter_v220);
static void bm_regular_mode_get_golomb_coding_parameter(benchmark::State& state)
{
g_context = context_regular_mode();
g_context = regular_mode_context();
g_context.update_variables_and_bias(error_value, near_lossless, reset_threshold);
for (const auto _ : state)
{
benchmark::DoNotOptimize(g_context.get_golomb_coding_parameter());
benchmark::DoNotOptimize(g_context.compute_golomb_coding_parameter());
}
}
BENCHMARK(bm_regular_mode_get_golomb_coding_parameter);

View File

@ -3,7 +3,8 @@
#pragma once
#include "../src/context_regular_mode.h"
#include "../src/regular_mode_context.hpp"
#include <cassert>
#include <cstdint>
@ -43,7 +44,7 @@ struct jls_context_v220 final
int n{N};
if (constexpr int limit{65536 * 256}; UNLIKELY(a >= limit || std::abs(b) >= limit))
impl::throw_jpegls_error(jpegls_errc::invalid_encoded_data);
impl::throw_jpegls_error(jpegls_errc::invalid_data);
if (n == reset_threshold)
{
@ -92,7 +93,7 @@ struct jls_context_v220 final
}
if (UNLIKELY(k == max_k_value))
impl::throw_jpegls_error(jpegls_errc::invalid_encoded_data);
impl::throw_jpegls_error(jpegls_errc::invalid_data);
return k;
}

74
benchmark/decode.cpp Normal file
View File

@ -0,0 +1,74 @@
// Copyright (c) Team CharLS.
// SPDX-License-Identifier: BSD-3-Clause
#include <benchmark/benchmark.h>
#include "../include/charls/charls.hpp"
#include <filesystem>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <vector>
#pragma warning(disable : 26409) // Avoid calling new explicitly (triggered by BENCHMARK macro)
using namespace charls;
using std::byte;
using std::ifstream;
using std::ios;
using std::vector;
template<typename Container>
void read(std::istream& input, Container& destination)
{
input.read(reinterpret_cast<char*>(destination.data()), static_cast<std::streamsize>(destination.size()));
}
vector<byte> read_file(const char* filename, long offset = 0, size_t bytes = 0)
try
{
ifstream input;
input.exceptions(ios::eofbit | ios::failbit | ios::badbit);
input.open(filename, ios::in | ios::binary);
input.seekg(0, ios::end);
const auto byte_count_file{static_cast<int>(input.tellg())};
input.seekg(offset, ios::beg);
if (offset < 0)
{
offset = static_cast<long>(byte_count_file - bytes);
}
if (bytes == 0)
{
bytes = static_cast<size_t>(byte_count_file) - offset;
}
vector<byte> buffer(bytes);
read(input, buffer);
return buffer;
}
catch (const std::ifstream::failure&)
{
std::cout << "Failed to open/read file: " << std::filesystem::absolute(filename) << "\n";
throw;
}
static void bm_decode(benchmark::State& state)
{
const auto source{read_file("d:/benchmark-test-image.jls")};
// Pre-allocate the destination outside the measurement loop.
// std::vector initializes its elements and this step needs to be excluded from the measurement.
vector<byte> destination(jpegls_decoder{source, true}.get_destination_size());
for (const auto _ : state)
{
jpegls_decoder decoder(source.data(), source.size());
decoder.decode(destination);
}
}
BENCHMARK(bm_decode);

View File

@ -3,18 +3,19 @@
#include <benchmark/benchmark.h>
#include "../src/util.h"
#include "../src/jpegls_algorithm.hpp"
#include <cmath>
#include <limits>
#pragma warning(disable : 26409) // Avoid calling new explicitly (triggered by BENCHMARK macro)
uint32_t log2_floor(const uint32_t n) noexcept
static uint32_t log2_floor(const uint32_t n) noexcept
{
return 31 - charls::countl_zero(n);
}
uint32_t max_value_to_bits_per_sample(const uint32_t max_value) noexcept
static uint32_t max_value_to_bits_per_sample(const uint32_t max_value) noexcept
{
ASSERT(max_value > 0);
return log2_floor(max_value) + 1;
@ -48,9 +49,9 @@ static void bm_log2_ceil_int32(benchmark::State& state)
{
for (const auto _ : state)
{
benchmark::DoNotOptimize(charls::log2_ceil(256));
benchmark::DoNotOptimize(charls::log2_ceil(1024));
benchmark::DoNotOptimize(charls::log2_ceil(std::numeric_limits<uint16_t>::max()));
benchmark::DoNotOptimize(charls::log2_ceiling(256));
benchmark::DoNotOptimize(charls::log2_ceiling(1024));
benchmark::DoNotOptimize(charls::log2_ceiling(std::numeric_limits<uint16_t>::max()));
}
}
BENCHMARK(bm_log2_ceil_int32);

View File

@ -1,8 +1,5 @@
{
"$schema": "https://raw.githubusercontent.com/microsoft/vcpkg/master/scripts/vcpkg.schema.json",
"name": "charls-benchmark",
"version": "1.0.0",
"dependencies": [
"benchmark"
]
}
"$schema": "https://raw.githubusercontent.com/microsoft/vcpkg/master/scripts/vcpkg.schema.json",
"dependencies": [ { "name": "benchmark", "version>=":"1.8.5" } ],
"builtin-baseline": "3508985146f1b1d248c67ead13f8f54be5b4f5da"
}

View File

@ -1,9 +1,10 @@
// Copyright (c) Team CharLS.
// SPDX-License-Identifier: BSD-3-Clause
#include <vector>
#include "../include/charls/jpegls_decoder.hpp"
#include <vector>
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, const size_t size)
{
charls::jpegls_decoder decoder(data, size, false);

View File

@ -15,6 +15,6 @@ module;
export module charls;
#include "charls_jpegls_decoder.h"
#include "charls_jpegls_encoder.h"
#include "jpegls_decoder.hpp"
#include "jpegls_encoder.hpp"
#include "version.h"

View File

@ -22,3 +22,5 @@ palletised
rect
cmove
Fuzzer
argv'
argc'