mirror of
https://github.com/team-charls/charls
synced 2025-03-28 21:03:13 +00:00
Add benchmark project and use memchr (#127)
* Add an initial benchmark project to research optimizations. * Use memchr to search for 0xFF during decoding. * Rename function to make intent more clear. * Use const pointers (decoding only need to read).
This commit is contained in:
parent
7d1b06ee07
commit
26478a8ba3
@ -72,6 +72,7 @@
|
||||
<s:Boolean x:Key="/Default/UserDictionary/Words/=lossless/@EntryIndexedValue">True</s:Boolean>
|
||||
<s:Boolean x:Key="/Default/UserDictionary/Words/=losslesstraits/@EntryIndexedValue">True</s:Boolean>
|
||||
<s:Boolean x:Key="/Default/UserDictionary/Words/=maxval/@EntryIndexedValue">True</s:Boolean>
|
||||
<s:Boolean x:Key="/Default/UserDictionary/Words/=memchr/@EntryIndexedValue">True</s:Boolean>
|
||||
<s:Boolean x:Key="/Default/UserDictionary/Words/=mrfx/@EntryIndexedValue">True</s:Boolean>
|
||||
<s:Boolean x:Key="/Default/UserDictionary/Words/=nightshot/@EntryIndexedValue">True</s:Boolean>
|
||||
<s:Boolean x:Key="/Default/UserDictionary/Words/=NODISCARD/@EntryIndexedValue">True</s:Boolean>
|
||||
|
7
benchmark/README.md
Normal file
7
benchmark/README.md
Normal file
@ -0,0 +1,7 @@
|
||||
# Benchmark
|
||||
|
||||
The Visual Studio project in this folder contains benchmarks to analyze different way of
|
||||
decoding and encoding functions.
|
||||
|
||||
The project expects that the Google Benchmark framework has been installed with vcpkg.
|
||||
This can be done with: ```vcpkg install benchmark```
|
199
benchmark/benchmark.cpp
Normal file
199
benchmark/benchmark.cpp
Normal file
@ -0,0 +1,199 @@
|
||||
// Copyright (c) Team CharLS.
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
#include "../src/jpegls_preset_coding_parameters.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
|
||||
|
||||
int8_t quantize_gradient_org(const charls::jpegls_pc_parameters& preset, const int32_t di) noexcept
|
||||
{
|
||||
constexpr int32_t near_lossless{};
|
||||
|
||||
if (di <= -preset.threshold3)
|
||||
return -4;
|
||||
if (di <= -preset.threshold2)
|
||||
return -3;
|
||||
if (di <= -preset.threshold1)
|
||||
return -2;
|
||||
if (di < -near_lossless)
|
||||
return -1;
|
||||
if (di <= near_lossless)
|
||||
return 0;
|
||||
if (di < preset.threshold1)
|
||||
return 1;
|
||||
if (di < preset.threshold2)
|
||||
return 2;
|
||||
if (di < preset.threshold3)
|
||||
return 3;
|
||||
|
||||
return 4;
|
||||
}
|
||||
|
||||
std::vector<int8_t> create_quantize_lut_lossless(const int32_t bit_count)
|
||||
{
|
||||
const charls::jpegls_pc_parameters preset{charls::compute_default((1 << static_cast<uint32_t>(bit_count)) - 1, 0)};
|
||||
const int32_t range{preset.maximum_sample_value + 1};
|
||||
|
||||
std::vector<int8_t> lut(static_cast<size_t>(range) * 2);
|
||||
for (size_t i{}; i != lut.size(); ++i)
|
||||
{
|
||||
lut[i] = quantize_gradient_org(preset, static_cast<int32_t>(i) - range);
|
||||
}
|
||||
|
||||
return lut;
|
||||
}
|
||||
|
||||
|
||||
const std::vector<int8_t> quantization_lut_lossless_8{create_quantize_lut_lossless(8)};
|
||||
|
||||
template<typename Traits>
|
||||
struct scan_decoder
|
||||
{
|
||||
int32_t t1_{};
|
||||
int32_t t2_{};
|
||||
int32_t t3_{};
|
||||
Traits traits_;
|
||||
|
||||
explicit scan_decoder(Traits traits, const int32_t bit_count) noexcept : traits_{std::move(traits)}
|
||||
{
|
||||
const charls::jpegls_pc_parameters preset{charls::compute_default((1 << static_cast<uint32_t>(bit_count)) - 1, 0)};
|
||||
|
||||
t1_ = preset.threshold1;
|
||||
t2_ = preset.threshold2;
|
||||
t3_ = preset.threshold3;
|
||||
}
|
||||
|
||||
int8_t quantize_gradient_org(const int32_t di) const noexcept
|
||||
{
|
||||
if (di <= -t3_)
|
||||
return -4;
|
||||
if (di <= -t2_)
|
||||
return -3;
|
||||
if (di <= -t1_)
|
||||
return -2;
|
||||
if (di < -traits_.near_lossless)
|
||||
return -1;
|
||||
if (di <= traits_.near_lossless)
|
||||
return 0;
|
||||
if (di < t1_)
|
||||
return 1;
|
||||
if (di < t2_)
|
||||
return 2;
|
||||
if (di < t3_)
|
||||
return 3;
|
||||
|
||||
return 4;
|
||||
}
|
||||
};
|
||||
|
||||
struct lossless_traits final
|
||||
{
|
||||
static constexpr int32_t near_lossless{};
|
||||
};
|
||||
|
||||
|
||||
|
||||
__declspec(noinline) int32_t get_predicted_value_default(int32_t Ra, int32_t Rb, int32_t Rc) noexcept
|
||||
{
|
||||
if (Ra < Rb)
|
||||
{
|
||||
if (Rc < Ra)
|
||||
return Rb;
|
||||
|
||||
if (Rc > Rb)
|
||||
return Ra;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (Rc < Rb)
|
||||
return Ra;
|
||||
|
||||
if (Rc > Ra)
|
||||
return Rb;
|
||||
}
|
||||
|
||||
return Ra + Rb - Rc;
|
||||
}
|
||||
|
||||
|
||||
constexpr size_t int32_t_bit_count = sizeof(int32_t) * 8;
|
||||
|
||||
|
||||
constexpr int32_t bit_wise_sign(const int32_t i) noexcept
|
||||
{
|
||||
return i >> (int32_t_bit_count - 1);
|
||||
}
|
||||
|
||||
|
||||
__declspec(noinline) int32_t get_predicted_value_optimized(const int32_t ra, const int32_t rb, const int32_t rc) noexcept
|
||||
{
|
||||
// sign trick reduces the number of if statements (branches)
|
||||
const int32_t sign{bit_wise_sign(rb - ra)};
|
||||
|
||||
// is Ra between Rc and Rb?
|
||||
if ((sign ^ (rc - ra)) < 0)
|
||||
{
|
||||
return rb;
|
||||
}
|
||||
if ((sign ^ (rb - rc)) < 0)
|
||||
{
|
||||
return ra;
|
||||
}
|
||||
|
||||
// default case, valid if Rc element of [Ra,Rb]
|
||||
return ra + rb - rc;
|
||||
}
|
||||
|
||||
|
||||
static void bm_get_predicted_value_default(benchmark::State& state)
|
||||
{
|
||||
for (const auto _ : state)
|
||||
{
|
||||
benchmark::DoNotOptimize(get_predicted_value_default(100, 200, 300));
|
||||
benchmark::DoNotOptimize(get_predicted_value_default(200, 100, 300));
|
||||
}
|
||||
}
|
||||
BENCHMARK(bm_get_predicted_value_default);
|
||||
|
||||
static void bm_get_predicted_value_optimized(benchmark::State& state)
|
||||
{
|
||||
for (const auto _ : state)
|
||||
{
|
||||
benchmark::DoNotOptimize(get_predicted_value_optimized(100, 200, 300));
|
||||
benchmark::DoNotOptimize(get_predicted_value_default(200, 100, 300));
|
||||
}
|
||||
}
|
||||
BENCHMARK(bm_get_predicted_value_optimized);
|
||||
|
||||
static void bm_quantize_gradient_calculated(benchmark::State& state)
|
||||
{
|
||||
const scan_decoder<lossless_traits> sd({}, 8);
|
||||
|
||||
for (const auto _ : state)
|
||||
{
|
||||
benchmark::DoNotOptimize(sd.quantize_gradient_org(0));
|
||||
benchmark::DoNotOptimize(sd.quantize_gradient_org(127));
|
||||
benchmark::DoNotOptimize(sd.quantize_gradient_org(255));
|
||||
}
|
||||
}
|
||||
BENCHMARK(bm_quantize_gradient_calculated);
|
||||
|
||||
static void bm_quantize_gradient_lut(benchmark::State& state)
|
||||
{
|
||||
const scan_decoder<lossless_traits> sd({}, 8);
|
||||
|
||||
for (const auto _ : state)
|
||||
{
|
||||
benchmark::DoNotOptimize(quantization_lut_lossless_8[0]);
|
||||
benchmark::DoNotOptimize(quantization_lut_lossless_8[127]);
|
||||
benchmark::DoNotOptimize(quantization_lut_lossless_8[255]);
|
||||
}
|
||||
}
|
||||
BENCHMARK(bm_quantize_gradient_lut);
|
||||
|
||||
|
||||
BENCHMARK_MAIN();
|
31
benchmark/benchmark.sln
Normal file
31
benchmark/benchmark.sln
Normal file
@ -0,0 +1,31 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 17
|
||||
VisualStudioVersion = 17.1.31911.260
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "benchmark", "benchmark.vcxproj", "{F961EC29-4ACE-4D5E-B7ED-55681A678A90}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Debug|x86 = Debug|x86
|
||||
Release|x64 = Release|x64
|
||||
Release|x86 = Release|x86
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Debug|x64.Build.0 = Debug|x64
|
||||
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Debug|x86.ActiveCfg = Debug|Win32
|
||||
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Debug|x86.Build.0 = Debug|Win32
|
||||
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Release|x64.ActiveCfg = Release|x64
|
||||
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Release|x64.Build.0 = Release|x64
|
||||
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Release|x86.ActiveCfg = Release|Win32
|
||||
{F961EC29-4ACE-4D5E-B7ED-55681A678A90}.Release|x86.Build.0 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {57D87A68-949C-476E-A240-13953EE8CA8C}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
3
benchmark/benchmark.sln.DotSettings
Normal file
3
benchmark/benchmark.sln.DotSettings
Normal file
@ -0,0 +1,3 @@
|
||||
<wpf:ResourceDictionary xml:space="preserve" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:s="clr-namespace:System;assembly=mscorlib" xmlns:ss="urn:shemas-jetbrains-com:settings-storage-xaml" xmlns:wpf="http://schemas.microsoft.com/winfx/2006/xaml/presentation">
|
||||
<s:String x:Key="/Default/CodeStyle/Naming/CppNaming/Rules/=Class_0020and_0020struct_0020public_0020fields/@EntryIndexedValue"><NamingElement Priority="12"><Descriptor Static="Indeterminate" Constexpr="Indeterminate" Const="Indeterminate" Volatile="Indeterminate" Accessibility="PUBLIC"><type Name="class field" /><type Name="struct field" /></Descriptor><Policy Inspect="True" Prefix="" Suffix="_" Style="aa_bb" /></NamingElement></s:String>
|
||||
<s:Boolean x:Key="/Default/UserDictionary/Words/=lossless/@EntryIndexedValue">True</s:Boolean></wpf:ResourceDictionary>
|
151
benchmark/benchmark.vcxproj
Normal file
151
benchmark/benchmark.vcxproj
Normal file
@ -0,0 +1,151 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<VCProjectVersion>16.0</VCProjectVersion>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<ProjectGuid>{f961ec29-4ace-4d5e-b7ed-55681a678a90}</ProjectGuid>
|
||||
<RootNamespace>benchmark</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="Shared">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>Shlwapi.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>Shlwapi.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>Shlwapi.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>Shlwapi.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="benchmark.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
22
benchmark/benchmark.vcxproj.filters
Normal file
22
benchmark/benchmark.vcxproj.filters
Normal file
@ -0,0 +1,22 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<Filter Include="Source Files">
|
||||
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
|
||||
<Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="Header Files">
|
||||
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
|
||||
<Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="Resource Files">
|
||||
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
|
||||
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="benchmark.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
</Project>
|
@ -170,6 +170,28 @@ One of the missing features of C++ is a standard Package Manager. The following
|
||||
* Library to read Anymap files (for example Netpbm)
|
||||
* Library to parse command line parameters (for example Clara, CLI11)
|
||||
|
||||
### Performance
|
||||
|
||||
#### Decoding the bitstream
|
||||
|
||||
There are 2 main ways to decode the bitstream
|
||||
|
||||
* Basic: Read it byte for byte and store the results in a cache variable
|
||||
|
||||
* Improved: read when possible a register in 1 step. The problem is that 0xFF can exists in the
|
||||
bitstream. If such a 0xFF exists the next bit needs to be ignored. There are a couple of way to do this:
|
||||
* A) Search for the first position with a 0xFF it and remember the position. 0xFFs are rare.
|
||||
* B) Search for the first 0xFF with memchr. memchr can leverage special CPU instructions when possible.
|
||||
* C) Load a register and check if it contains a 0xFF byte.
|
||||
|
||||
Measurements conclusion: option B is the fastest on x64. This is the original algorithm. There is not a large difference between the different options.
|
||||
Examples of decoding performance on a AMD 5950X x64 CPU:
|
||||
|
||||
| Image | Basic | Improved A | Improved B |Improved C |
|
||||
| -------------- | ------- | ---------- | ---------- |---------- |
|
||||
| 16 bit 512 * 512 (CT image) | 3.09 ms | 3.17 ms | 3.06 ms | 3.10 ms |
|
||||
| 8 bit 5412 * 7216 | 517 ms | 509 ms | 507 ms | 512 ms |
|
||||
|
||||
### Supported C++ Compilers
|
||||
|
||||
#### Clang
|
||||
|
@ -17,7 +17,7 @@ namespace charls {
|
||||
class decoder_strategy
|
||||
{
|
||||
public:
|
||||
explicit decoder_strategy(const frame_info& frame, const coding_parameters& parameters) noexcept :
|
||||
decoder_strategy(const frame_info& frame, const coding_parameters& parameters) noexcept :
|
||||
frame_info_{frame}, parameters_{parameters}
|
||||
{
|
||||
}
|
||||
@ -35,14 +35,11 @@ public:
|
||||
|
||||
void initialize(const byte_span source)
|
||||
{
|
||||
valid_bits_ = 0;
|
||||
read_cache_ = 0;
|
||||
|
||||
position_ = source.data;
|
||||
end_position_ = position_ + source.size;
|
||||
|
||||
next_ff_position_ = find_next_ff();
|
||||
make_valid();
|
||||
find_jpeg_marker_start_byte();
|
||||
fill_read_cache();
|
||||
}
|
||||
|
||||
void reset()
|
||||
@ -50,8 +47,8 @@ public:
|
||||
valid_bits_ = 0;
|
||||
read_cache_ = 0;
|
||||
|
||||
next_ff_position_ = find_next_ff();
|
||||
make_valid();
|
||||
find_jpeg_marker_start_byte();
|
||||
fill_read_cache();
|
||||
}
|
||||
|
||||
FORCE_INLINE void skip(const int32_t length) noexcept
|
||||
@ -82,84 +79,10 @@ public:
|
||||
impl::throw_jpegls_error(jpegls_errc::too_much_encoded_data);
|
||||
}
|
||||
|
||||
FORCE_INLINE bool optimized_read() noexcept
|
||||
{
|
||||
// Easy & fast: if there is no 0xFF byte in sight, we can read without bit stuffing
|
||||
if (position_ < next_ff_position_ - (sizeof(bufType) - 1))
|
||||
{
|
||||
read_cache_ |= from_big_endian<sizeof(bufType)>::read(position_) >> valid_bits_;
|
||||
const int bytes_to_read{(bufType_bit_count - valid_bits_) >> 3};
|
||||
position_ += bytes_to_read;
|
||||
valid_bits_ += bytes_to_read * 8;
|
||||
ASSERT(valid_bits_ >= bufType_bit_count - 8);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void make_valid()
|
||||
{
|
||||
ASSERT(valid_bits_ <= bufType_bit_count - 8);
|
||||
|
||||
if (optimized_read())
|
||||
return;
|
||||
|
||||
do
|
||||
{
|
||||
if (position_ >= end_position_)
|
||||
{
|
||||
if (valid_bits_ <= 0)
|
||||
impl::throw_jpegls_error(jpegls_errc::invalid_encoded_data);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
const bufType value_new{position_[0]};
|
||||
|
||||
if (value_new == jpeg_marker_start_byte)
|
||||
{
|
||||
// JPEG bit stream rule: no FF may be followed by 0x80 or higher
|
||||
if (position_ == end_position_ - 1 || (position_[1] & 0x80) != 0)
|
||||
{
|
||||
if (valid_bits_ <= 0)
|
||||
impl::throw_jpegls_error(jpegls_errc::invalid_encoded_data);
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
read_cache_ |= value_new << (bufType_bit_count - 8 - valid_bits_);
|
||||
position_ += 1;
|
||||
valid_bits_ += 8;
|
||||
|
||||
if (value_new == jpeg_marker_start_byte)
|
||||
{
|
||||
--valid_bits_;
|
||||
}
|
||||
} while (valid_bits_ < bufType_bit_count - 8);
|
||||
|
||||
next_ff_position_ = find_next_ff();
|
||||
}
|
||||
|
||||
uint8_t* find_next_ff() const noexcept
|
||||
{
|
||||
auto* position_next_ff{position_};
|
||||
|
||||
while (position_next_ff < end_position_)
|
||||
{
|
||||
if (*position_next_ff == jpeg_marker_start_byte)
|
||||
break;
|
||||
|
||||
++position_next_ff;
|
||||
}
|
||||
|
||||
return position_next_ff;
|
||||
}
|
||||
|
||||
uint8_t* get_cur_byte_pos() const noexcept
|
||||
const uint8_t* get_cur_byte_pos() const noexcept
|
||||
{
|
||||
int32_t valid_bits{valid_bits_};
|
||||
uint8_t* compressed_bytes{position_};
|
||||
const uint8_t* compressed_bytes{position_};
|
||||
|
||||
for (;;)
|
||||
{
|
||||
@ -177,14 +100,14 @@ public:
|
||||
{
|
||||
if (valid_bits_ < length)
|
||||
{
|
||||
make_valid();
|
||||
fill_read_cache();
|
||||
if (valid_bits_ < length)
|
||||
impl::throw_jpegls_error(jpegls_errc::invalid_encoded_data);
|
||||
}
|
||||
|
||||
ASSERT(length != 0 && length <= valid_bits_);
|
||||
ASSERT(length < 32);
|
||||
const auto result = static_cast<int32_t>(read_cache_ >> (bufType_bit_count - length));
|
||||
const auto result = static_cast<int32_t>(read_cache_ >> (cache_t_bit_count - length));
|
||||
skip(length);
|
||||
return result;
|
||||
}
|
||||
@ -193,20 +116,20 @@ public:
|
||||
{
|
||||
if (valid_bits_ < 8)
|
||||
{
|
||||
make_valid();
|
||||
fill_read_cache();
|
||||
}
|
||||
|
||||
return static_cast<int32_t>(read_cache_ >> (bufType_bit_count - 8));
|
||||
return static_cast<int32_t>(read_cache_ >> max_readable_cache_bits);
|
||||
}
|
||||
|
||||
FORCE_INLINE bool read_bit()
|
||||
{
|
||||
if (valid_bits_ <= 0)
|
||||
{
|
||||
make_valid();
|
||||
fill_read_cache();
|
||||
}
|
||||
|
||||
const bool set = (read_cache_ & (static_cast<bufType>(1) << (bufType_bit_count - 1))) != 0;
|
||||
const bool set = (read_cache_ & (static_cast<cache_t>(1) << (cache_t_bit_count - 1))) != 0;
|
||||
skip(1);
|
||||
return set;
|
||||
}
|
||||
@ -215,13 +138,13 @@ public:
|
||||
{
|
||||
if (valid_bits_ < 16)
|
||||
{
|
||||
make_valid();
|
||||
fill_read_cache();
|
||||
}
|
||||
bufType val_test = read_cache_;
|
||||
cache_t val_test = read_cache_;
|
||||
|
||||
for (int32_t count{}; count < 16; ++count)
|
||||
{
|
||||
if ((val_test & (static_cast<bufType>(1) << (bufType_bit_count - 1))) != 0)
|
||||
if ((val_test & (static_cast<cache_t>(1) << (cache_t_bit_count - 1))) != 0)
|
||||
return count;
|
||||
|
||||
val_test <<= 1;
|
||||
@ -268,17 +191,97 @@ protected:
|
||||
std::unique_ptr<process_line> process_line_;
|
||||
|
||||
private:
|
||||
using bufType = size_t;
|
||||
static constexpr auto bufType_bit_count = static_cast<int32_t>(sizeof(bufType) * 8);
|
||||
using cache_t = size_t;
|
||||
|
||||
void fill_read_cache()
|
||||
{
|
||||
ASSERT(valid_bits_ <= max_readable_cache_bits);
|
||||
|
||||
if (fill_read_cache_optimistic())
|
||||
return;
|
||||
|
||||
do
|
||||
{
|
||||
if (position_ >= end_position_)
|
||||
{
|
||||
if (valid_bits_ == 0)
|
||||
{
|
||||
// Decoding process expects at least some bits to be added to the cache.
|
||||
impl::throw_jpegls_error(jpegls_errc::invalid_encoded_data);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
const cache_t new_byte_value{*position_};
|
||||
|
||||
if (new_byte_value == jpeg_marker_start_byte)
|
||||
{
|
||||
// JPEG-LS bit stream rule: if FF is followed by a 1 bit then it is a marker
|
||||
if (position_ == end_position_ - 1 || (position_[1] & 0x80) != 0)
|
||||
{
|
||||
if (valid_bits_ <= 0)
|
||||
{
|
||||
// Decoding process expects at least some bits to be added to the cache.
|
||||
impl::throw_jpegls_error(jpegls_errc::invalid_encoded_data);
|
||||
}
|
||||
|
||||
// Marker detected, typical EOI, SOS (next scan) or RSTm.
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
read_cache_ |= new_byte_value << (max_readable_cache_bits - valid_bits_);
|
||||
valid_bits_ += 8;
|
||||
++position_;
|
||||
|
||||
if (new_byte_value == jpeg_marker_start_byte)
|
||||
{
|
||||
// The next bit after an 0xFF needs to be ignored, compensate for the next read (see ISO/IEC 14495-1,A.1)
|
||||
--valid_bits_;
|
||||
}
|
||||
|
||||
} while (valid_bits_ < max_readable_cache_bits);
|
||||
|
||||
find_jpeg_marker_start_byte();
|
||||
}
|
||||
|
||||
FORCE_INLINE bool fill_read_cache_optimistic() noexcept
|
||||
{
|
||||
// Easy & fast: if there is no 0xFF byte in sight, we can read without bit stuffing
|
||||
if (position_ < position_ff_ - (sizeof(cache_t) - 1))
|
||||
{
|
||||
read_cache_ |= byte_swap(read_unaligned<cache_t>(position_)) >> valid_bits_;
|
||||
const int bytes_to_read{(cache_t_bit_count - valid_bits_) / 8};
|
||||
position_ += bytes_to_read;
|
||||
valid_bits_ += bytes_to_read * 8;
|
||||
ASSERT(valid_bits_ >= max_readable_cache_bits);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void find_jpeg_marker_start_byte() noexcept
|
||||
{
|
||||
// Use memchr to find next start byte (0xFF). memchr is optimized on some platforms to search faster.
|
||||
position_ff_ = static_cast<const uint8_t*>(memchr(position_, jpeg_marker_start_byte, position_ - end_position_));
|
||||
if (!position_ff_)
|
||||
{
|
||||
position_ff_ = end_position_;
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr auto cache_t_bit_count = static_cast<int32_t>(sizeof(cache_t) * 8);
|
||||
static constexpr int32_t max_readable_cache_bits{cache_t_bit_count - 8};
|
||||
|
||||
std::vector<uint8_t> buffer_;
|
||||
|
||||
// decoding
|
||||
bufType read_cache_{};
|
||||
cache_t read_cache_{};
|
||||
int32_t valid_bits_{};
|
||||
uint8_t* position_{};
|
||||
uint8_t* next_ff_position_{};
|
||||
uint8_t* end_position_{};
|
||||
const uint8_t* position_{};
|
||||
const uint8_t* end_position_{};
|
||||
const uint8_t* position_ff_{};
|
||||
};
|
||||
|
||||
} // namespace charls
|
||||
|
@ -543,8 +543,12 @@ void jpeg_stream_reader::skip_byte()
|
||||
|
||||
uint16_t jpeg_stream_reader::read_uint16()
|
||||
{
|
||||
const uint32_t value{read_byte() * 256U};
|
||||
return static_cast<uint16_t>(value + read_byte());
|
||||
if (source_.size < 2)
|
||||
throw_jpegls_error(jpegls_errc::source_buffer_too_small);
|
||||
|
||||
const auto value{read_unaligned<uint16_t>(source_.data)};
|
||||
skip_bytes(source_, 2);
|
||||
return byte_swap(value);
|
||||
}
|
||||
|
||||
|
||||
@ -557,11 +561,12 @@ uint32_t jpeg_stream_reader::read_uint24()
|
||||
|
||||
uint32_t jpeg_stream_reader::read_uint32()
|
||||
{
|
||||
uint32_t value{read_uint16()};
|
||||
value = value << 16U;
|
||||
value += read_uint16();
|
||||
if (source_.size < 4)
|
||||
throw_jpegls_error(jpegls_errc::source_buffer_too_small);
|
||||
|
||||
return value;
|
||||
const auto value{read_unaligned<uint32_t>(source_.data)};
|
||||
skip_bytes(source_, 4);
|
||||
return byte_swap(value);
|
||||
}
|
||||
|
||||
|
||||
|
@ -30,6 +30,7 @@ void jpeg_stream_writer::write_end_of_image(const bool even_destination_size)
|
||||
{
|
||||
if (even_destination_size && bytes_written() % 2 != 0)
|
||||
{
|
||||
// Write an additional 0xFF byte to ensure that the encoded bit stream has an even size.
|
||||
write_uint8(jpeg_marker_start_byte);
|
||||
}
|
||||
|
||||
@ -49,22 +50,21 @@ void jpeg_stream_writer::write_spiff_header_segment(const spiff_header& header)
|
||||
write_bytes(spiff_magic_id.data(), spiff_magic_id.size());
|
||||
write_uint8(spiff_major_revision_number);
|
||||
write_uint8(spiff_minor_revision_number);
|
||||
write_uint8(static_cast<uint8_t>(header.profile_id));
|
||||
write_uint8(static_cast<uint8_t>(header.component_count));
|
||||
write_uint8(to_underlying_type(header.profile_id));
|
||||
write_uint8(header.component_count);
|
||||
write_uint32(header.height);
|
||||
write_uint32(header.width);
|
||||
write_uint8(static_cast<uint8_t>(header.color_space));
|
||||
write_uint8(static_cast<uint8_t>(header.bits_per_sample));
|
||||
write_uint8(static_cast<uint8_t>(header.compression_type));
|
||||
write_uint8(static_cast<uint8_t>(header.resolution_units));
|
||||
write_uint8(to_underlying_type(header.color_space));
|
||||
write_uint8(header.bits_per_sample);
|
||||
write_uint8(to_underlying_type(header.compression_type));
|
||||
write_uint8(to_underlying_type(header.resolution_units));
|
||||
write_uint32(header.vertical_resolution);
|
||||
write_uint32(header.horizontal_resolution);
|
||||
}
|
||||
|
||||
|
||||
USE_DECL_ANNOTATIONS void jpeg_stream_writer::write_spiff_directory_entry(const uint32_t entry_tag,
|
||||
const void* entry_data,
|
||||
const size_t entry_data_size_bytes)
|
||||
USE_DECL_ANNOTATIONS void jpeg_stream_writer::write_spiff_directory_entry(const uint32_t entry_tag, const void* entry_data,
|
||||
const size_t entry_data_size_bytes)
|
||||
{
|
||||
write_segment_header(jpeg_marker_code::application_data8, sizeof(uint32_t) + entry_data_size_bytes);
|
||||
write_uint32(entry_tag);
|
||||
@ -78,7 +78,7 @@ void jpeg_stream_writer::write_spiff_end_of_directory_entry()
|
||||
// but only 6 data bytes. This approach allows to wrap existing bit streams\encoders with a SPIFF header.
|
||||
// In this implementation the SOI marker is added as data bytes to simplify the design.
|
||||
static constexpr array<uint8_t, 6> spiff_end_of_directory{
|
||||
0, 0, 0, spiff_end_of_directory_entry_type, 0xFF, static_cast<uint8_t>(charls::jpeg_marker_code::start_of_image)};
|
||||
0, 0, 0, spiff_end_of_directory_entry_type, 0xFF, to_underlying_type(charls::jpeg_marker_code::start_of_image)};
|
||||
|
||||
write_segment_header(jpeg_marker_code::application_data8, spiff_end_of_directory.size());
|
||||
write_bytes(spiff_end_of_directory.data(), spiff_end_of_directory.size());
|
||||
@ -95,20 +95,20 @@ void jpeg_stream_writer::write_start_of_frame_segment(const frame_info& frame)
|
||||
// Create a Frame Header as defined in ISO/IEC 14495-1, C.2.2 and T.81, B.2.2
|
||||
const size_t data_size{6 + (static_cast<size_t>(frame.component_count) * 3)};
|
||||
write_segment_header(jpeg_marker_code::start_of_frame_jpegls, data_size);
|
||||
write_uint8(static_cast<uint8_t>(frame.bits_per_sample)); // P = Sample precision
|
||||
write_uint16(static_cast<uint16_t>(frame.height)); // Y = Number of lines
|
||||
write_uint16(static_cast<uint16_t>(frame.width)); // X = Number of samples per line
|
||||
write_uint8(frame.bits_per_sample); // P = Sample precision
|
||||
write_uint16(frame.height); // Y = Number of lines
|
||||
write_uint16(frame.width); // X = Number of samples per line
|
||||
|
||||
// Components
|
||||
write_uint8(static_cast<uint8_t>(frame.component_count)); // Nf = Number of image components in frame
|
||||
write_uint8(frame.component_count); // Nf = Number of image components in frame
|
||||
|
||||
// Use by default 1 as the start component identifier to remain compatible with the
|
||||
// code sample of ISO/IEC 14495-1, H.4 and the JPEG-LS ISO conformance sample files.
|
||||
for (auto component_id{1}; component_id <= frame.component_count; ++component_id)
|
||||
{
|
||||
// Component Specification parameters
|
||||
write_uint8(static_cast<uint8_t>(component_id)); // Ci = Component identifier
|
||||
write_uint8(0x11); // Hi + Vi = Horizontal sampling factor + Vertical sampling factor
|
||||
write_uint8(component_id); // Ci = Component identifier
|
||||
write_uint8(0x11); // Hi + Vi = Horizontal sampling factor + Vertical sampling factor
|
||||
write_uint8(0); // Tqi = Quantization table destination selector (reserved for JPEG-LS, should be set to 0)
|
||||
}
|
||||
}
|
||||
@ -152,7 +152,7 @@ void jpeg_stream_writer::write_start_of_scan_segment(const int32_t component_cou
|
||||
|
||||
// Create a Scan Header as defined in T.87, C.2.3 and T.81, B.2.3
|
||||
write_segment_header(jpeg_marker_code::start_of_scan, 1 + (static_cast<size_t>(component_count) * 2) + 3);
|
||||
write_uint8(static_cast<uint8_t>(component_count));
|
||||
write_uint8(component_count);
|
||||
|
||||
for (int32_t i{}; i != component_count; ++i)
|
||||
{
|
||||
@ -161,9 +161,9 @@ void jpeg_stream_writer::write_start_of_scan_segment(const int32_t component_cou
|
||||
++component_id_;
|
||||
}
|
||||
|
||||
write_uint8(static_cast<uint8_t>(near_lossless)); // NEAR parameter
|
||||
write_uint8(static_cast<uint8_t>(interleave_mode)); // ILV parameter
|
||||
write_uint8(0); // transformation
|
||||
write_uint8(near_lossless); // NEAR parameter
|
||||
write_uint8(to_underlying_type(interleave_mode)); // ILV parameter
|
||||
write_uint8(0); // transformation
|
||||
}
|
||||
|
||||
|
||||
@ -179,7 +179,7 @@ void jpeg_stream_writer::write_segment_header(const jpeg_marker_code marker_code
|
||||
impl::throw_jpegls_error(jpegls_errc::destination_buffer_too_small);
|
||||
|
||||
write_marker(marker_code);
|
||||
write_uint16(static_cast<uint16_t>(data_size + segment_length_size));
|
||||
write_uint16(static_cast<uint16_t>(segment_length_size + data_size));
|
||||
}
|
||||
|
||||
} // namespace charls
|
||||
|
@ -115,6 +115,12 @@ private:
|
||||
destination_.data[byte_offset_++] = value;
|
||||
}
|
||||
|
||||
void write_uint8(const int32_t value) noexcept
|
||||
{
|
||||
ASSERT(value >= 0 && value <= std::numeric_limits<uint8_t>::max());
|
||||
write_uint8(static_cast<uint8_t>(value));
|
||||
}
|
||||
|
||||
void write_uint16(const uint16_t value) noexcept
|
||||
{
|
||||
write_uint<uint16_t>(value);
|
||||
@ -126,6 +132,12 @@ private:
|
||||
write_uint16(static_cast<uint16_t>(value));
|
||||
}
|
||||
|
||||
void write_uint16(const uint32_t value) noexcept
|
||||
{
|
||||
ASSERT(value <= std::numeric_limits<uint16_t>::max());
|
||||
write_uint16(static_cast<uint16_t>(value));
|
||||
}
|
||||
|
||||
void write_uint32(const uint32_t value) noexcept
|
||||
{
|
||||
write_uint<uint32_t>(value);
|
||||
@ -138,7 +150,7 @@ private:
|
||||
|
||||
// Use write_bytes to write to the unaligned byte array.
|
||||
// The compiler will perform the correct optimization when the target platform support unaligned writes.
|
||||
const UnsignedIntType big_endian_value{endian_swap(value)};
|
||||
const UnsignedIntType big_endian_value{byte_swap(value)};
|
||||
write_bytes(&big_endian_value, sizeof big_endian_value);
|
||||
}
|
||||
|
||||
@ -164,16 +176,6 @@ private:
|
||||
write_uint8(static_cast<uint8_t>(marker_code));
|
||||
}
|
||||
|
||||
static constexpr uint32_t endian_swap(const uint32_t value) noexcept
|
||||
{
|
||||
return value >> 24 | (value & 0x00FF0000) >> 8 | (value & 0x0000FF00) << 8 | value << 24;
|
||||
}
|
||||
|
||||
static constexpr uint16_t endian_swap(const uint16_t value) noexcept
|
||||
{
|
||||
return static_cast<uint16_t>(value >> 8 | value << 8);
|
||||
}
|
||||
|
||||
byte_span destination_{};
|
||||
size_t byte_offset_{};
|
||||
uint8_t component_id_{1};
|
||||
|
33
src/scan.h
33
src/scan.h
@ -41,34 +41,6 @@ constexpr int32_t apply_sign(const int32_t i, const int32_t sign) noexcept
|
||||
}
|
||||
|
||||
|
||||
// Two alternatives for GetPredictedValue() (second is slightly faster due to reduced branching)
|
||||
|
||||
#if 0
|
||||
|
||||
inline int32_t get_predicted_value(int32_t Ra, int32_t Rb, int32_t Rc)
|
||||
{
|
||||
if (Ra < Rb)
|
||||
{
|
||||
if (Rc < Ra)
|
||||
return Rb;
|
||||
|
||||
if (Rc > Rb)
|
||||
return Ra;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (Rc < Rb)
|
||||
return Ra;
|
||||
|
||||
if (Rc > Ra)
|
||||
return Rb;
|
||||
}
|
||||
|
||||
return Ra + Rb - Rc;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
inline int32_t get_predicted_value(const int32_t ra, const int32_t rb, const int32_t rc) noexcept
|
||||
{
|
||||
// sign trick reduces the number of if statements (branches)
|
||||
@ -88,7 +60,6 @@ inline int32_t get_predicted_value(const int32_t ra, const int32_t rb, const int
|
||||
return ra + rb - rc;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/// <summary>
|
||||
/// This is the optimized inverse algorithm of ISO/IEC 14495-1, A.5.2, Code Segment A.11 (second else branch)
|
||||
@ -101,6 +72,7 @@ CONSTEXPR int32_t unmap_error_value(const int32_t mapped_error) noexcept
|
||||
return sign ^ (mapped_error >> 1);
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// This is the algorithm of ISO/IEC 14495-1, A.5.2, Code Segment A.11 (second else branch)
|
||||
/// It will map signed values to unsigned values. It has been optimized to prevent branching.
|
||||
@ -113,6 +85,7 @@ CONSTEXPR int32_t map_error_value(const int32_t error_value) noexcept
|
||||
return mapped_error;
|
||||
}
|
||||
|
||||
|
||||
constexpr int32_t compute_context_id(const int32_t q1, const int32_t q2, const int32_t q3) noexcept
|
||||
{
|
||||
return (q1 * 9 + q2) * 9 + q3;
|
||||
@ -379,7 +352,7 @@ private:
|
||||
}
|
||||
|
||||
/// <summary>Encodes/Decodes a scan line of samples</summary>
|
||||
void do_line(sample_type* /*template_selector*/)
|
||||
FORCE_INLINE void do_line(sample_type* /*template_selector*/)
|
||||
{
|
||||
int32_t index{};
|
||||
int32_t rb{previous_line_[index - 1]};
|
||||
|
74
src/util.h
74
src/util.h
@ -9,6 +9,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
@ -127,7 +128,8 @@ inline void string_copy(CHARLS_IN_Z const char* source, CHARLS_OUT_WRITES_Z(size
|
||||
#endif
|
||||
}
|
||||
|
||||
inline jpegls_errc set_error_message(const jpegls_errc error, CHARLS_OUT_WRITES_Z(ErrorMessageSize) char* error_message) noexcept
|
||||
inline jpegls_errc set_error_message(const jpegls_errc error,
|
||||
CHARLS_OUT_WRITES_Z(ErrorMessageSize) char* error_message) noexcept
|
||||
{
|
||||
if (error_message)
|
||||
{
|
||||
@ -264,34 +266,59 @@ struct quad final : triplet<SampleType>
|
||||
};
|
||||
|
||||
|
||||
template<int Size>
|
||||
struct from_big_endian final
|
||||
// C++23 comes with std::byteswap. Use our own byte_swap implementation for now.
|
||||
template<typename T>
|
||||
CHARLS_CHECK_RETURN T byte_swap(T /*value*/) noexcept
|
||||
{
|
||||
};
|
||||
|
||||
ASSERT(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<>
|
||||
struct from_big_endian<4> final
|
||||
inline CHARLS_CHECK_RETURN uint16_t byte_swap<uint16_t>(const uint16_t value) noexcept
|
||||
{
|
||||
FORCE_INLINE static unsigned int read(const uint8_t* buffer) noexcept
|
||||
{
|
||||
return (static_cast<uint32_t>(buffer[0]) << 24U) + (static_cast<uint32_t>(buffer[1]) << 16U) +
|
||||
(static_cast<uint32_t>(buffer[2]) << 8U) + (static_cast<uint32_t>(buffer[3]) << 0U);
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
return _byteswap_ushort(value);
|
||||
#else
|
||||
// Note: GCC and Clang will optimize this pattern to a built-in intrinsic.
|
||||
return static_cast<uint16_t>(value << 8 | value >> 8);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
struct from_big_endian<8> final
|
||||
inline CHARLS_CHECK_RETURN uint32_t byte_swap<uint32_t>(const uint32_t value) noexcept
|
||||
{
|
||||
FORCE_INLINE static uint64_t read(const uint8_t* buffer) noexcept
|
||||
{
|
||||
return (static_cast<uint64_t>(buffer[0]) << 56U) + (static_cast<uint64_t>(buffer[1]) << 48U) +
|
||||
(static_cast<uint64_t>(buffer[2]) << 40U) + (static_cast<uint64_t>(buffer[3]) << 32U) +
|
||||
(static_cast<uint64_t>(buffer[4]) << 24U) + (static_cast<uint64_t>(buffer[5]) << 16U) +
|
||||
(static_cast<uint64_t>(buffer[6]) << 8U) + (static_cast<uint64_t>(buffer[7]) << 0U);
|
||||
}
|
||||
};
|
||||
#ifdef _MSC_VER
|
||||
return _byteswap_ulong(value);
|
||||
#else
|
||||
// Note: GCC and Clang will optimize this pattern to a built-in intrinsic.
|
||||
return value >> 24 | (value & 0x00FF0000) >> 8 | (value & 0x0000FF00) << 8 | value << 24;
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
inline CHARLS_CHECK_RETURN uint64_t byte_swap(const uint64_t value) noexcept
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return _byteswap_uint64(value);
|
||||
#else
|
||||
// Note: GCC and Clang will optimize this pattern to a built-in intrinsic.
|
||||
return (value << 56) | ((value << 40) & 0x00FF'0000'0000'0000) | ((value << 24) & 0x0000'FF00'0000'0000) |
|
||||
((value << 8) & 0x0000'00FF'0000'0000) | ((value >> 8) & 0x0000'0000'FF00'0000) |
|
||||
((value >> 24) & 0x0000'0000'00FF'0000) | ((value >> 40) & 0x0000'0000'0000'FF00) | (value >> 56);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
T read_unaligned(const void* buffer) noexcept
|
||||
{
|
||||
// Note: MSVC, GCC and clang will replace this with a direct register read if architecture allows it (x86, x64, ARM64
|
||||
// allows it)
|
||||
T value;
|
||||
memcpy(&value, buffer, sizeof(T));
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
inline void skip_bytes(byte_span& stream_info, const size_t count) noexcept
|
||||
@ -362,6 +389,9 @@ constexpr uint32_t bit_to_byte_count(const int32_t bit_count) noexcept
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Converts an enumeration to its underlying type. Equivalent to C++23 std::to_underlying
|
||||
/// </summary>
|
||||
template<typename Enum>
|
||||
constexpr auto to_underlying_type(Enum e) noexcept
|
||||
{
|
||||
|
@ -115,14 +115,23 @@ void test_large_image_performance_rgb8(const int loop_count)
|
||||
}
|
||||
}
|
||||
|
||||
size_t get_destination_size(const vector<uint8_t>& source)
|
||||
{
|
||||
const jpegls_decoder decoder{source, true};
|
||||
return decoder.destination_size();
|
||||
}
|
||||
|
||||
void decode_performance_tests(const int loop_count)
|
||||
{
|
||||
cout << "Test decode performance with loop count " << loop_count << "\n";
|
||||
|
||||
const vector<uint8_t> encoded_source{read_file("decodetest.jls")};
|
||||
|
||||
try
|
||||
{
|
||||
// This test expect the file decodetest.jls to exist.
|
||||
// It can be any valid JPEG-LS file.
|
||||
// Changing the content of this file allows different performance measurements.
|
||||
const vector<uint8_t> encoded_source{read_file("decodetest.jls")};
|
||||
|
||||
// Pre-allocate the destination outside the measurement loop.
|
||||
// std::vector initializes its elements and this step needs to be excluded from the measurement.
|
||||
vector<uint8_t> destination(jpegls_decoder{encoded_source, true}.destination_size());
|
||||
@ -144,6 +153,10 @@ void decode_performance_tests(const int loop_count)
|
||||
{
|
||||
cout << "Decode failure: " << e.what() << "\n";
|
||||
}
|
||||
catch (const std::ios_base::failure& e)
|
||||
{
|
||||
cout << "IO failure (missing decodetest.jls?): " << e.what() << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
void encode_performance_tests(const int loop_count)
|
||||
|
Loading…
x
Reference in New Issue
Block a user