Merge branch 'main' into dev

This commit is contained in:
DRC 2024-03-04 18:10:16 -05:00
commit 7e45654c1b
94 changed files with 870 additions and 666 deletions

View File

@ -149,7 +149,7 @@ jobs:
mkdir build
pushd build
cmake -G"Unix Makefiles" -DWITH_JPEG8=1 \
-DCMAKE_C_FLAGS='--std=gnu90 -Wall -Werror -Wextra -Wpedantic -pedantic-errors -Wdouble-promotion -Wformat-overflow=2 -Wformat-security -Wformat-signedness -Wformat-truncation=2 -Wformat-y2k -Wmissing-include-dirs -Wshift-overflow=2 -Wswitch-bool -Wno-unused-parameter -Wuninitialized -Wstrict-overflow=2 -Wstringop-overflow=4 -Wstringop-truncation -Wduplicated-branches -Wduplicated-cond -Wdeclaration-after-statement -Wshadow -Wunsafe-loop-optimizations -Wundef -Wcast-align -Wno-clobbered -Wjump-misses-init -Wno-sign-compare -Wlogical-op -Waggregate-return -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-declarations -Wpacked -Wredundant-decls -Wnested-externs -Winline -Wno-long-long -Wdisabled-optimization -Wno-overlength-strings' \
-DCMAKE_C_FLAGS='--std=gnu90 -Wall -Werror -Wextra -Wpedantic -pedantic-errors -Wdouble-promotion -Wformat-overflow=2 -Wformat-security -Wformat-signedness -Wformat-truncation=2 -Wformat-y2k -Wmissing-include-dirs -Wshift-overflow=2 -Wswitch-bool -Wno-unused-parameter -Wuninitialized -Wstrict-overflow=2 -Wstringop-overflow=4 -Wstringop-truncation -Wduplicated-branches -Wduplicated-cond -Wdeclaration-after-statement -Wshadow -Wunsafe-loop-optimizations -Wundef -Wcast-align -Wno-clobbered -Wjump-misses-init -Wno-sign-compare -Wlogical-op -Waggregate-return -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-declarations -Wpacked -Wredundant-decls -Wnested-externs -Winline -Wno-long-long -Wdisabled-optimization -Wno-overlength-strings -fcf-protection' \
..
export NUMCPUS=`grep -c '^processor' /proc/cpuinfo`
make -j$NUMCPUS --load-average=$NUMCPUS

View File

@ -14,6 +14,8 @@ Build Requirements
(if building x86 or x86-64 SIMD extensions)
* If using NASM, 2.13 or later is required.
* If using Yasm, 1.2.0 or later is required.
* NASM 2.15 or later is required if building libjpeg-turbo with Intel
Control-flow Enforcement Technology (CET) support.
* If building on macOS, NASM or Yasm can be obtained from
[MacPorts](http://www.macports.org/) or [Homebrew](http://brew.sh/).
- NOTE: Currently, if it is desirable to hide the SIMD function symbols in

View File

@ -20,6 +20,14 @@ libjpeg-turbo components to depend on the Visual C++ run-time DLL when built
with Visual C++ and CMake 3.15 or later, regardless of value of the
`WITH_CRT_DLL` CMake variable.
2. The x86-64 SIMD extensions now include support for Intel Control-flow
Enforcement Technology (CET), which is enabled automatically if CET is enabled
in the C compiler.
3. Fixed a regression introduced by 3.0 beta2[6] that made it impossible for
calling applications to supply custom Huffman tables when generating
12-bit-per-component lossy JPEG images using the libjpeg API.
3.0.2
=====

View File

@ -96,6 +96,18 @@ if(NOT WIN32 AND (CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED))
set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DPIC")
endif()
if(CPU_TYPE STREQUAL "x86_64" AND CMAKE_ASM_NASM_OBJECT_FORMAT MATCHES "^elf")
check_c_source_compiles("
#if (__CET__ & 3) == 0
#error \"CET not enabled\"
#endif
int main(void) { return 0; }" HAVE_CET)
if(HAVE_CET)
set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -D__CET__")
endif()
endif()
string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
set(EFFECTIVE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} ${CMAKE_ASM_NASM_FLAGS_${CMAKE_BUILD_TYPE_UC}}")
message(STATUS "CMAKE_ASM_NASM_FLAGS = ${EFFECTIVE_ASM_NASM_FLAGS}")

View File

@ -2,7 +2,7 @@
; jccolext.asm - colorspace conversion (AVX2)
;
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -49,15 +49,15 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)]
test ecx, ecx
@ -80,9 +80,9 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
alignx 16, 7
ALIGNX 16, 7
.rowloop:
pushpic eax
PUSHPIC eax
push edx
push ebx
push edi
@ -93,11 +93,11 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
mov edi, JSAMPROW [edi] ; outptr0
mov ebx, JSAMPROW [ebx] ; outptr1
mov edx, JSAMPROW [edx] ; outptr2
movpic eax, POINTER [gotptr] ; load GOT address (eax)
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_YMMWORD
jae near .columnloop
alignx 16, 7
ALIGNX 16, 7
%if RGB_PIXELSIZE == 3 ; ---------------
@ -154,7 +154,7 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
jmp short .rgb_ycc_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@ -278,7 +278,7 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
jmp short .rgb_ycc_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@ -552,7 +552,7 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
pop edi
pop ebx
pop edx
poppic eax
POPPIC eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW

View File

@ -2,7 +2,7 @@
; jccolext.asm - colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -49,15 +49,15 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)] ; num_cols
test ecx, ecx
@ -80,9 +80,9 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
alignx 16, 7
ALIGNX 16, 7
.rowloop:
pushpic eax
PUSHPIC eax
push edx
push ebx
push edi
@ -93,11 +93,11 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
mov edi, JSAMPROW [edi] ; outptr0
mov ebx, JSAMPROW [ebx] ; outptr1
mov edx, JSAMPROW [edx] ; outptr2
movpic eax, POINTER [gotptr] ; load GOT address (eax)
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_MMWORD
jae short .columnloop
alignx 16, 7
ALIGNX 16, 7
%if RGB_PIXELSIZE == 3 ; ---------------
@ -143,7 +143,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
movq mmG, MMWORD [esi+1*SIZEOF_MMWORD]
jmp short .rgb_ycc_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@ -211,7 +211,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
movq mmF, MMWORD [esi+1*SIZEOF_MMWORD]
jmp short .rgb_ycc_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@ -449,7 +449,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
pop edi
pop ebx
pop edx
poppic eax
POPPIC eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW

View File

@ -1,7 +1,7 @@
;
; jccolext.asm - colorspace conversion (SSE2)
;
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -48,15 +48,15 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)]
test ecx, ecx
@ -79,9 +79,9 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
alignx 16, 7
ALIGNX 16, 7
.rowloop:
pushpic eax
PUSHPIC eax
push edx
push ebx
push edi
@ -92,11 +92,11 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
mov edi, JSAMPROW [edi] ; outptr0
mov ebx, JSAMPROW [ebx] ; outptr1
mov edx, JSAMPROW [edx] ; outptr2
movpic eax, POINTER [gotptr] ; load GOT address (eax)
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_XMMWORD
jae near .columnloop
alignx 16, 7
ALIGNX 16, 7
%if RGB_PIXELSIZE == 3 ; ---------------
@ -147,7 +147,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
jmp short .rgb_ycc_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@ -232,7 +232,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
jmp short .rgb_ycc_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@ -478,7 +478,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
pop edi
pop ebx
pop edx
poppic eax
POPPIC eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW

View File

@ -1,7 +1,7 @@
;
; jccolor.asm - colorspace conversion (AVX2)
;
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@ -33,7 +33,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_ycc_convert_avx2)
EXTN(jconst_rgb_ycc_convert_avx2):
@ -46,7 +46,7 @@ PD_ONEHALFM1_CJ times 8 dd (1 << (SCALEBITS - 1)) - 1 + \
(CENTERJSAMPLE << SCALEBITS)
PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -2,7 +2,7 @@
; jccolor.asm - colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -33,7 +33,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_ycc_convert_mmx)
EXTN(jconst_rgb_ycc_convert_mmx):
@ -46,7 +46,7 @@ PD_ONEHALFM1_CJ times 2 dd (1 << (SCALEBITS - 1)) - 1 + \
(CENTERJSAMPLE << SCALEBITS)
PD_ONEHALF times 2 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -1,7 +1,7 @@
;
; jccolor.asm - colorspace conversion (SSE2)
;
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -32,7 +32,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_ycc_convert_sse2)
EXTN(jconst_rgb_ycc_convert_sse2):
@ -45,7 +45,7 @@ PD_ONEHALFM1_CJ times 4 dd (1 << (SCALEBITS - 1)) - 1 + \
(CENTERJSAMPLE << SCALEBITS)
PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -1,7 +1,7 @@
;
; jcgray.asm - grayscale colorspace conversion (AVX2)
;
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@ -29,7 +29,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_gray_convert_avx2)
EXTN(jconst_rgb_gray_convert_avx2):
@ -38,7 +38,7 @@ PW_F0299_F0337 times 8 dw F_0_299, F_0_337
PW_F0114_F0250 times 8 dw F_0_114, F_0_250
PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -2,7 +2,7 @@
; jcgray.asm - grayscale colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -29,7 +29,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_gray_convert_mmx)
EXTN(jconst_rgb_gray_convert_mmx):
@ -38,7 +38,7 @@ PW_F0299_F0337 times 2 dw F_0_299, F_0_337
PW_F0114_F0250 times 2 dw F_0_114, F_0_250
PD_ONEHALF times 2 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -1,7 +1,7 @@
;
; jcgray.asm - grayscale colorspace conversion (SSE2)
;
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -28,7 +28,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_gray_convert_sse2)
EXTN(jconst_rgb_gray_convert_sse2):
@ -37,7 +37,7 @@ PW_F0299_F0337 times 4 dw F_0_299, F_0_337
PW_F0114_F0250 times 4 dw F_0_114, F_0_250
PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -1,7 +1,7 @@
;
; jcgryext.asm - grayscale colorspace conversion (AVX2)
;
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@ -49,15 +49,15 @@ EXTN(jsimd_rgb_gray_convert_avx2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)]
test ecx, ecx
@ -76,20 +76,20 @@ EXTN(jsimd_rgb_gray_convert_avx2):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
alignx 16, 7
ALIGNX 16, 7
.rowloop:
pushpic eax
PUSHPIC eax
push edi
push esi
push ecx ; col
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr0
movpic eax, POINTER [gotptr] ; load GOT address (eax)
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_YMMWORD
jae near .columnloop
alignx 16, 7
ALIGNX 16, 7
%if RGB_PIXELSIZE == 3 ; ---------------
@ -146,7 +146,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
jmp short .rgb_gray_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@ -270,7 +270,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
jmp short .rgb_gray_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@ -433,7 +433,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
pop ecx ; col
pop esi
pop edi
poppic eax
POPPIC eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW

View File

@ -2,7 +2,7 @@
; jcgryext.asm - grayscale colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -49,15 +49,15 @@ EXTN(jsimd_rgb_gray_convert_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)] ; num_cols
test ecx, ecx
@ -76,20 +76,20 @@ EXTN(jsimd_rgb_gray_convert_mmx):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
alignx 16, 7
ALIGNX 16, 7
.rowloop:
pushpic eax
PUSHPIC eax
push edi
push esi
push ecx ; col
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr0
movpic eax, POINTER [gotptr] ; load GOT address (eax)
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_MMWORD
jae short .columnloop
alignx 16, 7
ALIGNX 16, 7
%if RGB_PIXELSIZE == 3 ; ---------------
@ -135,7 +135,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
movq mmG, MMWORD [esi+1*SIZEOF_MMWORD]
jmp short .rgb_gray_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@ -203,7 +203,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
movq mmF, MMWORD [esi+1*SIZEOF_MMWORD]
jmp short .rgb_gray_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@ -330,7 +330,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
pop ecx ; col
pop esi
pop edi
poppic eax
POPPIC eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW

View File

@ -1,7 +1,7 @@
;
; jcgryext.asm - grayscale colorspace conversion (SSE2)
;
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -48,15 +48,15 @@ EXTN(jsimd_rgb_gray_convert_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)]
test ecx, ecx
@ -75,20 +75,20 @@ EXTN(jsimd_rgb_gray_convert_sse2):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
alignx 16, 7
ALIGNX 16, 7
.rowloop:
pushpic eax
PUSHPIC eax
push edi
push esi
push ecx ; col
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr0
movpic eax, POINTER [gotptr] ; load GOT address (eax)
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_XMMWORD
jae near .columnloop
alignx 16, 7
ALIGNX 16, 7
%if RGB_PIXELSIZE == 3 ; ---------------
@ -139,7 +139,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
jmp short .rgb_gray_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@ -224,7 +224,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
jmp short .rgb_gray_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@ -359,7 +359,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
pop ecx ; col
pop esi
pop edi
poppic eax
POPPIC eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW

View File

@ -42,7 +42,7 @@ endstruc
EXTN(jconst_huff_encode_one_block):
alignz 32
ALIGNZ 32
jpeg_mask_bits dq 0x0000, 0x0001, 0x0003, 0x0007
dq 0x000f, 0x001f, 0x003f, 0x007f
@ -84,7 +84,7 @@ times 1 << 12 db 13
times 1 << 13 db 14
times 1 << 14 db 15
alignz 32
ALIGNZ 32
%ifdef PIC
%define NBITS(x) nbits_base + x
@ -236,7 +236,7 @@ times 1 << 14 db 15
; If PIC is defined, load the address of a symbol defined in this file into a
; register. Equivalent to
; get_GOT %1
; GET_GOT %1
; lea %1, [GOTOFF(%1, %2)]
; without using the GOT.
;

View File

@ -3,7 +3,7 @@
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -70,7 +70,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
alignx 16, 7
ALIGNX 16, 7
.expandloop:
push eax
push ecx
@ -106,7 +106,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push ecx
push edi
@ -117,7 +117,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
cmp ecx, byte SIZEOF_YMMWORD
jae short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_r24:
; ecx can possibly be 8, 16, 24
@ -141,7 +141,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
vpxor ymm1, ymm1, ymm1
mov ecx, SIZEOF_YMMWORD
jmp short .downsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
vmovdqu ymm0, YMMWORD [esi+0*SIZEOF_YMMWORD]
@ -243,7 +243,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
alignx 16, 7
ALIGNX 16, 7
.expandloop:
push eax
push ecx
@ -279,7 +279,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push ecx
push edi
@ -291,7 +291,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
cmp ecx, byte SIZEOF_YMMWORD
jae short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_r24:
cmp ecx, 24
@ -320,7 +320,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
vpxor ymm3, ymm3, ymm3
mov ecx, SIZEOF_YMMWORD
jmp short .downsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
vmovdqu ymm0, YMMWORD [edx+0*SIZEOF_YMMWORD]

View File

@ -2,7 +2,7 @@
; jcsample.asm - downsampling (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -69,7 +69,7 @@ EXTN(jsimd_h2v1_downsample_mmx):
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
alignx 16, 7
ALIGNX 16, 7
.expandloop:
push eax
push ecx
@ -104,7 +104,7 @@ EXTN(jsimd_h2v1_downsample_mmx):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push ecx
push edi
@ -112,7 +112,7 @@ EXTN(jsimd_h2v1_downsample_mmx):
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [esi+0*SIZEOF_MMWORD]
@ -212,7 +212,7 @@ EXTN(jsimd_h2v2_downsample_mmx):
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
alignx 16, 7
ALIGNX 16, 7
.expandloop:
push eax
push ecx
@ -247,7 +247,7 @@ EXTN(jsimd_h2v2_downsample_mmx):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push ecx
push edi
@ -256,7 +256,7 @@ EXTN(jsimd_h2v2_downsample_mmx):
mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0
mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1
mov edi, JSAMPROW [edi] ; outptr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [edx+0*SIZEOF_MMWORD]

View File

@ -2,7 +2,7 @@
; jcsample.asm - downsampling (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -69,7 +69,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
alignx 16, 7
ALIGNX 16, 7
.expandloop:
push eax
push ecx
@ -104,7 +104,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push ecx
push edi
@ -115,14 +115,14 @@ EXTN(jsimd_h2v1_downsample_sse2):
cmp ecx, byte SIZEOF_XMMWORD
jae short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_r8:
movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
pxor xmm1, xmm1
mov ecx, SIZEOF_XMMWORD
jmp short .downsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
@ -225,7 +225,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
alignx 16, 7
ALIGNX 16, 7
.expandloop:
push eax
push ecx
@ -260,7 +260,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push ecx
push edi
@ -272,7 +272,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
cmp ecx, byte SIZEOF_XMMWORD
jae short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_r8:
movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
@ -281,7 +281,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
pxor xmm3, xmm3
mov ecx, SIZEOF_XMMWORD
jmp short .downsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]

View File

@ -2,7 +2,7 @@
; jdcolext.asm - colorspace conversion (AVX2)
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2012, 2016, D. R. Commander.
; Copyright (C) 2012, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@ -50,15 +50,15 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [out_width(eax)] ; num_cols
test ecx, ecx
@ -81,7 +81,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push eax
push edi
@ -94,8 +94,8 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
mov ebx, JSAMPROW [ebx] ; inptr1
mov edx, JSAMPROW [edx] ; inptr2
mov edi, JSAMPROW [edi] ; outptr
movpic eax, POINTER [gotptr] ; load GOT address (eax)
alignx 16, 7
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
ALIGNX 16, 7
.columnloop:
vmovdqu ymm5, YMMWORD [ebx] ; ymm5=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV)
@ -295,7 +295,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
add ebx, byte SIZEOF_YMMWORD ; inptr1
add edx, byte SIZEOF_YMMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st64:
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
@ -436,7 +436,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
add ebx, byte SIZEOF_YMMWORD ; inptr1
add edx, byte SIZEOF_YMMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st64:
cmp ecx, byte SIZEOF_YMMWORD/2
@ -479,7 +479,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
%endif ; RGB_PIXELSIZE ; ---------------
alignx 16, 7
ALIGNX 16, 7
.nextrow:
pop ecx

View File

@ -2,7 +2,7 @@
; jdcolext.asm - colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -49,15 +49,15 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [out_width(eax)] ; num_cols
test ecx, ecx
@ -80,7 +80,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push eax
push edi
@ -93,8 +93,8 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
mov ebx, JSAMPROW [ebx] ; inptr1
mov edx, JSAMPROW [edx] ; inptr2
mov edi, JSAMPROW [edi] ; outptr
movpic eax, POINTER [gotptr] ; load GOT address (eax)
alignx 16, 7
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
ALIGNX 16, 7
.columnloop:
movq mm5, MMWORD [ebx] ; mm5=Cb(01234567)
@ -255,7 +255,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
add edx, byte SIZEOF_MMWORD ; inptr2
add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st16:
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
@ -344,7 +344,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
add edx, byte SIZEOF_MMWORD ; inptr2
add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st16:
cmp ecx, byte SIZEOF_MMWORD/2
@ -369,7 +369,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
%endif ; RGB_PIXELSIZE ; ---------------
alignx 16, 7
ALIGNX 16, 7
.nextrow:
pop ecx

View File

@ -2,7 +2,7 @@
; jdcolext.asm - colorspace conversion (SSE2)
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2012, 2016, D. R. Commander.
; Copyright (C) 2012, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -49,15 +49,15 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [out_width(eax)] ; num_cols
test ecx, ecx
@ -80,7 +80,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push eax
push edi
@ -93,8 +93,8 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
mov ebx, JSAMPROW [ebx] ; inptr1
mov edx, JSAMPROW [edx] ; inptr2
mov edi, JSAMPROW [edi] ; outptr
movpic eax, POINTER [gotptr] ; load GOT address (eax)
alignx 16, 7
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
ALIGNX 16, 7
.columnloop:
movdqa xmm5, XMMWORD [ebx] ; xmm5=Cb(0123456789ABCDEF)
@ -275,7 +275,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
add ebx, byte SIZEOF_XMMWORD ; inptr1
add edx, byte SIZEOF_XMMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st32:
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
@ -387,7 +387,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
add ebx, byte SIZEOF_XMMWORD ; inptr1
add edx, byte SIZEOF_XMMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st32:
cmp ecx, byte SIZEOF_XMMWORD/2
@ -423,7 +423,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
%endif ; RGB_PIXELSIZE ; ---------------
alignx 16, 7
ALIGNX 16, 7
.nextrow:
pop ecx

View File

@ -3,7 +3,7 @@
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -32,7 +32,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_ycc_rgb_convert_avx2)
EXTN(jconst_ycc_rgb_convert_avx2):
@ -43,7 +43,7 @@ PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285
PW_ONE times 16 dw 1
PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -2,7 +2,7 @@
; jdcolor.asm - colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -31,7 +31,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_ycc_rgb_convert_mmx)
EXTN(jconst_ycc_rgb_convert_mmx):
@ -42,7 +42,7 @@ PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285
PW_ONE times 4 dw 1
PD_ONEHALF times 2 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -2,7 +2,7 @@
; jdcolor.asm - colorspace conversion (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -31,7 +31,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_ycc_rgb_convert_sse2)
EXTN(jconst_ycc_rgb_convert_sse2):
@ -42,7 +42,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
PW_ONE times 8 dw 1
PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -2,7 +2,7 @@
; jdmerge.asm - merged upsampling/color conversion (AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@ -32,7 +32,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_merged_upsample_avx2)
EXTN(jconst_merged_upsample_avx2):
@ -43,7 +43,7 @@ PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285
PW_ONE times 16 dw 1
PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -2,7 +2,7 @@
; jdmerge.asm - merged upsampling/color conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -31,7 +31,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_merged_upsample_mmx)
EXTN(jconst_merged_upsample_mmx):
@ -42,7 +42,7 @@ PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285
PW_ONE times 4 dw 1
PD_ONEHALF times 2 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -2,7 +2,7 @@
; jdmerge.asm - merged upsampling/color conversion (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -31,7 +31,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_merged_upsample_sse2)
EXTN(jconst_merged_upsample_sse2):
@ -42,7 +42,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
PW_ONE times 8 dw 1
PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -2,7 +2,7 @@
; jdmrgext.asm - merged upsampling/color conversion (AVX2)
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2012, 2016, D. R. Commander.
; Copyright (C) 2012, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@ -50,15 +50,15 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [output_width(eax)] ; col
test ecx, ecx
@ -79,9 +79,9 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
pop ecx ; col
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movpic eax, POINTER [gotptr] ; load GOT address (eax)
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
vmovdqu ymm6, YMMWORD [ebx] ; ymm6=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV)
vmovdqu ymm7, YMMWORD [edx] ; ymm7=Cr(0123456789ABCDEFGHIJKLMNOPQRSTUV)
@ -168,13 +168,13 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
mov al, 2 ; Yctr
jmp short .Yloop_1st
alignx 16, 7
ALIGNX 16, 7
.Yloop_2nd:
vmovdqa ymm0, YMMWORD [wk(1)] ; ymm0=(R-Y)H
vmovdqa ymm2, YMMWORD [wk(2)] ; ymm2=(G-Y)H
vmovdqa ymm4, YMMWORD [wk(0)] ; ymm4=(B-Y)H
alignx 16, 7
ALIGNX 16, 7
.Yloop_1st:
vmovdqu ymm7, YMMWORD [esi] ; ymm7=Y(0123456789ABCDEFGHIJKLMNOPQRSTUV)
@ -301,7 +301,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
add ebx, byte SIZEOF_YMMWORD ; inptr1
add edx, byte SIZEOF_YMMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st64:
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
@ -445,7 +445,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
add ebx, byte SIZEOF_YMMWORD ; inptr1
add edx, byte SIZEOF_YMMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st64:
cmp ecx, byte SIZEOF_YMMWORD/2

View File

@ -2,7 +2,7 @@
; jdmrgext.asm - merged upsampling/color conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -47,15 +47,15 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [output_width(eax)] ; col
test ecx, ecx
@ -76,9 +76,9 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
pop ecx ; col
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movpic eax, POINTER [gotptr] ; load GOT address (eax)
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
movq mm6, MMWORD [ebx] ; mm6=Cb(01234567)
movq mm7, MMWORD [edx] ; mm7=Cr(01234567)
@ -171,13 +171,13 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
mov al, 2 ; Yctr
jmp short .Yloop_1st
alignx 16, 7
ALIGNX 16, 7
.Yloop_2nd:
movq mm0, MMWORD [wk(1)] ; mm0=(R-Y)H
movq mm2, MMWORD [wk(2)] ; mm2=(G-Y)H
movq mm4, MMWORD [wk(0)] ; mm4=(B-Y)H
alignx 16, 7
ALIGNX 16, 7
.Yloop_1st:
movq mm7, MMWORD [esi] ; mm7=Y(01234567)
@ -258,7 +258,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
add ebx, byte SIZEOF_MMWORD ; inptr1
add edx, byte SIZEOF_MMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st16:
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
@ -350,7 +350,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
add ebx, byte SIZEOF_MMWORD ; inptr1
add edx, byte SIZEOF_MMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st16:
cmp ecx, byte SIZEOF_MMWORD/2

View File

@ -2,7 +2,7 @@
; jdmrgext.asm - merged upsampling/color conversion (SSE2)
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2012, 2016, D. R. Commander.
; Copyright (C) 2012, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -49,15 +49,15 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [output_width(eax)] ; col
test ecx, ecx
@ -78,9 +78,9 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
pop ecx ; col
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movpic eax, POINTER [gotptr] ; load GOT address (eax)
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
movdqa xmm6, XMMWORD [ebx] ; xmm6=Cb(0123456789ABCDEF)
movdqa xmm7, XMMWORD [edx] ; xmm7=Cr(0123456789ABCDEF)
@ -173,13 +173,13 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
mov al, 2 ; Yctr
jmp short .Yloop_1st
alignx 16, 7
ALIGNX 16, 7
.Yloop_2nd:
movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(R-Y)H
movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(G-Y)H
movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(B-Y)H
alignx 16, 7
ALIGNX 16, 7
.Yloop_1st:
movdqa xmm7, XMMWORD [esi] ; xmm7=Y(0123456789ABCDEF)
@ -280,7 +280,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
add ebx, byte SIZEOF_XMMWORD ; inptr1
add edx, byte SIZEOF_XMMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st32:
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
@ -395,7 +395,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
add ebx, byte SIZEOF_XMMWORD ; inptr1
add edx, byte SIZEOF_XMMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st32:
cmp ecx, byte SIZEOF_XMMWORD/2

View File

@ -3,7 +3,7 @@
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -20,7 +20,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fancy_upsample_avx2)
EXTN(jconst_fancy_upsample_avx2):
@ -31,7 +31,7 @@ PW_THREE times 16 dw 3
PW_SEVEN times 16 dw 7
PW_EIGHT times 16 dw 8
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -62,13 +62,13 @@ PW_EIGHT times 16 dw 8
EXTN(jsimd_h2v1_fancy_upsample_avx2):
push ebp
mov ebp, esp
pushpic ebx
PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr
test eax, eax
@ -81,7 +81,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push eax ; colctr
push edi
@ -104,7 +104,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
and eax, byte -SIZEOF_YMMWORD
cmp eax, byte SIZEOF_YMMWORD
ja short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_last:
vpcmpeqb xmm6, xmm6, xmm6
@ -112,7 +112,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
vperm2i128 ymm6, ymm6, ymm6, 1 ; (---- ---- ... ---- ---- ff) MSB is ff
vpand ymm6, ymm6, YMMWORD [esi+0*SIZEOF_YMMWORD]
jmp short .upsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
vmovdqu ymm6, YMMWORD [esi+1*SIZEOF_YMMWORD]
@ -196,7 +196,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
poppic ebx
POPPIC ebx
pop ebp
ret
@ -234,15 +234,15 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov edx, eax ; edx = original ebp
mov eax, JDIMENSION [downsamp_width(edx)] ; colctr
@ -256,7 +256,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
mov esi, JSAMPARRAY [input_data(edx)] ; input_data
mov edi, POINTER [output_data_ptr(edx)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push eax ; colctr
push ecx
@ -286,8 +286,8 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
vmovdqu ymm1, YMMWORD [ecx+0*SIZEOF_YMMWORD] ; ymm1=row[-1][0]
vmovdqu ymm2, YMMWORD [esi+0*SIZEOF_YMMWORD] ; ymm2=row[+1][0]
pushpic ebx
movpic ebx, POINTER [gotptr] ; load GOT address
PUSHPIC ebx
MOVPIC ebx, POINTER [gotptr] ; load GOT address
vpxor ymm3, ymm3, ymm3 ; ymm3=(all 0's)
@ -328,19 +328,19 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
vmovdqa YMMWORD [wk(0)], ymm1
vmovdqa YMMWORD [wk(1)], ymm2
poppic ebx
POPPIC ebx
add eax, byte SIZEOF_YMMWORD-1
and eax, byte -SIZEOF_YMMWORD
cmp eax, byte SIZEOF_YMMWORD
ja short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_last:
; -- process the last column block
pushpic ebx
movpic ebx, POINTER [gotptr] ; load GOT address
PUSHPIC ebx
MOVPIC ebx, POINTER [gotptr] ; load GOT address
vpcmpeqb xmm1, xmm1, xmm1
vpslldq xmm1, xmm1, (SIZEOF_XMMWORD-2)
@ -353,7 +353,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
vmovdqa YMMWORD [wk(3)], ymm2 ; ymm2=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 31)
jmp near .upsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
; -- process the next column block
@ -362,8 +362,8 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
vmovdqu ymm1, YMMWORD [ecx+1*SIZEOF_YMMWORD] ; ymm1=row[-1][1]
vmovdqu ymm2, YMMWORD [esi+1*SIZEOF_YMMWORD] ; ymm2=row[+1][1]
pushpic ebx
movpic ebx, POINTER [gotptr] ; load GOT address
PUSHPIC ebx
MOVPIC ebx, POINTER [gotptr] ; load GOT address
vpxor ymm3, ymm3, ymm3 ; ymm3=(all 0's)
@ -516,7 +516,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm1
vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm0
poppic ebx
POPPIC ebx
sub eax, byte SIZEOF_YMMWORD
add ecx, byte 1*SIZEOF_YMMWORD ; inptr1(above)
@ -590,7 +590,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push edi
push esi
@ -598,7 +598,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr
mov eax, edx ; colctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
cmp eax, byte SIZEOF_YMMWORD
@ -629,7 +629,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
add esi, byte SIZEOF_YMMWORD ; inptr
add edi, byte 2*SIZEOF_YMMWORD ; outptr
jmp short .columnloop
alignx 16, 7
ALIGNX 16, 7
.nextrow:
pop esi
@ -689,7 +689,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push edi
push esi
@ -698,7 +698,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0
mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1
mov eax, edx ; colctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
cmp eax, byte SIZEOF_YMMWORD
@ -734,7 +734,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
add ebx, 2*SIZEOF_YMMWORD ; outptr0
add edi, 2*SIZEOF_YMMWORD ; outptr1
jmp short .columnloop
alignx 16, 7
ALIGNX 16, 7
.nextrow:
pop esi

View File

@ -2,7 +2,7 @@
; jdsample.asm - upsampling (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -19,7 +19,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fancy_upsample_mmx)
EXTN(jconst_fancy_upsample_mmx):
@ -30,7 +30,7 @@ PW_THREE times 4 dw 3
PW_SEVEN times 4 dw 7
PW_EIGHT times 4 dw 8
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -61,13 +61,13 @@ PW_EIGHT times 4 dw 8
EXTN(jsimd_h2v1_fancy_upsample_mmx):
push ebp
mov ebp, esp
pushpic ebx
PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr
test eax, eax
@ -80,7 +80,7 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push eax ; colctr
push edi
@ -103,14 +103,14 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
and eax, byte -SIZEOF_MMWORD
cmp eax, byte SIZEOF_MMWORD
ja short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_last:
pcmpeqb mm6, mm6
psllq mm6, (SIZEOF_MMWORD-1)*BYTE_BIT
pand mm6, MMWORD [esi+0*SIZEOF_MMWORD]
jmp short .upsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mm6, MMWORD [esi+1*SIZEOF_MMWORD]
@ -187,7 +187,7 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
poppic ebx
POPPIC ebx
pop ebp
ret
@ -224,15 +224,15 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov edx, eax ; edx = original ebp
mov eax, JDIMENSION [downsamp_width(edx)] ; colctr
@ -246,7 +246,7 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
mov esi, JSAMPARRAY [input_data(edx)] ; input_data
mov edi, POINTER [output_data_ptr(edx)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push eax ; colctr
push ecx
@ -276,8 +276,8 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
movq mm1, MMWORD [ecx+0*SIZEOF_MMWORD] ; mm1=row[-1][0]
movq mm2, MMWORD [esi+0*SIZEOF_MMWORD] ; mm2=row[+1][0]
pushpic ebx
movpic ebx, POINTER [gotptr] ; load GOT address
PUSHPIC ebx
MOVPIC ebx, POINTER [gotptr] ; load GOT address
pxor mm3, mm3 ; mm3=(all 0's)
movq mm4, mm0
@ -312,19 +312,19 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
movq MMWORD [wk(0)], mm1
movq MMWORD [wk(1)], mm2
poppic ebx
POPPIC ebx
add eax, byte SIZEOF_MMWORD-1
and eax, byte -SIZEOF_MMWORD
cmp eax, byte SIZEOF_MMWORD
ja short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_last:
; -- process the last column block
pushpic ebx
movpic ebx, POINTER [gotptr] ; load GOT address
PUSHPIC ebx
MOVPIC ebx, POINTER [gotptr] ; load GOT address
pcmpeqb mm1, mm1
psllq mm1, (SIZEOF_MMWORD-2)*BYTE_BIT
@ -337,7 +337,7 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
movq MMWORD [wk(3)], mm2
jmp short .upsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
; -- process the next column block
@ -346,8 +346,8 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
movq mm1, MMWORD [ecx+1*SIZEOF_MMWORD] ; mm1=row[-1][1]
movq mm2, MMWORD [esi+1*SIZEOF_MMWORD] ; mm2=row[+1][1]
pushpic ebx
movpic ebx, POINTER [gotptr] ; load GOT address
PUSHPIC ebx
MOVPIC ebx, POINTER [gotptr] ; load GOT address
pxor mm3, mm3 ; mm3=(all 0's)
movq mm4, mm0
@ -486,7 +486,7 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
movq MMWORD [edi+0*SIZEOF_MMWORD], mm1
movq MMWORD [edi+1*SIZEOF_MMWORD], mm0
poppic ebx
POPPIC ebx
sub eax, byte SIZEOF_MMWORD
add ecx, byte 1*SIZEOF_MMWORD ; inptr1(above)
@ -561,7 +561,7 @@ EXTN(jsimd_h2v1_upsample_mmx):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push edi
push esi
@ -569,7 +569,7 @@ EXTN(jsimd_h2v1_upsample_mmx):
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr
mov eax, edx ; colctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [esi+0*SIZEOF_MMWORD]
@ -599,7 +599,7 @@ EXTN(jsimd_h2v1_upsample_mmx):
add esi, byte 2*SIZEOF_MMWORD ; inptr
add edi, byte 4*SIZEOF_MMWORD ; outptr
jmp short .columnloop
alignx 16, 7
ALIGNX 16, 7
.nextrow:
pop esi
@ -660,7 +660,7 @@ EXTN(jsimd_h2v2_upsample_mmx):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push edi
push esi
@ -669,7 +669,7 @@ EXTN(jsimd_h2v2_upsample_mmx):
mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0
mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1
mov eax, edx ; colctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [esi+0*SIZEOF_MMWORD]
@ -704,7 +704,7 @@ EXTN(jsimd_h2v2_upsample_mmx):
add ebx, byte 4*SIZEOF_MMWORD ; outptr0
add edi, byte 4*SIZEOF_MMWORD ; outptr1
jmp short .columnloop
alignx 16, 7
ALIGNX 16, 7
.nextrow:
pop esi

View File

@ -2,7 +2,7 @@
; jdsample.asm - upsampling (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -19,7 +19,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fancy_upsample_sse2)
EXTN(jconst_fancy_upsample_sse2):
@ -30,7 +30,7 @@ PW_THREE times 8 dw 3
PW_SEVEN times 8 dw 7
PW_EIGHT times 8 dw 8
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -61,13 +61,13 @@ PW_EIGHT times 8 dw 8
EXTN(jsimd_h2v1_fancy_upsample_sse2):
push ebp
mov ebp, esp
pushpic ebx
PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr
test eax, eax
@ -80,7 +80,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push eax ; colctr
push edi
@ -103,14 +103,14 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
and eax, byte -SIZEOF_XMMWORD
cmp eax, byte SIZEOF_XMMWORD
ja short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_last:
pcmpeqb xmm6, xmm6
pslldq xmm6, (SIZEOF_XMMWORD-1)
pand xmm6, XMMWORD [esi+0*SIZEOF_XMMWORD]
jmp short .upsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movdqa xmm6, XMMWORD [esi+1*SIZEOF_XMMWORD]
@ -185,7 +185,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
poppic ebx
POPPIC ebx
pop ebp
ret
@ -223,15 +223,15 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov edx, eax ; edx = original ebp
mov eax, JDIMENSION [downsamp_width(edx)] ; colctr
@ -245,7 +245,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
mov esi, JSAMPARRAY [input_data(edx)] ; input_data
mov edi, POINTER [output_data_ptr(edx)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push eax ; colctr
push ecx
@ -275,8 +275,8 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
movdqa xmm1, XMMWORD [ecx+0*SIZEOF_XMMWORD] ; xmm1=row[-1][0]
movdqa xmm2, XMMWORD [esi+0*SIZEOF_XMMWORD] ; xmm2=row[+1][0]
pushpic ebx
movpic ebx, POINTER [gotptr] ; load GOT address
PUSHPIC ebx
MOVPIC ebx, POINTER [gotptr] ; load GOT address
pxor xmm3, xmm3 ; xmm3=(all 0's)
movdqa xmm4, xmm0
@ -311,19 +311,19 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
movdqa XMMWORD [wk(0)], xmm1
movdqa XMMWORD [wk(1)], xmm2
poppic ebx
POPPIC ebx
add eax, byte SIZEOF_XMMWORD-1
and eax, byte -SIZEOF_XMMWORD
cmp eax, byte SIZEOF_XMMWORD
ja short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_last:
; -- process the last column block
pushpic ebx
movpic ebx, POINTER [gotptr] ; load GOT address
PUSHPIC ebx
MOVPIC ebx, POINTER [gotptr] ; load GOT address
pcmpeqb xmm1, xmm1
pslldq xmm1, (SIZEOF_XMMWORD-2)
@ -336,7 +336,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
movdqa XMMWORD [wk(3)], xmm2 ; xmm2=(-- -- -- -- -- -- -- 15)
jmp near .upsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
; -- process the next column block
@ -345,8 +345,8 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
movdqa xmm1, XMMWORD [ecx+1*SIZEOF_XMMWORD] ; xmm1=row[-1][1]
movdqa xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD] ; xmm2=row[+1][1]
pushpic ebx
movpic ebx, POINTER [gotptr] ; load GOT address
PUSHPIC ebx
MOVPIC ebx, POINTER [gotptr] ; load GOT address
pxor xmm3, xmm3 ; xmm3=(all 0's)
movdqa xmm4, xmm0
@ -485,7 +485,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm1
movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm0
poppic ebx
POPPIC ebx
sub eax, byte SIZEOF_XMMWORD
add ecx, byte 1*SIZEOF_XMMWORD ; inptr1(above)
@ -558,7 +558,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push edi
push esi
@ -566,7 +566,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr
mov eax, edx ; colctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
@ -596,7 +596,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
add esi, byte 2*SIZEOF_XMMWORD ; inptr
add edi, byte 4*SIZEOF_XMMWORD ; outptr
jmp short .columnloop
alignx 16, 7
ALIGNX 16, 7
.nextrow:
pop esi
@ -655,7 +655,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push edi
push esi
@ -664,7 +664,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0
mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1
mov eax, edx ; colctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
@ -699,7 +699,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
add ebx, byte 4*SIZEOF_XMMWORD ; outptr0
add edi, byte 4*SIZEOF_XMMWORD ; outptr1
jmp short .columnloop
alignx 16, 7
ALIGNX 16, 7
.nextrow:
pop esi

View File

@ -2,7 +2,7 @@
; jfdctflt.asm - floating-point FDCT (3DNow!)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -24,7 +24,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_float_3dnow)
EXTN(jconst_fdct_float_3dnow):
@ -34,7 +34,7 @@ PD_0_707 times 2 dd 0.707106781186547524400844
PD_0_541 times 2 dd 0.541196100146196984399723
PD_1_306 times 2 dd 1.306562964876376527856643
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -63,19 +63,19 @@ EXTN(jsimd_fdct_float_3dnow):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
mov edx, POINTER [data(eax)] ; (FAST_FLOAT *)
mov ecx, DCTSIZE/2
alignx 16, 7
ALIGNX 16, 7
.rowloop:
movq mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
@ -190,7 +190,7 @@ EXTN(jsimd_fdct_float_3dnow):
mov edx, POINTER [data(eax)] ; (FAST_FLOAT *)
mov ecx, DCTSIZE/2
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
@ -307,7 +307,7 @@ EXTN(jsimd_fdct_float_3dnow):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp

View File

@ -2,7 +2,7 @@
; jfdctflt.asm - floating-point FDCT (SSE)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -34,7 +34,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_float_sse)
EXTN(jconst_fdct_float_sse):
@ -44,7 +44,7 @@ PD_0_707 times 4 dd 0.707106781186547524400844
PD_0_541 times 4 dd 0.541196100146196984399723
PD_1_306 times 4 dd 1.306562964876376527856643
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -74,19 +74,19 @@ EXTN(jsimd_fdct_float_sse):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
mov edx, POINTER [data(eax)] ; (FAST_FLOAT *)
mov ecx, DCTSIZE/4
alignx 16, 7
ALIGNX 16, 7
.rowloop:
movaps xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
@ -222,7 +222,7 @@ EXTN(jsimd_fdct_float_sse):
mov edx, POINTER [data(eax)] ; (FAST_FLOAT *)
mov ecx, DCTSIZE/4
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movaps xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
@ -358,7 +358,7 @@ EXTN(jsimd_fdct_float_sse):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp

View File

@ -2,7 +2,7 @@
; jfdctfst.asm - fast integer FDCT (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -49,7 +49,7 @@ F_1_306 equ DESCALE(1402911301, 30 - CONST_BITS) ; FIX(1.306562965)
%define PRE_MULTIPLY_SCALE_BITS 2
%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_ifast_mmx)
EXTN(jconst_fdct_ifast_mmx):
@ -59,7 +59,7 @@ PW_F0382 times 4 dw F_0_382 << CONST_SHIFT
PW_F0541 times 4 dw F_0_541 << CONST_SHIFT
PW_F1306 times 4 dw F_1_306 << CONST_SHIFT
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -88,19 +88,19 @@ EXTN(jsimd_fdct_ifast_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
mov edx, POINTER [data(eax)] ; (DCTELEM *)
mov ecx, DCTSIZE/4
alignx 16, 7
ALIGNX 16, 7
.rowloop:
movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@ -241,7 +241,7 @@ EXTN(jsimd_fdct_ifast_mmx):
mov edx, POINTER [data(eax)] ; (DCTELEM *)
mov ecx, DCTSIZE/4
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@ -384,7 +384,7 @@ EXTN(jsimd_fdct_ifast_mmx):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp

View File

@ -2,7 +2,7 @@
; jfdctfst.asm - fast integer FDCT (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -49,7 +49,7 @@ F_1_306 equ DESCALE(1402911301, 30 - CONST_BITS) ; FIX(1.306562965)
%define PRE_MULTIPLY_SCALE_BITS 2
%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_ifast_sse2)
EXTN(jconst_fdct_ifast_sse2):
@ -59,7 +59,7 @@ PW_F0382 times 8 dw F_0_382 << CONST_SHIFT
PW_F0541 times 8 dw F_0_541 << CONST_SHIFT
PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -89,13 +89,13 @@ EXTN(jsimd_fdct_ifast_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
@ -392,7 +392,7 @@ EXTN(jsimd_fdct_ifast_sse2):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; unused
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp

View File

@ -2,7 +2,7 @@
; jfdctint.asm - accurate integer FDCT (AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -65,7 +65,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %1-%4: Input/output registers
; %5-%8: Temp registers
%macro dotranspose 8
%macro DOTRANSPOSE 8
; %1=(00 01 02 03 04 05 06 07 40 41 42 43 44 45 46 47)
; %2=(10 11 12 13 14 15 16 17 50 51 52 53 54 55 56 57)
; %3=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67)
@ -108,7 +108,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %5-%8: Temp registers
; %9: Pass (1 or 2)
%macro dodct 9
%macro DODCT 9
vpsubw %5, %1, %4 ; %5=data1_0-data6_7=tmp6_7
vpaddw %6, %1, %4 ; %6=data1_0+data6_7=tmp1_0
vpaddw %7, %2, %3 ; %7=data3_2+data4_5=tmp3_2
@ -223,7 +223,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_islow_avx2)
EXTN(jconst_fdct_islow_avx2):
@ -242,7 +242,7 @@ PW_DESCALE_P2X times 16 dw 1 << (PASS1_BITS - 1)
PW_1_NEG1 times 8 dw 1
times 8 dw -1
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -262,13 +262,13 @@ PW_1_NEG1 times 8 dw 1
EXTN(jsimd_fdct_islow_avx2):
push ebp
mov ebp, esp
pushpic ebx
PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
@ -292,9 +292,9 @@ EXTN(jsimd_fdct_islow_avx2):
; ymm2=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67)
; ymm3=(30 31 32 33 34 35 36 37 70 71 72 73 74 75 76 77)
dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm3=data7_5
; ---- Pass 2: process columns.
@ -302,9 +302,9 @@ EXTN(jsimd_fdct_islow_avx2):
vperm2i128 ymm4, ymm1, ymm3, 0x20 ; ymm4=data3_7
vperm2i128 ymm1, ymm1, ymm3, 0x31 ; ymm1=data1_5
dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm4=data7_5
vperm2i128 ymm3, ymm0, ymm1, 0x30 ; ymm3=data0_1
@ -322,7 +322,7 @@ EXTN(jsimd_fdct_islow_avx2):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; unused
poppic ebx
POPPIC ebx
pop ebp
ret

View File

@ -2,7 +2,7 @@
; jfdctint.asm - accurate integer FDCT (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, 2020, D. R. Commander.
; Copyright (C) 2016, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -63,7 +63,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_islow_mmx)
EXTN(jconst_fdct_islow_mmx):
@ -80,7 +80,7 @@ PD_DESCALE_P1 times 2 dd 1 << (DESCALE_P1 - 1)
PD_DESCALE_P2 times 2 dd 1 << (DESCALE_P2 - 1)
PW_DESCALE_P2X times 4 dw 1 << (PASS1_BITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -109,19 +109,19 @@ EXTN(jsimd_fdct_islow_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
mov edx, POINTER [data(eax)] ; (DCTELEM *)
mov ecx, DCTSIZE/4
alignx 16, 7
ALIGNX 16, 7
.rowloop:
movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@ -363,7 +363,7 @@ EXTN(jsimd_fdct_islow_mmx):
mov edx, POINTER [data(eax)] ; (DCTELEM *)
mov ecx, DCTSIZE/4
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@ -609,7 +609,7 @@ EXTN(jsimd_fdct_islow_mmx):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp

View File

@ -2,7 +2,7 @@
; jfdctint.asm - accurate integer FDCT (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, 2020, D. R. Commander.
; Copyright (C) 2016, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -63,7 +63,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_islow_sse2)
EXTN(jconst_fdct_islow_sse2):
@ -80,7 +80,7 @@ PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1)
PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1)
PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -110,13 +110,13 @@ EXTN(jsimd_fdct_islow_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
@ -622,7 +622,7 @@ EXTN(jsimd_fdct_islow_sse2):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; unused
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp

View File

@ -2,7 +2,7 @@
; jidctflt.asm - floating-point IDCT (3DNow! & MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -24,7 +24,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_float_3dnow)
EXTN(jconst_idct_float_3dnow):
@ -36,7 +36,7 @@ PD_2_613 times 2 dd 2.613125929752753055713286
PD_RNDINT_MAGIC times 2 dd 100663296.0 ; (float)(0x00C00000 << 3)
PB_CENTERJSAMP times 8 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -78,7 +78,7 @@ EXTN(jsimd_idct_float_3dnow):
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input, store into work array.
@ -87,21 +87,21 @@ EXTN(jsimd_idct_float_3dnow):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
lea edi, [workspace] ; FAST_FLOAT *wsptr
mov ecx, DCTSIZE/2 ; ctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz short .columnDCT
pushpic ebx ; save GOT address
PUSHPIC ebx ; save GOT address
mov ebx, dword [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
mov eax, dword [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
or ebx, dword [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
or eax, dword [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
or ebx, dword [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
or eax, ebx
poppic ebx ; restore GOT address
POPPIC ebx ; restore GOT address
jnz short .columnDCT
; -- AC terms all zero
@ -127,7 +127,7 @@ EXTN(jsimd_idct_float_3dnow):
movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm1
movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1
jmp near .nextcolumn
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@ -293,7 +293,7 @@ EXTN(jsimd_idct_float_3dnow):
mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
mov eax, JDIMENSION [output_col(eax)]
mov ecx, DCTSIZE/2 ; ctr
alignx 16, 7
ALIGNX 16, 7
.rowloop:
; -- Even part
@ -420,14 +420,14 @@ EXTN(jsimd_idct_float_3dnow):
punpckldq mm6, mm4 ; mm6=(00 01 02 03 04 05 06 07)
punpckhdq mm7, mm4 ; mm7=(10 11 12 13 14 15 16 17)
pushpic ebx ; save GOT address
PUSHPIC ebx ; save GOT address
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6
movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7
poppic ebx ; restore GOT address
POPPIC ebx ; restore GOT address
add esi, byte 2*SIZEOF_FAST_FLOAT ; wsptr
add edi, byte 2*SIZEOF_JSAMPROW

View File

@ -2,7 +2,7 @@
; jidctflt.asm - floating-point IDCT (SSE & MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -23,18 +23,18 @@
; --------------------------------------------------------------------------
%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
%macro UNPCKLPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
shufps %1, %2, 0x44
%endmacro
%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
%macro UNPCKHPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
shufps %1, %2, 0xEE
%endmacro
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_float_sse)
EXTN(jconst_idct_float_sse):
@ -46,7 +46,7 @@ PD_M2_613 times 4 dd -2.613125929752753055713286
PD_0_125 times 4 dd 0.125 ; 1/8
PB_CENTERJSAMP times 8 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -88,7 +88,7 @@ EXTN(jsimd_idct_float_sse):
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input, store into work array.
@ -97,7 +97,7 @@ EXTN(jsimd_idct_float_sse):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
lea edi, [workspace] ; FAST_FLOAT *wsptr
mov ecx, DCTSIZE/4 ; ctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@ -149,7 +149,7 @@ EXTN(jsimd_idct_float_sse):
movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
jmp near .nextcolumn
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@ -325,11 +325,11 @@ EXTN(jsimd_idct_float_sse):
unpckhps xmm4, xmm0 ; xmm4=(42 52 43 53)
movaps xmm3, xmm6 ; transpose coefficients(phase 2)
unpcklps2 xmm6, xmm7 ; xmm6=(00 10 20 30)
unpckhps2 xmm3, xmm7 ; xmm3=(01 11 21 31)
UNPCKLPS2 xmm6, xmm7 ; xmm6=(00 10 20 30)
UNPCKHPS2 xmm3, xmm7 ; xmm3=(01 11 21 31)
movaps xmm0, xmm1 ; transpose coefficients(phase 2)
unpcklps2 xmm1, xmm2 ; xmm1=(02 12 22 32)
unpckhps2 xmm0, xmm2 ; xmm0=(03 13 23 33)
UNPCKLPS2 xmm1, xmm2 ; xmm1=(02 12 22 32)
UNPCKHPS2 xmm0, xmm2 ; xmm0=(03 13 23 33)
movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71)
movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73)
@ -340,11 +340,11 @@ EXTN(jsimd_idct_float_sse):
movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
movaps xmm6, xmm5 ; transpose coefficients(phase 2)
unpcklps2 xmm5, xmm7 ; xmm5=(40 50 60 70)
unpckhps2 xmm6, xmm7 ; xmm6=(41 51 61 71)
UNPCKLPS2 xmm5, xmm7 ; xmm5=(40 50 60 70)
UNPCKHPS2 xmm6, xmm7 ; xmm6=(41 51 61 71)
movaps xmm3, xmm4 ; transpose coefficients(phase 2)
unpcklps2 xmm4, xmm2 ; xmm4=(42 52 62 72)
unpckhps2 xmm3, xmm2 ; xmm3=(43 53 63 73)
UNPCKLPS2 xmm4, xmm2 ; xmm4=(42 52 62 72)
UNPCKHPS2 xmm3, xmm2 ; xmm3=(43 53 63 73)
movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
@ -372,7 +372,7 @@ EXTN(jsimd_idct_float_sse):
mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
mov eax, JDIMENSION [output_col(eax)]
mov ecx, DCTSIZE/4 ; ctr
alignx 16, 7
ALIGNX 16, 7
.rowloop:
; -- Even part
@ -536,7 +536,7 @@ EXTN(jsimd_idct_float_sse):
punpckldq mm5, mm6 ; mm5=(20 21 22 23 24 25 26 27)
punpckhdq mm4, mm6 ; mm4=(30 31 32 33 34 35 36 37)
pushpic ebx ; save GOT address
PUSHPIC ebx ; save GOT address
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
@ -547,7 +547,7 @@ EXTN(jsimd_idct_float_sse):
movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
poppic ebx ; restore GOT address
POPPIC ebx ; restore GOT address
add esi, byte 4*SIZEOF_FAST_FLOAT ; wsptr
add edi, byte 4*SIZEOF_JSAMPROW

View File

@ -2,7 +2,7 @@
; jidctflt.asm - floating-point IDCT (SSE & SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -23,18 +23,18 @@
; --------------------------------------------------------------------------
%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
%macro UNPCKLPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
shufps %1, %2, 0x44
%endmacro
%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
%macro UNPCKHPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
shufps %1, %2, 0xEE
%endmacro
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_float_sse2)
EXTN(jconst_idct_float_sse2):
@ -46,7 +46,7 @@ PD_M2_613 times 4 dd -2.613125929752753055713286
PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3)
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -88,7 +88,7 @@ EXTN(jsimd_idct_float_sse2):
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input, store into work array.
@ -97,7 +97,7 @@ EXTN(jsimd_idct_float_sse2):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
lea edi, [workspace] ; FAST_FLOAT *wsptr
mov ecx, DCTSIZE/4 ; ctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@ -150,7 +150,7 @@ EXTN(jsimd_idct_float_sse2):
movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
jmp near .nextcolumn
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@ -287,11 +287,11 @@ EXTN(jsimd_idct_float_sse2):
unpckhps xmm4, xmm0 ; xmm4=(42 52 43 53)
movaps xmm3, xmm6 ; transpose coefficients(phase 2)
unpcklps2 xmm6, xmm7 ; xmm6=(00 10 20 30)
unpckhps2 xmm3, xmm7 ; xmm3=(01 11 21 31)
UNPCKLPS2 xmm6, xmm7 ; xmm6=(00 10 20 30)
UNPCKHPS2 xmm3, xmm7 ; xmm3=(01 11 21 31)
movaps xmm0, xmm1 ; transpose coefficients(phase 2)
unpcklps2 xmm1, xmm2 ; xmm1=(02 12 22 32)
unpckhps2 xmm0, xmm2 ; xmm0=(03 13 23 33)
UNPCKLPS2 xmm1, xmm2 ; xmm1=(02 12 22 32)
UNPCKHPS2 xmm0, xmm2 ; xmm0=(03 13 23 33)
movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71)
movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73)
@ -302,11 +302,11 @@ EXTN(jsimd_idct_float_sse2):
movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
movaps xmm6, xmm5 ; transpose coefficients(phase 2)
unpcklps2 xmm5, xmm7 ; xmm5=(40 50 60 70)
unpckhps2 xmm6, xmm7 ; xmm6=(41 51 61 71)
UNPCKLPS2 xmm5, xmm7 ; xmm5=(40 50 60 70)
UNPCKHPS2 xmm6, xmm7 ; xmm6=(41 51 61 71)
movaps xmm3, xmm4 ; transpose coefficients(phase 2)
unpcklps2 xmm4, xmm2 ; xmm4=(42 52 62 72)
unpckhps2 xmm3, xmm2 ; xmm3=(43 53 63 73)
UNPCKLPS2 xmm4, xmm2 ; xmm4=(42 52 62 72)
UNPCKHPS2 xmm3, xmm2 ; xmm3=(43 53 63 73)
movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
@ -334,7 +334,7 @@ EXTN(jsimd_idct_float_sse2):
mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
mov eax, JDIMENSION [output_col(eax)]
mov ecx, DCTSIZE/4 ; ctr
alignx 16, 7
ALIGNX 16, 7
.rowloop:
; -- Even part
@ -464,7 +464,7 @@ EXTN(jsimd_idct_float_sse2):
pshufd xmm5, xmm6, 0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
pshufd xmm3, xmm7, 0x4E ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
pushpic ebx ; save GOT address
PUSHPIC ebx ; save GOT address
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov ebx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
@ -475,7 +475,7 @@ EXTN(jsimd_idct_float_sse2):
movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5
movq XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm3
poppic ebx ; restore GOT address
POPPIC ebx ; restore GOT address
add esi, byte 4*SIZEOF_FAST_FLOAT ; wsptr
add edi, byte 4*SIZEOF_JSAMPROW

View File

@ -2,7 +2,7 @@
; jidctfst.asm - fast integer IDCT (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -56,7 +56,7 @@ F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1)
%define PRE_MULTIPLY_SCALE_BITS 2
%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_ifast_mmx)
EXTN(jconst_idct_ifast_mmx):
@ -67,7 +67,7 @@ PW_MF1613 times 4 dw -F_1_613 << CONST_SHIFT
PW_F1082 times 4 dw F_1_082 << CONST_SHIFT
PB_CENTERJSAMP times 8 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -109,7 +109,7 @@ EXTN(jsimd_idct_ifast_mmx):
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input, store into work array.
@ -118,7 +118,7 @@ EXTN(jsimd_idct_ifast_mmx):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
lea edi, [workspace] ; JCOEF *wsptr
mov ecx, DCTSIZE/4 ; ctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@ -163,7 +163,7 @@ EXTN(jsimd_idct_ifast_mmx):
movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
jmp near .nextcolumn
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@ -326,7 +326,7 @@ EXTN(jsimd_idct_ifast_mmx):
mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
mov eax, JDIMENSION [output_col(eax)]
mov ecx, DCTSIZE/4 ; ctr
alignx 16, 7
ALIGNX 16, 7
.rowloop:
; -- Even part
@ -464,7 +464,7 @@ EXTN(jsimd_idct_ifast_mmx):
punpckldq mm5, mm4 ; mm5=(20 21 22 23 24 25 26 27)
punpckhdq mm1, mm4 ; mm1=(30 31 32 33 34 35 36 37)
pushpic ebx ; save GOT address
PUSHPIC ebx ; save GOT address
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
@ -475,7 +475,7 @@ EXTN(jsimd_idct_ifast_mmx):
movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1
poppic ebx ; restore GOT address
POPPIC ebx ; restore GOT address
add esi, byte 4*SIZEOF_JCOEF ; wsptr
add edi, byte 4*SIZEOF_JSAMPROW

View File

@ -2,7 +2,7 @@
; jidctfst.asm - fast integer IDCT (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -56,7 +56,7 @@ F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1)
%define PRE_MULTIPLY_SCALE_BITS 2
%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_ifast_sse2)
EXTN(jconst_idct_ifast_sse2):
@ -67,7 +67,7 @@ PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT
PW_F1082 times 8 dw F_1_082 << CONST_SHIFT
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -101,13 +101,13 @@ EXTN(jsimd_idct_ifast_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input.
@ -155,7 +155,7 @@ EXTN(jsimd_idct_ifast_sse2):
movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=col1
movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=col3
jmp near .column_end
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@ -490,7 +490,7 @@ EXTN(jsimd_idct_ifast_sse2):
pop esi
; pop edx ; need not be preserved
; pop ecx ; unused
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp

View File

@ -2,7 +2,7 @@
; jidctint.asm - accurate integer IDCT (AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -65,7 +65,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %1-%4: Input/output registers
; %5-%8: Temp registers
%macro dotranspose 8
%macro DOTRANSPOSE 8
; %5=(00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71)
; %6=(03 13 23 33 43 53 63 73 02 12 22 32 42 52 62 72)
; %7=(04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75)
@ -118,7 +118,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %5-%12: Temp registers
; %9: Pass (1 or 2)
%macro dodct 13
%macro DODCT 13
; -- Even part
; (Original)
@ -250,7 +250,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_islow_avx2)
EXTN(jconst_idct_islow_avx2):
@ -269,7 +269,7 @@ PB_CENTERJSAMP times 32 db CENTERJSAMPLE
PW_1_NEG1 times 8 dw 1
times 8 dw -1
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -303,13 +303,13 @@ EXTN(jsimd_idct_islow_avx2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns.
@ -353,7 +353,7 @@ EXTN(jsimd_idct_islow_avx2):
vpshufd ymm3, ymm4, 0xFF ; ymm3=col3_7=(03 03 03 03 03 03 03 03 07 07 07 07 07 07 07 07)
jmp near .column_end
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@ -371,10 +371,10 @@ EXTN(jsimd_idct_islow_avx2):
vperm2i128 ymm2, ymm5, ymm7, 0x20 ; ymm2=in2_6
vperm2i128 ymm3, ymm7, ymm6, 0x31 ; ymm3=in7_5
dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 1
DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 1
; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm3=data7_6
dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm3=data3_7
.column_end:
@ -395,10 +395,10 @@ EXTN(jsimd_idct_islow_avx2):
vperm2i128 ymm4, ymm3, ymm1, 0x31 ; ymm3=in7_5
vperm2i128 ymm1, ymm3, ymm1, 0x20 ; ymm1=in3_1
dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 2
DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 2
; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm4=data7_6
dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm4=data3_7
vpacksswb ymm0, ymm0, ymm1 ; ymm0=data01_45
@ -442,7 +442,7 @@ EXTN(jsimd_idct_islow_avx2):
pop esi
; pop edx ; need not be preserved
; pop ecx ; unused
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp

View File

@ -2,7 +2,7 @@
; jidctint.asm - accurate integer IDCT (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, 2020, D. R. Commander.
; Copyright (C) 2016, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -63,7 +63,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_islow_mmx)
EXTN(jconst_idct_islow_mmx):
@ -80,7 +80,7 @@ PD_DESCALE_P1 times 2 dd 1 << (DESCALE_P1 - 1)
PD_DESCALE_P2 times 2 dd 1 << (DESCALE_P2 - 1)
PB_CENTERJSAMP times 8 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -122,7 +122,7 @@ EXTN(jsimd_idct_islow_mmx):
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input, store into work array.
@ -131,7 +131,7 @@ EXTN(jsimd_idct_islow_mmx):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
lea edi, [workspace] ; JCOEF *wsptr
mov ecx, DCTSIZE/4 ; ctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@ -178,7 +178,7 @@ EXTN(jsimd_idct_islow_mmx):
movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
jmp near .nextcolumn
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@ -513,7 +513,7 @@ EXTN(jsimd_idct_islow_mmx):
mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
mov eax, JDIMENSION [output_col(eax)]
mov ecx, DCTSIZE/4 ; ctr
alignx 16, 7
ALIGNX 16, 7
.rowloop:
; -- Even part
@ -816,7 +816,7 @@ EXTN(jsimd_idct_islow_mmx):
punpckldq mm7, mm5 ; mm7=(20 21 22 23 24 25 26 27)
punpckhdq mm4, mm5 ; mm4=(30 31 32 33 34 35 36 37)
pushpic ebx ; save GOT address
PUSHPIC ebx ; save GOT address
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
@ -827,7 +827,7 @@ EXTN(jsimd_idct_islow_mmx):
movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm7
movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
poppic ebx ; restore GOT address
POPPIC ebx ; restore GOT address
add esi, byte 4*SIZEOF_JCOEF ; wsptr
add edi, byte 4*SIZEOF_JSAMPROW

View File

@ -2,7 +2,7 @@
; jidctint.asm - accurate integer IDCT (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, 2020, D. R. Commander.
; Copyright (C) 2016, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -63,7 +63,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_islow_sse2)
EXTN(jconst_idct_islow_sse2):
@ -80,7 +80,7 @@ PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1)
PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1)
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -114,13 +114,13 @@ EXTN(jsimd_idct_islow_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input.
@ -172,7 +172,7 @@ EXTN(jsimd_idct_islow_sse2):
movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5
movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7
jmp near .column_end
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@ -847,7 +847,7 @@ EXTN(jsimd_idct_islow_sse2):
pop esi
; pop edx ; need not be preserved
; pop ecx ; unused
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp

View File

@ -2,7 +2,7 @@
; jidctred.asm - reduced-size IDCT (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -69,7 +69,7 @@ F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS) ; FIX(3.624509785)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_red_mmx)
EXTN(jconst_idct_red_mmx):
@ -87,7 +87,7 @@ PD_DESCALE_P1_2 times 2 dd 1 << (DESCALE_P1_2 - 1)
PD_DESCALE_P2_2 times 2 dd 1 << (DESCALE_P2_2 - 1)
PB_CENTERJSAMP times 8 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -124,13 +124,13 @@ EXTN(jsimd_idct_4x4_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [workspace]
pushpic ebx
PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input, store into work array.
@ -139,7 +139,7 @@ EXTN(jsimd_idct_4x4_mmx):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
lea edi, [workspace] ; JCOEF *wsptr
mov ecx, DCTSIZE/4 ; ctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_4X4_MMX
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@ -181,7 +181,7 @@ EXTN(jsimd_idct_4x4_mmx):
movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
jmp near .nextcolumn
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@ -479,7 +479,7 @@ EXTN(jsimd_idct_4x4_mmx):
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp
@ -512,7 +512,7 @@ EXTN(jsimd_idct_2x2_mmx):
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input.

View File

@ -2,7 +2,7 @@
; jidctred.asm - reduced-size IDCT (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -69,7 +69,7 @@ F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS) ; FIX(3.624509785)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_red_sse2)
EXTN(jconst_idct_red_sse2):
@ -87,7 +87,7 @@ PD_DESCALE_P1_2 times 4 dd 1 << (DESCALE_P1_2 - 1)
PD_DESCALE_P2_2 times 4 dd 1 << (DESCALE_P2_2 - 1)
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -122,13 +122,13 @@ EXTN(jsimd_idct_4x4_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input.
@ -171,7 +171,7 @@ EXTN(jsimd_idct_4x4_sse2):
pshufd xmm3, xmm3, 0xFA ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07)
jmp near .column_end
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@ -400,7 +400,7 @@ EXTN(jsimd_idct_4x4_sse2):
pop esi
; pop edx ; need not be preserved
; pop ecx ; unused
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp
@ -433,7 +433,7 @@ EXTN(jsimd_idct_2x2_sse2):
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input.

View File

@ -2,7 +2,7 @@
; jquant.asm - sample data conversion and quantization (3DNow! & MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -52,7 +52,7 @@ EXTN(jsimd_convsamp_float_3dnow):
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/2
alignx 16, 7
ALIGNX 16, 7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
@ -154,7 +154,7 @@ EXTN(jsimd_quantize_float_3dnow):
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov eax, DCTSIZE2/16
alignx 16, 7
ALIGNX 16, 7
.quantloop:
movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
movq mm1, MMWORD [MMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]

View File

@ -2,7 +2,7 @@
; jquant.asm - sample data conversion and quantization (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -52,7 +52,7 @@ EXTN(jsimd_convsamp_mmx):
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/4
alignx 16, 7
ALIGNX 16, 7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
@ -157,10 +157,10 @@ EXTN(jsimd_quantize_mmx):
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov ah, 2
alignx 16, 7
ALIGNX 16, 7
.quantloop1:
mov al, DCTSIZE2/8/2
alignx 16, 7
ALIGNX 16, 7
.quantloop2:
movq mm2, MMWORD [MMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
movq mm3, MMWORD [MMBLOCK(0,1,esi,SIZEOF_DCTELEM)]

View File

@ -2,7 +2,7 @@
; jquant.asm - sample data conversion and quantization (SSE & MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -52,7 +52,7 @@ EXTN(jsimd_convsamp_float_sse):
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/2
alignx 16, 7
ALIGNX 16, 7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
@ -150,7 +150,7 @@ EXTN(jsimd_quantize_float_sse):
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov eax, DCTSIZE2/16
alignx 16, 7
ALIGNX 16, 7
.quantloop:
movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]

View File

@ -2,7 +2,7 @@
; jquantf.asm - sample data conversion and quantization (SSE & SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -52,7 +52,7 @@ EXTN(jsimd_convsamp_float_sse2):
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/2
alignx 16, 7
ALIGNX 16, 7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
@ -127,7 +127,7 @@ EXTN(jsimd_quantize_float_sse2):
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov eax, DCTSIZE2/16
alignx 16, 7
ALIGNX 16, 7
.quantloop:
movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]

View File

@ -2,7 +2,7 @@
; jquanti.asm - sample data conversion and quantization (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -52,7 +52,7 @@ EXTN(jsimd_convsamp_sse2):
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/4
alignx 16, 7
ALIGNX 16, 7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
@ -133,7 +133,7 @@ EXTN(jsimd_quantize_sse2):
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov eax, DCTSIZE2/32
alignx 16, 7
ALIGNX 16, 7
.quantloop:
movdqa xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
movdqa xmm5, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_DCTELEM)]

View File

@ -2,7 +2,7 @@
; jsimdext.inc - common declarations
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2010, 2016, 2018-2019, D. R. Commander.
; Copyright (C) 2010, 2016, 2018-2019, 2024, D. R. Commander.
; Copyright (C) 2018, Matthieu Darbois.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
@ -76,6 +76,14 @@
; mark stack as non-executable
section .note.GNU-stack noalloc noexec nowrite progbits
%ifdef __CET__
%ifdef __x86_64__
section .note.gnu.property note alloc noexec align=8
dd 0x00000004, 0x00000010, 0x00000005, 0x00554e47
dd 0xc0000002, 0x00000004, 0x00000003, 0x00000000
%endif
%endif
; -- segment definition --
;
%ifdef __x86_64__
@ -272,7 +280,7 @@ const_base:
%define GOTOFF(got, sym) (got) + (sym) - const_base
%imacro get_GOT 1
%imacro GET_GOT 1
; NOTE: this macro destroys ecx resister.
call %%geteip
add ecx, byte (%%ref - $)
@ -304,7 +312,7 @@ const_base:
%define GOTOFF(got, sym) (got) + (sym) wrt ..gotoff
%imacro get_GOT 1
%imacro GET_GOT 1
extern GOT_SYMBOL
call %%geteip
add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc
@ -317,13 +325,13 @@ const_base:
%endif ; GOT_SYMBOL == _MACHO_PIC_ ----------------
%imacro pushpic 1.nolist
%imacro PUSHPIC 1.nolist
push %1
%endmacro
%imacro poppic 1.nolist
%imacro POPPIC 1.nolist
pop %1
%endmacro
%imacro movpic 2.nolist
%imacro MOVPIC 2.nolist
mov %1, %2
%endmacro
@ -331,13 +339,13 @@ const_base:
%define GOTOFF(got, sym) (sym)
%imacro get_GOT 1.nolist
%imacro GET_GOT 1.nolist
%endmacro
%imacro pushpic 1.nolist
%imacro PUSHPIC 1.nolist
%endmacro
%imacro poppic 1.nolist
%imacro POPPIC 1.nolist
%endmacro
%imacro movpic 2.nolist
%imacro MOVPIC 2.nolist
%endmacro
%endif ; PIC -----------------------------------------
@ -349,7 +357,7 @@ const_base:
%define MSKLE(x, y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
%define FILLB(b, n) (($$-(b)) & ((n)-1))
%imacro alignx 1-2.nolist 0xFFFF
%imacro ALIGNX 1-2.nolist 0xFFFF
%%bs: \
times MSKLE(FILLB(%%bs, %1), %2) & MSKLE(16, FILLB($, %1)) & FILLB($, %1) \
db 0x90 ; nop
@ -371,7 +379,7 @@ const_base:
; Align the next data on {2,4,8,16,..}-byte boundary.
;
%imacro alignz 1.nolist
%imacro ALIGNZ 1.nolist
align %1, db 0 ; filling zeros
%endmacro
@ -379,7 +387,7 @@ const_base:
%ifdef WIN64
%imacro collect_args 1
%imacro COLLECT_ARGS 1
sub rsp, SIZEOF_XMMWORD
movaps XMMWORD [rsp], xmm6
sub rsp, SIZEOF_XMMWORD
@ -408,7 +416,7 @@ const_base:
push rdi
%endmacro
%imacro uncollect_args 1
%imacro UNCOLLECT_ARGS 1
pop rdi
pop rsi
%if %1 > 5
@ -429,7 +437,7 @@ const_base:
add rsp, SIZEOF_XMMWORD
%endmacro
%imacro push_xmm 1
%imacro PUSH_XMM 1
sub rsp, %1 * SIZEOF_XMMWORD
movaps XMMWORD [rsp+0*SIZEOF_XMMWORD], xmm8
%if %1 > 1
@ -443,7 +451,7 @@ const_base:
%endif
%endmacro
%imacro pop_xmm 1
%imacro POP_XMM 1
movaps xmm8, XMMWORD [rsp+0*SIZEOF_XMMWORD]
%if %1 > 1
movaps xmm9, XMMWORD [rsp+1*SIZEOF_XMMWORD]
@ -459,7 +467,7 @@ const_base:
%else
%imacro collect_args 1
%imacro COLLECT_ARGS 1
push r10
mov r10, rdi
%if %1 > 1
@ -484,7 +492,7 @@ const_base:
%endif
%endmacro
%imacro uncollect_args 1
%imacro UNCOLLECT_ARGS 1
%if %1 > 5
pop r15
%endif
@ -503,16 +511,29 @@ const_base:
pop r10
%endmacro
%imacro push_xmm 1
%imacro PUSH_XMM 1
%endmacro
%imacro pop_xmm 1
%imacro POP_XMM 1
%endmacro
%endif
%endif
%ifdef __CET__
%imacro ENDBR64 0
dd 0xfa1e0ff3
%endmacro
%else
%imacro ENDBR64 0
%endmacro
%endif
; --------------------------------------------------------------------------
; Defines picked up from the C headers
;

View File

@ -1,7 +1,7 @@
;
; jccolext.asm - colorspace conversion (64-bit AVX2)
;
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
@ -41,6 +41,7 @@
GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_avx2)
EXTN(jsimd_rgb_ycc_convert_avx2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@ -48,7 +49,7 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, (SIZEOF_YMMWORD * WK_NUM)
collect_args 5
COLLECT_ARGS 5
push rbx
mov ecx, r10d
@ -549,7 +550,7 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
.return:
pop rbx
vzeroupper
uncollect_args 5
UNCOLLECT_ARGS 5
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@ -1,7 +1,7 @@
;
; jccolext.asm - colorspace conversion (64-bit SSE2)
;
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
@ -40,6 +40,7 @@
GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_sse2)
EXTN(jsimd_rgb_ycc_convert_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@ -47,7 +48,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, (SIZEOF_XMMWORD * WK_NUM)
collect_args 5
COLLECT_ARGS 5
push rbx
mov ecx, r10d
@ -474,7 +475,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
.return:
pop rbx
uncollect_args 5
UNCOLLECT_ARGS 5
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@ -1,7 +1,7 @@
;
; jccolor.asm - colorspace conversion (64-bit AVX2)
;
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@ -33,7 +33,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_ycc_convert_avx2)
EXTN(jconst_rgb_ycc_convert_avx2):
@ -46,7 +46,7 @@ PD_ONEHALFM1_CJ times 8 dd (1 << (SCALEBITS - 1)) - 1 + \
(CENTERJSAMPLE << SCALEBITS)
PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -1,7 +1,7 @@
;
; jccolor.asm - colorspace conversion (64-bit SSE2)
;
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -32,7 +32,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_ycc_convert_sse2)
EXTN(jconst_rgb_ycc_convert_sse2):
@ -45,7 +45,7 @@ PD_ONEHALFM1_CJ times 4 dd (1 << (SCALEBITS - 1)) - 1 + \
(CENTERJSAMPLE << SCALEBITS)
PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -1,7 +1,7 @@
;
; jcgray.asm - grayscale colorspace conversion (64-bit AVX2)
;
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@ -29,7 +29,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_gray_convert_avx2)
EXTN(jconst_rgb_gray_convert_avx2):
@ -38,7 +38,7 @@ PW_F0299_F0337 times 8 dw F_0_299, F_0_337
PW_F0114_F0250 times 8 dw F_0_114, F_0_250
PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -1,7 +1,7 @@
;
; jcgray.asm - grayscale colorspace conversion (64-bit SSE2)
;
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -28,7 +28,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_gray_convert_sse2)
EXTN(jconst_rgb_gray_convert_sse2):
@ -37,7 +37,7 @@ PW_F0299_F0337 times 4 dw F_0_299, F_0_337
PW_F0114_F0250 times 4 dw F_0_114, F_0_250
PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -1,7 +1,7 @@
;
; jcgryext.asm - grayscale colorspace conversion (64-bit AVX2)
;
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
@ -41,6 +41,7 @@
GLOBAL_FUNCTION(jsimd_rgb_gray_convert_avx2)
EXTN(jsimd_rgb_gray_convert_avx2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@ -48,7 +49,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_YMMWORD * WK_NUM)
collect_args 5
COLLECT_ARGS 5
push rbx
mov ecx, r10d
@ -428,7 +429,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
.return:
pop rbx
vzeroupper
uncollect_args 5
UNCOLLECT_ARGS 5
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@ -1,7 +1,7 @@
;
; jcgryext.asm - grayscale colorspace conversion (64-bit SSE2)
;
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
@ -40,6 +40,7 @@
GLOBAL_FUNCTION(jsimd_rgb_gray_convert_sse2)
EXTN(jsimd_rgb_gray_convert_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@ -47,7 +48,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
collect_args 5
COLLECT_ARGS 5
push rbx
mov ecx, r10d
@ -353,7 +354,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
.return:
pop rbx
uncollect_args 5
UNCOLLECT_ARGS 5
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@ -39,7 +39,7 @@ endstruc
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_huff_encode_one_block)
EXTN(jconst_huff_encode_one_block):
@ -49,7 +49,7 @@ jpeg_mask_bits dd 0x0000, 0x0001, 0x0003, 0x0007
dd 0x00ff, 0x01ff, 0x03ff, 0x07ff
dd 0x0fff, 0x1fff, 0x3fff, 0x7fff
alignz 32
ALIGNZ 32
times 1 << 14 db 15
times 1 << 13 db 14
@ -87,7 +87,7 @@ times 1 << 13 db 14
times 1 << 14 db 15
times 1 << 15 db 16
alignz 32
ALIGNZ 32
%define NBITS(x) nbits_base + x
%define MASK_BITS(x) NBITS((x) * 4) + (jpeg_mask_bits - EXTN(jpeg_nbits_table))
@ -261,6 +261,7 @@ times 1 << 15 db 16
GLOBAL_FUNCTION(jsimd_huff_encode_one_block_sse2)
EXTN(jsimd_huff_encode_one_block_sse2):
ENDBR64
push rbp
mov rbp, rsp

View File

@ -4,6 +4,7 @@
;
; Copyright (C) 2016, 2018, Matthieu Darbois
; Copyright (C) 2023, Aliaksiej Kandracienka.
; Copyright (C) 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -282,12 +283,13 @@
GLOBAL_FUNCTION(jsimd_encode_mcu_AC_first_prepare_sse2)
EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
ENDBR64
push rbp
mov rbp, rsp
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
sub rsp, SIZEOF_XMMWORD
movdqa XMMWORD [rsp], ZERO
collect_args 6
COLLECT_ARGS 6
movd AL, r13d
pxor ZERO, ZERO
@ -381,7 +383,7 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
REDUCE0
uncollect_args 6
UNCOLLECT_ARGS 6
movdqa ZERO, XMMWORD [rsp]
mov rsp, rbp
pop rbp
@ -445,12 +447,13 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
GLOBAL_FUNCTION(jsimd_encode_mcu_AC_refine_prepare_sse2)
EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2):
ENDBR64
push rbp
mov rbp, rsp
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
sub rsp, SIZEOF_XMMWORD
movdqa XMMWORD [rsp], ZERO
collect_args 6
COLLECT_ARGS 6
xor SIGN, SIGN
xor EOB, EOB
@ -598,7 +601,7 @@ EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2):
REDUCE0
mov eax, EOB
uncollect_args 6
UNCOLLECT_ARGS 6
movdqa ZERO, XMMWORD [rsp]
mov rsp, rbp
pop rbp

View File

@ -2,7 +2,7 @@
; jcsample.asm - downsampling (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
;
@ -44,9 +44,10 @@
GLOBAL_FUNCTION(jsimd_h2v1_downsample_avx2)
EXTN(jsimd_h2v1_downsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
collect_args 6
COLLECT_ARGS 6
mov ecx, r13d
shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
@ -177,7 +178,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
.return:
vzeroupper
uncollect_args 6
UNCOLLECT_ARGS 6
pop rbp
ret
@ -205,9 +206,10 @@ EXTN(jsimd_h2v1_downsample_avx2):
GLOBAL_FUNCTION(jsimd_h2v2_downsample_avx2)
EXTN(jsimd_h2v2_downsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
collect_args 6
COLLECT_ARGS 6
mov ecx, r13d
shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
@ -356,7 +358,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
.return:
vzeroupper
uncollect_args 6
UNCOLLECT_ARGS 6
pop rbp
ret

View File

@ -2,7 +2,7 @@
; jcsample.asm - downsampling (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
@ -43,9 +43,10 @@
GLOBAL_FUNCTION(jsimd_h2v1_downsample_sse2)
EXTN(jsimd_h2v1_downsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
collect_args 6
COLLECT_ARGS 6
mov ecx, r13d
shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
@ -159,7 +160,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
jg near .rowloop
.return:
uncollect_args 6
UNCOLLECT_ARGS 6
pop rbp
ret
@ -187,9 +188,10 @@ EXTN(jsimd_h2v1_downsample_sse2):
GLOBAL_FUNCTION(jsimd_h2v2_downsample_sse2)
EXTN(jsimd_h2v2_downsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
collect_args 6
COLLECT_ARGS 6
mov ecx, r13d
shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
@ -319,7 +321,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
jg near .rowloop
.return:
uncollect_args 6
UNCOLLECT_ARGS 6
pop rbp
ret

View File

@ -2,7 +2,7 @@
; jdcolext.asm - colorspace conversion (64-bit AVX2)
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
@ -42,6 +42,7 @@
GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_avx2)
EXTN(jsimd_ycc_rgb_convert_avx2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@ -49,7 +50,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (WK_NUM * SIZEOF_YMMWORD)
collect_args 5
COLLECT_ARGS 5
push rbx
mov ecx, r10d ; num_cols
@ -486,7 +487,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
.return:
pop rbx
vzeroupper
uncollect_args 5
UNCOLLECT_ARGS 5
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@ -2,7 +2,7 @@
; jdcolext.asm - colorspace conversion (64-bit SSE2)
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
@ -41,6 +41,7 @@
GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_sse2)
EXTN(jsimd_ycc_rgb_convert_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@ -48,7 +49,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
collect_args 5
COLLECT_ARGS 5
push rbx
mov ecx, r10d ; num_cols
@ -429,7 +430,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
.return:
pop rbx
uncollect_args 5
UNCOLLECT_ARGS 5
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@ -2,7 +2,7 @@
; jdcolor.asm - colorspace conversion (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@ -32,7 +32,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_ycc_rgb_convert_avx2)
EXTN(jconst_ycc_rgb_convert_avx2):
@ -43,7 +43,7 @@ PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285
PW_ONE times 16 dw 1
PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -2,7 +2,7 @@
; jdcolor.asm - colorspace conversion (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -31,7 +31,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_ycc_rgb_convert_sse2)
EXTN(jconst_ycc_rgb_convert_sse2):
@ -42,7 +42,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
PW_ONE times 8 dw 1
PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -2,7 +2,7 @@
; jdmerge.asm - merged upsampling/color conversion (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@ -32,7 +32,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_merged_upsample_avx2)
EXTN(jconst_merged_upsample_avx2):
@ -43,7 +43,7 @@ PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285
PW_ONE times 16 dw 1
PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -2,7 +2,7 @@
; jdmerge.asm - merged upsampling/color conversion (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -31,7 +31,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_merged_upsample_sse2)
EXTN(jconst_merged_upsample_sse2):
@ -42,7 +42,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
PW_ONE times 8 dw 1
PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@ -2,7 +2,7 @@
; jdmrgext.asm - merged upsampling/color conversion (64-bit AVX2)
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
@ -42,6 +42,7 @@
GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_avx2)
EXTN(jsimd_h2v1_merged_upsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@ -49,7 +50,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, SIZEOF_YMMWORD * WK_NUM
collect_args 4
COLLECT_ARGS 4
push rbx
mov ecx, r10d ; col
@ -480,7 +481,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
.return:
pop rbx
vzeroupper
uncollect_args 4
UNCOLLECT_ARGS 4
lea rsp, [rbp-8]
pop r15
pop rbp
@ -506,9 +507,10 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_avx2)
EXTN(jsimd_h2v2_merged_upsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
collect_args 4
COLLECT_ARGS 4
push rbx
mov eax, r10d
@ -587,7 +589,7 @@ EXTN(jsimd_h2v2_merged_upsample_avx2):
add rsp, SIZEOF_JSAMPARRAY*4
pop rbx
uncollect_args 4
UNCOLLECT_ARGS 4
pop rbp
ret

View File

@ -2,7 +2,7 @@
; jdmrgext.asm - merged upsampling/color conversion (64-bit SSE2)
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
@ -41,6 +41,7 @@
GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_sse2)
EXTN(jsimd_h2v1_merged_upsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@ -48,7 +49,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
collect_args 4
COLLECT_ARGS 4
push rbx
mov ecx, r10d ; col
@ -422,7 +423,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
.return:
pop rbx
uncollect_args 4
UNCOLLECT_ARGS 4
lea rsp, [rbp-8]
pop r15
pop rbp
@ -448,9 +449,10 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_sse2)
EXTN(jsimd_h2v2_merged_upsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
collect_args 4
COLLECT_ARGS 4
push rbx
mov eax, r10d
@ -529,7 +531,7 @@ EXTN(jsimd_h2v2_merged_upsample_sse2):
add rsp, SIZEOF_JSAMPARRAY*4
pop rbx
uncollect_args 4
UNCOLLECT_ARGS 4
pop rbp
ret

View File

@ -2,7 +2,7 @@
; jdsample.asm - upsampling (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
@ -22,7 +22,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fancy_upsample_avx2)
EXTN(jconst_fancy_upsample_avx2):
@ -33,7 +33,7 @@ PW_THREE times 16 dw 3
PW_SEVEN times 16 dw 7
PW_EIGHT times 16 dw 8
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -62,10 +62,11 @@ PW_EIGHT times 16 dw 8
GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2)
EXTN(jsimd_h2v1_fancy_upsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
push_xmm 3
collect_args 4
PUSH_XMM 3
COLLECT_ARGS 4
mov eax, r11d ; colctr
test rax, rax
@ -186,8 +187,8 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
.return:
vzeroupper
uncollect_args 4
pop_xmm 3
UNCOLLECT_ARGS 4
POP_XMM 3
pop rbp
ret
@ -215,6 +216,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_avx2)
EXTN(jsimd_h2v2_fancy_upsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@ -222,8 +224,8 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, (SIZEOF_YMMWORD * WK_NUM)
push_xmm 3
collect_args 4
PUSH_XMM 3
COLLECT_ARGS 4
push rbx
mov eax, r11d ; colctr
@ -498,8 +500,8 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
.return:
pop rbx
vzeroupper
uncollect_args 4
pop_xmm 3
UNCOLLECT_ARGS 4
POP_XMM 3
lea rsp, [rbp-8]
pop r15
pop rbp
@ -524,9 +526,10 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
GLOBAL_FUNCTION(jsimd_h2v1_upsample_avx2)
EXTN(jsimd_h2v1_upsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
collect_args 4
COLLECT_ARGS 4
mov edx, r11d
add rdx, byte (SIZEOF_YMMWORD-1)
@ -589,7 +592,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
.return:
vzeroupper
uncollect_args 4
UNCOLLECT_ARGS 4
pop rbp
ret
@ -612,9 +615,10 @@ EXTN(jsimd_h2v1_upsample_avx2):
GLOBAL_FUNCTION(jsimd_h2v2_upsample_avx2)
EXTN(jsimd_h2v2_upsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
collect_args 4
COLLECT_ARGS 4
push rbx
mov edx, r11d
@ -685,7 +689,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
.return:
pop rbx
vzeroupper
uncollect_args 4
UNCOLLECT_ARGS 4
pop rbp
ret

View File

@ -2,7 +2,7 @@
; jdsample.asm - upsampling (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
@ -21,7 +21,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fancy_upsample_sse2)
EXTN(jconst_fancy_upsample_sse2):
@ -32,7 +32,7 @@ PW_THREE times 8 dw 3
PW_SEVEN times 8 dw 7
PW_EIGHT times 8 dw 8
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -61,9 +61,10 @@ PW_EIGHT times 8 dw 8
GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_sse2)
EXTN(jsimd_h2v1_fancy_upsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
collect_args 4
COLLECT_ARGS 4
mov eax, r11d ; colctr
test rax, rax
@ -174,7 +175,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
jg near .rowloop
.return:
uncollect_args 4
UNCOLLECT_ARGS 4
pop rbp
ret
@ -202,6 +203,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_sse2)
EXTN(jsimd_h2v2_fancy_upsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@ -209,7 +211,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
collect_args 4
COLLECT_ARGS 4
push rbx
mov eax, r11d ; colctr
@ -472,7 +474,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
.return:
pop rbx
uncollect_args 4
UNCOLLECT_ARGS 4
lea rsp, [rbp-8]
pop r15
pop rbp
@ -497,9 +499,10 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
GLOBAL_FUNCTION(jsimd_h2v1_upsample_sse2)
EXTN(jsimd_h2v1_upsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
collect_args 4
COLLECT_ARGS 4
mov edx, r11d
add rdx, byte (2*SIZEOF_XMMWORD)-1
@ -560,7 +563,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
jg short .rowloop
.return:
uncollect_args 4
UNCOLLECT_ARGS 4
pop rbp
ret
@ -583,9 +586,10 @@ EXTN(jsimd_h2v1_upsample_sse2):
GLOBAL_FUNCTION(jsimd_h2v2_upsample_sse2)
EXTN(jsimd_h2v2_upsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
collect_args 4
COLLECT_ARGS 4
push rbx
mov edx, r11d
@ -654,7 +658,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
.return:
pop rbx
uncollect_args 4
UNCOLLECT_ARGS 4
pop rbp
ret

View File

@ -2,7 +2,7 @@
; jfdctflt.asm - floating-point FDCT (64-bit SSE)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
@ -35,7 +35,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_float_sse)
EXTN(jconst_fdct_float_sse):
@ -45,7 +45,7 @@ PD_0_707 times 4 dd 0.707106781186547524400844
PD_0_541 times 4 dd 0.541196100146196984399723
PD_1_306 times 4 dd 1.306562964876376527856643
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -66,6 +66,7 @@ PD_1_306 times 4 dd 1.306562964876376527856643
GLOBAL_FUNCTION(jsimd_fdct_float_sse)
EXTN(jsimd_fdct_float_sse):
ENDBR64
push rbp
mov rbp, rsp
push r15
@ -73,7 +74,7 @@ EXTN(jsimd_fdct_float_sse):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
collect_args 1
COLLECT_ARGS 1
; ---- Pass 1: process rows.
@ -345,7 +346,7 @@ EXTN(jsimd_fdct_float_sse):
dec rcx
jnz near .columnloop
uncollect_args 1
UNCOLLECT_ARGS 1
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@ -2,7 +2,7 @@
; jfdctfst.asm - fast integer FDCT (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
@ -50,7 +50,7 @@ F_1_306 equ DESCALE(1402911301, 30 - CONST_BITS) ; FIX(1.306562965)
%define PRE_MULTIPLY_SCALE_BITS 2
%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_ifast_sse2)
EXTN(jconst_fdct_ifast_sse2):
@ -60,7 +60,7 @@ PW_F0382 times 8 dw F_0_382 << CONST_SHIFT
PW_F0541 times 8 dw F_0_541 << CONST_SHIFT
PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -81,6 +81,7 @@ PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
GLOBAL_FUNCTION(jsimd_fdct_ifast_sse2)
EXTN(jsimd_fdct_ifast_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@ -88,7 +89,7 @@ EXTN(jsimd_fdct_ifast_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
collect_args 1
COLLECT_ARGS 1
; ---- Pass 1: process rows.
@ -379,7 +380,7 @@ EXTN(jsimd_fdct_ifast_sse2):
movdqa XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm6
movdqa XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm2
uncollect_args 1
UNCOLLECT_ARGS 1
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@ -2,7 +2,7 @@
; jfdctint.asm - accurate integer FDCT (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -65,7 +65,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %1-%4: Input/output registers
; %5-%8: Temp registers
%macro dotranspose 8
%macro DOTRANSPOSE 8
; %1=(00 01 02 03 04 05 06 07 40 41 42 43 44 45 46 47)
; %2=(10 11 12 13 14 15 16 17 50 51 52 53 54 55 56 57)
; %3=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67)
@ -108,7 +108,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %5-%8: Temp registers
; %9: Pass (1 or 2)
%macro dodct 9
%macro DODCT 9
vpsubw %5, %1, %4 ; %5=data1_0-data6_7=tmp6_7
vpaddw %6, %1, %4 ; %6=data1_0+data6_7=tmp1_0
vpaddw %7, %2, %3 ; %7=data3_2+data4_5=tmp3_2
@ -223,7 +223,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_islow_avx2)
EXTN(jconst_fdct_islow_avx2):
@ -242,7 +242,7 @@ PW_DESCALE_P2X times 16 dw 1 << (PASS1_BITS - 1)
PW_1_NEG1 times 8 dw 1
times 8 dw -1
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -260,9 +260,10 @@ PW_1_NEG1 times 8 dw 1
GLOBAL_FUNCTION(jsimd_fdct_islow_avx2)
EXTN(jsimd_fdct_islow_avx2):
ENDBR64
push rbp
mov rbp, rsp
collect_args 1
COLLECT_ARGS 1
; ---- Pass 1: process rows.
@ -284,9 +285,9 @@ EXTN(jsimd_fdct_islow_avx2):
; ymm2=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67)
; ymm3=(30 31 32 33 34 35 36 37 70 71 72 73 74 75 76 77)
dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm3=data7_5
; ---- Pass 2: process columns.
@ -294,9 +295,9 @@ EXTN(jsimd_fdct_islow_avx2):
vperm2i128 ymm4, ymm1, ymm3, 0x20 ; ymm4=data3_7
vperm2i128 ymm1, ymm1, ymm3, 0x31 ; ymm1=data1_5
dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm4=data7_5
vperm2i128 ymm3, ymm0, ymm1, 0x30 ; ymm3=data0_1
@ -310,7 +311,7 @@ EXTN(jsimd_fdct_islow_avx2):
vmovdqu YMMWORD [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm7
vzeroupper
uncollect_args 1
UNCOLLECT_ARGS 1
pop rbp
ret

View File

@ -2,7 +2,7 @@
; jfdctint.asm - accurate integer FDCT (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, 2020, D. R. Commander.
; Copyright (C) 2009, 2016, 2020, 2024, D. R. Commander.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
@ -64,7 +64,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_islow_sse2)
EXTN(jconst_fdct_islow_sse2):
@ -81,7 +81,7 @@ PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1)
PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1)
PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -102,6 +102,7 @@ PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS - 1)
GLOBAL_FUNCTION(jsimd_fdct_islow_sse2)
EXTN(jsimd_fdct_islow_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@ -109,7 +110,7 @@ EXTN(jsimd_fdct_islow_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
collect_args 1
COLLECT_ARGS 1
; ---- Pass 1: process rows.
@ -609,7 +610,7 @@ EXTN(jsimd_fdct_islow_sse2):
movdqa XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm1
movdqa XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm3
uncollect_args 1
UNCOLLECT_ARGS 1
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@ -2,7 +2,7 @@
; jidctflt.asm - floating-point IDCT (64-bit SSE & SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
@ -25,18 +25,18 @@
; --------------------------------------------------------------------------
%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
%macro UNPCKLPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
shufps %1, %2, 0x44
%endmacro
%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
%macro UNPCKHPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
shufps %1, %2, 0xEE
%endmacro
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_float_sse2)
EXTN(jconst_idct_float_sse2):
@ -48,7 +48,7 @@ PD_M2_613 times 4 dd -2.613125929752753055713286
PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3)
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -76,6 +76,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
GLOBAL_FUNCTION(jsimd_idct_float_sse2)
EXTN(jsimd_idct_float_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@ -83,7 +84,7 @@ EXTN(jsimd_idct_float_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
lea rsp, [workspace]
collect_args 4
COLLECT_ARGS 4
push rbx
; ---- Pass 1: process columns from input, store into work array.
@ -280,11 +281,11 @@ EXTN(jsimd_idct_float_sse2):
unpckhps xmm4, xmm0 ; xmm4=(42 52 43 53)
movaps xmm3, xmm6 ; transpose coefficients(phase 2)
unpcklps2 xmm6, xmm7 ; xmm6=(00 10 20 30)
unpckhps2 xmm3, xmm7 ; xmm3=(01 11 21 31)
UNPCKLPS2 xmm6, xmm7 ; xmm6=(00 10 20 30)
UNPCKHPS2 xmm3, xmm7 ; xmm3=(01 11 21 31)
movaps xmm0, xmm1 ; transpose coefficients(phase 2)
unpcklps2 xmm1, xmm2 ; xmm1=(02 12 22 32)
unpckhps2 xmm0, xmm2 ; xmm0=(03 13 23 33)
UNPCKLPS2 xmm1, xmm2 ; xmm1=(02 12 22 32)
UNPCKHPS2 xmm0, xmm2 ; xmm0=(03 13 23 33)
movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71)
movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73)
@ -295,11 +296,11 @@ EXTN(jsimd_idct_float_sse2):
movaps XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm0
movaps xmm6, xmm5 ; transpose coefficients(phase 2)
unpcklps2 xmm5, xmm7 ; xmm5=(40 50 60 70)
unpckhps2 xmm6, xmm7 ; xmm6=(41 51 61 71)
UNPCKLPS2 xmm5, xmm7 ; xmm5=(40 50 60 70)
UNPCKHPS2 xmm6, xmm7 ; xmm6=(41 51 61 71)
movaps xmm3, xmm4 ; transpose coefficients(phase 2)
unpcklps2 xmm4, xmm2 ; xmm4=(42 52 62 72)
unpckhps2 xmm3, xmm2 ; xmm3=(43 53 63 73)
UNPCKLPS2 xmm4, xmm2 ; xmm4=(42 52 62 72)
UNPCKHPS2 xmm3, xmm2 ; xmm3=(43 53 63 73)
movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm5
movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm6
@ -470,7 +471,7 @@ EXTN(jsimd_idct_float_sse2):
jnz near .rowloop
pop rbx
uncollect_args 4
UNCOLLECT_ARGS 4
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@ -2,7 +2,7 @@
; jidctfst.asm - fast integer IDCT (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
@ -58,7 +58,7 @@ F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1)
%define PRE_MULTIPLY_SCALE_BITS 2
%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_ifast_sse2)
EXTN(jconst_idct_ifast_sse2):
@ -69,7 +69,7 @@ PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT
PW_F1082 times 8 dw F_1_082 << CONST_SHIFT
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -95,6 +95,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
GLOBAL_FUNCTION(jsimd_idct_ifast_sse2)
EXTN(jsimd_idct_ifast_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@ -102,7 +103,7 @@ EXTN(jsimd_idct_ifast_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
collect_args 4
COLLECT_ARGS 4
; ---- Pass 1: process columns from input.
@ -478,7 +479,7 @@ EXTN(jsimd_idct_ifast_sse2):
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
uncollect_args 4
UNCOLLECT_ARGS 4
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@ -2,7 +2,7 @@
; jidctint.asm - accurate integer IDCT (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
@ -66,7 +66,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %1-%4: Input/output registers
; %5-%8: Temp registers
%macro dotranspose 8
%macro DOTRANSPOSE 8
; %5=(00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71)
; %6=(03 13 23 33 43 53 63 73 02 12 22 32 42 52 62 72)
; %7=(04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75)
@ -119,7 +119,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %5-%12: Temp registers
; %9: Pass (1 or 2)
%macro dodct 13
%macro DODCT 13
; -- Even part
; (Original)
@ -241,7 +241,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_islow_avx2)
EXTN(jconst_idct_islow_avx2):
@ -260,7 +260,7 @@ PB_CENTERJSAMP times 32 db CENTERJSAMPLE
PW_1_NEG1 times 8 dw 1
times 8 dw -1
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -282,10 +282,11 @@ PW_1_NEG1 times 8 dw 1
GLOBAL_FUNCTION(jsimd_idct_islow_avx2)
EXTN(jsimd_idct_islow_avx2):
ENDBR64
push rbp
mov rbp, rsp ; rbp = aligned rbp
push_xmm 4
collect_args 4
PUSH_XMM 4
COLLECT_ARGS 4
; ---- Pass 1: process columns.
@ -342,10 +343,10 @@ EXTN(jsimd_idct_islow_avx2):
vperm2i128 ymm2, ymm5, ymm7, 0x20 ; ymm2=in2_6
vperm2i128 ymm3, ymm7, ymm6, 0x31 ; ymm3=in7_5
dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 1
DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 1
; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm3=data7_6
dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm3=data3_7
.column_end:
@ -362,10 +363,10 @@ EXTN(jsimd_idct_islow_avx2):
vperm2i128 ymm4, ymm3, ymm1, 0x31 ; ymm3=in7_5
vperm2i128 ymm1, ymm3, ymm1, 0x20 ; ymm1=in3_1
dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 2
DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 2
; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm4=data7_6
dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm4=data3_7
vpacksswb ymm0, ymm0, ymm1 ; ymm0=data01_45
@ -407,8 +408,8 @@ EXTN(jsimd_idct_islow_avx2):
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7
uncollect_args 4
pop_xmm 4
UNCOLLECT_ARGS 4
POP_XMM 4
pop rbp
ret

View File

@ -2,7 +2,7 @@
; jidctint.asm - accurate integer IDCT (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, 2020, D. R. Commander.
; Copyright (C) 2009, 2016, 2020, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
@ -65,7 +65,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_islow_sse2)
EXTN(jconst_idct_islow_sse2):
@ -82,7 +82,7 @@ PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1)
PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1)
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -108,6 +108,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
GLOBAL_FUNCTION(jsimd_idct_islow_sse2)
EXTN(jsimd_idct_islow_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@ -115,7 +116,7 @@ EXTN(jsimd_idct_islow_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, (SIZEOF_XMMWORD * WK_NUM)
collect_args 4
COLLECT_ARGS 4
; ---- Pass 1: process columns from input.
@ -835,7 +836,7 @@ EXTN(jsimd_idct_islow_sse2):
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5
uncollect_args 4
UNCOLLECT_ARGS 4
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@ -2,7 +2,7 @@
; jidctred.asm - reduced-size IDCT (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
@ -71,7 +71,7 @@ F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS) ; FIX(3.624509785)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_red_sse2)
EXTN(jconst_idct_red_sse2):
@ -89,7 +89,7 @@ PD_DESCALE_P1_2 times 4 dd 1 << (DESCALE_P1_2 - 1)
PD_DESCALE_P2_2 times 4 dd 1 << (DESCALE_P2_2 - 1)
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@ -116,6 +116,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
GLOBAL_FUNCTION(jsimd_idct_4x4_sse2)
EXTN(jsimd_idct_4x4_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@ -123,7 +124,7 @@ EXTN(jsimd_idct_4x4_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
collect_args 4
COLLECT_ARGS 4
; ---- Pass 1: process columns from input.
@ -388,7 +389,7 @@ EXTN(jsimd_idct_4x4_sse2):
movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
uncollect_args 4
UNCOLLECT_ARGS 4
lea rsp, [rbp-8]
pop r15
pop rbp
@ -413,9 +414,10 @@ EXTN(jsimd_idct_4x4_sse2):
GLOBAL_FUNCTION(jsimd_idct_2x2_sse2)
EXTN(jsimd_idct_2x2_sse2):
ENDBR64
push rbp
mov rbp, rsp
collect_args 4
COLLECT_ARGS 4
push rbx
; ---- Pass 1: process columns from input.
@ -563,7 +565,7 @@ EXTN(jsimd_idct_2x2_sse2):
mov word [rsi+rax*SIZEOF_JSAMPLE], cx
pop rbx
uncollect_args 4
UNCOLLECT_ARGS 4
pop rbp
ret

View File

@ -2,7 +2,7 @@
; jquantf.asm - sample data conversion and quantization (64-bit SSE & SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
@ -37,9 +37,10 @@
GLOBAL_FUNCTION(jsimd_convsamp_float_sse2)
EXTN(jsimd_convsamp_float_sse2):
ENDBR64
push rbp
mov rbp, rsp
collect_args 3
COLLECT_ARGS 3
push rbx
pcmpeqw xmm7, xmm7
@ -88,7 +89,7 @@ EXTN(jsimd_convsamp_float_sse2):
jnz short .convloop
pop rbx
uncollect_args 3
UNCOLLECT_ARGS 3
pop rbp
ret
@ -109,9 +110,10 @@ EXTN(jsimd_convsamp_float_sse2):
GLOBAL_FUNCTION(jsimd_quantize_float_sse2)
EXTN(jsimd_quantize_float_sse2):
ENDBR64
push rbp
mov rbp, rsp
collect_args 3
COLLECT_ARGS 3
mov rsi, r12
mov rdx, r11
@ -144,7 +146,7 @@ EXTN(jsimd_quantize_float_sse2):
dec rax
jnz short .quantloop
uncollect_args 3
UNCOLLECT_ARGS 3
pop rbp
ret

View File

@ -2,7 +2,7 @@
; jquanti.asm - sample data conversion and quantization (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, 2018, D. R. Commander.
; Copyright (C) 2009, 2016, 2018, 2024, D. R. Commander.
; Copyright (C) 2016, Matthieu Darbois.
; Copyright (C) 2018, Matthias Räncker.
;
@ -38,9 +38,10 @@
GLOBAL_FUNCTION(jsimd_convsamp_avx2)
EXTN(jsimd_convsamp_avx2):
ENDBR64
push rbp
mov rbp, rsp
collect_args 3
COLLECT_ARGS 3
mov eax, r11d
@ -83,7 +84,7 @@ EXTN(jsimd_convsamp_avx2):
vmovdqu YMMWORD [YMMBLOCK(6,0,r12,SIZEOF_DCTELEM)], ymm3
vzeroupper
uncollect_args 3
UNCOLLECT_ARGS 3
pop rbp
ret
@ -115,9 +116,10 @@ EXTN(jsimd_convsamp_avx2):
GLOBAL_FUNCTION(jsimd_quantize_avx2)
EXTN(jsimd_quantize_avx2):
ENDBR64
push rbp
mov rbp, rsp
collect_args 3
COLLECT_ARGS 3
vmovdqu ymm4, [YMMBLOCK(0,0,r12,SIZEOF_DCTELEM)]
vmovdqu ymm5, [YMMBLOCK(2,0,r12,SIZEOF_DCTELEM)]
@ -152,7 +154,7 @@ EXTN(jsimd_quantize_avx2):
vmovdqu [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm3
vzeroupper
uncollect_args 3
UNCOLLECT_ARGS 3
pop rbp
ret

View File

@ -2,7 +2,7 @@
; jquanti.asm - sample data conversion and quantization (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
@ -37,9 +37,10 @@
GLOBAL_FUNCTION(jsimd_convsamp_sse2)
EXTN(jsimd_convsamp_sse2):
ENDBR64
push rbp
mov rbp, rsp
collect_args 3
COLLECT_ARGS 3
push rbx
pxor xmm6, xmm6 ; xmm6=(all 0's)
@ -83,7 +84,7 @@ EXTN(jsimd_convsamp_sse2):
jnz short .convloop
pop rbx
uncollect_args 3
UNCOLLECT_ARGS 3
pop rbp
ret
@ -115,9 +116,10 @@ EXTN(jsimd_convsamp_sse2):
GLOBAL_FUNCTION(jsimd_quantize_sse2)
EXTN(jsimd_quantize_sse2):
ENDBR64
push rbp
mov rbp, rsp
collect_args 3
COLLECT_ARGS 3
mov rsi, r12
mov rdx, r11
@ -177,7 +179,7 @@ EXTN(jsimd_quantize_sse2):
dec rax
jnz near .quantloop
uncollect_args 3
UNCOLLECT_ARGS 3
pop rbp
ret

View File

@ -7,7 +7,7 @@
* Lossless JPEG Modifications:
* Copyright (C) 1999, Ken Murchison.
* libjpeg-turbo Modifications:
* Copyright (C) 2010, 2016, 2018, 2022-2023, D. R. Commander.
* Copyright (C) 2010, 2016, 2018, 2022-2024, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -50,6 +50,113 @@ jpeg_calc_jpeg_dimensions(j_compress_ptr cinfo)
#endif
LOCAL(boolean)
using_std_huff_tables(j_compress_ptr cinfo)
{
int i;
static const UINT8 bits_dc_luminance[17] = {
/* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0
};
static const UINT8 val_dc_luminance[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
};
static const UINT8 bits_dc_chrominance[17] = {
/* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
};
static const UINT8 val_dc_chrominance[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
};
static const UINT8 bits_ac_luminance[17] = {
/* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d
};
static const UINT8 val_ac_luminance[] = {
0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
0xf9, 0xfa
};
static const UINT8 bits_ac_chrominance[17] = {
/* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77
};
static const UINT8 val_ac_chrominance[] = {
0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
0xf9, 0xfa
};
if (cinfo->dc_huff_tbl_ptrs[0] == NULL ||
cinfo->ac_huff_tbl_ptrs[0] == NULL ||
cinfo->dc_huff_tbl_ptrs[1] == NULL ||
cinfo->ac_huff_tbl_ptrs[1] == NULL)
return FALSE;
for (i = 2; i < NUM_HUFF_TBLS; i++) {
if (cinfo->dc_huff_tbl_ptrs[i] != NULL ||
cinfo->ac_huff_tbl_ptrs[i] != NULL)
return FALSE;
}
if (memcmp(cinfo->dc_huff_tbl_ptrs[0]->bits, bits_dc_luminance,
sizeof(bits_dc_luminance)) ||
memcmp(cinfo->dc_huff_tbl_ptrs[0]->huffval, val_dc_luminance,
sizeof(val_dc_luminance)) ||
memcmp(cinfo->ac_huff_tbl_ptrs[0]->bits, bits_ac_luminance,
sizeof(bits_ac_luminance)) ||
memcmp(cinfo->ac_huff_tbl_ptrs[0]->huffval, val_ac_luminance,
sizeof(val_ac_luminance)) ||
memcmp(cinfo->dc_huff_tbl_ptrs[1]->bits, bits_dc_chrominance,
sizeof(bits_dc_chrominance)) ||
memcmp(cinfo->dc_huff_tbl_ptrs[1]->huffval, val_dc_chrominance,
sizeof(val_dc_chrominance)) ||
memcmp(cinfo->ac_huff_tbl_ptrs[1]->bits, bits_ac_chrominance,
sizeof(bits_ac_chrominance)) ||
memcmp(cinfo->ac_huff_tbl_ptrs[1]->huffval, val_ac_chrominance,
sizeof(val_ac_chrominance)))
return FALSE;
return TRUE;
}
LOCAL(void)
initial_setup(j_compress_ptr cinfo, boolean transcode_only)
/* Do computations that are needed before master selection phase */
@ -605,6 +712,8 @@ GLOBAL(void)
jinit_c_master_control(j_compress_ptr cinfo, boolean transcode_only)
{
my_master_ptr master = (my_master_ptr)cinfo->master;
boolean empty_huff_tables = TRUE;
int i;
master->pub.prepare_for_pass = prepare_for_pass;
master->pub.pass_startup = pass_startup;
@ -646,7 +755,16 @@ jinit_c_master_control(j_compress_ptr cinfo, boolean transcode_only)
(cinfo->progressive_mode && !cinfo->arith_code))
cinfo->optimize_coding = TRUE; /* assume default tables no good for
progressive mode or lossless mode */
if (cinfo->data_precision == 12 && !cinfo->arith_code)
for (i = 0; i < NUM_HUFF_TBLS; i++) {
if (cinfo->dc_huff_tbl_ptrs[i] != NULL ||
cinfo->ac_huff_tbl_ptrs[i] != NULL) {
empty_huff_tables = FALSE;
break;
}
}
if (cinfo->data_precision == 12 && !cinfo->arith_code &&
!cinfo->optimize_coding &&
(empty_huff_tables || using_std_huff_tables(cinfo)))
cinfo->optimize_coding = TRUE; /* assume default tables no good for 12-bit
data precision */

View File

@ -1,5 +1,5 @@
/*
* Copyright (C)2009-2023 D. R. Commander. All Rights Reserved.
* Copyright (C)2009-2024 D. R. Commander. All Rights Reserved.
* Copyright (C)2021 Alex Richardson. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -370,7 +370,8 @@ static void setCompDefaults(tjinstance *this, int pixelFormat)
jpeg_set_colorspace(&this->cinfo, JCS_YCbCr);
}
this->cinfo.optimize_coding = this->optimize;
if (this->cinfo.data_precision == 8)
this->cinfo.optimize_coding = this->optimize;
#ifdef C_PROGRESSIVE_SUPPORTED
if (this->progressive) jpeg_simple_progression(&this->cinfo);
#endif