0,0 → 1,2378 |
;***************************************************************************** |
; JPEG to RAW convert plugin - for zSea image viewer |
; Copyright (c) 2008, 2009, Evgeny Grechnikov aka Diamond |
; All rights reserved. |
; |
; Redistribution and use in source and binary forms, with or without |
; modification, are permitted provided that the following conditions are met: |
; * Redistributions of source code must retain the above copyright |
; notice, this list of conditions and the following disclaimer. |
; * Redistributions in binary form must reproduce the above copyright |
; notice, this list of conditions and the following disclaimer in the |
; documentation and/or other materials provided with the distribution. |
; * Neither the name of the <organization> nor the |
; names of its contributors may be used to endorse or promote products |
; derived from this software without specific prior written permission. |
; |
; THIS SOFTWARE IS PROVIDED BY Evgeny Grechnikov ''AS IS'' AND ANY |
; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
; DISCLAIMED. IN NO EVENT SHALL <copyright holder> BE LIABLE FOR ANY |
; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
;***************************************************************************** |
; Some small changes (c) 2011 Marat Zakiyanov aka Mario79, aka Mario |
;***************************************************************************** |
|
format MS COFF |
|
public EXPORTS |
|
section '.flat' code readable align 16 |
|
START: |
pushad |
finit |
mov eax,dword [esp+36] |
mov esi, [eax] ; esi -> JPEG data |
mov ebp, [eax+12] ; ebp = file size |
mov [_esp], esp |
; initialize constant tables, if not yet |
cmp [color_table_1+4], 0 |
jnz @f |
call initialize_color_table |
@@: |
xor ebx, ebx ; ebx -> RAW data, not allocated yet |
mov [dct_buffer], ebx |
mov [_ebx], ebx |
; check for SOI [Start-Of-Image] marker |
call get_marker |
jc .end |
cmp al, 0xD8 ; SOI? |
jz .soi_ok |
.end: |
; general exit from the function |
; for progressive mode: convert loaded DCT coefficients to image |
call handle_progressive |
; convert full-color images to RGB |
call convert_to_rgb |
xor eax, eax |
test ebx, ebx |
jnz @f |
inc eax ; ebx=0 => bad image |
@@: |
.ret: |
mov ecx, [esp+28] |
mov [ecx+4], ebx ; save RAW data ptr |
mov [ecx+8], eax ; save result |
popad |
ret 4 |
.soi_ok: |
mov [restart_interval], ebx |
mov [adobe_ycck], 0 |
; loop until start of frame (real data), parse markers |
.markers_loop: |
call get_marker |
jc .end |
; markers RSTn do not have parameters |
; N.B. They can not exist in this part of JPEG, but let's be liberal :) |
cmp al, 0xD0 |
jb @f |
cmp al, 0xD8 |
jb .markers_loop |
@@: |
cmp al, 0xD9 ; EOI? [invalid here] |
jz .end |
; ok, this is marker segment |
; first word is length of the segment |
cmp ebp, 2 |
jb .end |
xor edx, edx |
mov dl, [esi+1] |
mov dh, [esi] ; edx = marker length, al = marker value |
sub ebp, edx |
jb .end |
cmp al, 0xDB ; DQT? |
jz .dqt |
cmp al, 0xC4 ; DHT? |
jz .dht |
cmp al, 0xCC ; DAC? [ignored - no arithmetic coding] |
jz .next_marker |
cmp al, 0xDD ; DRI? |
jz .dri |
cmp al, 0xDA ; SOS? |
jz .sos |
cmp al, 0xC0 |
jb @f |
cmp al, 0xD0 |
jb .sofn |
@@: |
cmp al, 0xEE ; APP14? |
jz .app14 |
; unrecognized marker; let's skip it and hope for the best |
.next_marker: |
add esi, edx |
jmp .markers_loop |
.app14: |
; check for special Adobe marker |
cmp dx, 14 |
jb .next_marker |
cmp byte [esi+2], 'A' |
jnz .next_marker |
cmp dword [esi+3], 'dobe' |
jnz .next_marker |
cmp byte [esi+13], 2 |
setz [adobe_ycck] |
jmp .next_marker |
.dqt: |
; DQT marker found |
; length: 2 bytes for length field + 65 bytes per table |
sub edx, 2 |
jc .end |
lodsw |
.dqt_loop: |
test edx, edx |
jz .markers_loop |
sub edx, 1+64 |
jc .end |
lodsb |
; 8-bit DCT-based process shall not use a 16-bit precision quantization table. |
test al, 0xF0 |
jnz .end |
and eax, 3 |
mov [eax+quant_tables_defined], 1 |
shl eax, 8 |
lea edi, [eax+quant_tables] |
xor ecx, ecx |
@@: |
xor eax, eax |
lodsb |
push eax |
fild dword [esp] |
pop eax |
movzx eax, byte [zigzag+ecx] |
add eax, eax |
push eax |
and eax, 7*4 |
fmul dword [idct_pre_table+eax] |
pop eax |
push eax |
shr eax, 3 |
and eax, 7*4 |
fmul dword [idct_pre_table+eax] |
pop eax |
fstp dword [edi+eax] |
inc ecx |
cmp ecx, 64 |
jb @b |
jmp .dqt_loop |
.dri: |
; DRI marker found |
cmp edx, 4 ; length must be 4 |
jnz .end2 |
movzx eax, word [esi+2] |
xchg al, ah |
mov [restart_interval], eax |
jmp .next_marker |
.dht: |
; DHT marker found |
sub edx, 2 |
jc .end2 |
lodsw |
.dht_loop: |
test edx, edx |
jz .markers_loop |
sub edx, 17 |
jc .end2 |
; next Huffman table; find place for it |
lodsb |
mov edi, eax |
and eax, 0x10 |
and edi, 3 |
shr eax, 2 |
or edi, eax |
mov [dc_huffman_defined+edi], 1 |
; shl edi, 11 |
imul edi, max_hufftable_size |
add edi, dc_huffman ; edi -> destination table |
; get table size |
xor eax, eax |
push 16 |
pop ecx |
@@: |
add al, [esi] |
adc ah, 0 |
inc esi |
loop @b |
cmp ax, 0x100 |
ja .end2 |
sub edx, eax |
jc .end2 |
; construct Huffman tree |
push ebx edx |
; lea eax, [edi+256*8] |
; push eax |
; push 16 |
; mov edx, esi |
; @@: |
; cmp byte [edx-1], 0 |
; jnz @f |
; dec edx |
; dec dword [esp] |
; jmp @b |
; @@: |
; sub edx, [esp] |
; lea eax, [edi+8] |
; push 2 |
; pop ecx |
; .lenloop: |
; mov bl, byte [edx] |
; test bl, bl |
; jz .len1done |
; push eax |
; xor eax, eax |
; .len1loop: |
; dec ecx |
; js .dhterr |
; cmp edi, [esp+8] |
; jae .dhterr |
; lodsb |
; stosd |
; dec bl |
; jnz .len1loop |
; pop eax |
; .len1done: |
; jecxz .len2done |
; push ecx |
; .len2loop: |
; cmp eax, [esp+8] |
; jb @f |
; or eax, -1 |
; @@: |
; cmp edi, [esp+8] |
; jae .dhterr |
; stosd |
; add eax, 8 |
; jnb @f |
; or eax, -1 |
; @@: |
; loop .len2loop |
; pop ecx |
; .len2done: |
; add ecx, ecx |
; inc edx |
; dec dword [esp] |
; jnz .lenloop |
; pop eax |
; pop eax |
; sub eax, edi |
; shr eax, 2 |
; cmp eax, ecx |
; ja @f |
; mov ecx, eax |
; @@: |
; or eax, -1 |
; rep stosd |
; pop edx ebx |
; jmp .dht_loop |
; .dhterr: |
; ;pop eax eax eax edx ebx |
; add esp, 5*4 |
lea eax, [edi+256*2] |
push eax |
lea edx, [esi-16] |
mov ah, 1 |
mov ecx, 128 |
.dht_l1: |
movzx ebx, byte [edx] |
inc edx |
test ebx, ebx |
jz .dht_l3 |
.dht_l2: |
cmp edi, [esp] |
jae .dhterr1 |
lodsb |
xchg al, ah |
push ecx |
rep stosw |
pop ecx |
xchg al, ah |
dec ebx |
jnz .dht_l2 |
.dht_l3: |
inc ah |
shr ecx, 1 |
jnz .dht_l1 |
push edi |
mov edi, [esp+4] |
push edi |
mov eax, 0x00090100 |
mov cl, 8 |
.dht_l4: |
movzx ebx, byte [edx] |
inc edx |
test ebx, ebx |
jz .dht_l6 |
.dht_l5: |
cmp edi, [esp] |
jb @f |
mov edi, [esp+4] |
rol eax, 16 |
cmp edi, [esp+8] |
jae .dhterr2 |
stosw |
inc ah |
mov [esp+4], edi |
pop edi |
push edi |
rol eax, 16 |
add dword [esp], 16*2 |
@@: |
lodsb |
xchg al, ah |
push ecx |
rep stosw |
pop ecx |
xchg al, ah |
dec ebx |
jnz .dht_l5 |
.dht_l6: |
inc ah |
shr ecx, 1 |
jnz .dht_l4 |
push edi |
movzx ebx, byte [edx] |
add ebx, ebx |
add bl, [edx+1] |
adc bh, 0 |
add ebx, ebx |
add bl, [edx+2] |
adc bh, 0 |
add ebx, ebx |
add bl, [edx+3] |
adc bh, 0 |
add ebx, 15 |
shr ebx, 4 |
mov cl, 8 |
lea ebx, [edi+ebx*2] |
sub ebx, [esp+12] |
add ebx, 31 |
shr ebx, 5 |
mov edi, ebx |
shl edi, 5 |
add edi, [esp+12] |
xor ebx, 9 |
shl ebx, 16 |
xor eax, ebx |
push edi |
.dht_l7: |
movzx ebx, byte [edx] |
inc edx |
test ebx, ebx |
jz .dht_l10 |
.dht_l8: |
cmp edi, [esp] |
jb .dht_l9 |
mov edi, [esp+4] |
cmp edi, [esp+8] |
jb @f |
mov edi, [esp+12] |
cmp edi, [esp+16] |
jae .dhterr3 |
mov al, 9 |
stosb |
rol eax, 8 |
stosb |
inc eax |
ror eax, 8 |
mov [esp+12], edi |
mov edi, [esp+8] |
add dword [esp+8], 16*2 |
@@: |
mov al, 9 |
stosb |
rol eax, 16 |
stosb |
inc eax |
ror eax, 16 |
mov [esp+4], edi |
pop edi |
push edi |
add dword [esp], 16*2 |
.dht_l9: |
lodsb |
xchg al, ah |
push ecx |
rep stosw |
pop ecx |
xchg al, ah |
dec ebx |
jnz .dht_l8 |
.dht_l10: |
inc ah |
shr ecx, 1 |
jnz .dht_l7 |
push -1 |
pop eax |
pop ecx |
sub ecx, edi |
rep stosb |
pop edi |
pop ecx |
sub ecx, edi |
rep stosb |
pop edi |
pop ecx |
sub ecx, edi |
rep stosb |
pop edx ebx |
jmp .dht_loop |
.dhterr3: |
pop eax eax |
.dhterr2: |
pop eax eax |
.dhterr1: |
pop eax |
pop edx ebx |
.end2: |
jmp .end |
.sofn: |
; SOFn marker found |
test ebx, ebx |
jnz .end2 ; only one frame is allowed |
; only SOF0 [baseline sequential], SOF1 [extended sequential], SOF2 [progressive] |
; nobody supports other compression methods |
cmp al, 0xC2 |
ja .end2 |
setz [progressive] |
; Length must be at least 8 |
sub edx, 8 |
jb .end2 |
; Sample precision in JFIF must be 8 bits |
cmp byte [esi+2], 8 |
jnz .end2 |
; Color space in JFIF is either YCbCr (color images, 3 components) |
; or Y (grey images, 1 component) |
movzx eax, byte [esi+7] |
cmp al, 1 |
jz @f |
cmp al, 3 |
jz @f |
; Adobe products sometimes use YCCK color space with 4 components |
cmp al, 4 |
jnz .end2 |
cmp [adobe_ycck], 0 |
jz .end2 |
@@: |
mov edi, eax ; edi = number of components |
lea eax, [eax*3] |
sub edx, eax |
jnz .end2 |
; get width and height |
; width must be nonzero |
; height must be nonzero - nobody supports DNL markers |
mov ah, [esi+3] |
mov al, [esi+4] ; eax = height |
test eax, eax |
jz .end2 |
xor ecx, ecx |
mov ch, [esi+5] |
mov cl, [esi+6] ; ecx = width |
jecxz .end3 |
push eax ecx |
imul ecx, eax |
cmp ecx, 4000000h |
jb @f |
pop ecx eax |
.end3: |
jmp .end2 |
@@: |
imul ecx, edi |
push ecx |
add ecx, 44+1 |
; for grayscale images, allocate additional memory for palette |
cmp edi, 1 |
jnz @f |
add ecx, 256*4-1 |
@@: |
push 68 |
pop eax |
push 12 |
pop ebx |
int 0x40 |
mov ebx, eax |
test eax, eax |
jnz @f |
pop ecx |
mov al, 2 |
jmp .ret |
@@: |
; OS zeroes all allocated memory |
; push edx edi |
; mov edi, eax |
; mov edx, ecx |
; shr ecx, 2 |
; xor eax, eax |
; rep stosd |
; mov ecx, edx |
; and ecx, 3 |
; rep stosb |
; mov ecx, edx |
; pop edi edx |
mov [_ebx], ebx |
pop dword [ebx+32] ; size of pixels area |
push 44 |
pop eax |
mov dword [ebx+28], eax ; pointer to RGB data |
; create grayscale palette if needed |
cmp edi, 1 |
jnz .no_create_palette |
mov dword [ebx+20], eax |
mov ecx, 256*4 |
mov dword [ebx+24], ecx |
add dword [ebx+28], ecx |
push edi |
shr ecx, 2 |
xor eax, eax |
lea edi, [ebx+44] |
@@: |
stosd |
add eax, 0x010101 |
loop @b |
pop edi |
.no_create_palette: |
; other image characteristics |
pop ecx eax |
mov dword [ebx], 'RAW ' ; signature |
mov dword [ebx+4], ecx ; width |
mov dword [ebx+8], eax ; height |
mov eax, edi |
shl eax, 3 |
mov dword [ebx+12], eax ; total pixel size |
mov byte [ebx+16], 8 ; 8 bits per component |
mov word [ebx+18], di ; number of components |
mov [delta_x], eax |
mov [pixel_size], edi |
;mov eax, edi |
imul eax, ecx |
mov [delta_y], eax |
shr eax, 3 |
mov [line_size], eax |
; and dword [ebx+36], 0 ; transparency data pointer = NULL |
; and dword [ebx+40], 0 ; transparency data size |
add esi, 8 |
mov ecx, edi |
mov edi, components |
xor eax, eax |
xor edx, edx |
.sof_parse_comp: |
movsb ; db ComponentIdentifier |
lodsb |
mov ah, al |
and al, 0xF |
jz .end3 |
shr ah, 4 |
jz .end3 |
stosd ; db V, db H, db ?, db ? (will be filled later) |
cmp dl, al |
ja @f |
mov dl, al |
@@: |
cmp dh, ah |
ja @f |
mov dh, ah |
@@: |
movsb ; db QuantizationTableID |
loop .sof_parse_comp |
mov word [max_v], dx |
movzx eax, dh |
movzx edx, dl |
push eax edx |
shl eax, 3 |
shl edx, 3 |
mov [block_width], eax |
mov [block_height], edx |
pop edx eax |
push eax edx |
imul eax, [delta_x] |
mov [block_delta_x], eax |
imul edx, [delta_y] |
mov [block_delta_y], edx |
mov eax, [ebx+4] |
add eax, [block_width] |
dec eax |
xor edx, edx |
div [block_width] |
mov [x_num_blocks], eax |
mov eax, [ebx+8] |
add eax, [block_height] |
dec eax |
xor edx, edx |
div [block_height] |
mov [y_num_blocks], eax |
mov cl, [ebx+18] |
pop edx |
mov edi, components |
@@: |
mov eax, edx |
div byte [edi+1] ; VMax / V_i = VFactor_i |
mov byte [edi+3], al ; db VFactor |
pop eax |
push eax |
div byte [edi+2] ; HMax / H_i = HFactor_i |
mov byte [edi+4], al ; db HFactor |
add edi, 6 |
loop @b |
pop eax |
cmp [progressive], 0 |
jz .sof_noprogressive |
mov eax, [x_num_blocks] |
mul [block_width] |
mul [y_num_blocks] |
mul [block_height] |
add eax, eax |
mov [dct_buffer_size], eax |
mul [pixel_size] |
xchg eax, ecx |
push ebx |
push 68 |
pop eax |
push 12 |
pop ebx |
int 0x40 |
pop ebx |
test eax, eax |
jnz @f |
mov ecx, ebx |
push 68 |
pop eax |
push 13 |
pop ebx |
int 0x40 |
xor ebx, ebx |
jmp .end |
@@: |
mov [dct_buffer], eax |
.sof_noprogressive: |
jmp .markers_loop |
.sos: |
; SOS marker found |
; frame must be already opened |
test ebx, ebx |
jz .end3 |
cmp edx, 6 |
jb .end3 |
; parse marker |
movzx eax, byte [esi+2] ; number of components in this scan |
test eax, eax |
jz .end3 ; must be nonzero |
cmp al, [ebx+18] |
ja .end3 ; must be <= total number of components |
; mov [ns], eax |
cmp al, 1 |
setz [not_interleaved] |
lea ecx, [6+eax+eax] |
cmp edx, ecx |
jnz .end3 |
mov ecx, eax |
mov edi, cur_components |
add esi, 3 |
.sos_find_comp: |
lodsb ; got ComponentID, look for component info |
push ecx esi |
mov cl, [ebx+18] |
mov esi, components |
and dword [edi+48], 0 |
and dword [edi+52], 0 |
@@: |
cmp [esi], al |
jz @f |
inc dword [edi+52] |
add esi, 6 |
loop @b |
@@: |
mov eax, [esi+1] |
mov dl, [esi+5] |
pop esi ecx |
jnz .end3 ; bad ComponentID |
cmp [not_interleaved], 0 |
jz @f |
mov ax, 0x0101 |
@@: |
stosd ; db V, db H, db VFactor, db HFactor |
xor eax, eax |
mov al, byte [edi-1] ; get HFactor |
mul byte [ebx+18] ; number of components |
stosd ; HIncrement_i = HFactor_i * sizeof(pixel) |
mov al, byte [edi-4-2] ; get VFactor |
mul byte [ebx+18] ; number of components |
imul eax, [ebx+4] ; image width |
stosd ; VIncrement_i = VFactor_i * sizeof(row) |
xchg eax, edx |
and eax, 3 |
cmp [quant_tables_defined+eax], 0 |
jz .end3 |
shl eax, 8 |
add eax, quant_tables |
stosd ; dd QuantizationTable |
lodsb |
movzx eax, al |
mov edx, eax |
shr eax, 4 |
and edx, 3 |
and eax, 3 |
cmp [dc_huffman_defined+eax], 0 |
jnz .dc_table_ok |
cmp [progressive], 0 |
jz .end3 |
xor eax, eax |
jmp .dc_table_done |
.dc_table_ok: |
; shl eax, 11 |
imul eax, max_hufftable_size |
add eax, dc_huffman |
.dc_table_done: |
cmp [ac_huffman_defined+edx], 0 |
jnz .ac_table_ok |
cmp [progressive], 0 |
jz .end3 |
xor edx, edx |
jmp .ac_table_done |
.ac_table_ok: |
; shl edx, 11 |
imul edx, max_hufftable_size |
add edx, ac_huffman |
.ac_table_done: |
stosd ; dd DCTable |
xchg eax, edx |
stosd ; dd ACTable |
push ecx |
mov eax, [ebx+4] |
movzx ecx, byte [edi-21] ; get HFactor |
cdq ; edx:eax = width (width<0x10000, so as dword it is unsigned) |
div ecx |
stosd ; dd width / HFactor_i |
stosd |
xchg eax, ecx |
inc eax |
sub eax, edx |
stosd ; dd HFactor_i+1 - (width % HFactor_i) |
mov eax, [ebx+8] |
movzx ecx, byte [edi-34] ; get VFactor |
cdq |
div ecx |
stosd ; dd height / VFactor_i |
stosd |
xchg eax, ecx |
inc eax |
sub eax, edx |
stosd ; dd VFactor_i+1 - (height % VFactor_i) |
pop ecx |
scasd ; dd DCPrediction |
cmp dword [edi], 0 |
setnp al |
ror al, 1 |
mov byte [edi-1], al |
scasd ; dd ComponentOffset |
dec ecx |
jnz .sos_find_comp |
mov [cur_components_end], edi |
mov edi, ScanStart |
movsb |
cmp byte [esi], 63 |
ja .end3 |
movsb |
lodsb |
push eax |
and al, 0xF |
stosb |
pop eax |
shr al, 4 |
stosb |
; now unpack data |
call init_limits |
and [decoded_MCUs], 0 |
mov [cur_rst_marker], 7 |
and [huffman_bits], 0 |
cmp [progressive], 0 |
jz .sos_noprogressive |
; progressive mode - only decode DCT coefficients |
; initialize pointers to coefficients data |
; zero number of EOBs for AC coefficients |
; redefine HIncrement and VIncrement |
mov edi, cur_components |
.coeff_init: |
mov eax, [dct_buffer_size] |
mul dword [edi+52] |
add eax, [dct_buffer] |
mov [edi+12], eax |
and dword [edi+52], 0 |
cmp [ScanStart], 0 |
jz .scan_dc |
cmp dword [edi+20], 0 |
jz .end3 |
jmp @f |
.scan_dc: |
cmp dword [edi+16], 0 |
jz .end3 |
@@: |
movzx eax, byte [edi+1] |
shl eax, 7 |
mov [edi+4], eax |
mov eax, [edi+28] |
mov cl, [edi+3] |
cmp cl, [edi+32] |
sbb eax, -7-1 |
shr eax, 3 |
shl eax, 7 |
mov [edi+8], eax |
add edi, 56 |
cmp edi, [cur_components_end] |
jb .coeff_init |
; unpack coefficients |
; N.B. Speed optimization has sense here. |
push ebx |
.coeff_decode_loop: |
mov edx, cur_components |
.coeff_components_loop: |
mov edi, [edx+12] |
movzx ecx, byte [edx] |
push dword [edx+40] |
push edi |
.coeff_y_loop: |
push ecx |
movzx eax, byte [edx+1] |
push dword [edx+28] |
push edi |
.coeff_x_loop: |
cmp dword [edx+40], 0 |
jl @f |
cmp dword [edx+28], 0 |
jge .realdata |
@@: |
cmp [not_interleaved], 0 |
jnz .norealdata |
push eax edi |
mov edi, dct_coeff |
call decode_progressive_coeff |
pop edi eax |
jmp .norealdata |
.realdata: |
push eax |
call decode_progressive_coeff |
add edi, 64*2 |
pop eax |
.norealdata: |
sub dword [edx+28], 8 |
sub eax, 1 |
jnz .coeff_x_loop |
pop edi |
pop dword [edx+28] |
add edi, [edx+8] |
pop ecx |
sub dword [edx+40], 8 |
sub ecx, 1 |
jnz .coeff_y_loop |
movzx eax, byte [edx+1] |
shl eax, 3 |
pop edi |
add edi, [edx+4] |
pop dword [edx+40] |
sub [edx+28], eax |
mov [edx+12], edi |
add edx, 56 |
cmp edx, [cur_components_end] |
jnz .coeff_components_loop |
call next_MCU |
jc .norst |
sub [cur_x], 1 |
jnz .coeff_decode_loop |
call next_line |
mov edx, cur_components |
@@: |
mov eax, [max_x] |
imul eax, [edx+4] |
sub [edx+12], eax |
movzx eax, byte [edx] |
imul eax, [edx+8] |
add [edx+12], eax |
add edx, 56 |
cmp edx, [cur_components_end] |
jnz @b |
sub [cur_y], 1 |
jnz .coeff_decode_loop |
pop ebx |
jmp .markers_loop |
.norst: |
pop ebx |
jmp .end4 |
.sos_noprogressive: |
; normal mode - unpack JPEG image |
mov edi, [ebx+28] |
add edi, ebx |
mov [cur_out_ptr], edi |
; N.B. Speed optimization has sense here. |
.decode_loop: |
call decode_MCU |
call next_MCU |
jc .end4 |
sub [cur_x], 1 |
jnz .decode_loop |
call next_line |
sub [cur_y], 1 |
jnz .decode_loop |
jmp .markers_loop |
.end4: |
jmp .end3 |
;--------------------------------------------------------------------- |
get_marker: |
; in: esi -> data |
; out: CF=0, al=marker value - ok |
; CF=1 - no marker |
sub ebp, 1 |
jc .ret |
lodsb |
if 1 |
cmp al, 0xFF |
jae @f |
; Some stupid men, which do not read specifications and manuals, |
; sometimes create markers with length field two less than true |
; value (in JPEG length of marker = length of data INCLUDING |
; length field itself). To open such files, allow 2 bytes |
; before next marker. |
cmp ebp, 2 |
jb .ret |
lodsb |
lodsb |
end if |
cmp al, 0xFF |
jb .ret |
@@: |
sub ebp, 1 |
jc .ret |
lodsb |
cmp al, 0xFF |
jz @b |
clc |
.ret: |
ret |
;--------------------------------------------------------------------- |
align 16 |
decode_MCU: |
mov edx, cur_components |
.components_loop: |
; decode each component |
push [cur_out_ptr] |
movzx ecx, byte [edx] |
push dword [edx+40] |
; we have H_i * V_i blocks of packed data, decode them |
.y_loop_1: |
push [cur_out_ptr] |
push ecx |
movzx eax, byte [edx+1] |
push dword [edx+28] |
.x_loop_1: |
push eax |
call decode_data_unit |
cmp dword [edx+40], 0 |
jl .nocopyloop |
cmp dword [edx+28], 0 |
jl .nocopyloop |
; now we have decoded block 8*8 in decoded_data |
; H_i * V_i packed blocks 8*8 make up one block (8*HMax) * (8*VMax) |
; so each pixel in packed block corresponds to HFact * VFact pixels |
movzx ecx, byte [edx+2] |
push esi ebp |
mov edi, [cur_out_ptr] |
add edi, [edx+52] |
.y_loop_2: |
push ecx edi |
cmp ecx, [edx+44] |
mov ecx, [edx+40] |
sbb ecx, 8-1 |
sbb eax, eax |
and ecx, eax |
add ecx, 8 |
jz .skip_x_loop_2 |
movzx eax, byte [edx+3] |
.x_loop_2: |
push eax ecx edi |
cmp eax, [edx+32] |
mov eax, [edx+28] |
sbb eax, 8-1 |
sbb ebp, ebp |
and eax, ebp |
mov ebp, .copyiter_all |
mov esi, decoded_data |
sub ebp, eax |
sub ebp, eax |
sub ebp, eax |
mov eax, [edx+4] |
sub eax, 1 |
.copyloop: |
push esi edi |
jmp ebp |
.copyiter_all: |
movsb |
repeat 7 |
add edi, eax |
movsb |
end repeat |
nop |
nop |
pop edi esi |
add edi, [edx+8] |
add esi, 8 |
sub ecx, 1 |
jnz .copyloop |
pop edi ecx eax |
add edi, [pixel_size] |
sub eax, 1 |
jnz .x_loop_2 |
.skip_x_loop_2: |
pop edi ecx |
add edi, [line_size] |
sub ecx, 1 |
jnz .y_loop_2 |
pop ebp esi |
.nocopyloop: |
mov eax, [delta_x] |
add [cur_out_ptr], eax |
pop eax |
sub dword [edx+28], 8 |
sub eax, 1 |
jnz .x_loop_1 |
pop dword [edx+28] |
pop ecx |
pop eax |
sub dword [edx+40], 8 |
add eax, [delta_y] |
mov [cur_out_ptr], eax |
sub ecx, 1 |
jnz .y_loop_1 |
movzx eax, byte [edx+1] |
pop dword [edx+40] |
shl eax, 3 |
pop [cur_out_ptr] |
sub dword [edx+28], eax |
add edx, 56 |
cmp edx, [cur_components_end] |
jb .components_loop |
mov eax, [cur_block_dx] |
add [cur_out_ptr], eax |
ret |
|
align 16 |
next_MCU: |
add [decoded_MCUs], 1 |
mov eax, [restart_interval] |
test eax, eax |
jz .no_restart |
cmp [decoded_MCUs], eax |
jb .no_restart |
and [decoded_MCUs], 0 |
and [huffman_bits], 0 |
cmp [cur_x], 1 |
jnz @f |
cmp [cur_y], 1 |
jz .no_restart |
@@: |
; restart marker must be present |
sub ebp, 2 |
js .error |
cmp byte [esi], 0xFF |
jnz .error |
mov al, [cur_rst_marker] |
inc eax |
and al, 7 |
mov [cur_rst_marker], al |
add al, 0xD0 |
cmp [esi+1], al |
jnz .error |
add esi, 2 |
; handle restart marker - zero all DC predictions |
mov edx, cur_components |
@@: |
and word [edx+48], 0 |
add edx, 56 |
cmp edx, [cur_components_end] |
jb @b |
.no_restart: |
clc |
ret |
.error: |
stc |
ret |
|
next_line: |
mov eax, [max_x] |
mov [cur_x], eax |
mul [cur_block_dx] |
sub eax, [cur_block_dy] |
sub [cur_out_ptr], eax |
mov edx, cur_components |
@@: |
mov eax, [edx+24] |
mov [edx+28], eax |
movzx eax, byte [edx] |
shl eax, 3 |
sub [edx+40], eax |
add edx, 56 |
cmp edx, [cur_components_end] |
jb @b |
ret |
|
init_limits: |
push [x_num_blocks] |
pop [max_x] |
push [y_num_blocks] |
pop [max_y] |
push [block_delta_x] |
pop [cur_block_dx] |
push [block_delta_y] |
pop [cur_block_dy] |
cmp [not_interleaved], 0 |
jz @f |
mov eax, dword [cur_components+28] |
movzx ecx, byte [cur_components+3] |
cmp cl, [cur_components+32] |
sbb eax, -7-1 |
shr eax, 3 |
mov [max_x], eax |
mov eax, dword [cur_components+40] |
movzx edx, byte [cur_components+2] |
cmp dl, [cur_components+44] |
sbb eax, -7-1 |
shr eax, 3 |
mov [max_y], eax |
imul ecx, [delta_x] |
mov [cur_block_dx], ecx |
imul edx, [delta_y] |
mov [cur_block_dy], edx |
@@: |
push [max_x] |
pop [cur_x] |
push [max_y] |
pop [cur_y] |
ret |
|
;macro get_bit |
;{ |
;local .l1,.l2,.marker |
; add cl, cl |
; jnz .l1 |
; sub ebp, 1 |
; js decode_data_unit.eof |
; mov cl, [esi] |
; cmp cl, 0xFF |
; jnz .l2 |
;.marker: |
; add esi, 1 |
; sub ebp, 1 |
; js decode_data_unit.eof |
; cmp byte [esi], 0xFF |
; jz .marker |
; cmp byte [esi], 0 |
; jnz decode_data_unit.eof |
;.l2: |
; sub esi, -1 |
; adc cl, cl |
;.l1: |
;} |
macro get_bit |
{ |
local .l1,.l2,.marker |
sub cl, 1 |
jns .l1 |
sub ebp, 1 |
js .eof |
mov ch, [esi] |
cmp ch, 0xFF |
jnz .l2 |
.marker: |
add esi, 1 |
sub ebp, 1 |
js .eof |
cmp byte [esi], 0xFF |
jz .marker |
cmp byte [esi], 0 |
jnz .eof |
.l2: |
add esi, 1 |
mov cl, 7 |
.l1: |
add ch, ch |
} |
macro get_bits restore_edx |
{ |
local .l1,.l2,.l3,.marker2 |
movzx eax, ch |
mov dl, cl |
shl eax, 24 |
neg cl |
push ebx |
add cl, 24 |
.l1: |
cmp bl, dl |
jbe .l2 |
sub bl, dl |
sub ebp, 1 |
js .eof |
mov ch, [esi] |
cmp ch, 0xFF |
jnz .l3 |
.marker2: |
add esi, 1 |
sub ebp, 1 |
js .eof |
cmp byte [esi], 0xFF |
jz .marker2 |
cmp byte [esi], 0 |
jnz .eof |
.l3: |
movzx edx, ch |
add esi, 1 |
shl edx, cl |
sub cl, 8 |
or eax, edx |
mov dl, 8 |
jmp .l1 |
.l2: |
mov cl, bl |
sub dl, bl |
shl ch, cl |
pop ebx |
cmp eax, 80000000h |
rcr eax, 1 |
mov cl, 31 |
sub cl, bl |
sar eax, cl |
mov cl, dl |
if restore_edx eq true |
pop edx |
end if |
add eax, 80000000h |
adc eax, 80000000h |
} |
; macro get_huffman_code |
; { |
; local .l1 |
; xor ebx, ebx |
; .l1: |
; get_bit |
; adc ebx, ebx |
; mov eax, [eax+4*ebx] |
; xor ebx, ebx |
; cmp eax, -1 |
; jz .eof |
; cmp eax, 0x1000 |
; jae .l1 |
; mov ebx, eax |
; } |
macro get_huffman_code |
{ |
local .l1,.l2,.l3,.l4,.l5,.l6,.nomarker1,.marker1,.nomarker2,.marker2,.nomarker3,.marker3,.done |
; 1. (First level in Huffman table) Does the current Huffman code fit in 8 bits |
; and have we got enough bits? |
movzx ebx, ch |
cmp byte [eax+ebx*2], cl |
jbe .l1 |
; 2a. No; load next byte |
sub ebp, 1 |
js .eof |
mov ch, [esi] |
movzx edx, ch |
cmp ch, 0xFF |
jnz .nomarker1 |
.marker1: |
add esi, 1 |
sub ebp, 1 |
js .eof |
cmp byte [esi], 0xFF |
jz .marker1 |
cmp byte [esi], 0 |
jnz .eof |
.nomarker1: |
shr edx, cl |
add esi, 1 |
or ebx, edx |
; 3a. (First level in Huffman table, >=8 bits known) Does the current Huffman code fit in 8 bits? |
cmp byte [eax+ebx*2], 8 |
jbe .l2 |
jl .eof |
; 4aa. No; go to next level |
movzx ebx, byte [eax+ebx*2+1] |
mov dl, ch |
shl ebx, 5 |
ror edx, cl |
lea ebx, [eax+ebx+0x200] |
shr edx, 24 |
push edx |
shr edx, 4 |
; 5aa. (Second level in Huffman table) Does the current Huffman code fit in 12 bits |
; and have we got enough bits? |
cmp byte [ebx+edx*2], cl |
jbe .l3 |
; 6aaa. No; have we got 12 bits? |
cmp cl, 4 |
jae .l4 |
; 7aaaa. No; load next byte |
pop edx |
sub ebp, 1 |
js .eof |
mov ch, [esi] |
cmp ch, 0xFF |
jnz .nomarker2 |
.marker2: |
add esi, 1 |
sub ebp, 1 |
js .eof |
cmp byte [esi], 0xFF |
jz .marker2 |
cmp byte [esi], 0 |
jnz .eof |
.nomarker2: |
push ecx |
shr ch, cl |
add esi, 1 |
or dl, ch |
pop ecx |
push edx |
shr edx, 4 |
; 8aaaa. (Second level in Huffman table) Does the current Huffman code fit in 12 bits? |
cmp byte [ebx+edx*2], 4 |
jbe .l5 |
jl .eof |
; 9aaaaa. No; go to next level |
movzx ebx, byte [ebx+edx*2+1] |
pop edx |
shl ebx, 5 |
and edx, 0xF |
lea ebx, [eax+ebx+0x200] |
; 10aaaaa. Get current code length and value |
sub cl, [ebx+edx*2] |
movzx eax, byte [ebx+edx*2+1] |
neg cl |
shl ch, cl |
neg cl |
add cl, 8 |
jmp .done |
.l5: |
; 9aaaab. Yes; get current code length and value |
sub cl, [ebx+edx*2] |
movzx eax, byte [ebx+edx*2+1] |
neg cl |
pop edx |
shl ch, cl |
neg cl |
add cl, 8 |
jmp .done |
.l4: |
; 7aaab. Yes; go to next level |
movzx ebx, byte [ebx+edx*2+1] |
pop edx |
shl ebx, 5 |
and edx, 0xF |
lea ebx, [eax+ebx+0x200] |
; 8aaab. (Third level in Huffman table) Have we got enough bits? |
cmp [ebx+edx*2], cl |
jbe .l6 |
; 9aaaba. No; load next byte |
sub ebp, 1 |
js .eof |
mov ch, [esi] |
cmp ch, 0xFF |
jnz .nomarker3 |
.marker3: |
add esi, 1 |
sub ebp, 1 |
js .eof |
cmp byte [esi], 0xFF |
jz .marker3 |
cmp byte [esi], 0 |
jnz .eof |
.nomarker3: |
push ecx |
shr ch, cl |
add esi, 1 |
or dl, ch |
pop ecx |
; 10aaaba. Get current code length and value |
sub cl, [ebx+edx*2] |
movzx eax, byte [ebx+edx*2+1] |
neg cl |
shl ch, cl |
neg cl |
add cl, 8 |
jmp .done |
.l3: |
; 6aab. Yes; get current code length and value |
pop eax |
.l6: |
; 9aaabb. Yes; get current code length and value |
sub cl, [ebx+edx*2] |
movzx eax, byte [ebx+edx*2+1] |
xor cl, 7 |
shl ch, cl |
xor cl, 7 |
add ch, ch |
jmp .done |
.l2: |
; 3ab. Yes; get current code length and value |
sub cl, [eax+ebx*2] |
movzx eax, byte [eax+ebx*2+1] |
neg cl |
shl ch, cl |
neg cl |
add cl, 8 |
jmp .done |
.l1: |
; 3b. Yes; get current code length and value |
mov dl, [eax+ebx*2] |
movzx eax, byte [eax+ebx*2+1] |
xchg cl, dl |
sub dl, cl |
shl ch, cl |
mov cl, dl |
.done: |
mov ebx, eax |
} |
; Decode DCT coefficients for one 8*8 block in progressive mode |
; from input stream, given by pointer esi and length ebp |
; N.B. Speed optimization has sense here. |
align 16 |
decode_progressive_coeff: |
mov ecx, [huffman_bits] |
cmp [ScanStart], 0 |
jnz .ac |
; DC coefficient |
cmp [ApproxPosHigh], 0 |
jz .dc_first |
; DC coefficient, subsequent passes |
xor eax, eax |
get_bit |
adc eax, eax |
mov [huffman_bits], ecx |
mov cl, [ApproxPosLow] |
shl eax, cl |
or [edi], ax |
ret |
.dc_first: |
; DC coefficient, first pass |
mov eax, [edx+16] |
push edx |
get_huffman_code |
get_bits true |
add eax, [edx+48] |
mov [edx+48], ax |
mov [huffman_bits], ecx |
mov cl, [ApproxPosLow] |
shl eax, cl |
mov [edi], ax |
ret |
.ac: |
; AC coefficients |
movzx eax, [ScanStart] |
cmp al, [ScanEnd] |
ja .ret |
cmp dword [edx+52], 0 |
jnz .was_eob |
.acloop: |
push edx |
push eax |
mov eax, [edx+20] |
get_huffman_code |
pop eax |
test ebx, 15 |
jz .band |
push eax ebx |
and ebx, 15 |
get_bits false |
pop ebx |
xchg eax, [esp] |
shr ebx, 4 |
.zeroloop1: |
push eax ebx |
movzx eax, byte [zigzag+eax] |
xor ebx, ebx |
cmp word [edi+eax], bx |
jz .zeroloop2 |
get_bit |
jnc @f |
push ecx |
mov cl, [ApproxPosLow] |
xor ebx, ebx |
cmp byte [edi+eax+1], 80h |
adc ebx, 0 |
add ebx, ebx |
sub ebx, 1 |
shl ebx, cl |
pop ecx |
add [edi+eax], bx |
@@: |
pop ebx eax |
@@: |
add eax, 1 |
cmp al, [ScanEnd] |
ja decode_data_unit.eof |
jmp .zeroloop1 |
.zeroloop2: |
pop ebx eax |
sub ebx, 1 |
jns @b |
.nozero1: |
pop ebx |
test ebx, ebx |
jz @f |
push eax |
movzx eax, byte [zigzag+eax] |
push ecx |
mov cl, [ApproxPosLow] |
shl ebx, cl |
pop ecx |
mov [edi+eax], bx |
pop eax |
@@: |
add eax, 1 |
pop edx |
cmp al, [ScanEnd] |
jbe .acloop |
mov [huffman_bits], ecx |
.ret: |
ret |
.eof: |
jmp decode_data_unit.eof |
.band: |
shr ebx, 4 |
cmp ebx, 15 |
jnz .eob |
push 0 |
jmp .zeroloop1 |
.eob: |
pop edx |
push eax |
mov eax, 1 |
test ebx, ebx |
jz .eob0 |
@@: |
get_bit |
adc eax, eax |
sub ebx, 1 |
jnz @b |
.eob0: |
mov [edx+52], eax |
pop eax |
.was_eob: |
sub dword [edx+52], 1 |
cmp al, [ScanEnd] |
ja .ret2 |
.zeroloop3: |
push eax |
movzx eax, byte [zigzag+eax] |
xor ebx, ebx |
cmp word [edi+eax], bx |
jz @f |
get_bit |
jnc @f |
push ecx |
mov cl, [ApproxPosLow] |
xor ebx, ebx |
cmp byte [edi+eax+1], 80h |
adc ebx, 0 |
add ebx, ebx |
sub ebx, 1 |
shl ebx, cl |
pop ecx |
add [edi+eax], bx |
@@: |
pop eax |
add eax, 1 |
cmp al, [ScanEnd] |
jbe .zeroloop3 |
.ret2: |
mov [huffman_bits], ecx |
ret |
|
handle_progressive: |
cmp [dct_buffer], 0 |
jnz @f |
ret |
@@: |
; information for all components |
mov esi, components |
xor ebp, ebp |
movzx ecx, byte [ebx+18] |
.next_component: |
mov edi, cur_components |
lodsb ; ComponentID |
lodsd |
mov ax, 0x0101 |
stosd ; db V, db H, db VFactor, db HFactor |
xor eax, eax |
mov al, byte [edi-1] ; get HFactor |
mul byte [ebx+18] ; number of components |
stosd ; HIncrement_i = HFactor_i * sizeof(pixel) |
mov al, byte [edi-4-2] ; get VFactor |
mul byte [ebx+18] ; number of components |
mul dword [ebx+4] ; image width |
stosd ; VIncrement_i = VFactor_i * sizeof(row) |
lodsb |
and eax, 3 |
cmp [quant_tables_defined+eax], 0 |
jz .error |
shl eax, 8 |
add eax, quant_tables |
stosd ; dd QuantizationTable |
stosd ; dd DCTable - ignored |
mov eax, ebp |
mul [dct_buffer_size] |
add eax, [dct_buffer] |
stosd ; instead of dd ACTable - pointer to current DCT coefficients |
push ecx |
mov eax, [ebx+4] |
movzx ecx, byte [edi-21] ; get HFactor |
; cdq ; edx = 0 as a result of previous mul |
div ecx |
stosd ; dd width / HFactor_i |
stosd |
xchg eax, ecx |
inc eax |
sub eax, edx |
stosd ; dd HFactor_i+1 - (width % HFactor_i) |
mov eax, [ebx+8] |
movzx ecx, byte [edi-34] ; get VFactor |
cdq |
div ecx |
stosd ; dd height / VFactor_i |
stosd |
xchg eax, ecx |
inc eax |
sub eax, edx |
stosd ; dd VFactor_i+1 - (height % VFactor_i) |
pop ecx |
xor eax, eax |
test ebp, ebp |
setnp al |
ror eax, 1 |
stosd ; dd DCPrediction |
mov eax, ebp |
stosd ; dd ComponentOffset |
inc ebp |
push ecx |
mov [cur_components_end], edi |
lea edx, [edi-56] |
; do IDCT and unpack |
mov edi, [ebx+28] |
add edi, ebx |
mov [cur_out_ptr], edi |
mov [not_interleaved], 1 |
call init_limits |
.decode_loop: |
call decode_MCU |
sub [cur_x], 1 |
jnz .decode_loop |
call next_line |
sub [cur_y], 1 |
jnz .decode_loop |
pop ecx |
dec ecx |
jnz .next_component |
; image unpacked, return |
.error: |
push ebx |
push 68 |
pop eax |
push 13 |
pop ebx |
mov ecx, [dct_buffer] |
int 0x40 |
pop ebx |
ret |
|
; Support for YCbCr -> RGB conversion |
; R = Y + 1.402 * (Cr - 128) |
; G = Y - 0.34414 * (Cb - 128) - 0.71414 * (Cr - 128) |
; B = Y + 1.772 * (Cb - 128) |
; When converting YCbCr -> RGB, we need to do some multiplications; |
; to be faster, we precalculate the table for all 256 possible values |
; Also we approximate fractions with N/65536, this gives sufficient precision |
initialize_color_table: |
; 1.402 = 1 + 26345/65536, -0.71414 = -46802/65536 |
; -0.34414 = -22554/65536, 1.772 = 1 + 50594/65536 |
mov edi, color_table_1 |
mov ecx, 128 |
; 1. Cb -> 1.772*Cb |
xor eax, eax |
mov dx, 8000h |
.l1: |
push ecx |
@@: |
stosd |
add dx, 50594 |
adc eax, 1 |
loop @b |
neg dx |
adc eax, -1 |
neg eax |
pop ecx |
jnz .l1 |
; 2. Cb -> -0.34414*Cb |
mov ax, dx |
.l2: |
push ecx |
@@: |
stosd |
sub eax, 22554 |
loop @b |
neg eax |
pop ecx |
cmp ax, dx |
jnz .l2 |
xor eax, eax |
; 3. Cr -> -0.71414*Cr |
.l3: |
push ecx |
@@: |
stosd |
sub eax, 46802 |
loop @b |
neg eax |
pop ecx |
jnz .l3 |
; 4. Cr -> 1.402*Cr |
.l4: |
push ecx |
@@: |
stosd |
add dx, 26345 |
adc eax, 1 |
loop @b |
neg dx |
adc eax, -1 |
neg eax |
pop ecx |
jnz .l4 |
ret |
|
; this function is called in the end of image loading |
convert_to_rgb: |
; some checks |
test ebx, ebx ; image exists? |
jz .ret |
cmp byte [ebx+18], 3 ; full-color image? |
jz .ycc2rgb |
cmp byte [ebx+18], 4 |
jz .ycck2rgb |
.ret: |
ret |
.ycc2rgb: |
; conversion is needed |
mov esi, [ebx+4] |
imul esi, [ebx+8] |
lea edi, [ebx+44] |
push ebx |
; N.B. Speed optimization has sense here. |
align 16 |
.loop: |
; mov ebx, [edi] |
; mov edx, ebx |
; mov ecx, ebx |
; movzx ebx, bl ; ebx = Y |
; shr edx, 16 |
; mov eax, ebx |
; movzx edx, dl ; edx = Cr |
; movzx ecx, ch ; ecx = Cb |
movzx ebx, byte [edi] |
movzx ecx, byte [edi+1] |
mov eax, ebx |
movzx edx, byte [edi+2] |
; B = Y + color_table_1[Cb] |
add eax, [color_table_1+ecx*4] |
mov ebp, [color_table_2+ecx*4] |
cmp eax, 80000000h |
sbb ecx, ecx |
and eax, ecx |
add ebp, [color_table_3+edx*4] |
cmp eax, 0x100 |
sbb ecx, ecx |
not ecx |
sar ebp, 16 |
or eax, ecx |
mov [edi], al |
; G = Y + color_table_2[Cb] + color_table_3[Cr] |
lea eax, [ebx+ebp] |
cmp eax, 80000000h |
sbb ecx, ecx |
and eax, ecx |
cmp eax, 0x100 |
sbb ecx, ecx |
not ecx |
or eax, ecx |
mov [edi+1], al |
; R = Y + color_table_4[Cr] |
mov eax, ebx |
add eax, [color_table_4+edx*4] |
cmp eax, 80000000h |
sbb ecx, ecx |
and eax, ecx |
cmp eax, 0x100 |
sbb ecx, ecx |
not ecx |
or eax, ecx |
mov [edi+2], al |
add edi, 3 |
sub esi, 1 |
jnz .loop |
pop ebx |
ret |
.ycck2rgb: |
; conversion is needed |
mov esi, [ebx+4] |
imul esi, [ebx+8] |
push ebx |
push esi |
lea edi, [ebx+44] |
mov esi, edi |
; N.B. Speed optimization has sense here. |
align 16 |
.kloop: |
; mov ebx, [esi] |
; mov edx, ebx |
; mov ecx, ebx |
; movzx ebx, bl ; ebx = Y |
; shr edx, 16 |
; mov eax, ebx |
; movzx edx, dl ; edx = Cr |
; movzx ecx, ch ; ecx = Cb |
movzx ebx, byte [esi] |
movzx ecx, byte [esi+1] |
mov eax, ebx |
movzx edx, byte [esi+2] |
; B = Y + color_table_1[Cb] |
add eax, [color_table_1+ecx*4] |
mov ebp, [color_table_2+ecx*4] |
cmp eax, 80000000h |
sbb ecx, ecx |
and eax, ecx |
add ebp, [color_table_3+edx*4] |
cmp eax, 0x100 |
sbb ecx, ecx |
not ecx |
sar ebp, 16 |
or eax, ecx |
xor al, 0xFF |
mul byte [esi+3] |
add al, ah |
adc ah, 0 |
add al, 80h |
adc ah, 0 |
mov byte [edi], ah |
; G = Y + color_table_2[Cb] + color_table_3[Cr] |
lea eax, [ebx+ebp] |
cmp eax, 80000000h |
sbb ecx, ecx |
and eax, ecx |
cmp eax, 0x100 |
sbb ecx, ecx |
not ecx |
or eax, ecx |
xor al, 0xFF |
mul byte [esi+3] |
add al, ah |
adc ah, 0 |
add al, 80h |
adc ah, 0 |
mov byte [edi+1], ah |
; R = Y + color_table_4[Cr] |
mov eax, ebx |
add eax, [color_table_4+edx*4] |
cmp eax, 80000000h |
sbb ecx, ecx |
and eax, ecx |
cmp eax, 0x100 |
sbb ecx, ecx |
not ecx |
or eax, ecx |
xor al, 0xFF |
mul byte [esi+3] |
add al, ah |
adc ah, 0 |
add al, 80h |
adc ah, 0 |
mov byte [edi+2], ah |
add esi, 4 |
add edi, 3 |
sub dword [esp], 1 |
jnz .kloop |
pop eax |
pop ebx |
; now correct values in RAW header |
mov byte [ebx+12], 24 |
mov byte [ebx+18], 3 |
mov ecx, [ebx+32] |
shr ecx, 2 |
lea ecx, [ecx*3] |
mov [ebx+22], ecx |
; release some memory - must succeed because we decrease size |
add ecx, 44+1 |
mov edx, ebx |
push 68 |
pop eax |
push 20 |
pop ebx |
int 0x40 |
mov ebx, eax |
ret |
|
; Decodes one data unit, that is, 8*8 block, |
; from input stream, given by pointer esi and length ebp |
; N.B. Speed optimization has sense here. |
align 16 |
decode_data_unit: |
; edx -> component data |
cmp [progressive], 0 |
jz @f |
mov edi, [edx+20] |
add dword [edx+20], 64*2 |
jmp .coeff_decoded |
@@: |
mov edi, dct_coeff |
mov ecx, 64*2/4 |
xor eax, eax |
rep stosd |
mov edi, zigzag+1 |
mov ecx, [huffman_bits] |
; read DC coefficient |
mov eax, [edx+16] |
push edx |
get_huffman_code |
get_bits true |
add eax, [edx+48] |
mov [dct_coeff], ax |
mov [edx+48], ax |
; read AC coefficients |
@@: |
mov eax, [edx+20] |
push edx |
get_huffman_code |
shr eax, 4 |
and ebx, 15 |
jz .band |
add edi, eax |
cmp edi, zigzag+64 |
jae .eof |
get_bits true |
movzx ebx, byte [edi] |
mov [dct_coeff+ebx], ax |
add edi, 1 |
cmp edi, zigzag+64 |
jb @b |
jmp .do_idct |
.band: |
pop edx |
cmp al, 15 |
jnz .do_idct |
add edi, 16 |
cmp edi, zigzag+64 |
jb @b |
; jmp .eof |
.do_idct: |
mov edi, dct_coeff |
mov [huffman_bits], ecx |
; coefficients loaded, now IDCT |
.coeff_decoded: |
mov eax, [edx+12] |
mov ebx, idct_tmp_area |
.idct_loop1: |
mov cx, word [edi+1*16] |
repeat 6 |
or cx, word [edi+(%+1)*16] |
end repeat |
jnz .real_transform |
fild word [edi] |
fmul dword [eax] |
fstp dword [ebx] |
mov ecx, [ebx] |
repeat 7 |
mov [ebx+%*32], ecx |
end repeat |
jmp .idct_next1 |
.real_transform: |
; S0,...,S7 - transformed values, s0,...,s7 - sought-for values |
; S0,...,S7 are dequantized; |
; dequantization table elements were multiplied to [idct_pre_table], |
; so S0,S1,... later denote S0/2\sqrt{2},S1*\cos{\pi/16}/2,... |
; sqrt2 = \sqrt{2}, cos = 2\cos{\pi/8}, |
; cos_sum = -2(\cos{\pi/8}+\cos{3\pi/8}), cos_diff = 2(\cos{\pi/8}-\cos{3\pi/8}) |
; Now formulas: |
; s0 = ((S0+S4)+(S2+S6)) + ((S1+S7)+(S3+S5)) |
; s7 = ((S0+S4)+(S2+S6)) - ((S1+S7)+(S3+S5)) |
; val0 = ((cos-1)S1-(cos+cos_sum+1)S3+(cos+cos_sum-1)S5-(cos+1)S7) |
; s1 = ((S0-S4)+((sqrt2-1)S2-(sqrt2+1)S6)) + val0 |
; s6 = ((S0-S4)+((sqrt2-1)S2-(sqrt2+1)S6)) - val0 |
; val1 = (S1+S7-S3-S5)sqrt2 - val0 |
; s2 = ((S0-S4)-((sqrt2-1)S2-(sqrt2+1)S6)) + val1 |
; s5 = ((S0-S4)-((sqrt2-1)S2-(sqrt2+1)S6)) - val1 |
; val2 = (S1-S7)cos_diff - (S1-S3+S5-S7)cos + val1 |
; s3 = ((S0+S4)-(S2+S6)) - val2 |
; s4 = ((S0+S4)-(S2+S6)) + val2 |
fild word [edi+3*16] |
fmul dword [eax+3*32] |
fild word [edi+5*16] |
fmul dword [eax+5*32] ; st0=S5,st1=S3 |
fadd st1,st0 |
fadd st0,st0 |
fsub st0,st1 ; st0=S5-S3,st1=S5+S3 |
fild word [edi+1*16] |
fmul dword [eax+1*32] |
fild word [edi+7*16] |
fmul dword [eax+7*32] ; st0=S7,st1=S1 |
fsub st1,st0 |
fadd st0,st0 |
fadd st0,st1 ; st0=S1+S7,st1=S1-S7,st2=S5-S3,st3=S5+S3 |
fadd st3,st0 |
fadd st0,st0 |
fsub st0,st3 ; st0=S1-S3-S5+S7,st1=S1-S7,st2=S5-S3,st3=S1+S3+S5+S7 |
fmul [idct_sqrt2] |
fld st2 |
fadd st0,st2 |
fmul [idct_cos] ; st0=(S1-S3+S5-S7)cos,st1=(S1-S3-S5+S7)sqrt2, |
; st2=S1-S7,st3=S5-S3,st4=S1+S3+S5+S7 |
fxch st2 |
fmul [idct_cos_diff] |
fsub st0,st2 ; st0=(S1-S7)cos_diff - (S1-S3+S5-S7)cos |
fxch st3 |
fmul [idct_cos_sum] |
fadd st0,st2 ; st0=(S5-S3)cos_sum+(S1-S3+S5-S7)cos |
fsub st0,st4 ; st0=val0 |
fsub st1,st0 ; st0=val0,st1=val1,st2=(S1-S3+S5-S7)cos, |
; st3=(S1-S7)cos_diff-(S1-S3+S5-S7)cos,st4=S1+S3+S5+S7 |
fxch st2 |
fstp st0 |
fadd st2,st0 ; st0=val1,st1=val0,st2=val2,st3=S1+S3+S5+S7 |
|
fild word [edi+0*16] |
fmul dword [eax+0*32] |
fild word [edi+4*16] |
fmul dword [eax+4*32] ; st0=S4,st1=S0 |
fsub st1,st0 |
fadd st0,st0 |
fadd st0,st1 ; st0=S0+S4,st1=S0-S4 |
fild word [edi+6*16] |
fmul dword [eax+6*32] |
fild word [edi+2*16] |
fmul dword [eax+2*32] ; st0=S2,st1=S6 |
fadd st1,st0 |
fadd st0,st0 |
fsub st0,st1 ; st0=S2-S6,st1=S2+S6 |
fmul [idct_sqrt2] |
fsub st0,st1 |
fsub st3,st0 |
fadd st0,st0 |
fadd st0,st3 ; st0=(S0-S4)+((S2-S6)sqrt2-(S2+S6)) |
; st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6)) |
fxch st1 |
fsub st2,st0 |
fadd st0,st0 |
fadd st0,st2 ; st0=(S0+S4)+(S2+S6),st1=(S0-S4)+((S2-S6)sqrt2-(S2+S6)), |
; st2=(S0+S4)-(S2+S6),st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6)) |
; st4=val1,st5=val0,st6=val2,st7=S1+S3+S5+S7 |
fsubr st7,st0 |
fadd st0,st0 |
fsub st0,st7 |
fstp dword [ebx+0*32] |
fsubr st4,st0 |
fadd st0,st0 |
fsub st0,st4 |
fstp dword [ebx+1*32] |
fadd st4,st0 |
fadd st0,st0 |
fsub st0,st4 |
fstp dword [ebx+3*32] |
fsubr st1,st0 |
fadd st0,st0 |
fsub st0,st1 |
fstp dword [ebx+2*32] |
fstp dword [ebx+5*32] |
fstp dword [ebx+6*32] |
fstp dword [ebx+4*32] |
fstp dword [ebx+7*32] |
.idct_next1: |
add ebx, 4 |
add edi, 2 |
add eax, 4 |
cmp ebx, idct_tmp_area+8*4 |
jb .idct_loop1 |
sub ebx, 8*4 |
.idct_loop2: |
fld dword [ebx+3*4] |
fld dword [ebx+5*4] |
fadd st1,st0 |
fadd st0,st0 |
fsub st0,st1 ; st0=S5-S3,st1=S5+S3 |
fld dword [ebx+1*4] |
fld dword [ebx+7*4] |
fsub st1,st0 |
fadd st0,st0 |
fadd st0,st1 ; st0=S1+S7,st1=S1-S7,st2=S5-S3,st3=S5+S3 |
fadd st3,st0 |
fadd st0,st0 |
fsub st0,st3 ; st0=S1-S3-S5+S7,st1=S1-S7,st2=S5-S3,st3=S1+S3+S5+S7 |
fmul [idct_sqrt2] |
fld st2 |
fadd st0,st2 |
fmul [idct_cos] ; st0=(S1-S3+S5-S7)cos,st1=(S1-S3-S5+S7)sqrt2, |
; st2=S1-S7,st3=S5-S3,st4=S1+S3+S5+S7 |
fxch st2 |
fmul [idct_cos_diff] |
fsub st0,st2 ; st0=(S1-S7)cos_diff - (S1-S3+S5-S7)cos |
fxch st3 |
fmul [idct_cos_sum] |
fadd st0,st2 ; st0=(S5-S3)cos_sum+(S1-S3+S5-S7)cos |
fsub st0,st4 ; st0=val0 |
fsub st1,st0 ; st0=val0,st1=val1,st2=(S1-S3+S5-S7)cos, |
; st3=(S1-S7)cos_diff-(S1-S3+S5-S7)cos,st4=S1+S3+S5+S7 |
fxch st2 |
fstp st0 |
fadd st2,st0 ; st0=val1,st1=val0,st2=val2,st3=S1+S3+S5+S7 |
|
fld dword [ebx+0*4] |
fld dword [ebx+4*4] |
fsub st1,st0 |
fadd st0,st0 |
fadd st0,st1 ; st0=S0+S4,st1=S0-S4 |
fld dword [ebx+6*4] |
fld dword [ebx+2*4] |
fadd st1,st0 |
fadd st0,st0 |
fsub st0,st1 ; st0=S2-S6,st1=S2+S6 |
fmul [idct_sqrt2] |
fsub st0,st1 |
fsub st3,st0 |
fadd st0,st0 |
fadd st0,st3 ; st0=(S0-S4)+((S2-S6)sqrt2-(S2+S6)) |
; st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6)) |
fxch st1 |
fsub st2,st0 |
fadd st0,st0 |
fadd st0,st2 ; st0=(S0+S4)+(S2+S6),st1=(S0-S4)+((S2-S6)sqrt2-(S2+S6)), |
; st2=(S0+S4)-(S2+S6),st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6)) |
; st4=val1,st5=val0,st6=val2,st7=S1+S3+S5+S7 |
fsubr st7,st0 |
fadd st0,st0 |
fsub st0,st7 |
fistp dword [ebx+0*4] |
fsubr st4,st0 |
fadd st0,st0 |
fsub st0,st4 |
fistp dword [ebx+1*4] |
fadd st4,st0 |
fadd st0,st0 |
fsub st0,st4 |
fistp dword [ebx+3*4] |
fsubr st1,st0 |
fadd st0,st0 |
fsub st0,st1 |
fistp dword [ebx+2*4] |
fistp dword [ebx+5*4] |
fistp dword [ebx+6*4] |
fistp dword [ebx+4*4] |
fistp dword [ebx+7*4] |
|
add ebx, 32 |
cmp ebx, idct_tmp_area+32*8 |
jb .idct_loop2 |
|
mov ecx, idct_tmp_area |
mov edi, decoded_data - 1 |
.idct_loop3: |
mov eax, [ecx] |
add ecx, 4 |
add eax, 80h |
cmp eax, 80000000h |
sbb ebx, ebx |
add edi, 1 |
and eax, ebx |
cmp eax, 100h |
sbb ebx, ebx |
not ebx |
or eax, ebx |
sub al, [edx+51] |
cmp ecx, idct_tmp_area+64*4 |
mov [edi], al |
jb .idct_loop3 |
; done |
mov ebx, [_ebx] |
ret |
|
.eof: |
; EOF or incorrect data during scanning |
mov esp, [_esp] |
mov ebx, [_ebx] |
jmp START.end |
;--------------------------------------------------------------------- |
check_header: |
pushad |
mov eax,dword [esp+36] |
push eax |
and dword [eax+8], 0 |
mov esi, [eax] |
mov ebp, [eax+12] |
call get_marker |
jc .err |
cmp al, 0xD8 |
jnz .err |
pop eax |
popad |
ret 4 |
.err: |
pop eax |
inc dword [eax+8] |
popad |
ret 4 |
;--------------------------------------------------------------------- |
zigzag: |
; (x,y) -> 2*(x+y*8) |
repeat 8 |
.cur = % |
if .cur and 1 |
repeat % |
db 2*((%-1) + (.cur-%)*8) |
end repeat |
else |
repeat % |
db 2*((.cur-%) + (%-1)*8) |
end repeat |
end if |
end repeat |
repeat 7 |
.cur = % |
if .cur and 1 |
repeat 8-% |
db 2*((%+.cur-1) + (8-%)*8) |
end repeat |
else |
repeat 8-% |
db 2*((8-%) + (%+.cur-1)*8) |
end repeat |
end if |
end repeat |
|
align 4 |
idct_pre_table: |
; c_0 = 1/(2\sqrt{2}), c_i = cos(i*\pi/16)/2 |
dd 0.35355339, 0.49039264, 0.461939766, 0.41573481 |
dd 0.35355339, 0.27778512, 0.19134172, 0.09754516 |
idct_sqrt2 dd 1.41421356 ; \sqrt{2} |
idct_cos dd 1.847759065 ; 2\cos{\pi/8} |
idct_cos_sum dd -2.61312593 ; -2(\cos{\pi/8} + \cos{3\pi/8}) |
idct_cos_diff dd 1.08239220 ; 2(\cos{\pi/8} - \cos{3\pi/8}) |
;--------------------------------------------------------------------- |
Associations: |
dd Associations.end - Associations |
db 'JPEG',0 |
db 'JPG',0 |
db 'JPE',0 |
.end: |
db 0 |
;--------------------------------------------------------------------- |
align 4 |
EXPORTS: |
dd szStart, START |
dd szVersion, 0x00010002 |
dd szCheck, check_header |
dd szAssoc, Associations |
dd 0 |
|
szStart db 'START',0 |
szVersion db 'version',0 |
szCheck db 'Check_Header',0 |
szAssoc db 'Associations',0 |
|
section '.data' data readable writable align 16 |
|
; up to 4 quantization tables |
quant_tables rd 4*64 |
quant_tables_defined rb 4 |
|
; Huffman tables |
dc_huffman_defined rb 4 |
ac_huffman_defined rb 4 |
; up to 4 DC Huffman tables |
;dc_huffman rd 4*256*2 |
; up to 4 AC Huffman tables |
;ac_huffman rd 4*256*2 |
max_hufftable_size = (256 + (9+128)*16)*2 |
dc_huffman rb 4*max_hufftable_size |
ac_huffman rb 4*max_hufftable_size |
|
; restart interval |
restart_interval dd ? |
decoded_MCUs dd ? |
|
; base esp,ebx values |
_esp dd ? |
_ebx dd ? |
|
; components information, up to 4 components |
; db ComponentIdentifier, db V, db H, db VFactor, db HFactor, db QuantizationTable |
components rb 4*6 |
max_v db ? |
max_h db ? |
cur_rst_marker db ? |
db ? |
huffman_bits dd ? |
block_width dd ? |
block_height dd ? |
block_delta_x dd ? |
block_delta_y dd ? |
cur_block_dx dd ? |
cur_block_dy dd ? |
x_num_blocks dd ? |
y_num_blocks dd ? |
delta_x dd ? |
delta_y dd ? |
pixel_size dd ? |
line_size dd ? |
cur_x dd ? |
cur_y dd ? |
max_x dd ? |
max_y dd ? |
cur_out_ptr dd ? |
dct_buffer dd ? |
dct_buffer_size dd ? |
;ns dd ? |
; +0: db V, db H, db VFactor, db HFactor, dd HIncrement, dd VIncrement, |
; +12: dd QuantizationTable, dd DCTable, dd ACTable, |
; +24: dd width/HFactor, dd width/HFactor-8k, dd HFactor+1-(width%HFactor), |
; +36: dd height/VFactor, dd height/VFactor-8m, dd VFactor+1-(height%VFactor), |
; +48: dw DCPrediction, db ?, db (0 for Y, 80h for Cb,Cr), dd ComponentOffset |
cur_components rb 4*56 |
cur_components_end dd ? |
; progressive JPEG? |
progressive db ? |
; one component in the scan? |
not_interleaved db ? |
; Adobe YCCK file? |
adobe_ycck db ? |
rb 1 |
; parameters for progressive scan |
ScanStart db ? |
ScanEnd db ? |
ApproxPosLow db ? |
ApproxPosHigh db ? |
; Fourier coefficients |
dct_coeff rw 64 |
; Temporary space for IDCT |
idct_tmp_area rd 64 |
; decoded block 8*8 |
decoded_data rb 8*8 |
; data for YCbCr -> RGB translation |
color_table_1 rd 256 |
color_table_2 rd 256 |
color_table_3 rd 256 |
color_table_4 rd 256 |