WebSVN – Kolibri OS – Blame – /programs/develop/libraries/libs-dev/libimg/jpeg/jpeg.asm

Rev	Author	Line No.	Line
999	diamond	1	;;================================================================================================;;
		2	;;//// jpeg.asm //// (c) diamond, 2008-2009 //////////////////////////////////////////////////////;;
		3	;;================================================================================================;;
		4	;; ;;
		5	;; This file is part of Common development libraries (Libs-Dev). ;;
		6	;; ;;
		7	;; Libs-Dev is free software: you can redistribute it and/or modify it under the terms of the GNU ;;
		8	;; Lesser General Public License as published by the Free Software Foundation, either version 2.1 ;;
		9	;; of the License, or (at your option) any later version. ;;
		10	;; ;;
		11	;; Libs-Dev is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without ;;
		12	;; even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;
		13	;; Lesser General Public License for more details. ;;
		14	;; ;;
		15	;; You should have received a copy of the GNU Lesser General Public License along with Libs-Dev. ;;
		16	;; If not, see . ;;
		17	;; ;;
		18	;;================================================================================================;;
		19
		20	include 'jpeg.inc'
		21
		22	img.is.jpg:
		23	push esi ebp
		24	mov esi, [esp+12] ; esi -> JPEG data
		25	mov ebp, [esp+16] ; ebp = data size
		26	call get_marker
		27	jc .no
		28	cmp al, 0xD8 ; SOI marker?
		29	push 1
		30	pop eax
		31	jz .ok
		32	.no:
		33	xor eax, eax
		34	.ok:
		35	pop ebp esi
		36	ret 8
		37
		38	img.decode.jpg:
		39	finit
		40	pushad
		41	mov esi, [esp+20h+4] ; esi -> JPEG data
		42	mov ebp, [esp+20h+8] ; ebp = data size
		43	@@:
		44	; allocate area for JPEG processing
		45	push sizeof.jpeg.work
		46	call [mem.alloc]
		47	test eax, eax
		48	jz .ret
		49	mov ebx, eax
		50	xor ecx, ecx
		51	mov [ebx + jpeg.work.image], ecx
		52	mov [ebx + jpeg.work.dct_buffer], ecx
		53	mov [ebx + jpeg.work._esp], esp
		54	; check for SOI [Start-Of-Image] marker
		55	call get_marker
		56	jc .end
		57	cmp al, 0xD8 ; SOI?
		58	jz .soi_ok
		59	.end:
		60	; general exit from the function
		61	; for progressive mode: convert loaded DCT coefficients to image
		62	call handle_progressive
		63	; convert full-color images to RGB
		64	call convert_to_rgb
		65	push [ebx + jpeg.work.image]
		66	push ebx
		67	call [mem.free]
		68	pop eax
		69	.ret:
		70	mov [esp+28], eax
		71	popad
1102	diamond	72	ret 12
999	diamond	73	.soi_ok:
		74	mov [ebx + jpeg.work.restart_interval], ecx
		75	mov [ebx + jpeg.work.adobe_ycck], cl
		76	; loop until start of frame (real data), parse markers
		77	.markers_loop:
		78	call get_marker
		79	jc .end
		80	; markers RSTn do not have parameters
		81	; N.B. They can not exist in this part of JPEG, but let's be liberal :)
		82	cmp al, 0xD0
		83	jb @f
		84	cmp al, 0xD8
		85	jb .markers_loop
		86	@@:
		87	cmp al, 0xD9 ; EOI? [invalid here]
		88	jz .end
		89	; ok, this is marker segment
		90	; first word is length of the segment
		91	cmp ebp, 2
		92	jb .end
		93	xor edx, edx
		94	mov dl, [esi+1]
		95	mov dh, [esi] ; edx = marker length, al = marker value
		96	sub ebp, edx
		97	jb .end
		98	cmp al, 0xDB ; DQT?
		99	jz .dqt
		100	cmp al, 0xC4 ; DHT?
		101	jz .dht
		102	cmp al, 0xCC ; DAC? [ignored - no arithmetic coding]
		103	jz .next_marker
		104	cmp al, 0xDD ; DRI?
		105	jz .dri
		106	cmp al, 0xDA ; SOS?
		107	jz .sos
		108	cmp al, 0xC0
		109	jb @f
		110	cmp al, 0xD0
		111	jb .sofn
		112	@@:
		113	cmp al, 0xEE ; APP14?
		114	jz .app14
		115	; unrecognized marker; let's skip it and hope for the best
		116	.next_marker:
		117	add esi, edx
		118	jmp .markers_loop
		119	.app14:
		120	; check for special Adobe marker
		121	cmp dx, 14
		122	jb .next_marker
		123	cmp byte [esi+2], 'A'
		124	jnz .next_marker
		125	cmp dword [esi+3], 'dobe'
		126	jnz .next_marker
		127	cmp byte [esi+13], 2
		128	setz [ebx + jpeg.work.adobe_ycck]
		129	jmp .next_marker
		130	.dqt:
		131	; DQT marker found
		132	; length: 2 bytes for length field + 65 bytes per table
		133	sub edx, 2
		134	jc .end
		135	lodsw
		136	.dqt_loop:
		137	test edx, edx
		138	jz .markers_loop
		139	sub edx, 1+64
		140	jc .end
		141	lodsb
		142	; 8-bit DCT-based process shall not use a 16-bit precision quantization table.
		143	test al, 0xF0
		144	jnz .end
		145	and eax, 3
		146	mov [ebx+jpeg.work.quant_tables_defined+eax], 1
		147	shl eax, 8
		148	lea edi, [ebx+eax+jpeg.work.quant_tables]
		149	xor ecx, ecx
		150	@@:
		151	xor eax, eax
		152	lodsb
		153	push eax
		154	fild dword [esp]
		155	pop eax
		156	movzx eax, byte [zigzag+ecx]
		157	add eax, eax
		158	push eax
		159	and eax, 7*4
		160	fmul dword [idct_pre_table+eax]
		161	pop eax
		162	push eax
		163	shr eax, 3
		164	and eax, 7*4
		165	fmul dword [idct_pre_table+eax]
		166	pop eax
		167	fstp dword [edi+eax]
		168	inc ecx
		169	cmp ecx, 64
		170	jb @b
		171	jmp .dqt_loop
		172	.dri:
		173	; DRI marker found
		174	cmp edx, 4 ; length must be 4
		175	jnz .end2
		176	movzx eax, word [esi+2]
		177	xchg al, ah
		178	mov [ebx+jpeg.work.restart_interval], eax
		179	jmp .next_marker
		180	.dht:
		181	; DHT marker found
		182	sub edx, 2
		183	jc .end2
		184	lodsw
		185	.dht_loop:
		186	test edx, edx
		187	jz .markers_loop
		188	sub edx, 17
		189	jc .end2
		190	; next Huffman table; find place for it
		191	lodsb
		192	mov edi, eax
		193	and eax, 0x10
		194	and edi, 3
		195	shr eax, 2
		196	or edi, eax
		197	mov [ebx+jpeg.work.dc_huffman_defined+edi], 1
		198	; shl edi, 11
		199	imul edi, max_hufftable_size
		200	lea edi, [ebx+edi+jpeg.work.dc_huffman] ; edi -> destination table
		201	; get table size
		202	xor eax, eax
		203	push 16
		204	pop ecx
		205	@@:
		206	add al, [esi]
		207	adc ah, 0
		208	inc esi
		209	loop @b
		210	cmp ax, 0x100
		211	ja .end2
		212	sub edx, eax
		213	jc .end2
		214	; construct Huffman tree
		215	push ebx edx
		216	; lea eax, [edi+256*8]
		217	; push eax
		218	; push 16
		219	; mov edx, esi
		220	; @@:
		221	; cmp byte [edx-1], 0
		222	; jnz @f
		223	; dec edx
		224	; dec dword [esp]
		225	; jmp @b
		226	; @@:
		227	; sub edx, [esp]
		228	; lea eax, [edi+8]
		229	; push 2
		230	; pop ecx
		231	; .lenloop:
		232	; mov bl, byte [edx]
		233	; test bl, bl
		234	; jz .len1done
		235	; push eax
		236	; xor eax, eax
		237	; .len1loop:
		238	; dec ecx
		239	; js .dhterr
		240	; cmp edi, [esp+8]
		241	; jae .dhterr
		242	; lodsb
		243	; stosd
		244	; dec bl
		245	; jnz .len1loop
		246	; pop eax
		247	; .len1done:
		248	; jecxz .len2done
		249	; push ecx
		250	; .len2loop:
		251	; cmp eax, [esp+8]
		252	; jb @f
		253	; or eax, -1
		254	; @@:
		255	; cmp edi, [esp+8]
		256	; jae .dhterr
		257	; stosd
		258	; add eax, 8
		259	; jnb @f
		260	; or eax, -1
		261	; @@:
		262	; loop .len2loop
		263	; pop ecx
		264	; .len2done:
		265	; add ecx, ecx
		266	; inc edx
		267	; dec dword [esp]
		268	; jnz .lenloop
		269	; pop eax
		270	; pop eax
		271	; sub eax, edi
		272	; shr eax, 2
		273	; cmp eax, ecx
		274	; ja @f
		275	; mov ecx, eax
		276	; @@:
		277	; or eax, -1
		278	; rep stosd
		279	; pop edx ebx
		280	; jmp .dht_loop
		281	; .dhterr:
		282	; ;pop eax eax eax edx ebx
		283	; add esp, 5*4
		284	lea eax, [edi+256*2]
		285	push eax
		286	lea edx, [esi-16]
		287	mov ah, 1
		288	mov ecx, 128
		289	.dht_l1:
		290	movzx ebx, byte [edx]
		291	inc edx
		292	test ebx, ebx
		293	jz .dht_l3
		294	.dht_l2:
		295	cmp edi, [esp]
		296	jae .dhterr1
		297	lodsb
		298	xchg al, ah
		299	push ecx
		300	rep stosw
		301	pop ecx
		302	xchg al, ah
		303	dec ebx
		304	jnz .dht_l2
		305	.dht_l3:
		306	inc ah
		307	shr ecx, 1
		308	jnz .dht_l1
		309	push edi
		310	mov edi, [esp+4]
		311	push edi
		312	mov eax, 0x00090100
		313	mov cl, 8
		314	.dht_l4:
		315	movzx ebx, byte [edx]
		316	inc edx
		317	test ebx, ebx
		318	jz .dht_l6
		319	.dht_l5:
		320	cmp edi, [esp]
		321	jb @f
		322	mov edi, [esp+4]
		323	rol eax, 16
		324	cmp edi, [esp+8]
		325	jae .dhterr2
		326	stosw
		327	inc ah
		328	mov [esp+4], edi
		329	pop edi
		330	push edi
		331	rol eax, 16
		332	add dword [esp], 16*2
		333	@@:
		334	lodsb
		335	xchg al, ah
		336	push ecx
		337	rep stosw
		338	pop ecx
		339	xchg al, ah
		340	dec ebx
		341	jnz .dht_l5
		342	.dht_l6:
		343	inc ah
		344	shr ecx, 1
		345	jnz .dht_l4
		346	push edi
		347	movzx ebx, byte [edx]
		348	add ebx, ebx
		349	add bl, [edx+1]
		350	adc bh, 0
		351	add ebx, ebx
		352	add bl, [edx+2]
		353	adc bh, 0
		354	add ebx, ebx
		355	add bl, [edx+3]
		356	adc bh, 0
		357	add ebx, 15
		358	shr ebx, 4
		359	mov cl, 8
		360	lea ebx, [edi+ebx*2]
		361	sub ebx, [esp+12]
		362	add ebx, 31
		363	shr ebx, 5
		364	mov edi, ebx
		365	shl edi, 5
		366	add edi, [esp+12]
		367	xor ebx, 9
		368	shl ebx, 16
		369	xor eax, ebx
		370	push edi
		371	.dht_l7:
		372	movzx ebx, byte [edx]
		373	inc edx
		374	test ebx, ebx
		375	jz .dht_l10
		376	.dht_l8:
		377	cmp edi, [esp]
		378	jb .dht_l9
		379	mov edi, [esp+4]
		380	cmp edi, [esp+8]
		381	jb @f
		382	mov edi, [esp+12]
		383	cmp edi, [esp+16]
		384	jae .dhterr3
		385	mov al, 9
		386	stosb
		387	rol eax, 8
		388	stosb
		389	inc eax
		390	ror eax, 8
		391	mov [esp+12], edi
		392	mov edi, [esp+8]
		393	add dword [esp+8], 16*2
		394	@@:
		395	mov al, 9
		396	stosb
		397	rol eax, 16
		398	stosb
		399	inc eax
		400	ror eax, 16
		401	mov [esp+4], edi
		402	pop edi
		403	push edi
		404	add dword [esp], 16*2
		405	.dht_l9:
		406	lodsb
		407	xchg al, ah
		408	push ecx
		409	rep stosw
		410	pop ecx
		411	xchg al, ah
		412	dec ebx
		413	jnz .dht_l8
		414	.dht_l10:
		415	inc ah
		416	shr ecx, 1
		417	jnz .dht_l7
		418	push -1
		419	pop eax
		420	pop ecx
		421	sub ecx, edi
		422	rep stosb
		423	pop edi
		424	pop ecx
		425	sub ecx, edi
		426	rep stosb
		427	pop edi
		428	pop ecx
		429	sub ecx, edi
		430	rep stosb
		431	pop edx ebx
		432	jmp .dht_loop
		433	.dhterr3:
		434	pop eax eax
		435	.dhterr2:
		436	pop eax eax
		437	.dhterr1:
		438	pop eax
		439	pop edx ebx
		440	.end2:
		441	jmp .end
		442	.sofn:
		443	; SOFn marker found
		444	cmp [ebx+jpeg.work.image], 0
		445	jnz .end2 ; only one frame is allowed
		446	; only SOF0 [baseline sequential], SOF1 [extended sequential], SOF2 [progressive]
		447	; nobody supports other compression methods
		448	cmp al, 0xC2
		449	ja .end2
		450	setz [ebx+jpeg.work.progressive]
		451	; Length must be at least 8
		452	sub edx, 8
		453	jb .end2
		454	; Sample precision in JFIF must be 8 bits
		455	cmp byte [esi+2], 8
		456	jnz .end2
		457	; Color space in JFIF is either YCbCr (color images, 3 components)
		458	; or Y (grey images, 1 component)
		459	movzx eax, byte [esi+7]
		460	cmp al, 1
		461	jz @f
		462	cmp al, 3
		463	jz @f
		464	; Adobe products sometimes use YCCK color space with 4 components
		465	cmp al, 4
		466	jnz .end2
		467	cmp [ebx+jpeg.work.adobe_ycck], 0
		468	jz .end2
		469	@@:
		470	mov edi, eax ; edi = number of components
		471	lea eax, [eax*3]
		472	sub edx, eax
		473	jnz .end2
		474	; image type: 8 bpp for grayscale JPEGs, 24 bpp for normal,
		475	; 32 bpp for Adobe YCCK
2733	dunkaist	476	push Image.bpp8i
		477	pop eax ; Image.bpp8i = 1
1079	diamond	478	cmp edi, eax
999	diamond	479	jz @f
		480	inc eax ; Image.bpp24 = 2
		481	cmp edi, 3
		482	jz @f
		483	inc eax ; Image.bpp32 = 3
		484	@@:
		485	push eax
		486	; get width and height
		487	; width must be nonzero
		488	; height must be nonzero - nobody supports DNL markers
		489	mov ah, [esi+3]
		490	mov al, [esi+4] ; eax = height
		491	xor ecx, ecx
		492	mov ch, [esi+5]
		493	mov cl, [esi+6] ; ecx = width
		494	; allocate memory for image
		495	stdcall img.create, ecx, eax
		496	test eax, eax
		497	jz .end2
		498	mov [ebx + jpeg.work.image], eax
		499	; create grayscale palette if needed
		500	cmp edi, 1
		501	jnz .no_create_palette
		502	push ecx edi
		503	mov edi, [eax + Image.Palette]
		504	xor eax, eax
		505	mov ecx, 256
		506	@@:
		507	stosd
		508	add eax, 0x010101
		509	loop @b
		510	pop edi ecx
		511	.no_create_palette:
		512	; other image characteristics
		513	mov eax, edi
		514	shl eax, 3
		515	mov [ebx + jpeg.work.delta_x], eax
		516	mov [ebx + jpeg.work.pixel_size], edi
		517	;mov eax, edi
		518	imul eax, ecx
		519	mov [ebx + jpeg.work.delta_y], eax
		520	shr eax, 3
		521	mov [ebx + jpeg.work.line_size], eax
		522	add esi, 8
		523	mov ecx, edi
		524	lea edi, [ebx + jpeg.work.components]
		525	xor eax, eax
		526	xor edx, edx
		527	.sof_parse_comp:
		528	movsb ; db ComponentIdentifier
		529	lodsb
		530	mov ah, al
		531	and al, 0xF
		532	jz .end3
		533	shr ah, 4
		534	jz .end3
		535	stosd ; db V, db H, db ?, db ? (will be filled later)
		536	cmp dl, al
		537	ja @f
		538	mov dl, al
		539	@@:
		540	cmp dh, ah
		541	ja @f
		542	mov dh, ah
		543	@@:
		544	movsb ; db QuantizationTableID
		545	loop .sof_parse_comp
		546	mov word [ebx + jpeg.work.max_v], dx
		547	movzx eax, dh
		548	movzx edx, dl
		549	push eax edx
		550	shl eax, 3
		551	shl edx, 3
		552	mov [ebx + jpeg.work.block_width], eax
		553	mov [ebx + jpeg.work.block_height], edx
		554	pop edx eax
		555	push eax edx
		556	imul eax, [ebx + jpeg.work.delta_x]
		557	mov [ebx + jpeg.work.block_delta_x], eax
		558	imul edx, [ebx + jpeg.work.delta_y]
		559	mov [ebx + jpeg.work.block_delta_y], edx
		560	mov ecx, [ebx + jpeg.work.image]
		561	mov eax, [ecx + Image.Width]
		562	add eax, [ebx + jpeg.work.block_width]
		563	dec eax
		564	xor edx, edx
		565	div [ebx + jpeg.work.block_width]
		566	mov [ebx + jpeg.work.x_num_blocks], eax
		567	mov eax, [ecx + Image.Height]
		568	add eax, [ebx + jpeg.work.block_height]
		569	dec eax
		570	xor edx, edx
		571	div [ebx + jpeg.work.block_height]
		572	mov [ebx + jpeg.work.y_num_blocks], eax
		573	mov ecx, [ebx + jpeg.work.pixel_size]
		574	pop edx
		575	lea edi, [ebx + jpeg.work.components]
		576	@@:
		577	mov eax, edx
		578	div byte [edi+1] ; VMax / V_i = VFactor_i
		579	mov byte [edi+3], al ; db VFactor
		580	pop eax
		581	push eax
		582	div byte [edi+2] ; HMax / H_i = HFactor_i
		583	mov byte [edi+4], al ; db HFactor
		584	add edi, 6
		585	loop @b
		586	pop eax
		587	cmp [ebx + jpeg.work.progressive], 0
		588	jz .sof_noprogressive
		589	mov eax, [ebx + jpeg.work.x_num_blocks]
		590	mul [ebx + jpeg.work.block_width]
		591	mul [ebx + jpeg.work.y_num_blocks]
		592	mul [ebx + jpeg.work.block_height]
		593	add eax, eax
		594	mov [ebx + jpeg.work.dct_buffer_size], eax
		595	mul [ebx + jpeg.work.pixel_size]
		596	push eax
		597	call [mem.alloc]
		598	test eax, eax
		599	jnz @f
		600	xchg eax, [ebx + jpeg.work.image]
		601	push eax
		602	call img.destroy
		603	jmp .end
		604	@@:
		605	mov [ebx + jpeg.work.dct_buffer], eax
		606	.sof_noprogressive:
		607	jmp .markers_loop
		608	.end3:
		609	jmp .end
		610	.sos:
		611	; SOS marker found
		612	; frame must be already opened
		613	cmp [ebx + jpeg.work.image], 0
		614	jz .end3
		615	cmp edx, 6
		616	jb .end3
		617	; parse marker
		618	movzx eax, byte [esi+2] ; number of components in this scan
		619	test eax, eax
		620	jz .end3 ; must be nonzero
		621	cmp al, byte [ebx + jpeg.work.pixel_size]
		622	ja .end3 ; must be <= total number of components
		623	; mov [ns], eax
		624	cmp al, 1
		625	setz [ebx + jpeg.work.not_interleaved]
		626	lea ecx, [6+eax+eax]
		627	cmp edx, ecx
		628	jnz .end3
		629	mov ecx, eax
		630	lea edi, [ebx + jpeg.work.cur_components]
		631	add esi, 3
		632	.sos_find_comp:
		633	lodsb ; got ComponentID, look for component info
		634	push ecx esi
		635	mov ecx, [ebx + jpeg.work.pixel_size]
		636	lea esi, [ebx + jpeg.work.components]
		637	and dword [edi+48], 0
		638	and dword [edi+52], 0
		639	@@:
		640	cmp [esi], al
		641	jz @f
		642	inc dword [edi+52]
		643	add esi, 6
		644	loop @b
		645	@@:
		646	mov eax, [esi+1]
		647	mov dl, [esi+5]
		648	pop esi ecx
		649	jnz .end3 ; bad ComponentID
		650	cmp [ebx + jpeg.work.not_interleaved], 0
		651	jz @f
		652	mov ax, 0x0101
		653	@@:
		654	stosd ; db V, db H, db VFactor, db HFactor
		655	push ecx
		656	xor eax, eax
		657	mov al, byte [edi-1] ; get HFactor
		658	mul byte [ebx+jpeg.work.pixel_size] ; number of components
		659	stosd ; HIncrement_i = HFactor_i * sizeof(pixel)
		660	mov al, byte [edi-4-2] ; get VFactor
		661	mul byte [ebx+jpeg.work.pixel_size] ; number of components
		662	mov ecx, [ebx+jpeg.work.image]
		663	imul eax, [ecx+Image.Width] ; image width
		664	stosd ; VIncrement_i = VFactor_i * sizeof(row)
		665	xchg eax, edx
		666	and eax, 3
		667	cmp [ebx+jpeg.work.quant_tables_defined+eax], 0
		668	jz .end3
		669	shl eax, 8
		670	lea eax, [ebx+eax+jpeg.work.quant_tables]
		671	stosd ; dd QuantizationTable
		672	lodsb
		673	movzx eax, al
		674	mov edx, eax
		675	shr eax, 4
		676	and edx, 3
		677	and eax, 3
		678	cmp [ebx+jpeg.work.dc_huffman_defined+eax], 0
		679	jnz .dc_table_ok
		680	cmp [ebx+jpeg.work.progressive], 0
		681	jz .end3
		682	xor eax, eax
		683	jmp .dc_table_done
		684	.dc_table_ok:
		685	; shl eax, 11
		686	imul eax, max_hufftable_size
		687	lea eax, [ebx+jpeg.work.dc_huffman+eax]
		688	.dc_table_done:
		689	cmp [ebx+jpeg.work.ac_huffman_defined+edx], 0
		690	jnz .ac_table_ok
		691	cmp [ebx+jpeg.work.progressive], 0
		692	jz .end3
		693	xor edx, edx
		694	jmp .ac_table_done
		695	.ac_table_ok:
		696	; shl edx, 11
		697	imul edx, max_hufftable_size
		698	lea edx, [ebx+jpeg.work.ac_huffman+edx]
		699	.ac_table_done:
		700	stosd ; dd DCTable
		701	xchg eax, edx
		702	stosd ; dd ACTable
		703	mov eax, [ecx+Image.Width]
		704	movzx ecx, byte [edi-21] ; get HFactor
		705	cdq ; edx:eax = width (width<0x10000, so as dword it is unsigned)
		706	div ecx
		707	stosd ; dd width / HFactor_i
		708	stosd
		709	xchg eax, ecx
		710	inc eax
		711	sub eax, edx
		712	stosd ; dd HFactor_i+1 - (width % HFactor_i)
		713	mov ecx, [ebx+jpeg.work.image]
		714	mov eax, [ecx+Image.Height]
		715	movzx ecx, byte [edi-34] ; get VFactor
		716	cdq
		717	div ecx
		718	stosd ; dd height / VFactor_i
		719	stosd
		720	xchg eax, ecx
		721	inc eax
		722	sub eax, edx
		723	stosd ; dd VFactor_i+1 - (height % VFactor_i)
		724	pop ecx
		725	scasd ; dd DCPrediction
		726	cmp dword [edi], 0
		727	setnp al
		728	ror al, 1
		729	mov byte [edi-1], al
		730	scasd ; dd ComponentOffset
		731	dec ecx
		732	jnz .sos_find_comp
		733	mov [ebx+jpeg.work.cur_components_end], edi
		734	lea edi, [ebx+jpeg.work.ScanStart]
		735	movsb
		736	cmp byte [esi], 63
		737	ja .end3
		738	movsb
		739	lodsb
		740	push eax
		741	and al, 0xF
		742	stosb
		743	pop eax
		744	shr al, 4
		745	stosb
		746	; now unpack data
		747	call init_limits
		748	and [ebx+jpeg.work.decoded_MCUs], 0
		749	mov [ebx+jpeg.work.cur_rst_marker], 7
		750	and [ebx+jpeg.work.huffman_bits], 0
		751	cmp [ebx+jpeg.work.progressive], 0
		752	jz .sos_noprogressive
		753	; progressive mode - only decode DCT coefficients
		754	; initialize pointers to coefficients data
		755	; zero number of EOBs for AC coefficients
		756	; redefine HIncrement and VIncrement
		757	lea edi, [ebx+jpeg.work.cur_components]
		758	.coeff_init:
		759	mov eax, [ebx+jpeg.work.dct_buffer_size]
		760	mul dword [edi+52]
		761	add eax, [ebx+jpeg.work.dct_buffer]
		762	mov [edi+12], eax
		763	and dword [edi+52], 0
		764	cmp [ebx+jpeg.work.ScanStart], 0
		765	jz .scan_dc
		766	cmp dword [edi+20], 0
		767	jz .end3
		768	jmp @f
		769	.scan_dc:
		770	cmp dword [edi+16], 0
		771	jz .end3
		772	@@:
		773	movzx eax, byte [edi+1]
		774	shl eax, 7
		775	mov [edi+4], eax
		776	mov eax, [edi+28]
		777	mov cl, [edi+3]
		778	cmp cl, [edi+32]
		779	sbb eax, -7-1
		780	shr eax, 3
		781	shl eax, 7
		782	mov [edi+8], eax
		783	add edi, 56
		784	cmp edi, [ebx+jpeg.work.cur_components_end]
		785	jb .coeff_init
		786	; unpack coefficients
		787	; N.B. Speed optimization has sense here.
		788	.coeff_decode_loop:
		789	lea edx, [ebx+jpeg.work.cur_components]
		790	.coeff_components_loop:
		791	mov edi, [edx+12]
		792	movzx ecx, byte [edx]
		793	push dword [edx+40]
		794	push edi
		795	.coeff_y_loop:
		796	push ecx
		797	movzx eax, byte [edx+1]
		798	push dword [edx+28]
		799	push edi
		800	.coeff_x_loop:
		801	cmp dword [edx+40], 0
		802	jl @f
		803	cmp dword [edx+28], 0
		804	jge .realdata
		805	@@:
		806	cmp [ebx+jpeg.work.not_interleaved], 0
		807	jnz .norealdata
		808	push eax edi
		809	lea edi, [ebx+jpeg.work.dct_coeff]
		810	call decode_progressive_coeff
		811	pop edi eax
		812	jmp .norealdata
		813	.realdata:
		814	push eax
		815	call decode_progressive_coeff
		816	add edi, 64*2
		817	pop eax
		818	.norealdata:
		819	sub dword [edx+28], 8
		820	sub eax, 1
		821	jnz .coeff_x_loop
		822	pop edi
		823	pop dword [edx+28]
		824	add edi, [edx+8]
		825	pop ecx
		826	sub dword [edx+40], 8
		827	sub ecx, 1
		828	jnz .coeff_y_loop
		829	movzx eax, byte [edx+1]
		830	shl eax, 3
		831	pop edi
		832	add edi, [edx+4]
		833	pop dword [edx+40]
		834	sub [edx+28], eax
		835	mov [edx+12], edi
		836	add edx, 56
		837	cmp edx, [ebx+jpeg.work.cur_components_end]
		838	jnz .coeff_components_loop
		839	call next_MCU
		840	jc .norst
		841	sub [ebx+jpeg.work.cur_x], 1
		842	jnz .coeff_decode_loop
		843	call next_line
		844	lea edx, [ebx+jpeg.work.cur_components]
		845	@@:
		846	mov eax, [ebx+jpeg.work.max_x]
		847	imul eax, [edx+4]
		848	sub [edx+12], eax
		849	movzx eax, byte [edx]
		850	imul eax, [edx+8]
		851	add [edx+12], eax
		852	add edx, 56
		853	cmp edx, [ebx+jpeg.work.cur_components_end]
		854	jnz @b
		855	sub [ebx+jpeg.work.cur_y], 1
		856	jnz .coeff_decode_loop
		857	jmp .markers_loop
		858	.norst:
		859	.end4:
		860	jmp .end3
		861	.sos_noprogressive:
		862	; normal mode - unpack JPEG image
		863	mov edi, [ebx+jpeg.work.image]
		864	mov edi, [edi+Image.Data]
		865	mov [ebx+jpeg.work.cur_out_ptr], edi
		866	; N.B. Speed optimization has sense here.
		867	.decode_loop:
		868	call decode_MCU
		869	call next_MCU
		870	jc .end4
		871	sub [ebx+jpeg.work.cur_x], 1
		872	jnz .decode_loop
		873	call next_line
		874	sub [ebx+jpeg.work.cur_y], 1
		875	jnz .decode_loop
		876	jmp .markers_loop
		877
		878	get_marker:
		879	; in: esi -> data
		880	; out: CF=0, al=marker value - ok
		881	; CF=1 - no marker
		882	sub ebp, 1
		883	jc .ret
		884	lodsb
		885	if 1
		886	cmp al, 0xFF
		887	jae @f
		888	; Some stupid men, which do not read specifications and manuals,
		889	; sometimes create markers with length field two less than true
		890	; value (in JPEG length of marker = length of data INCLUDING
		891	; length field itself). To open such files, allow 2 bytes
		892	; before next marker.
		893	cmp ebp, 2
		894	jb .ret
		895	lodsb
		896	lodsb
		897	end if
		898	cmp al, 0xFF
		899	jb .ret
		900	@@:
		901	sub ebp, 1
		902	jc .ret
		903	lodsb
		904	cmp al, 0xFF
		905	jz @b
		906	clc
		907	.ret:
		908	ret
		909
		910	align 16
		911	decode_MCU:
		912	lea edx, [ebx+jpeg.work.cur_components]
		913	.components_loop:
		914	; decode each component
		915	push [ebx+jpeg.work.cur_out_ptr]
		916	movzx ecx, byte [edx]
		917	push dword [edx+40]
		918	; we have H_i * V_i blocks of packed data, decode them
		919	.y_loop_1:
		920	push [ebx+jpeg.work.cur_out_ptr]
		921	push ecx
		922	movzx eax, byte [edx+1]
		923	push dword [edx+28]
		924	.x_loop_1:
		925	push eax
		926	call decode_data_unit
		927	cmp dword [edx+40], 0
		928	jl .nocopyloop
		929	cmp dword [edx+28], 0
		930	jl .nocopyloop
		931	; now we have decoded block 8*8 in decoded_data
		932	; H_i * V_i packed blocks 88 make up one block (8HMax) * (8*VMax)
		933	; so each pixel in packed block corresponds to HFact * VFact pixels
		934	movzx ecx, byte [edx+2]
		935	push esi ebp
		936	mov edi, [ebx+jpeg.work.cur_out_ptr]
		937	add edi, [edx+52]
		938	.y_loop_2:
		939	push ecx edi
		940	cmp ecx, [edx+44]
		941	mov ecx, [edx+40]
		942	sbb ecx, 8-1
		943	sbb eax, eax
		944	and ecx, eax
		945	add ecx, 8
		946	jz .skip_x_loop_2
		947	movzx eax, byte [edx+3]
		948	.x_loop_2:
		949	push eax ecx edi
		950	cmp eax, [edx+32]
		951	mov eax, [edx+28]
		952	sbb eax, 8-1
		953	sbb ebp, ebp
		954	and eax, ebp
		955	mov ebp, .copyiter_all
		956	lea esi, [ebx+jpeg.work.decoded_data]
		957	sub ebp, eax
		958	sub ebp, eax
		959	sub ebp, eax
		960	mov eax, [edx+4]
		961	sub eax, 1
		962	.copyloop:
		963	push esi edi
		964	jmp ebp
		965	.copyiter_all:
		966	movsb
		967	repeat 7
		968	add edi, eax
		969	movsb
		970	end repeat
		971	nop
		972	nop
		973	pop edi esi
		974	add edi, [edx+8]
		975	add esi, 8
		976	sub ecx, 1
		977	jnz .copyloop
		978	pop edi ecx eax
		979	add edi, [ebx+jpeg.work.pixel_size]
		980	sub eax, 1
		981	jnz .x_loop_2
		982	.skip_x_loop_2:
		983	pop edi ecx
		984	add edi, [ebx+jpeg.work.line_size]
		985	sub ecx, 1
		986	jnz .y_loop_2
		987	pop ebp esi
		988	.nocopyloop:
		989	mov eax, [ebx+jpeg.work.delta_x]
		990	add [ebx+jpeg.work.cur_out_ptr], eax
		991	pop eax
		992	sub dword [edx+28], 8
		993	sub eax, 1
		994	jnz .x_loop_1
		995	pop dword [edx+28]
		996	pop ecx
		997	pop eax
		998	sub dword [edx+40], 8
		999	add eax, [ebx+jpeg.work.delta_y]
		1000	mov [ebx+jpeg.work.cur_out_ptr], eax
		1001	sub ecx, 1
		1002	jnz .y_loop_1
		1003	movzx eax, byte [edx+1]
		1004	pop dword [edx+40]
		1005	shl eax, 3
		1006	pop [ebx+jpeg.work.cur_out_ptr]
		1007	sub dword [edx+28], eax
		1008	add edx, 56
		1009	cmp edx, [ebx+jpeg.work.cur_components_end]
		1010	jb .components_loop
		1011	mov eax, [ebx+jpeg.work.cur_block_dx]
		1012	add [ebx+jpeg.work.cur_out_ptr], eax
		1013	ret
		1014
		1015	align 16
		1016	next_MCU:
		1017	add [ebx+jpeg.work.decoded_MCUs], 1
		1018	mov eax, [ebx+jpeg.work.restart_interval]
		1019	test eax, eax
		1020	jz .no_restart
		1021	cmp [ebx+jpeg.work.decoded_MCUs], eax
		1022	jb .no_restart
		1023	and [ebx+jpeg.work.decoded_MCUs], 0
		1024	and [ebx+jpeg.work.huffman_bits], 0
		1025	cmp [ebx+jpeg.work.cur_x], 1
		1026	jnz @f
		1027	cmp [ebx+jpeg.work.cur_y], 1
		1028	jz .no_restart
		1029	@@:
		1030	; restart marker must be present
		1031	sub ebp, 2
		1032	js .error
		1033	cmp byte [esi], 0xFF
		1034	jnz .error
		1035	mov al, [ebx+jpeg.work.cur_rst_marker]
		1036	inc eax
		1037	and al, 7
		1038	mov [ebx+jpeg.work.cur_rst_marker], al
		1039	add al, 0xD0
		1040	cmp [esi+1], al
		1041	jnz .error
		1042	add esi, 2
		1043	; handle restart marker - zero all DC predictions
		1044	lea edx, [ebx+jpeg.work.cur_components]
		1045	@@:
		1046	and word [edx+48], 0
		1047	add edx, 56
		1048	cmp edx, [ebx+jpeg.work.cur_components_end]
		1049	jb @b
		1050	.no_restart:
		1051	clc
		1052	ret
		1053	.error:
		1054	stc
		1055	ret
		1056
		1057	next_line:
		1058	mov eax, [ebx+jpeg.work.max_x]
		1059	mov [ebx+jpeg.work.cur_x], eax
		1060	mul [ebx+jpeg.work.cur_block_dx]
		1061	sub eax, [ebx+jpeg.work.cur_block_dy]
		1062	sub [ebx+jpeg.work.cur_out_ptr], eax
		1063	lea edx, [ebx+jpeg.work.cur_components]
		1064	@@:
		1065	mov eax, [edx+24]
		1066	mov [edx+28], eax
		1067	movzx eax, byte [edx]
		1068	shl eax, 3
		1069	sub [edx+40], eax
		1070	add edx, 56
		1071	cmp edx, [ebx+jpeg.work.cur_components_end]
		1072	jb @b
		1073	ret
		1074
		1075	init_limits:
		1076	push [ebx+jpeg.work.x_num_blocks]
		1077	pop [ebx+jpeg.work.max_x]
		1078	push [ebx+jpeg.work.y_num_blocks]
		1079	pop [ebx+jpeg.work.max_y]
		1080	push [ebx+jpeg.work.block_delta_x]
		1081	pop [ebx+jpeg.work.cur_block_dx]
		1082	push [ebx+jpeg.work.block_delta_y]
		1083	pop [ebx+jpeg.work.cur_block_dy]
		1084	cmp [ebx+jpeg.work.not_interleaved], 0
		1085	jz @f
		1086	mov eax, dword [ebx+jpeg.work.cur_components+28]
		1087	movzx ecx, byte [ebx+jpeg.work.cur_components+3]
		1088	cmp cl, [ebx+jpeg.work.cur_components+32]
		1089	sbb eax, -7-1
		1090	shr eax, 3
		1091	mov [ebx+jpeg.work.max_x], eax
		1092	mov eax, dword [ebx+jpeg.work.cur_components+40]
		1093	movzx edx, byte [ebx+jpeg.work.cur_components+2]
		1094	cmp dl, [ebx+jpeg.work.cur_components+44]
		1095	sbb eax, -7-1
		1096	shr eax, 3
		1097	mov [ebx+jpeg.work.max_y], eax
		1098	imul ecx, [ebx+jpeg.work.delta_x]
		1099	mov [ebx+jpeg.work.cur_block_dx], ecx
		1100	imul edx, [ebx+jpeg.work.delta_y]
		1101	mov [ebx+jpeg.work.cur_block_dy], edx
		1102	@@:
		1103	push [ebx+jpeg.work.max_x]
		1104	pop [ebx+jpeg.work.cur_x]
		1105	push [ebx+jpeg.work.max_y]
		1106	pop [ebx+jpeg.work.cur_y]
		1107	ret
		1108
		1109	;macro get_bit
		1110	;{
		1111	;local .l1,.l2,.marker
		1112	; add cl, cl
		1113	; jnz .l1
		1114	; sub ebp, 1
		1115	; js decode_data_unit.eof
		1116	; mov cl, [esi]
		1117	; cmp cl, 0xFF
		1118	; jnz .l2
		1119	;.marker:
		1120	; add esi, 1
		1121	; sub ebp, 1
		1122	; js decode_data_unit.eof
		1123	; cmp byte [esi], 0xFF
		1124	; jz .marker
		1125	; cmp byte [esi], 0
		1126	; jnz decode_data_unit.eof
		1127	;.l2:
		1128	; sub esi, -1
		1129	; adc cl, cl
		1130	;.l1:
		1131	;}
		1132	macro get_bit stack_depth
		1133	{
		1134	local .l1,.l2,.marker
		1135	sub cl, 1
		1136	jns .l1
		1137	sub ebp, 1
		1138	js .eof_pop#stack_depth
		1139	mov ch, [esi]
		1140	cmp ch, 0xFF
		1141	jnz .l2
		1142	.marker:
		1143	add esi, 1
		1144	sub ebp, 1
		1145	js .eof_pop#stack_depth
		1146	cmp byte [esi], 0xFF
		1147	jz .marker
		1148	cmp byte [esi], 0
		1149	jnz .eof_pop#stack_depth
		1150	.l2:
		1151	add esi, 1
		1152	mov cl, 7
		1153	.l1:
		1154	add ch, ch
		1155	}
		1156	macro get_bits stack_depth,stack_depth_p1,restore_edx
		1157	{
		1158	local .l1,.l2,.l3,.marker2
		1159	movzx eax, ch
		1160	mov dl, cl
		1161	shl eax, 24
		1162	neg cl
		1163	push ebx
		1164	add cl, 24
		1165	.l1:
		1166	cmp bl, dl
		1167	jbe .l2
		1168	sub bl, dl
		1169	sub ebp, 1
		1170	js .eof_pop#stack_depth_p1
		1171	mov ch, [esi]
		1172	cmp ch, 0xFF
		1173	jnz .l3
		1174	.marker2:
		1175	add esi, 1
		1176	sub ebp, 1
		1177	js .eof_pop#stack_depth_p1
		1178	cmp byte [esi], 0xFF
		1179	jz .marker2
		1180	cmp byte [esi], 0
		1181	jnz .eof_pop#stack_depth_p1
		1182	.l3:
		1183	movzx edx, ch
		1184	add esi, 1
		1185	shl edx, cl
		1186	sub cl, 8
		1187	or eax, edx
		1188	mov dl, 8
		1189	jmp .l1
		1190	.l2:
		1191	mov cl, bl
		1192	sub dl, bl
		1193	shl ch, cl
		1194	pop ebx
		1195	cmp eax, 80000000h
		1196	rcr eax, 1
		1197	mov cl, 31
		1198	sub cl, bl
		1199	sar eax, cl
		1200	mov cl, dl
		1201	if restore_edx eq true
		1202	pop edx
		1203	end if
		1204	add eax, 80000000h
		1205	adc eax, 80000000h
		1206	}
		1207	; macro get_huffman_code
		1208	; {
		1209	; local .l1
		1210	; xor ebx, ebx
		1211	; .l1:
		1212	; get_bit
		1213	; adc ebx, ebx
		1214	; mov eax, [eax+4*ebx]
		1215	; xor ebx, ebx
		1216	; cmp eax, -1
		1217	; jz .eof_pop
		1218	; cmp eax, 0x1000
		1219	; jae .l1
		1220	; mov ebx, eax
		1221	; }
		1222	macro get_huffman_code stack_depth,stack_depth_p1
		1223	{
		1224	local .l1,.l2,.l3,.l4,.l5,.l6,.nomarker1,.marker1,.nomarker2,.marker2,.nomarker3,.marker3,.done
		1225	; 1. (First level in Huffman table) Does the current Huffman code fit in 8 bits
		1226	; and have we got enough bits?
		1227	movzx ebx, ch
		1228	cmp byte [eax+ebx*2], cl
		1229	jbe .l1
		1230	; 2a. No; load next byte
		1231	sub ebp, 1
		1232	js .eof_pop#stack_depth
		1233	mov ch, [esi]
		1234	movzx edx, ch
		1235	cmp ch, 0xFF
		1236	jnz .nomarker1
		1237	.marker1:
		1238	add esi, 1
		1239	sub ebp, 1
		1240	js .eof_pop#stack_depth
		1241	cmp byte [esi], 0xFF
		1242	jz .marker1
		1243	cmp byte [esi], 0
		1244	jnz .eof_pop#stack_depth
		1245	.nomarker1:
		1246	shr edx, cl
		1247	add esi, 1
		1248	or ebx, edx
		1249	; 3a. (First level in Huffman table, >=8 bits known) Does the current Huffman code fit in 8 bits?
		1250	cmp byte [eax+ebx*2], 8
		1251	jbe .l2
		1252	jl .eof_pop#stack_depth
		1253	; 4aa. No; go to next level
		1254	movzx ebx, byte [eax+ebx*2+1]
		1255	mov dl, ch
		1256	shl ebx, 5
		1257	ror edx, cl
		1258	lea ebx, [eax+ebx+0x200]
		1259	shr edx, 24
		1260	push edx
		1261	shr edx, 4
		1262	; 5aa. (Second level in Huffman table) Does the current Huffman code fit in 12 bits
		1263	; and have we got enough bits?
		1264	cmp byte [ebx+edx*2], cl
		1265	jbe .l3
		1266	; 6aaa. No; have we got 12 bits?
		1267	cmp cl, 4
		1268	jae .l4
		1269	; 7aaaa. No; load next byte
		1270	pop edx
		1271	sub ebp, 1
		1272	js .eof_pop#stack_depth
		1273	mov ch, [esi]
		1274	cmp ch, 0xFF
		1275	jnz .nomarker2
		1276	.marker2:
		1277	add esi, 1
		1278	sub ebp, 1
		1279	js .eof_pop#stack_depth
		1280	cmp byte [esi], 0xFF
		1281	jz .marker2
		1282	cmp byte [esi], 0
		1283	jnz .eof_pop#stack_depth
		1284	.nomarker2:
		1285	push ecx
		1286	shr ch, cl
		1287	add esi, 1
		1288	or dl, ch
		1289	pop ecx
		1290	push edx
		1291	shr edx, 4
		1292	; 8aaaa. (Second level in Huffman table) Does the current Huffman code fit in 12 bits?
		1293	cmp byte [ebx+edx*2], 4
		1294	jbe .l5
		1295	jl .eof_pop#stack_depth_p1
		1296	; 9aaaaa. No; go to next level
		1297	movzx ebx, byte [ebx+edx*2+1]
		1298	pop edx
		1299	shl ebx, 5
		1300	and edx, 0xF
		1301	lea ebx, [eax+ebx+0x200]
		1302	; 10aaaaa. Get current code length and value
		1303	sub cl, [ebx+edx*2]
		1304	movzx eax, byte [ebx+edx*2+1]
		1305	neg cl
		1306	shl ch, cl
		1307	neg cl
		1308	add cl, 8
		1309	jmp .done
		1310	.l5:
		1311	; 9aaaab. Yes; get current code length and value
		1312	sub cl, [ebx+edx*2]
		1313	movzx eax, byte [ebx+edx*2+1]
		1314	neg cl
		1315	pop edx
		1316	shl ch, cl
		1317	neg cl
		1318	add cl, 8
		1319	jmp .done
		1320	.l4:
		1321	; 7aaab. Yes; go to next level
		1322	movzx ebx, byte [ebx+edx*2+1]
		1323	pop edx
		1324	shl ebx, 5
		1325	and edx, 0xF
		1326	lea ebx, [eax+ebx+0x200]
		1327	; 8aaab. (Third level in Huffman table) Have we got enough bits?
		1328	cmp [ebx+edx*2], cl
		1329	jbe .l6
		1330	; 9aaaba. No; load next byte
		1331	sub ebp, 1
		1332	js .eof_pop#stack_depth
		1333	mov ch, [esi]
		1334	cmp ch, 0xFF
		1335	jnz .nomarker3
		1336	.marker3:
		1337	add esi, 1
		1338	sub ebp, 1
		1339	js .eof_pop#stack_depth
		1340	cmp byte [esi], 0xFF
		1341	jz .marker3
		1342	cmp byte [esi], 0
		1343	jnz .eof_pop#stack_depth
		1344	.nomarker3:
		1345	push ecx
		1346	shr ch, cl
		1347	add esi, 1
		1348	or dl, ch
		1349	pop ecx
		1350	; 10aaaba. Get current code length and value
		1351	sub cl, [ebx+edx*2]
		1352	movzx eax, byte [ebx+edx*2+1]
		1353	neg cl
		1354	shl ch, cl
		1355	neg cl
		1356	add cl, 8
		1357	jmp .done
		1358	.l3:
		1359	; 6aab. Yes; get current code length and value
		1360	pop eax
		1361	.l6:
		1362	; 9aaabb. Yes; get current code length and value
		1363	sub cl, [ebx+edx*2]
		1364	movzx eax, byte [ebx+edx*2+1]
		1365	xor cl, 7
		1366	shl ch, cl
		1367	xor cl, 7
		1368	add ch, ch
		1369	jmp .done
		1370	.l2:
		1371	; 3ab. Yes; get current code length and value
		1372	sub cl, [eax+ebx*2]
		1373	movzx eax, byte [eax+ebx*2+1]
		1374	neg cl
		1375	shl ch, cl
		1376	neg cl
		1377	add cl, 8
		1378	jmp .done
		1379	.l1:
		1380	; 3b. Yes; get current code length and value
		1381	mov dl, [eax+ebx*2]
		1382	movzx eax, byte [eax+ebx*2+1]
		1383	xchg cl, dl
		1384	sub dl, cl
		1385	shl ch, cl
		1386	mov cl, dl
		1387	.done:
		1388	mov ebx, eax
		1389	}
		1390	; Decode DCT coefficients for one 8*8 block in progressive mode
		1391	; from input stream, given by pointer esi and length ebp
		1392	; N.B. Speed optimization has sense here.
		1393	align 16
		1394	decode_progressive_coeff:
		1395	mov ecx, [ebx+jpeg.work.huffman_bits]
		1396	cmp [ebx+jpeg.work.ScanStart], 0
		1397	jnz .ac
		1398	; DC coefficient
		1399	cmp [ebx+jpeg.work.ApproxPosHigh], 0
		1400	jz .dc_first
		1401	; DC coefficient, subsequent passes
		1402	xor eax, eax
		1403	get_bit 0
		1404	adc eax, eax
		1405	mov [ebx+jpeg.work.huffman_bits], ecx
		1406	mov cl, [ebx+jpeg.work.ApproxPosLow]
		1407	shl eax, cl
		1408	or [edi], ax
		1409	ret
		1410	.dc_first:
		1411	; DC coefficient, first pass
		1412	mov eax, [edx+16]
		1413	push ebx
		1414	push edx
		1415	get_huffman_code 2,3
		1416	get_bits 2,3,true
		1417	pop ebx
		1418	add eax, [edx+48]
		1419	mov [edx+48], ax
		1420	mov [ebx+jpeg.work.huffman_bits], ecx
		1421	mov cl, [ebx+jpeg.work.ApproxPosLow]
		1422	shl eax, cl
		1423	mov [edi], ax
		1424	ret
		1425	.ac:
		1426	; AC coefficients
		1427	movzx eax, [ebx+jpeg.work.ScanStart]
		1428	cmp al, [ebx+jpeg.work.ScanEnd]
		1429	ja .ret
		1430	cmp dword [edx+52], 0
		1431	jnz .was_eob
		1432	push ebx
		1433	.acloop:
		1434	push edx
		1435	push eax
		1436	mov eax, [edx+20]
		1437	get_huffman_code 3,4
		1438	pop eax
		1439	test ebx, 15
		1440	jz .band
		1441	push eax ebx
		1442	and ebx, 15
		1443	get_bits 4,5,false
		1444	pop ebx
		1445	xchg eax, [esp]
		1446	shr ebx, 4
		1447	mov edx, [esp+8]
		1448	.zeroloop1:
		1449	push eax ebx
		1450	movzx eax, byte [zigzag+eax]
		1451	xor ebx, ebx
		1452	cmp word [edi+eax], bx
		1453	jz .zeroloop2
		1454	get_bit 5
		1455	jnc @f
		1456	push ecx
		1457	mov cl, [edx+jpeg.work.ApproxPosLow]
		1458	xor ebx, ebx
		1459	cmp byte [edi+eax+1], 80h
		1460	adc ebx, 0
		1461	add ebx, ebx
		1462	sub ebx, 1
		1463	shl ebx, cl
		1464	pop ecx
		1465	add [edi+eax], bx
		1466	@@:
		1467	pop ebx eax
		1468	@@:
		1469	add eax, 1
		1470	cmp al, [edx+jpeg.work.ScanEnd]
		1471	ja decode_data_unit.eof_pop3
		1472	jmp .zeroloop1
		1473	.zeroloop2:
		1474	pop ebx eax
		1475	sub ebx, 1
		1476	jns @b
		1477	.nozero1:
		1478	pop ebx
		1479	test ebx, ebx
		1480	jz @f
		1481	push eax
		1482	movzx eax, byte [zigzag+eax]
		1483	push ecx
		1484	mov cl, [edx+jpeg.work.ApproxPosLow]
		1485	shl ebx, cl
		1486	pop ecx
		1487	mov [edi+eax], bx
		1488	pop eax
		1489	@@:
		1490	add eax, 1
		1491	cmp al, [edx+jpeg.work.ScanEnd]
		1492	pop edx
		1493	jbe .acloop
		1494	pop ebx
		1495	mov [ebx+jpeg.work.huffman_bits], ecx
		1496	.ret:
		1497	ret
		1498	.eof_pop5:
		1499	pop ebx
		1500	.eof_pop4:
		1501	pop ebx
		1502	.eof_pop3:
		1503	pop ebx
		1504	.eof_pop2:
		1505	pop ebx
		1506	.eof_pop1:
		1507	pop ebx
		1508	.eof_pop0:
		1509	jmp decode_data_unit.eof_pop0
		1510	.band:
		1511	shr ebx, 4
		1512	cmp ebx, 15
		1513	jnz .eob
		1514	mov edx, [esp+4]
		1515	push 0
		1516	jmp .zeroloop1
		1517	.eob:
		1518	pop edx
		1519	push eax
		1520	mov eax, 1
		1521	test ebx, ebx
		1522	jz .eob0
		1523	@@:
		1524	get_bit 2
		1525	adc eax, eax
		1526	sub ebx, 1
		1527	jnz @b
		1528	.eob0:
		1529	mov [edx+52], eax
		1530	pop eax
		1531	pop ebx
		1532	.was_eob:
		1533	sub dword [edx+52], 1
		1534	cmp al, [ebx+jpeg.work.ScanEnd]
		1535	ja .ret2
		1536	push edx
		1537	.zeroloop3:
		1538	push eax
		1539	movzx eax, byte [zigzag+eax]
		1540	xor edx, edx
		1541	cmp word [edi+eax], dx
		1542	jz @f
		1543	get_bit 2
		1544	jnc @f
		1545	push ecx
		1546	mov cl, [ebx+jpeg.work.ApproxPosLow]
		1547	xor edx, edx
		1548	cmp byte [edi+eax+1], 80h
		1549	adc edx, 0
		1550	add edx, edx
		1551	sub edx, 1
		1552	shl edx, cl
		1553	pop ecx
		1554	add [edi+eax], dx
		1555	@@:
		1556	pop eax
		1557	add eax, 1
		1558	cmp al, [ebx+jpeg.work.ScanEnd]
		1559	jbe .zeroloop3
		1560	pop edx
		1561	.ret2:
		1562	mov [ebx+jpeg.work.huffman_bits], ecx
		1563	ret
		1564
		1565	handle_progressive:
		1566	cmp [ebx+jpeg.work.dct_buffer], 0
		1567	jnz @f
		1568	ret
		1569	@@:
		1570	; information for all components
		1571	lea esi, [ebx+jpeg.work.components]
		1572	xor ebp, ebp
		1573	mov ecx, [ebx+jpeg.work.pixel_size]
		1574	.next_component:
		1575	lea edi, [ebx+jpeg.work.cur_components]
		1576	lodsb ; ComponentID
		1577	lodsd
		1578	mov ax, 0x0101
		1579	stosd ; db V, db H, db VFactor, db HFactor
		1580	xor eax, eax
		1581	mov al, byte [edi-1] ; get HFactor
		1582	mul byte [ebx+jpeg.work.pixel_size] ; number of components
		1583	stosd ; HIncrement_i = HFactor_i * sizeof(pixel)
		1584	movzx eax, byte [edi-4-2] ; get VFactor
		1585	mul [ebx+jpeg.work.line_size] ; number of components * image width
		1586	stosd ; VIncrement_i = VFactor_i * sizeof(row)
		1587	lodsb
		1588	and eax, 3
		1589	cmp [ebx+jpeg.work.quant_tables_defined+eax], 0
		1590	jz .error
		1591	shl eax, 8
		1592	lea eax, [ebx+jpeg.work.quant_tables+eax]
		1593	stosd ; dd QuantizationTable
		1594	stosd ; dd DCTable - ignored
		1595	mov eax, ebp
		1596	mul [ebx+jpeg.work.dct_buffer_size]
		1597	add eax, [ebx+jpeg.work.dct_buffer]
		1598	stosd ; instead of dd ACTable - pointer to current DCT coefficients
		1599	push ecx
		1600	mov eax, [ebx+jpeg.work.image]
		1601	mov eax, [eax+Image.Width]
		1602	movzx ecx, byte [edi-21] ; get HFactor
		1603	; cdq ; edx = 0 as a result of previous mul
		1604	div ecx
		1605	stosd ; dd width / HFactor_i
		1606	stosd
		1607	xchg eax, ecx
		1608	inc eax
		1609	sub eax, edx
		1610	stosd ; dd HFactor_i+1 - (width % HFactor_i)
		1611	mov eax, [ebx+jpeg.work.image]
		1612	mov eax, [eax+Image.Height]
		1613	movzx ecx, byte [edi-34] ; get VFactor
		1614	cdq
		1615	div ecx
		1616	stosd ; dd height / VFactor_i
		1617	stosd
		1618	xchg eax, ecx
		1619	inc eax
		1620	sub eax, edx
		1621	stosd ; dd VFactor_i+1 - (height % VFactor_i)
		1622	pop ecx
		1623	xor eax, eax
1079	diamond	1624	test ebp, ebp
		1625	setnp al
		1626	ror eax, 1
999	diamond	1627	stosd ; dd DCPrediction
		1628	mov eax, ebp
		1629	stosd ; dd ComponentOffset
		1630	inc ebp
		1631	push ecx
		1632	mov [ebx+jpeg.work.cur_components_end], edi
		1633	lea edx, [edi-56]
		1634	; do IDCT and unpack
		1635	mov edi, [ebx+jpeg.work.image]
		1636	mov edi, [edi+Image.Data]
		1637	mov [ebx+jpeg.work.cur_out_ptr], edi
		1638	mov [ebx+jpeg.work.not_interleaved], 1
		1639	call init_limits
		1640	.decode_loop:
		1641	call decode_MCU
		1642	sub [ebx+jpeg.work.cur_x], 1
		1643	jnz .decode_loop
		1644	call next_line
		1645	sub [ebx+jpeg.work.cur_y], 1
		1646	jnz .decode_loop
		1647	pop ecx
		1648	dec ecx
		1649	jnz .next_component
		1650	; image unpacked, return
		1651	.error:
		1652	push [ebx+jpeg.work.dct_buffer]
		1653	call [mem.free]
		1654	ret
		1655
		1656	; Support for YCbCr -> RGB conversion
		1657	; R = Y + 1.402 * (Cr - 128)
		1658	; G = Y - 0.34414 * (Cb - 128) - 0.71414 * (Cr - 128)
		1659	; B = Y + 1.772 * (Cb - 128)
		1660	; When converting YCbCr -> RGB, we need to do some multiplications;
		1661	; to be faster, we precalculate the table for all 256 possible values
		1662	; Also we approximate fractions with N/65536, this gives sufficient precision
		1663	img.initialize.jpeg:
		1664	;initialize_color_table:
		1665	; 1.402 = 1 + 26345/65536, -0.71414 = -46802/65536
		1666	; -0.34414 = -22554/65536, 1.772 = 1 + 50594/65536
		1667	pushad
		1668	mov edi, color_table_1
		1669	mov ecx, 128
		1670	; 1. Cb -> 1.772*Cb
		1671	xor eax, eax
		1672	mov dx, 8000h
		1673	.l1:
		1674	push ecx
		1675	@@:
		1676	stosd
		1677	add dx, 50594
		1678	adc eax, 1
		1679	loop @b
		1680	neg dx
		1681	adc eax, -1
		1682	neg eax
		1683	pop ecx
		1684	jnz .l1
		1685	; 2. Cb -> -0.34414*Cb
		1686	mov ax, dx
		1687	.l2:
		1688	push ecx
		1689	@@:
		1690	stosd
		1691	sub eax, 22554
		1692	loop @b
		1693	neg eax
		1694	pop ecx
		1695	cmp ax, dx
		1696	jnz .l2
		1697	xor eax, eax
		1698	; 3. Cr -> -0.71414*Cr
		1699	.l3:
		1700	push ecx
		1701	@@:
		1702	stosd
		1703	sub eax, 46802
		1704	loop @b
		1705	neg eax
		1706	pop ecx
		1707	jnz .l3
		1708	; 4. Cr -> 1.402*Cr
		1709	.l4:
		1710	push ecx
		1711	@@:
		1712	stosd
		1713	add dx, 26345
		1714	adc eax, 1
		1715	loop @b
		1716	neg dx
		1717	adc eax, -1
		1718	neg eax
		1719	pop ecx
		1720	jnz .l4
		1721	popad
		1722	ret
		1723
		1724	; this function is called in the end of image loading
		1725	convert_to_rgb:
		1726	; some checks
		1727	mov eax, [ebx+jpeg.work.image]
		1728	test eax, eax ; image exists?
		1729	jz .ret
		1730	cmp byte [ebx+jpeg.work.pixel_size], 3 ; full-color image?
		1731	jz .ycc2rgb
		1732	cmp byte [ebx+jpeg.work.pixel_size], 4
		1733	jz .ycck2rgb
		1734	.ret:
		1735	ret
		1736	.ycc2rgb:
		1737	; conversion is needed
		1738	mov esi, [eax+Image.Width]
		1739	imul esi, [eax+Image.Height]
		1740	mov edi, [eax+Image.Data]
		1741	push ebx
		1742	; N.B. Speed optimization has sense here.
		1743	align 16
		1744	.loop:
		1745	; mov ebx, [edi]
		1746	; mov edx, ebx
		1747	; mov ecx, ebx
		1748	; movzx ebx, bl ; ebx = Y
		1749	; shr edx, 16
		1750	; mov eax, ebx
		1751	; movzx edx, dl ; edx = Cr
		1752	; movzx ecx, ch ; ecx = Cb
		1753	movzx ebx, byte [edi]
		1754	movzx ecx, byte [edi+1]
		1755	mov eax, ebx
		1756	movzx edx, byte [edi+2]
		1757	; B = Y + color_table_1[Cb]
		1758	add eax, [color_table_1+ecx*4]
		1759	mov ebp, [color_table_2+ecx*4]
		1760	cmp eax, 80000000h
		1761	sbb ecx, ecx
		1762	and eax, ecx
		1763	add ebp, [color_table_3+edx*4]
		1764	cmp eax, 0x100
		1765	sbb ecx, ecx
		1766	not ecx
		1767	sar ebp, 16
		1768	or eax, ecx
		1769	mov [edi], al
		1770	; G = Y + color_table_2[Cb] + color_table_3[Cr]
		1771	lea eax, [ebx+ebp]
		1772	cmp eax, 80000000h
		1773	sbb ecx, ecx
		1774	and eax, ecx
		1775	cmp eax, 0x100
		1776	sbb ecx, ecx
		1777	not ecx
		1778	or eax, ecx
		1779	mov [edi+1], al
		1780	; R = Y + color_table_4[Cr]
		1781	mov eax, ebx
		1782	add eax, [color_table_4+edx*4]
		1783	cmp eax, 80000000h
		1784	sbb ecx, ecx
		1785	and eax, ecx
		1786	cmp eax, 0x100
		1787	sbb ecx, ecx
		1788	not ecx
		1789	or eax, ecx
		1790	mov [edi+2], al
		1791	add edi, 3
		1792	sub esi, 1
		1793	jnz .loop
		1794	pop ebx
		1795	ret
		1796	.ycck2rgb:
		1797	; conversion is needed
		1798	mov esi, [eax+Image.Width]
		1799	imul esi, [eax+Image.Height]
		1800	push ebx
		1801	push esi
		1802	mov edi, [eax+Image.Data]
		1803	mov esi, edi
		1804	; N.B. Speed optimization has sense here.
		1805	align 16
		1806	.kloop:
		1807	; mov ebx, [esi]
		1808	; mov edx, ebx
		1809	; mov ecx, ebx
		1810	; movzx ebx, bl ; ebx = Y
		1811	; shr edx, 16
		1812	; mov eax, ebx
		1813	; movzx edx, dl ; edx = Cr
		1814	; movzx ecx, ch ; ecx = Cb
		1815	movzx ebx, byte [esi]
		1816	movzx ecx, byte [esi+1]
		1817	mov eax, ebx
		1818	movzx edx, byte [esi+2]
		1819	; B = Y + color_table_1[Cb]
		1820	add eax, [color_table_1+ecx*4]
		1821	mov ebp, [color_table_2+ecx*4]
		1822	cmp eax, 80000000h
		1823	sbb ecx, ecx
		1824	and eax, ecx
		1825	add ebp, [color_table_3+edx*4]
		1826	cmp eax, 0x100
		1827	sbb ecx, ecx
		1828	not ecx
		1829	sar ebp, 16
		1830	or eax, ecx
		1831	xor al, 0xFF
		1832	mul byte [esi+3]
		1833	add al, ah
		1834	adc ah, 0
		1835	add al, 80h
		1836	adc ah, 0
		1837	mov byte [edi], ah
		1838	; G = Y + color_table_2[Cb] + color_table_3[Cr]
		1839	lea eax, [ebx+ebp]
		1840	cmp eax, 80000000h
		1841	sbb ecx, ecx
		1842	and eax, ecx
		1843	cmp eax, 0x100
		1844	sbb ecx, ecx
		1845	not ecx
		1846	or eax, ecx
		1847	xor al, 0xFF
		1848	mul byte [esi+3]
		1849	add al, ah
		1850	adc ah, 0
		1851	add al, 80h
		1852	adc ah, 0
		1853	mov byte [edi+1], ah
		1854	; R = Y + color_table_4[Cr]
		1855	mov eax, ebx
		1856	add eax, [color_table_4+edx*4]
		1857	cmp eax, 80000000h
		1858	sbb ecx, ecx
		1859	and eax, ecx
		1860	cmp eax, 0x100
		1861	sbb ecx, ecx
		1862	not ecx
		1863	or eax, ecx
		1864	xor al, 0xFF
		1865	mul byte [esi+3]
		1866	add al, ah
		1867	adc ah, 0
		1868	add al, 80h
		1869	adc ah, 0
		1870	mov byte [edi+2], ah
		1871	add esi, 4
		1872	add edi, 4 ;3
		1873	sub dword [esp], 1
		1874	jnz .kloop
		1875	pop eax
		1876	pop ebx
		1877	; release some memory - must succeed because we decrease size
		1878	; add ecx, 44+1
		1879	; mov edx, ebx
		1880	; push 68
		1881	; pop eax
		1882	; push 20
		1883	; pop ebx
		1884	; int 0x40
		1885	; mov ebx, eax
		1886	ret
		1887
		1888	; Decodes one data unit, that is, 8*8 block,
		1889	; from input stream, given by pointer esi and length ebp
		1890	; N.B. Speed optimization has sense here.
		1891	align 16
		1892	decode_data_unit:
		1893	; edx -> component data
		1894	cmp [ebx+jpeg.work.progressive], 0
		1895	jz @f
		1896	mov edi, [edx+20]
		1897	add dword [edx+20], 64*2
		1898	jmp .coeff_decoded
		1899	@@:
		1900	lea edi, [ebx+jpeg.work.dct_coeff]
		1901	mov ecx, 64*2/4
		1902	xor eax, eax
		1903	rep stosd
		1904	mov edi, zigzag+1
		1905	mov ecx, [ebx+jpeg.work.huffman_bits]
		1906	; read DC coefficient
		1907	push ebx
		1908	mov eax, [edx+16]
		1909	push edx
		1910	get_huffman_code 2,3
		1911	get_bits 2,3,true
		1912	pop ebx
		1913	add eax, [edx+48]
		1914	mov [ebx+jpeg.work.dct_coeff], ax
		1915	mov [edx+48], ax
		1916	; read AC coefficients
		1917	push ebx
		1918	@@:
		1919	mov eax, [edx+20]
		1920	push edx
		1921	get_huffman_code 2,3
		1922	shr eax, 4
		1923	and ebx, 15
		1924	jz .band
		1925	add edi, eax
		1926	cmp edi, zigzag+64
		1927	jae .eof_pop2
		1928	get_bits 2,3,true
		1929	movzx ebx, byte [edi]
		1930	add ebx, [esp]
		1931	mov [jpeg.work.dct_coeff+ebx], ax
		1932	add edi, 1
		1933	cmp edi, zigzag+64
		1934	jb @b
		1935	jmp .do_idct
		1936	.band:
		1937	pop edx
		1938	cmp al, 15
		1939	jnz .do_idct
		1940	add edi, 16
		1941	cmp edi, zigzag+64
		1942	jb @b
		1943	; jmp .eof_pop1
		1944	.do_idct:
		1945	pop ebx
		1946	lea edi, [ebx+jpeg.work.dct_coeff]
		1947	mov [ebx+jpeg.work.huffman_bits], ecx
		1948	; coefficients loaded, now IDCT
		1949	.coeff_decoded:
		1950	mov eax, [edx+12]
		1951	add ebx, jpeg.work.idct_tmp_area
		1952	push 8
		1953	.idct_loop1:
		1954	mov cx, word [edi+1*16]
		1955	repeat 6
		1956	or cx, word [edi+(%+1)*16]
		1957	end repeat
		1958	jnz .real_transform
		1959	fild word [edi]
		1960	fmul dword [eax]
		1961	fstp dword [ebx]
		1962	mov ecx, [ebx]
		1963	repeat 7
		1964	mov [ebx+%*32], ecx
		1965	end repeat
		1966	jmp .idct_next1
		1967	.real_transform:
		1968	; S0,...,S7 - transformed values, s0,...,s7 - sought-for values
		1969	; S0,...,S7 are dequantized;
		1970	; dequantization table elements were multiplied to [idct_pre_table],
		1971	; so S0,S1,... later denote S0/2\sqrt{2},S1*\cos{\pi/16}/2,...
		1972	; sqrt2 = \sqrt{2}, cos = 2\cos{\pi/8},
		1973	; cos_sum = -2(\cos{\pi/8}+\cos{3\pi/8}), cos_diff = 2(\cos{\pi/8}-\cos{3\pi/8})
		1974	; Now formulas:
		1975	; s0 = ((S0+S4)+(S2+S6)) + ((S1+S7)+(S3+S5))
		1976	; s7 = ((S0+S4)+(S2+S6)) - ((S1+S7)+(S3+S5))
		1977	; val0 = ((cos-1)S1-(cos+cos_sum+1)S3+(cos+cos_sum-1)S5-(cos+1)S7)
		1978	; s1 = ((S0-S4)+((sqrt2-1)S2-(sqrt2+1)S6)) + val0
		1979	; s6 = ((S0-S4)+((sqrt2-1)S2-(sqrt2+1)S6)) - val0
		1980	; val1 = (S1+S7-S3-S5)sqrt2 - val0
		1981	; s2 = ((S0-S4)-((sqrt2-1)S2-(sqrt2+1)S6)) + val1
		1982	; s5 = ((S0-S4)-((sqrt2-1)S2-(sqrt2+1)S6)) - val1
		1983	; val2 = (S1-S7)cos_diff - (S1-S3+S5-S7)cos + val1
		1984	; s3 = ((S0+S4)-(S2+S6)) - val2
		1985	; s4 = ((S0+S4)-(S2+S6)) + val2
		1986	fild word [edi+3*16]
		1987	fmul dword [eax+3*32]
		1988	fild word [edi+5*16]
		1989	fmul dword [eax+5*32] ; st0=S5,st1=S3
		1990	fadd st1,st0
		1991	fadd st0,st0
		1992	fsub st0,st1 ; st0=S5-S3,st1=S5+S3
		1993	fild word [edi+1*16]
		1994	fmul dword [eax+1*32]
		1995	fild word [edi+7*16]
		1996	fmul dword [eax+7*32] ; st0=S7,st1=S1
		1997	fsub st1,st0
		1998	fadd st0,st0
		1999	fadd st0,st1 ; st0=S1+S7,st1=S1-S7,st2=S5-S3,st3=S5+S3
		2000	fadd st3,st0
		2001	fadd st0,st0
		2002	fsub st0,st3 ; st0=S1-S3-S5+S7,st1=S1-S7,st2=S5-S3,st3=S1+S3+S5+S7
		2003	fmul [idct_sqrt2]
		2004	fld st2
		2005	fadd st0,st2
		2006	fmul [idct_cos] ; st0=(S1-S3+S5-S7)cos,st1=(S1-S3-S5+S7)sqrt2,
		2007	; st2=S1-S7,st3=S5-S3,st4=S1+S3+S5+S7
		2008	fxch st2
		2009	fmul [idct_cos_diff]
		2010	fsub st0,st2 ; st0=(S1-S7)cos_diff - (S1-S3+S5-S7)cos
		2011	fxch st3
		2012	fmul [idct_cos_sum]
		2013	fadd st0,st2 ; st0=(S5-S3)cos_sum+(S1-S3+S5-S7)cos
		2014	fsub st0,st4 ; st0=val0
		2015	fsub st1,st0 ; st0=val0,st1=val1,st2=(S1-S3+S5-S7)cos,
		2016	; st3=(S1-S7)cos_diff-(S1-S3+S5-S7)cos,st4=S1+S3+S5+S7
		2017	fxch st2
		2018	fstp st0
		2019	fadd st2,st0 ; st0=val1,st1=val0,st2=val2,st3=S1+S3+S5+S7
		2020
		2021	fild word [edi+0*16]
		2022	fmul dword [eax+0*32]
		2023	fild word [edi+4*16]
		2024	fmul dword [eax+4*32] ; st0=S4,st1=S0
		2025	fsub st1,st0
		2026	fadd st0,st0
		2027	fadd st0,st1 ; st0=S0+S4,st1=S0-S4
		2028	fild word [edi+6*16]
		2029	fmul dword [eax+6*32]
		2030	fild word [edi+2*16]
		2031	fmul dword [eax+2*32] ; st0=S2,st1=S6
		2032	fadd st1,st0
		2033	fadd st0,st0
		2034	fsub st0,st1 ; st0=S2-S6,st1=S2+S6
		2035	fmul [idct_sqrt2]
		2036	fsub st0,st1
		2037	fsub st3,st0
		2038	fadd st0,st0
		2039	fadd st0,st3 ; st0=(S0-S4)+((S2-S6)sqrt2-(S2+S6))
		2040	; st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
		2041	fxch st1
		2042	fsub st2,st0
		2043	fadd st0,st0
		2044	fadd st0,st2 ; st0=(S0+S4)+(S2+S6),st1=(S0-S4)+((S2-S6)sqrt2-(S2+S6)),
		2045	; st2=(S0+S4)-(S2+S6),st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
		2046	; st4=val1,st5=val0,st6=val2,st7=S1+S3+S5+S7
		2047	fsubr st7,st0
		2048	fadd st0,st0
		2049	fsub st0,st7
		2050	fstp dword [ebx+0*32]
		2051	fsubr st4,st0
		2052	fadd st0,st0
		2053	fsub st0,st4
		2054	fstp dword [ebx+1*32]
		2055	fadd st4,st0
		2056	fadd st0,st0
		2057	fsub st0,st4
		2058	fstp dword [ebx+3*32]
		2059	fsubr st1,st0
		2060	fadd st0,st0
		2061	fsub st0,st1
		2062	fstp dword [ebx+2*32]
		2063	fstp dword [ebx+5*32]
		2064	fstp dword [ebx+6*32]
		2065	fstp dword [ebx+4*32]
		2066	fstp dword [ebx+7*32]
		2067	.idct_next1:
		2068	add ebx, 4
		2069	add edi, 2
		2070	add eax, 4
		2071	sub dword [esp], 1
		2072	jnz .idct_loop1
		2073	pop ecx
		2074	sub ebx, 8*4
		2075	mov ecx, 8
		2076	.idct_loop2:
		2077	fld dword [ebx+3*4]
		2078	fld dword [ebx+5*4]
		2079	fadd st1,st0
		2080	fadd st0,st0
		2081	fsub st0,st1 ; st0=S5-S3,st1=S5+S3
		2082	fld dword [ebx+1*4]
		2083	fld dword [ebx+7*4]
		2084	fsub st1,st0
		2085	fadd st0,st0
		2086	fadd st0,st1 ; st0=S1+S7,st1=S1-S7,st2=S5-S3,st3=S5+S3
		2087	fadd st3,st0
		2088	fadd st0,st0
		2089	fsub st0,st3 ; st0=S1-S3-S5+S7,st1=S1-S7,st2=S5-S3,st3=S1+S3+S5+S7
		2090	fmul [idct_sqrt2]
		2091	fld st2
		2092	fadd st0,st2
		2093	fmul [idct_cos] ; st0=(S1-S3+S5-S7)cos,st1=(S1-S3-S5+S7)sqrt2,
		2094	; st2=S1-S7,st3=S5-S3,st4=S1+S3+S5+S7
		2095	fxch st2
		2096	fmul [idct_cos_diff]
		2097	fsub st0,st2 ; st0=(S1-S7)cos_diff - (S1-S3+S5-S7)cos
		2098	fxch st3
		2099	fmul [idct_cos_sum]
		2100	fadd st0,st2 ; st0=(S5-S3)cos_sum+(S1-S3+S5-S7)cos
		2101	fsub st0,st4 ; st0=val0
		2102	fsub st1,st0 ; st0=val0,st1=val1,st2=(S1-S3+S5-S7)cos,
		2103	; st3=(S1-S7)cos_diff-(S1-S3+S5-S7)cos,st4=S1+S3+S5+S7
		2104	fxch st2
		2105	fstp st0
		2106	fadd st2,st0 ; st0=val1,st1=val0,st2=val2,st3=S1+S3+S5+S7
		2107
		2108	fld dword [ebx+0*4]
		2109	fld dword [ebx+4*4]
		2110	fsub st1,st0
		2111	fadd st0,st0
		2112	fadd st0,st1 ; st0=S0+S4,st1=S0-S4
		2113	fld dword [ebx+6*4]
		2114	fld dword [ebx+2*4]
		2115	fadd st1,st0
		2116	fadd st0,st0
		2117	fsub st0,st1 ; st0=S2-S6,st1=S2+S6
		2118	fmul [idct_sqrt2]
		2119	fsub st0,st1
		2120	fsub st3,st0
		2121	fadd st0,st0
		2122	fadd st0,st3 ; st0=(S0-S4)+((S2-S6)sqrt2-(S2+S6))
		2123	; st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
		2124	fxch st1
		2125	fsub st2,st0
		2126	fadd st0,st0
		2127	fadd st0,st2 ; st0=(S0+S4)+(S2+S6),st1=(S0-S4)+((S2-S6)sqrt2-(S2+S6)),
		2128	; st2=(S0+S4)-(S2+S6),st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
		2129	; st4=val1,st5=val0,st6=val2,st7=S1+S3+S5+S7
		2130	fsubr st7,st0
		2131	fadd st0,st0
		2132	fsub st0,st7
		2133	fistp dword [ebx+0*4]
		2134	fsubr st4,st0
		2135	fadd st0,st0
		2136	fsub st0,st4
		2137	fistp dword [ebx+1*4]
		2138	fadd st4,st0
		2139	fadd st0,st0
		2140	fsub st0,st4
		2141	fistp dword [ebx+3*4]
		2142	fsubr st1,st0
		2143	fadd st0,st0
		2144	fsub st0,st1
		2145	fistp dword [ebx+2*4]
		2146	fistp dword [ebx+5*4]
		2147	fistp dword [ebx+6*4]
		2148	fistp dword [ebx+4*4]
		2149	fistp dword [ebx+7*4]
		2150
		2151	add ebx, 32
		2152	sub ecx, 1
		2153	jnz .idct_loop2
		2154
		2155	sub ebx, 32*8
		2156	mov ecx, 64
		2157	lea edi, [ebx - jpeg.work.idct_tmp_area + jpeg.work.decoded_data - 1]
		2158	push esi
		2159	.idct_loop3:
		2160	mov eax, [ebx]
		2161	add ebx, 4
		2162	add eax, 80h
		2163	cmp eax, 80000000h
		2164	sbb esi, esi
		2165	add edi, 1
		2166	and eax, esi
		2167	cmp eax, 100h
		2168	sbb esi, esi
		2169	not esi
		2170	or eax, esi
		2171	sub al, [edx+51]
		2172	sub ecx, 1
		2173	mov [edi], al
		2174	jnz .idct_loop3
		2175	pop esi
		2176	sub ebx, 64*4 + jpeg.work.idct_tmp_area
		2177	; done
		2178	ret
		2179
		2180	.eof_pop3:
		2181	pop ebx
		2182	.eof_pop2:
		2183	pop ebx
		2184	.eof_pop1:
		2185	pop ebx
		2186	.eof_pop0:
		2187	; EOF or incorrect data during scanning
		2188	mov esp, [ebx + jpeg.work._esp]
		2189	jmp img.decode.jpg.end
		2190
		2191	img.encode.jpg:
		2192	xor eax, eax
		2193	ret 8
		2194
		2195	zigzag:
		2196	; (x,y) -> 2(x+y8)
		2197	repeat 8
		2198	.cur = %
		2199	if .cur and 1
		2200	repeat %
		2201	db 2((%-1) + (.cur-%)8)
		2202	end repeat
		2203	else
		2204	repeat %
		2205	db 2((.cur-%) + (%-1)8)
		2206	end repeat
		2207	end if
		2208	end repeat
		2209	repeat 7
		2210	.cur = %
		2211	if .cur and 1
		2212	repeat 8-%
		2213	db 2((%+.cur-1) + (8-%)8)
		2214	end repeat
		2215	else
		2216	repeat 8-%
		2217	db 2((8-%) + (%+.cur-1)8)
		2218	end repeat
		2219	end if
		2220	end repeat
		2221
		2222	align 4
		2223	idct_pre_table:
		2224	; c_0 = 1/(2\sqrt{2}), c_i = cos(i*\pi/16)/2
		2225	dd 0.35355339, 0.49039264, 0.461939766, 0.41573481
		2226	dd 0.35355339, 0.27778512, 0.19134172, 0.09754516
		2227	idct_sqrt2 dd 1.41421356 ; \sqrt{2}
		2228	idct_cos dd 1.847759065 ; 2\cos{\pi/8}
		2229	idct_cos_sum dd -2.61312593 ; -2(\cos{\pi/8} + \cos{3\pi/8})
		2230	idct_cos_diff dd 1.08239220 ; 2(\cos{\pi/8} - \cos{3\pi/8})
		2231	;---------------------------------------------------------------------

Subversion Repositories Kolibri OS

(root)/programs/develop/libraries/libs-dev/libimg/jpeg/jpeg.asm – Rev 2733