Subversion Repositories Kolibri OS

Rev

Rev 1079 | Rev 2733 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 1079 Rev 1102
1
;;================================================================================================;;
1
;;================================================================================================;;
2
;;//// jpeg.asm //// (c) diamond, 2008-2009 //////////////////////////////////////////////////////;;
2
;;//// jpeg.asm //// (c) diamond, 2008-2009 //////////////////////////////////////////////////////;;
3
;;================================================================================================;;
3
;;================================================================================================;;
4
;;                                                                                                ;;
4
;;                                                                                                ;;
5
;; This file is part of Common development libraries (Libs-Dev).                                  ;;
5
;; This file is part of Common development libraries (Libs-Dev).                                  ;;
6
;;                                                                                                ;;
6
;;                                                                                                ;;
7
;; Libs-Dev is free software: you can redistribute it and/or modify it under the terms of the GNU ;;
7
;; Libs-Dev is free software: you can redistribute it and/or modify it under the terms of the GNU ;;
8
;; Lesser General Public License as published by the Free Software Foundation, either version 2.1 ;;
8
;; Lesser General Public License as published by the Free Software Foundation, either version 2.1 ;;
9
;; of the License, or (at your option) any later version.                                         ;;
9
;; of the License, or (at your option) any later version.                                         ;;
10
;;                                                                                                ;;
10
;;                                                                                                ;;
11
;; Libs-Dev is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without  ;;
11
;; Libs-Dev is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without  ;;
12
;; even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU  ;;
12
;; even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU  ;;
13
;; Lesser General Public License for more details.                                                ;;
13
;; Lesser General Public License for more details.                                                ;;
14
;;                                                                                                ;;
14
;;                                                                                                ;;
15
;; You should have received a copy of the GNU Lesser General Public License along with Libs-Dev.  ;;
15
;; You should have received a copy of the GNU Lesser General Public License along with Libs-Dev.  ;;
16
;; If not, see .                                                    ;;
16
;; If not, see .                                                    ;;
17
;;                                                                                                ;;
17
;;                                                                                                ;;
18
;;================================================================================================;;
18
;;================================================================================================;;
19
 
19
 
20
include 'jpeg.inc'
20
include 'jpeg.inc'
21
 
21
 
22
img.is.jpg:
22
img.is.jpg:
23
	push	esi ebp
23
	push	esi ebp
24
	mov	esi, [esp+12]	; esi -> JPEG data
24
	mov	esi, [esp+12]	; esi -> JPEG data
25
	mov	ebp, [esp+16]	; ebp = data size
25
	mov	ebp, [esp+16]	; ebp = data size
26
	call	get_marker
26
	call	get_marker
27
	jc	.no
27
	jc	.no
28
	cmp	al, 0xD8	; SOI marker?
28
	cmp	al, 0xD8	; SOI marker?
29
	push	1
29
	push	1
30
	pop	eax
30
	pop	eax
31
	jz	.ok
31
	jz	.ok
32
.no:
32
.no:
33
	xor	eax, eax
33
	xor	eax, eax
34
.ok:
34
.ok:
35
	pop	ebp esi
35
	pop	ebp esi
36
	ret	8
36
	ret	8
37
 
37
 
38
img.decode.jpg:
38
img.decode.jpg:
39
	finit
39
	finit
40
	pushad
40
	pushad
41
	mov	esi, [esp+20h+4]	; esi -> JPEG data
41
	mov	esi, [esp+20h+4]	; esi -> JPEG data
42
	mov	ebp, [esp+20h+8]	; ebp = data size
42
	mov	ebp, [esp+20h+8]	; ebp = data size
43
@@:
43
@@:
44
; allocate area for JPEG processing
44
; allocate area for JPEG processing
45
	push	sizeof.jpeg.work
45
	push	sizeof.jpeg.work
46
	call	[mem.alloc]
46
	call	[mem.alloc]
47
	test	eax, eax
47
	test	eax, eax
48
	jz	.ret
48
	jz	.ret
49
	mov	ebx, eax
49
	mov	ebx, eax
50
	xor	ecx, ecx
50
	xor	ecx, ecx
51
	mov	[ebx + jpeg.work.image], ecx
51
	mov	[ebx + jpeg.work.image], ecx
52
	mov	[ebx + jpeg.work.dct_buffer], ecx
52
	mov	[ebx + jpeg.work.dct_buffer], ecx
53
	mov	[ebx + jpeg.work._esp], esp
53
	mov	[ebx + jpeg.work._esp], esp
54
; check for SOI [Start-Of-Image] marker
54
; check for SOI [Start-Of-Image] marker
55
	call	get_marker
55
	call	get_marker
56
	jc	.end
56
	jc	.end
57
	cmp	al, 0xD8	; SOI?
57
	cmp	al, 0xD8	; SOI?
58
	jz	.soi_ok
58
	jz	.soi_ok
59
.end:
59
.end:
60
; general exit from the function
60
; general exit from the function
61
; for progressive mode: convert loaded DCT coefficients to image
61
; for progressive mode: convert loaded DCT coefficients to image
62
	call	handle_progressive
62
	call	handle_progressive
63
; convert full-color images to RGB
63
; convert full-color images to RGB
64
	call	convert_to_rgb
64
	call	convert_to_rgb
65
	push	[ebx + jpeg.work.image]
65
	push	[ebx + jpeg.work.image]
66
	push	ebx
66
	push	ebx
67
	call	[mem.free]
67
	call	[mem.free]
68
	pop	eax
68
	pop	eax
69
.ret:
69
.ret:
70
	mov	[esp+28], eax
70
	mov	[esp+28], eax
71
	popad
71
	popad
72
	ret	8
72
	ret	12
73
.soi_ok:
73
.soi_ok:
74
	mov	[ebx + jpeg.work.restart_interval], ecx
74
	mov	[ebx + jpeg.work.restart_interval], ecx
75
	mov	[ebx + jpeg.work.adobe_ycck], cl
75
	mov	[ebx + jpeg.work.adobe_ycck], cl
76
; loop until start of frame (real data), parse markers
76
; loop until start of frame (real data), parse markers
77
.markers_loop:
77
.markers_loop:
78
	call	get_marker
78
	call	get_marker
79
	jc	.end
79
	jc	.end
80
; markers RSTn do not have parameters
80
; markers RSTn do not have parameters
81
; N.B. They can not exist in this part of JPEG, but let's be liberal :)
81
; N.B. They can not exist in this part of JPEG, but let's be liberal :)
82
	cmp	al, 0xD0
82
	cmp	al, 0xD0
83
	jb	@f
83
	jb	@f
84
	cmp	al, 0xD8
84
	cmp	al, 0xD8
85
	jb	.markers_loop
85
	jb	.markers_loop
86
@@:
86
@@:
87
	cmp	al, 0xD9	; EOI? [invalid here]
87
	cmp	al, 0xD9	; EOI? [invalid here]
88
	jz	.end
88
	jz	.end
89
; ok, this is marker segment
89
; ok, this is marker segment
90
; first word is length of the segment
90
; first word is length of the segment
91
	cmp	ebp, 2
91
	cmp	ebp, 2
92
	jb	.end
92
	jb	.end
93
	xor	edx, edx
93
	xor	edx, edx
94
	mov	dl, [esi+1]
94
	mov	dl, [esi+1]
95
	mov	dh, [esi]	; edx = marker length, al = marker value
95
	mov	dh, [esi]	; edx = marker length, al = marker value
96
	sub	ebp, edx
96
	sub	ebp, edx
97
	jb	.end
97
	jb	.end
98
	cmp	al, 0xDB	; DQT?
98
	cmp	al, 0xDB	; DQT?
99
	jz	.dqt
99
	jz	.dqt
100
	cmp	al, 0xC4	; DHT?
100
	cmp	al, 0xC4	; DHT?
101
	jz	.dht
101
	jz	.dht
102
	cmp	al, 0xCC	; DAC? [ignored - no arithmetic coding]
102
	cmp	al, 0xCC	; DAC? [ignored - no arithmetic coding]
103
	jz	.next_marker
103
	jz	.next_marker
104
	cmp	al, 0xDD	; DRI?
104
	cmp	al, 0xDD	; DRI?
105
	jz	.dri
105
	jz	.dri
106
	cmp	al, 0xDA	; SOS?
106
	cmp	al, 0xDA	; SOS?
107
	jz	.sos
107
	jz	.sos
108
	cmp	al, 0xC0
108
	cmp	al, 0xC0
109
	jb	@f
109
	jb	@f
110
	cmp	al, 0xD0
110
	cmp	al, 0xD0
111
	jb	.sofn
111
	jb	.sofn
112
@@:
112
@@:
113
	cmp	al, 0xEE	; APP14?
113
	cmp	al, 0xEE	; APP14?
114
	jz	.app14
114
	jz	.app14
115
; unrecognized marker; let's skip it and hope for the best
115
; unrecognized marker; let's skip it and hope for the best
116
.next_marker:
116
.next_marker:
117
	add	esi, edx
117
	add	esi, edx
118
	jmp	.markers_loop
118
	jmp	.markers_loop
119
.app14:
119
.app14:
120
; check for special Adobe marker
120
; check for special Adobe marker
121
	cmp	dx, 14
121
	cmp	dx, 14
122
	jb	.next_marker
122
	jb	.next_marker
123
	cmp	byte [esi+2], 'A'
123
	cmp	byte [esi+2], 'A'
124
	jnz	.next_marker
124
	jnz	.next_marker
125
	cmp	dword [esi+3], 'dobe'
125
	cmp	dword [esi+3], 'dobe'
126
	jnz	.next_marker
126
	jnz	.next_marker
127
	cmp	byte [esi+13], 2
127
	cmp	byte [esi+13], 2
128
	setz	[ebx + jpeg.work.adobe_ycck]
128
	setz	[ebx + jpeg.work.adobe_ycck]
129
	jmp	.next_marker
129
	jmp	.next_marker
130
.dqt:
130
.dqt:
131
; DQT marker found
131
; DQT marker found
132
; length: 2 bytes for length field + 65 bytes per table
132
; length: 2 bytes for length field + 65 bytes per table
133
	sub	edx, 2
133
	sub	edx, 2
134
	jc	.end
134
	jc	.end
135
	lodsw
135
	lodsw
136
.dqt_loop:
136
.dqt_loop:
137
	test	edx, edx
137
	test	edx, edx
138
	jz	.markers_loop
138
	jz	.markers_loop
139
	sub	edx, 1+64
139
	sub	edx, 1+64
140
	jc	.end
140
	jc	.end
141
	lodsb
141
	lodsb
142
; 8-bit DCT-based process shall not use a 16-bit precision quantization table.
142
; 8-bit DCT-based process shall not use a 16-bit precision quantization table.
143
	test	al, 0xF0
143
	test	al, 0xF0
144
	jnz	.end
144
	jnz	.end
145
	and	eax, 3
145
	and	eax, 3
146
	mov	[ebx+jpeg.work.quant_tables_defined+eax], 1
146
	mov	[ebx+jpeg.work.quant_tables_defined+eax], 1
147
	shl	eax, 8
147
	shl	eax, 8
148
	lea	edi, [ebx+eax+jpeg.work.quant_tables]
148
	lea	edi, [ebx+eax+jpeg.work.quant_tables]
149
	xor	ecx, ecx
149
	xor	ecx, ecx
150
@@:
150
@@:
151
	xor	eax, eax
151
	xor	eax, eax
152
	lodsb
152
	lodsb
153
	push	eax
153
	push	eax
154
	fild	dword [esp]
154
	fild	dword [esp]
155
	pop	eax
155
	pop	eax
156
	movzx	eax, byte [zigzag+ecx]
156
	movzx	eax, byte [zigzag+ecx]
157
	add	eax, eax
157
	add	eax, eax
158
	push	eax
158
	push	eax
159
	and	eax, 7*4
159
	and	eax, 7*4
160
	fmul	dword [idct_pre_table+eax]
160
	fmul	dword [idct_pre_table+eax]
161
	pop	eax
161
	pop	eax
162
	push	eax
162
	push	eax
163
	shr	eax, 3
163
	shr	eax, 3
164
	and	eax, 7*4
164
	and	eax, 7*4
165
	fmul	dword [idct_pre_table+eax]
165
	fmul	dword [idct_pre_table+eax]
166
	pop	eax
166
	pop	eax
167
	fstp	dword [edi+eax]
167
	fstp	dword [edi+eax]
168
	inc	ecx
168
	inc	ecx
169
	cmp	ecx, 64
169
	cmp	ecx, 64
170
	jb	@b
170
	jb	@b
171
	jmp	.dqt_loop
171
	jmp	.dqt_loop
172
.dri:
172
.dri:
173
; DRI marker found
173
; DRI marker found
174
	cmp	edx, 4		; length must be 4
174
	cmp	edx, 4		; length must be 4
175
	jnz	.end2
175
	jnz	.end2
176
	movzx	eax, word [esi+2]
176
	movzx	eax, word [esi+2]
177
	xchg	al, ah
177
	xchg	al, ah
178
	mov	[ebx+jpeg.work.restart_interval], eax
178
	mov	[ebx+jpeg.work.restart_interval], eax
179
	jmp	.next_marker
179
	jmp	.next_marker
180
.dht:
180
.dht:
181
; DHT marker found
181
; DHT marker found
182
	sub	edx, 2
182
	sub	edx, 2
183
	jc	.end2
183
	jc	.end2
184
	lodsw
184
	lodsw
185
.dht_loop:
185
.dht_loop:
186
	test	edx, edx
186
	test	edx, edx
187
	jz	.markers_loop
187
	jz	.markers_loop
188
	sub	edx, 17
188
	sub	edx, 17
189
	jc	.end2
189
	jc	.end2
190
; next Huffman table; find place for it
190
; next Huffman table; find place for it
191
	lodsb
191
	lodsb
192
	mov	edi, eax
192
	mov	edi, eax
193
	and	eax, 0x10
193
	and	eax, 0x10
194
	and	edi, 3
194
	and	edi, 3
195
	shr	eax, 2
195
	shr	eax, 2
196
	or	edi, eax
196
	or	edi, eax
197
	mov	[ebx+jpeg.work.dc_huffman_defined+edi], 1
197
	mov	[ebx+jpeg.work.dc_huffman_defined+edi], 1
198
;	shl	edi, 11
198
;	shl	edi, 11
199
	imul	edi, max_hufftable_size
199
	imul	edi, max_hufftable_size
200
	lea	edi, [ebx+edi+jpeg.work.dc_huffman]	; edi -> destination table
200
	lea	edi, [ebx+edi+jpeg.work.dc_huffman]	; edi -> destination table
201
; get table size
201
; get table size
202
	xor	eax, eax
202
	xor	eax, eax
203
	push	16
203
	push	16
204
	pop	ecx
204
	pop	ecx
205
@@:
205
@@:
206
	add	al, [esi]
206
	add	al, [esi]
207
	adc	ah, 0
207
	adc	ah, 0
208
	inc	esi
208
	inc	esi
209
	loop	@b
209
	loop	@b
210
	cmp	ax, 0x100
210
	cmp	ax, 0x100
211
	ja	.end2
211
	ja	.end2
212
	sub	edx, eax
212
	sub	edx, eax
213
	jc	.end2
213
	jc	.end2
214
; construct Huffman tree
214
; construct Huffman tree
215
	push	ebx edx
215
	push	ebx edx
216
	; lea	eax, [edi+256*8]
216
	; lea	eax, [edi+256*8]
217
	; push	eax
217
	; push	eax
218
	; push	16
218
	; push	16
219
	; mov	edx, esi
219
	; mov	edx, esi
220
; @@:
220
; @@:
221
	; cmp	byte [edx-1], 0
221
	; cmp	byte [edx-1], 0
222
	; jnz	@f
222
	; jnz	@f
223
	; dec	edx
223
	; dec	edx
224
	; dec	dword [esp]
224
	; dec	dword [esp]
225
	; jmp	@b
225
	; jmp	@b
226
; @@:
226
; @@:
227
	; sub	edx, [esp]
227
	; sub	edx, [esp]
228
	; lea	eax, [edi+8]
228
	; lea	eax, [edi+8]
229
	; push	2
229
	; push	2
230
	; pop	ecx
230
	; pop	ecx
231
; .lenloop:
231
; .lenloop:
232
	; mov	bl, byte [edx]
232
	; mov	bl, byte [edx]
233
	; test	bl, bl
233
	; test	bl, bl
234
	; jz	.len1done
234
	; jz	.len1done
235
	; push	eax
235
	; push	eax
236
	; xor	eax, eax
236
	; xor	eax, eax
237
; .len1loop:
237
; .len1loop:
238
	; dec	ecx
238
	; dec	ecx
239
	; js	.dhterr
239
	; js	.dhterr
240
	; cmp	edi, [esp+8]
240
	; cmp	edi, [esp+8]
241
	; jae	.dhterr
241
	; jae	.dhterr
242
	; lodsb
242
	; lodsb
243
	; stosd
243
	; stosd
244
	; dec	bl
244
	; dec	bl
245
	; jnz	.len1loop
245
	; jnz	.len1loop
246
	; pop	eax
246
	; pop	eax
247
; .len1done:
247
; .len1done:
248
	; jecxz	.len2done
248
	; jecxz	.len2done
249
	; push	ecx
249
	; push	ecx
250
; .len2loop:
250
; .len2loop:
251
	; cmp	eax, [esp+8]
251
	; cmp	eax, [esp+8]
252
	; jb	@f
252
	; jb	@f
253
	; or	eax, -1
253
	; or	eax, -1
254
; @@:
254
; @@:
255
	; cmp	edi, [esp+8]
255
	; cmp	edi, [esp+8]
256
	; jae	.dhterr
256
	; jae	.dhterr
257
	; stosd
257
	; stosd
258
	; add	eax, 8
258
	; add	eax, 8
259
	; jnb	@f
259
	; jnb	@f
260
	; or	eax, -1
260
	; or	eax, -1
261
; @@:
261
; @@:
262
	; loop	.len2loop
262
	; loop	.len2loop
263
	; pop	ecx
263
	; pop	ecx
264
; .len2done:
264
; .len2done:
265
	; add	ecx, ecx
265
	; add	ecx, ecx
266
	; inc	edx
266
	; inc	edx
267
	; dec	dword [esp]
267
	; dec	dword [esp]
268
	; jnz	.lenloop
268
	; jnz	.lenloop
269
	; pop	eax
269
	; pop	eax
270
	; pop	eax
270
	; pop	eax
271
	; sub	eax, edi
271
	; sub	eax, edi
272
	; shr	eax, 2
272
	; shr	eax, 2
273
	; cmp	eax, ecx
273
	; cmp	eax, ecx
274
	; ja	@f
274
	; ja	@f
275
	; mov	ecx, eax
275
	; mov	ecx, eax
276
; @@:
276
; @@:
277
	; or	eax, -1
277
	; or	eax, -1
278
	; rep	stosd
278
	; rep	stosd
279
	; pop	edx ebx
279
	; pop	edx ebx
280
	; jmp	.dht_loop
280
	; jmp	.dht_loop
281
; .dhterr:
281
; .dhterr:
282
	; ;pop	eax eax eax edx ebx
282
	; ;pop	eax eax eax edx ebx
283
	; add	esp, 5*4
283
	; add	esp, 5*4
284
	lea	eax, [edi+256*2]
284
	lea	eax, [edi+256*2]
285
	push	eax
285
	push	eax
286
	lea	edx, [esi-16]
286
	lea	edx, [esi-16]
287
	mov	ah, 1
287
	mov	ah, 1
288
	mov	ecx, 128
288
	mov	ecx, 128
289
.dht_l1:
289
.dht_l1:
290
	movzx	ebx, byte [edx]
290
	movzx	ebx, byte [edx]
291
	inc	edx
291
	inc	edx
292
	test	ebx, ebx
292
	test	ebx, ebx
293
	jz	.dht_l3
293
	jz	.dht_l3
294
.dht_l2:
294
.dht_l2:
295
	cmp	edi, [esp]
295
	cmp	edi, [esp]
296
	jae	.dhterr1
296
	jae	.dhterr1
297
	lodsb
297
	lodsb
298
	xchg	al, ah
298
	xchg	al, ah
299
	push	ecx
299
	push	ecx
300
	rep	stosw
300
	rep	stosw
301
	pop	ecx
301
	pop	ecx
302
	xchg	al, ah
302
	xchg	al, ah
303
	dec	ebx
303
	dec	ebx
304
	jnz	.dht_l2
304
	jnz	.dht_l2
305
.dht_l3:
305
.dht_l3:
306
	inc	ah
306
	inc	ah
307
	shr	ecx, 1
307
	shr	ecx, 1
308
	jnz	.dht_l1
308
	jnz	.dht_l1
309
	push	edi
309
	push	edi
310
	mov	edi, [esp+4]
310
	mov	edi, [esp+4]
311
	push	edi
311
	push	edi
312
	mov	eax, 0x00090100
312
	mov	eax, 0x00090100
313
	mov	cl, 8
313
	mov	cl, 8
314
.dht_l4:
314
.dht_l4:
315
	movzx	ebx, byte [edx]
315
	movzx	ebx, byte [edx]
316
	inc	edx
316
	inc	edx
317
	test	ebx, ebx
317
	test	ebx, ebx
318
	jz	.dht_l6
318
	jz	.dht_l6
319
.dht_l5:
319
.dht_l5:
320
	cmp	edi, [esp]
320
	cmp	edi, [esp]
321
	jb	@f
321
	jb	@f
322
	mov	edi, [esp+4]
322
	mov	edi, [esp+4]
323
	rol	eax, 16
323
	rol	eax, 16
324
	cmp	edi, [esp+8]
324
	cmp	edi, [esp+8]
325
	jae	.dhterr2
325
	jae	.dhterr2
326
	stosw
326
	stosw
327
	inc	ah
327
	inc	ah
328
	mov	[esp+4], edi
328
	mov	[esp+4], edi
329
	pop	edi
329
	pop	edi
330
	push	edi
330
	push	edi
331
	rol	eax, 16
331
	rol	eax, 16
332
	add	dword [esp], 16*2
332
	add	dword [esp], 16*2
333
@@:
333
@@:
334
	lodsb
334
	lodsb
335
	xchg	al, ah
335
	xchg	al, ah
336
	push	ecx
336
	push	ecx
337
	rep	stosw
337
	rep	stosw
338
	pop	ecx
338
	pop	ecx
339
	xchg	al, ah
339
	xchg	al, ah
340
	dec	ebx
340
	dec	ebx
341
	jnz	.dht_l5
341
	jnz	.dht_l5
342
.dht_l6:
342
.dht_l6:
343
	inc	ah
343
	inc	ah
344
	shr	ecx, 1
344
	shr	ecx, 1
345
	jnz	.dht_l4
345
	jnz	.dht_l4
346
	push	edi
346
	push	edi
347
	movzx	ebx, byte [edx]
347
	movzx	ebx, byte [edx]
348
	add	ebx, ebx
348
	add	ebx, ebx
349
	add	bl, [edx+1]
349
	add	bl, [edx+1]
350
	adc	bh, 0
350
	adc	bh, 0
351
	add	ebx, ebx
351
	add	ebx, ebx
352
	add	bl, [edx+2]
352
	add	bl, [edx+2]
353
	adc	bh, 0
353
	adc	bh, 0
354
	add	ebx, ebx
354
	add	ebx, ebx
355
	add	bl, [edx+3]
355
	add	bl, [edx+3]
356
	adc	bh, 0
356
	adc	bh, 0
357
	add	ebx, 15
357
	add	ebx, 15
358
	shr	ebx, 4
358
	shr	ebx, 4
359
	mov	cl, 8
359
	mov	cl, 8
360
	lea	ebx, [edi+ebx*2]
360
	lea	ebx, [edi+ebx*2]
361
	sub	ebx, [esp+12]
361
	sub	ebx, [esp+12]
362
	add	ebx, 31
362
	add	ebx, 31
363
	shr	ebx, 5
363
	shr	ebx, 5
364
	mov	edi, ebx
364
	mov	edi, ebx
365
	shl	edi, 5
365
	shl	edi, 5
366
	add	edi, [esp+12]
366
	add	edi, [esp+12]
367
	xor	ebx, 9
367
	xor	ebx, 9
368
	shl	ebx, 16
368
	shl	ebx, 16
369
	xor	eax, ebx
369
	xor	eax, ebx
370
	push	edi
370
	push	edi
371
.dht_l7:
371
.dht_l7:
372
	movzx	ebx, byte [edx]
372
	movzx	ebx, byte [edx]
373
	inc	edx
373
	inc	edx
374
	test	ebx, ebx
374
	test	ebx, ebx
375
	jz	.dht_l10
375
	jz	.dht_l10
376
.dht_l8:
376
.dht_l8:
377
	cmp	edi, [esp]
377
	cmp	edi, [esp]
378
	jb	.dht_l9
378
	jb	.dht_l9
379
	mov	edi, [esp+4]
379
	mov	edi, [esp+4]
380
	cmp	edi, [esp+8]
380
	cmp	edi, [esp+8]
381
	jb	@f
381
	jb	@f
382
	mov	edi, [esp+12]
382
	mov	edi, [esp+12]
383
	cmp	edi, [esp+16]
383
	cmp	edi, [esp+16]
384
	jae	.dhterr3
384
	jae	.dhterr3
385
	mov	al, 9
385
	mov	al, 9
386
	stosb
386
	stosb
387
	rol	eax, 8
387
	rol	eax, 8
388
	stosb
388
	stosb
389
	inc	eax
389
	inc	eax
390
	ror	eax, 8
390
	ror	eax, 8
391
	mov	[esp+12], edi
391
	mov	[esp+12], edi
392
	mov	edi, [esp+8]
392
	mov	edi, [esp+8]
393
	add	dword [esp+8], 16*2
393
	add	dword [esp+8], 16*2
394
@@:
394
@@:
395
	mov	al, 9
395
	mov	al, 9
396
	stosb
396
	stosb
397
	rol	eax, 16
397
	rol	eax, 16
398
	stosb
398
	stosb
399
	inc	eax
399
	inc	eax
400
	ror	eax, 16
400
	ror	eax, 16
401
	mov	[esp+4], edi
401
	mov	[esp+4], edi
402
	pop	edi
402
	pop	edi
403
	push	edi
403
	push	edi
404
	add	dword [esp], 16*2
404
	add	dword [esp], 16*2
405
.dht_l9:
405
.dht_l9:
406
	lodsb
406
	lodsb
407
	xchg	al, ah
407
	xchg	al, ah
408
	push	ecx
408
	push	ecx
409
	rep	stosw
409
	rep	stosw
410
	pop	ecx
410
	pop	ecx
411
	xchg	al, ah
411
	xchg	al, ah
412
	dec	ebx
412
	dec	ebx
413
	jnz	.dht_l8
413
	jnz	.dht_l8
414
.dht_l10:
414
.dht_l10:
415
	inc	ah
415
	inc	ah
416
	shr	ecx, 1
416
	shr	ecx, 1
417
	jnz	.dht_l7
417
	jnz	.dht_l7
418
	push	-1
418
	push	-1
419
	pop	eax
419
	pop	eax
420
	pop	ecx
420
	pop	ecx
421
	sub	ecx, edi
421
	sub	ecx, edi
422
	rep	stosb
422
	rep	stosb
423
	pop	edi
423
	pop	edi
424
	pop	ecx
424
	pop	ecx
425
	sub	ecx, edi
425
	sub	ecx, edi
426
	rep	stosb
426
	rep	stosb
427
	pop	edi
427
	pop	edi
428
	pop	ecx
428
	pop	ecx
429
	sub	ecx, edi
429
	sub	ecx, edi
430
	rep	stosb
430
	rep	stosb
431
	pop	edx ebx
431
	pop	edx ebx
432
	jmp	.dht_loop
432
	jmp	.dht_loop
433
.dhterr3:
433
.dhterr3:
434
	pop	eax eax
434
	pop	eax eax
435
.dhterr2:
435
.dhterr2:
436
	pop	eax eax
436
	pop	eax eax
437
.dhterr1:
437
.dhterr1:
438
	pop	eax
438
	pop	eax
439
	pop	edx ebx
439
	pop	edx ebx
440
.end2:
440
.end2:
441
	jmp	.end
441
	jmp	.end
442
.sofn:
442
.sofn:
443
; SOFn marker found
443
; SOFn marker found
444
	cmp	[ebx+jpeg.work.image], 0
444
	cmp	[ebx+jpeg.work.image], 0
445
	jnz	.end2	; only one frame is allowed
445
	jnz	.end2	; only one frame is allowed
446
; only SOF0 [baseline sequential], SOF1 [extended sequential], SOF2 [progressive]
446
; only SOF0 [baseline sequential], SOF1 [extended sequential], SOF2 [progressive]
447
; nobody supports other compression methods
447
; nobody supports other compression methods
448
	cmp	al, 0xC2
448
	cmp	al, 0xC2
449
	ja	.end2
449
	ja	.end2
450
	setz	[ebx+jpeg.work.progressive]
450
	setz	[ebx+jpeg.work.progressive]
451
; Length must be at least 8
451
; Length must be at least 8
452
	sub	edx, 8
452
	sub	edx, 8
453
	jb	.end2
453
	jb	.end2
454
; Sample precision in JFIF must be 8 bits
454
; Sample precision in JFIF must be 8 bits
455
	cmp	byte [esi+2], 8
455
	cmp	byte [esi+2], 8
456
	jnz	.end2
456
	jnz	.end2
457
; Color space in JFIF is either YCbCr (color images, 3 components)
457
; Color space in JFIF is either YCbCr (color images, 3 components)
458
;                        or Y (grey images, 1 component)
458
;                        or Y (grey images, 1 component)
459
	movzx	eax, byte [esi+7]
459
	movzx	eax, byte [esi+7]
460
	cmp	al, 1
460
	cmp	al, 1
461
	jz	@f
461
	jz	@f
462
	cmp	al, 3
462
	cmp	al, 3
463
	jz	@f
463
	jz	@f
464
; Adobe products sometimes use YCCK color space with 4 components
464
; Adobe products sometimes use YCCK color space with 4 components
465
	cmp	al, 4
465
	cmp	al, 4
466
	jnz	.end2
466
	jnz	.end2
467
	cmp	[ebx+jpeg.work.adobe_ycck], 0
467
	cmp	[ebx+jpeg.work.adobe_ycck], 0
468
	jz	.end2
468
	jz	.end2
469
@@:
469
@@:
470
	mov	edi, eax	; edi = number of components
470
	mov	edi, eax	; edi = number of components
471
	lea	eax, [eax*3]
471
	lea	eax, [eax*3]
472
	sub	edx, eax
472
	sub	edx, eax
473
	jnz	.end2
473
	jnz	.end2
474
; image type: 8 bpp for grayscale JPEGs, 24 bpp for normal,
474
; image type: 8 bpp for grayscale JPEGs, 24 bpp for normal,
475
; 32 bpp for Adobe YCCK
475
; 32 bpp for Adobe YCCK
476
	push	Image.bpp8
476
	push	Image.bpp8
477
	pop	eax	; Image.bpp8 = 1
477
	pop	eax	; Image.bpp8 = 1
478
	cmp	edi, eax
478
	cmp	edi, eax
479
	jz	@f
479
	jz	@f
480
	inc	eax	; Image.bpp24 = 2
480
	inc	eax	; Image.bpp24 = 2
481
	cmp	edi, 3
481
	cmp	edi, 3
482
	jz	@f
482
	jz	@f
483
	inc	eax	; Image.bpp32 = 3
483
	inc	eax	; Image.bpp32 = 3
484
@@:
484
@@:
485
	push	eax
485
	push	eax
486
; get width and height
486
; get width and height
487
; width must be nonzero
487
; width must be nonzero
488
; height must be nonzero - nobody supports DNL markers
488
; height must be nonzero - nobody supports DNL markers
489
	mov	ah, [esi+3]
489
	mov	ah, [esi+3]
490
	mov	al, [esi+4]	; eax = height
490
	mov	al, [esi+4]	; eax = height
491
	xor	ecx, ecx
491
	xor	ecx, ecx
492
	mov	ch, [esi+5]
492
	mov	ch, [esi+5]
493
	mov	cl, [esi+6]	; ecx = width
493
	mov	cl, [esi+6]	; ecx = width
494
; allocate memory for image
494
; allocate memory for image
495
	stdcall img.create, ecx, eax
495
	stdcall img.create, ecx, eax
496
	test	eax, eax
496
	test	eax, eax
497
	jz	.end2
497
	jz	.end2
498
	mov	[ebx + jpeg.work.image], eax
498
	mov	[ebx + jpeg.work.image], eax
499
; create grayscale palette if needed
499
; create grayscale palette if needed
500
	cmp	edi, 1
500
	cmp	edi, 1
501
	jnz	.no_create_palette
501
	jnz	.no_create_palette
502
	push	ecx edi
502
	push	ecx edi
503
	mov	edi, [eax + Image.Palette]
503
	mov	edi, [eax + Image.Palette]
504
	xor	eax, eax
504
	xor	eax, eax
505
	mov	ecx, 256
505
	mov	ecx, 256
506
@@:
506
@@:
507
	stosd
507
	stosd
508
	add	eax, 0x010101
508
	add	eax, 0x010101
509
	loop	@b
509
	loop	@b
510
	pop	edi ecx
510
	pop	edi ecx
511
.no_create_palette:
511
.no_create_palette:
512
; other image characteristics
512
; other image characteristics
513
	mov	eax, edi
513
	mov	eax, edi
514
	shl	eax, 3
514
	shl	eax, 3
515
	mov	[ebx + jpeg.work.delta_x], eax
515
	mov	[ebx + jpeg.work.delta_x], eax
516
	mov	[ebx + jpeg.work.pixel_size], edi
516
	mov	[ebx + jpeg.work.pixel_size], edi
517
	;mov	eax, edi
517
	;mov	eax, edi
518
	imul	eax, ecx
518
	imul	eax, ecx
519
	mov	[ebx + jpeg.work.delta_y], eax
519
	mov	[ebx + jpeg.work.delta_y], eax
520
	shr	eax, 3
520
	shr	eax, 3
521
	mov	[ebx + jpeg.work.line_size], eax
521
	mov	[ebx + jpeg.work.line_size], eax
522
	add	esi, 8
522
	add	esi, 8
523
	mov	ecx, edi
523
	mov	ecx, edi
524
	lea	edi, [ebx + jpeg.work.components]
524
	lea	edi, [ebx + jpeg.work.components]
525
	xor	eax, eax
525
	xor	eax, eax
526
	xor	edx, edx
526
	xor	edx, edx
527
.sof_parse_comp:
527
.sof_parse_comp:
528
	movsb	; db ComponentIdentifier
528
	movsb	; db ComponentIdentifier
529
	lodsb
529
	lodsb
530
	mov	ah, al
530
	mov	ah, al
531
	and	al, 0xF
531
	and	al, 0xF
532
	jz	.end3
532
	jz	.end3
533
	shr	ah, 4
533
	shr	ah, 4
534
	jz	.end3
534
	jz	.end3
535
	stosd	; db V, db H, db ?, db ? (will be filled later)
535
	stosd	; db V, db H, db ?, db ? (will be filled later)
536
	cmp	dl, al
536
	cmp	dl, al
537
	ja	@f
537
	ja	@f
538
	mov	dl, al
538
	mov	dl, al
539
@@:
539
@@:
540
	cmp	dh, ah
540
	cmp	dh, ah
541
	ja	@f
541
	ja	@f
542
	mov	dh, ah
542
	mov	dh, ah
543
@@:
543
@@:
544
	movsb	; db QuantizationTableID
544
	movsb	; db QuantizationTableID
545
	loop	.sof_parse_comp
545
	loop	.sof_parse_comp
546
	mov	word [ebx + jpeg.work.max_v], dx
546
	mov	word [ebx + jpeg.work.max_v], dx
547
	movzx	eax, dh
547
	movzx	eax, dh
548
	movzx	edx, dl
548
	movzx	edx, dl
549
	push	eax edx
549
	push	eax edx
550
	shl	eax, 3
550
	shl	eax, 3
551
	shl	edx, 3
551
	shl	edx, 3
552
	mov	[ebx + jpeg.work.block_width], eax
552
	mov	[ebx + jpeg.work.block_width], eax
553
	mov	[ebx + jpeg.work.block_height], edx
553
	mov	[ebx + jpeg.work.block_height], edx
554
	pop	edx eax
554
	pop	edx eax
555
	push	eax edx
555
	push	eax edx
556
	imul	eax, [ebx + jpeg.work.delta_x]
556
	imul	eax, [ebx + jpeg.work.delta_x]
557
	mov	[ebx + jpeg.work.block_delta_x], eax
557
	mov	[ebx + jpeg.work.block_delta_x], eax
558
	imul	edx, [ebx + jpeg.work.delta_y]
558
	imul	edx, [ebx + jpeg.work.delta_y]
559
	mov	[ebx + jpeg.work.block_delta_y], edx
559
	mov	[ebx + jpeg.work.block_delta_y], edx
560
	mov	ecx, [ebx + jpeg.work.image]
560
	mov	ecx, [ebx + jpeg.work.image]
561
	mov	eax, [ecx + Image.Width]
561
	mov	eax, [ecx + Image.Width]
562
	add	eax, [ebx + jpeg.work.block_width]
562
	add	eax, [ebx + jpeg.work.block_width]
563
	dec	eax
563
	dec	eax
564
	xor	edx, edx
564
	xor	edx, edx
565
	div	[ebx + jpeg.work.block_width]
565
	div	[ebx + jpeg.work.block_width]
566
	mov	[ebx + jpeg.work.x_num_blocks], eax
566
	mov	[ebx + jpeg.work.x_num_blocks], eax
567
	mov	eax, [ecx + Image.Height]
567
	mov	eax, [ecx + Image.Height]
568
	add	eax, [ebx + jpeg.work.block_height]
568
	add	eax, [ebx + jpeg.work.block_height]
569
	dec	eax
569
	dec	eax
570
	xor	edx, edx
570
	xor	edx, edx
571
	div	[ebx + jpeg.work.block_height]
571
	div	[ebx + jpeg.work.block_height]
572
	mov	[ebx + jpeg.work.y_num_blocks], eax
572
	mov	[ebx + jpeg.work.y_num_blocks], eax
573
	mov	ecx, [ebx + jpeg.work.pixel_size]
573
	mov	ecx, [ebx + jpeg.work.pixel_size]
574
	pop	edx
574
	pop	edx
575
	lea	edi, [ebx + jpeg.work.components]
575
	lea	edi, [ebx + jpeg.work.components]
576
@@:
576
@@:
577
	mov	eax, edx
577
	mov	eax, edx
578
	div	byte [edi+1]	; VMax / V_i = VFactor_i
578
	div	byte [edi+1]	; VMax / V_i = VFactor_i
579
	mov	byte [edi+3], al	; db VFactor
579
	mov	byte [edi+3], al	; db VFactor
580
	pop	eax
580
	pop	eax
581
	push	eax
581
	push	eax
582
	div	byte [edi+2]	; HMax / H_i = HFactor_i
582
	div	byte [edi+2]	; HMax / H_i = HFactor_i
583
	mov	byte [edi+4], al	; db HFactor
583
	mov	byte [edi+4], al	; db HFactor
584
	add	edi, 6
584
	add	edi, 6
585
	loop	@b
585
	loop	@b
586
	pop	eax
586
	pop	eax
587
	cmp	[ebx + jpeg.work.progressive], 0
587
	cmp	[ebx + jpeg.work.progressive], 0
588
	jz	.sof_noprogressive
588
	jz	.sof_noprogressive
589
	mov	eax, [ebx + jpeg.work.x_num_blocks]
589
	mov	eax, [ebx + jpeg.work.x_num_blocks]
590
	mul	[ebx + jpeg.work.block_width]
590
	mul	[ebx + jpeg.work.block_width]
591
	mul	[ebx + jpeg.work.y_num_blocks]
591
	mul	[ebx + jpeg.work.y_num_blocks]
592
	mul	[ebx + jpeg.work.block_height]
592
	mul	[ebx + jpeg.work.block_height]
593
	add	eax, eax
593
	add	eax, eax
594
	mov	[ebx + jpeg.work.dct_buffer_size], eax
594
	mov	[ebx + jpeg.work.dct_buffer_size], eax
595
	mul	[ebx + jpeg.work.pixel_size]
595
	mul	[ebx + jpeg.work.pixel_size]
596
	push	eax
596
	push	eax
597
	call	[mem.alloc]
597
	call	[mem.alloc]
598
	test	eax, eax
598
	test	eax, eax
599
	jnz	@f
599
	jnz	@f
600
	xchg	eax, [ebx + jpeg.work.image]
600
	xchg	eax, [ebx + jpeg.work.image]
601
	push	eax
601
	push	eax
602
	call	img.destroy
602
	call	img.destroy
603
	jmp	.end
603
	jmp	.end
604
@@:
604
@@:
605
	mov	[ebx + jpeg.work.dct_buffer], eax
605
	mov	[ebx + jpeg.work.dct_buffer], eax
606
.sof_noprogressive:
606
.sof_noprogressive:
607
	jmp	.markers_loop
607
	jmp	.markers_loop
608
.end3:
608
.end3:
609
	jmp	.end
609
	jmp	.end
610
.sos:
610
.sos:
611
; SOS marker found
611
; SOS marker found
612
; frame must be already opened
612
; frame must be already opened
613
	cmp	[ebx + jpeg.work.image], 0
613
	cmp	[ebx + jpeg.work.image], 0
614
	jz	.end3
614
	jz	.end3
615
	cmp	edx, 6
615
	cmp	edx, 6
616
	jb	.end3
616
	jb	.end3
617
; parse marker
617
; parse marker
618
	movzx	eax, byte [esi+2]	; number of components in this scan
618
	movzx	eax, byte [esi+2]	; number of components in this scan
619
	test	eax, eax
619
	test	eax, eax
620
	jz	.end3		; must be nonzero
620
	jz	.end3		; must be nonzero
621
	cmp	al, byte [ebx + jpeg.work.pixel_size]
621
	cmp	al, byte [ebx + jpeg.work.pixel_size]
622
	ja	.end3		; must be <= total number of components
622
	ja	.end3		; must be <= total number of components
623
;	mov	[ns], eax
623
;	mov	[ns], eax
624
	cmp	al, 1
624
	cmp	al, 1
625
	setz	[ebx + jpeg.work.not_interleaved]
625
	setz	[ebx + jpeg.work.not_interleaved]
626
	lea	ecx, [6+eax+eax]
626
	lea	ecx, [6+eax+eax]
627
	cmp	edx, ecx
627
	cmp	edx, ecx
628
	jnz	.end3
628
	jnz	.end3
629
	mov	ecx, eax
629
	mov	ecx, eax
630
	lea	edi, [ebx + jpeg.work.cur_components]
630
	lea	edi, [ebx + jpeg.work.cur_components]
631
	add	esi, 3
631
	add	esi, 3
632
.sos_find_comp:
632
.sos_find_comp:
633
	lodsb	; got ComponentID, look for component info
633
	lodsb	; got ComponentID, look for component info
634
	push	ecx esi
634
	push	ecx esi
635
	mov	ecx, [ebx + jpeg.work.pixel_size]
635
	mov	ecx, [ebx + jpeg.work.pixel_size]
636
	lea	esi, [ebx + jpeg.work.components]
636
	lea	esi, [ebx + jpeg.work.components]
637
	and	dword [edi+48], 0
637
	and	dword [edi+48], 0
638
	and	dword [edi+52], 0
638
	and	dword [edi+52], 0
639
@@:
639
@@:
640
	cmp	[esi], al
640
	cmp	[esi], al
641
	jz	@f
641
	jz	@f
642
	inc	dword [edi+52]
642
	inc	dword [edi+52]
643
	add	esi, 6
643
	add	esi, 6
644
	loop	@b
644
	loop	@b
645
@@:
645
@@:
646
	mov	eax, [esi+1]
646
	mov	eax, [esi+1]
647
	mov	dl, [esi+5]
647
	mov	dl, [esi+5]
648
	pop	esi ecx
648
	pop	esi ecx
649
	jnz	.end3	; bad ComponentID
649
	jnz	.end3	; bad ComponentID
650
	cmp	[ebx + jpeg.work.not_interleaved], 0
650
	cmp	[ebx + jpeg.work.not_interleaved], 0
651
	jz	@f
651
	jz	@f
652
	mov	ax, 0x0101
652
	mov	ax, 0x0101
653
@@:
653
@@:
654
	stosd		; db V, db H, db VFactor, db HFactor
654
	stosd		; db V, db H, db VFactor, db HFactor
655
	push	ecx
655
	push	ecx
656
	xor	eax, eax
656
	xor	eax, eax
657
	mov	al, byte [edi-1]	; get HFactor
657
	mov	al, byte [edi-1]	; get HFactor
658
	mul	byte [ebx+jpeg.work.pixel_size]	; number of components
658
	mul	byte [ebx+jpeg.work.pixel_size]	; number of components
659
	stosd			; HIncrement_i = HFactor_i * sizeof(pixel)
659
	stosd			; HIncrement_i = HFactor_i * sizeof(pixel)
660
	mov	al, byte [edi-4-2]	; get VFactor
660
	mov	al, byte [edi-4-2]	; get VFactor
661
	mul	byte [ebx+jpeg.work.pixel_size]	; number of components
661
	mul	byte [ebx+jpeg.work.pixel_size]	; number of components
662
	mov	ecx, [ebx+jpeg.work.image]
662
	mov	ecx, [ebx+jpeg.work.image]
663
	imul	eax, [ecx+Image.Width]	; image width
663
	imul	eax, [ecx+Image.Width]	; image width
664
	stosd			; VIncrement_i = VFactor_i * sizeof(row)
664
	stosd			; VIncrement_i = VFactor_i * sizeof(row)
665
	xchg	eax, edx
665
	xchg	eax, edx
666
	and	eax, 3
666
	and	eax, 3
667
	cmp	[ebx+jpeg.work.quant_tables_defined+eax], 0
667
	cmp	[ebx+jpeg.work.quant_tables_defined+eax], 0
668
	jz	.end3
668
	jz	.end3
669
	shl	eax, 8
669
	shl	eax, 8
670
	lea	eax, [ebx+eax+jpeg.work.quant_tables]
670
	lea	eax, [ebx+eax+jpeg.work.quant_tables]
671
	stosd		; dd QuantizationTable
671
	stosd		; dd QuantizationTable
672
	lodsb
672
	lodsb
673
	movzx	eax, al
673
	movzx	eax, al
674
	mov	edx, eax
674
	mov	edx, eax
675
	shr	eax, 4
675
	shr	eax, 4
676
	and	edx, 3
676
	and	edx, 3
677
	and	eax, 3
677
	and	eax, 3
678
	cmp	[ebx+jpeg.work.dc_huffman_defined+eax], 0
678
	cmp	[ebx+jpeg.work.dc_huffman_defined+eax], 0
679
	jnz	.dc_table_ok
679
	jnz	.dc_table_ok
680
	cmp	[ebx+jpeg.work.progressive], 0
680
	cmp	[ebx+jpeg.work.progressive], 0
681
	jz	.end3
681
	jz	.end3
682
	xor	eax, eax
682
	xor	eax, eax
683
	jmp	.dc_table_done
683
	jmp	.dc_table_done
684
.dc_table_ok:
684
.dc_table_ok:
685
;	shl	eax, 11
685
;	shl	eax, 11
686
	imul	eax, max_hufftable_size
686
	imul	eax, max_hufftable_size
687
	lea	eax, [ebx+jpeg.work.dc_huffman+eax]
687
	lea	eax, [ebx+jpeg.work.dc_huffman+eax]
688
.dc_table_done:
688
.dc_table_done:
689
	cmp	[ebx+jpeg.work.ac_huffman_defined+edx], 0
689
	cmp	[ebx+jpeg.work.ac_huffman_defined+edx], 0
690
	jnz	.ac_table_ok
690
	jnz	.ac_table_ok
691
	cmp	[ebx+jpeg.work.progressive], 0
691
	cmp	[ebx+jpeg.work.progressive], 0
692
	jz	.end3
692
	jz	.end3
693
	xor	edx, edx
693
	xor	edx, edx
694
	jmp	.ac_table_done
694
	jmp	.ac_table_done
695
.ac_table_ok:
695
.ac_table_ok:
696
;	shl	edx, 11
696
;	shl	edx, 11
697
	imul	edx, max_hufftable_size
697
	imul	edx, max_hufftable_size
698
	lea	edx, [ebx+jpeg.work.ac_huffman+edx]
698
	lea	edx, [ebx+jpeg.work.ac_huffman+edx]
699
.ac_table_done:
699
.ac_table_done:
700
	stosd		; dd DCTable
700
	stosd		; dd DCTable
701
	xchg	eax, edx
701
	xchg	eax, edx
702
	stosd		; dd ACTable
702
	stosd		; dd ACTable
703
	mov	eax, [ecx+Image.Width]
703
	mov	eax, [ecx+Image.Width]
704
	movzx	ecx, byte [edi-21]	; get HFactor
704
	movzx	ecx, byte [edi-21]	; get HFactor
705
	cdq	; edx:eax = width (width<0x10000, so as dword it is unsigned)
705
	cdq	; edx:eax = width (width<0x10000, so as dword it is unsigned)
706
	div	ecx
706
	div	ecx
707
	stosd		; dd width / HFactor_i
707
	stosd		; dd width / HFactor_i
708
	stosd
708
	stosd
709
	xchg	eax, ecx
709
	xchg	eax, ecx
710
	inc	eax
710
	inc	eax
711
	sub	eax, edx
711
	sub	eax, edx
712
	stosd		; dd HFactor_i+1 - (width % HFactor_i)
712
	stosd		; dd HFactor_i+1 - (width % HFactor_i)
713
	mov	ecx, [ebx+jpeg.work.image]
713
	mov	ecx, [ebx+jpeg.work.image]
714
	mov	eax, [ecx+Image.Height]
714
	mov	eax, [ecx+Image.Height]
715
	movzx	ecx, byte [edi-34]	; get VFactor
715
	movzx	ecx, byte [edi-34]	; get VFactor
716
	cdq
716
	cdq
717
	div	ecx
717
	div	ecx
718
	stosd		; dd height / VFactor_i
718
	stosd		; dd height / VFactor_i
719
	stosd
719
	stosd
720
	xchg	eax, ecx
720
	xchg	eax, ecx
721
	inc	eax
721
	inc	eax
722
	sub	eax, edx
722
	sub	eax, edx
723
	stosd		; dd VFactor_i+1 - (height % VFactor_i)
723
	stosd		; dd VFactor_i+1 - (height % VFactor_i)
724
	pop	ecx
724
	pop	ecx
725
	scasd		; dd DCPrediction
725
	scasd		; dd DCPrediction
726
	cmp	dword [edi], 0
726
	cmp	dword [edi], 0
727
	setnp	al
727
	setnp	al
728
	ror	al, 1
728
	ror	al, 1
729
	mov	byte [edi-1], al
729
	mov	byte [edi-1], al
730
	scasd		; dd ComponentOffset
730
	scasd		; dd ComponentOffset
731
	dec	ecx
731
	dec	ecx
732
	jnz	.sos_find_comp
732
	jnz	.sos_find_comp
733
	mov	[ebx+jpeg.work.cur_components_end], edi
733
	mov	[ebx+jpeg.work.cur_components_end], edi
734
	lea	edi, [ebx+jpeg.work.ScanStart]
734
	lea	edi, [ebx+jpeg.work.ScanStart]
735
	movsb
735
	movsb
736
	cmp	byte [esi], 63
736
	cmp	byte [esi], 63
737
	ja	.end3
737
	ja	.end3
738
	movsb
738
	movsb
739
	lodsb
739
	lodsb
740
	push	eax
740
	push	eax
741
	and	al, 0xF
741
	and	al, 0xF
742
	stosb
742
	stosb
743
	pop	eax
743
	pop	eax
744
	shr	al, 4
744
	shr	al, 4
745
	stosb
745
	stosb
746
; now unpack data
746
; now unpack data
747
	call	init_limits
747
	call	init_limits
748
	and	[ebx+jpeg.work.decoded_MCUs], 0
748
	and	[ebx+jpeg.work.decoded_MCUs], 0
749
	mov	[ebx+jpeg.work.cur_rst_marker], 7
749
	mov	[ebx+jpeg.work.cur_rst_marker], 7
750
	and	[ebx+jpeg.work.huffman_bits], 0
750
	and	[ebx+jpeg.work.huffman_bits], 0
751
	cmp	[ebx+jpeg.work.progressive], 0
751
	cmp	[ebx+jpeg.work.progressive], 0
752
	jz	.sos_noprogressive
752
	jz	.sos_noprogressive
753
; progressive mode - only decode DCT coefficients
753
; progressive mode - only decode DCT coefficients
754
; initialize pointers to coefficients data
754
; initialize pointers to coefficients data
755
; zero number of EOBs for AC coefficients
755
; zero number of EOBs for AC coefficients
756
; redefine HIncrement and VIncrement
756
; redefine HIncrement and VIncrement
757
	lea	edi, [ebx+jpeg.work.cur_components]
757
	lea	edi, [ebx+jpeg.work.cur_components]
758
.coeff_init:
758
.coeff_init:
759
	mov	eax, [ebx+jpeg.work.dct_buffer_size]
759
	mov	eax, [ebx+jpeg.work.dct_buffer_size]
760
	mul	dword [edi+52]
760
	mul	dword [edi+52]
761
	add	eax, [ebx+jpeg.work.dct_buffer]
761
	add	eax, [ebx+jpeg.work.dct_buffer]
762
	mov	[edi+12], eax
762
	mov	[edi+12], eax
763
	and	dword [edi+52], 0
763
	and	dword [edi+52], 0
764
	cmp	[ebx+jpeg.work.ScanStart], 0
764
	cmp	[ebx+jpeg.work.ScanStart], 0
765
	jz	.scan_dc
765
	jz	.scan_dc
766
	cmp	dword [edi+20], 0
766
	cmp	dword [edi+20], 0
767
	jz	.end3
767
	jz	.end3
768
	jmp	@f
768
	jmp	@f
769
.scan_dc:
769
.scan_dc:
770
	cmp	dword [edi+16], 0
770
	cmp	dword [edi+16], 0
771
	jz	.end3
771
	jz	.end3
772
@@:
772
@@:
773
	movzx	eax, byte [edi+1]
773
	movzx	eax, byte [edi+1]
774
	shl	eax, 7
774
	shl	eax, 7
775
	mov	[edi+4], eax
775
	mov	[edi+4], eax
776
	mov	eax, [edi+28]
776
	mov	eax, [edi+28]
777
	mov	cl, [edi+3]
777
	mov	cl, [edi+3]
778
	cmp	cl, [edi+32]
778
	cmp	cl, [edi+32]
779
	sbb	eax, -7-1
779
	sbb	eax, -7-1
780
	shr	eax, 3
780
	shr	eax, 3
781
	shl	eax, 7
781
	shl	eax, 7
782
	mov	[edi+8], eax
782
	mov	[edi+8], eax
783
	add	edi, 56
783
	add	edi, 56
784
	cmp	edi, [ebx+jpeg.work.cur_components_end]
784
	cmp	edi, [ebx+jpeg.work.cur_components_end]
785
	jb	.coeff_init
785
	jb	.coeff_init
786
; unpack coefficients
786
; unpack coefficients
787
; N.B. Speed optimization has sense here.
787
; N.B. Speed optimization has sense here.
788
.coeff_decode_loop:
788
.coeff_decode_loop:
789
	lea	edx, [ebx+jpeg.work.cur_components]
789
	lea	edx, [ebx+jpeg.work.cur_components]
790
.coeff_components_loop:
790
.coeff_components_loop:
791
	mov	edi, [edx+12]
791
	mov	edi, [edx+12]
792
	movzx	ecx, byte [edx]
792
	movzx	ecx, byte [edx]
793
	push	dword [edx+40]
793
	push	dword [edx+40]
794
	push	edi
794
	push	edi
795
.coeff_y_loop:
795
.coeff_y_loop:
796
	push	ecx
796
	push	ecx
797
	movzx	eax, byte [edx+1]
797
	movzx	eax, byte [edx+1]
798
	push	dword [edx+28]
798
	push	dword [edx+28]
799
	push	edi
799
	push	edi
800
.coeff_x_loop:
800
.coeff_x_loop:
801
	cmp	dword [edx+40], 0
801
	cmp	dword [edx+40], 0
802
	jl	@f
802
	jl	@f
803
	cmp	dword [edx+28], 0
803
	cmp	dword [edx+28], 0
804
	jge	.realdata
804
	jge	.realdata
805
@@:
805
@@:
806
	cmp	[ebx+jpeg.work.not_interleaved], 0
806
	cmp	[ebx+jpeg.work.not_interleaved], 0
807
	jnz	.norealdata
807
	jnz	.norealdata
808
	push	eax edi
808
	push	eax edi
809
	lea	edi, [ebx+jpeg.work.dct_coeff]
809
	lea	edi, [ebx+jpeg.work.dct_coeff]
810
	call	decode_progressive_coeff
810
	call	decode_progressive_coeff
811
	pop	edi eax
811
	pop	edi eax
812
	jmp	.norealdata
812
	jmp	.norealdata
813
.realdata:
813
.realdata:
814
	push	eax
814
	push	eax
815
	call	decode_progressive_coeff
815
	call	decode_progressive_coeff
816
	add	edi, 64*2
816
	add	edi, 64*2
817
	pop	eax
817
	pop	eax
818
.norealdata:
818
.norealdata:
819
	sub	dword [edx+28], 8
819
	sub	dword [edx+28], 8
820
	sub	eax, 1
820
	sub	eax, 1
821
	jnz	.coeff_x_loop
821
	jnz	.coeff_x_loop
822
	pop	edi
822
	pop	edi
823
	pop	dword [edx+28]
823
	pop	dword [edx+28]
824
	add	edi, [edx+8]
824
	add	edi, [edx+8]
825
	pop	ecx
825
	pop	ecx
826
	sub	dword [edx+40], 8
826
	sub	dword [edx+40], 8
827
	sub	ecx, 1
827
	sub	ecx, 1
828
	jnz	.coeff_y_loop
828
	jnz	.coeff_y_loop
829
	movzx	eax, byte [edx+1]
829
	movzx	eax, byte [edx+1]
830
	shl	eax, 3
830
	shl	eax, 3
831
	pop	edi
831
	pop	edi
832
	add	edi, [edx+4]
832
	add	edi, [edx+4]
833
	pop	dword [edx+40]
833
	pop	dword [edx+40]
834
	sub	[edx+28], eax
834
	sub	[edx+28], eax
835
	mov	[edx+12], edi
835
	mov	[edx+12], edi
836
	add	edx, 56
836
	add	edx, 56
837
	cmp	edx, [ebx+jpeg.work.cur_components_end]
837
	cmp	edx, [ebx+jpeg.work.cur_components_end]
838
	jnz	.coeff_components_loop
838
	jnz	.coeff_components_loop
839
	call	next_MCU
839
	call	next_MCU
840
	jc	.norst
840
	jc	.norst
841
	sub	[ebx+jpeg.work.cur_x], 1
841
	sub	[ebx+jpeg.work.cur_x], 1
842
	jnz	.coeff_decode_loop
842
	jnz	.coeff_decode_loop
843
	call	next_line
843
	call	next_line
844
	lea	edx, [ebx+jpeg.work.cur_components]
844
	lea	edx, [ebx+jpeg.work.cur_components]
845
@@:
845
@@:
846
	mov	eax, [ebx+jpeg.work.max_x]
846
	mov	eax, [ebx+jpeg.work.max_x]
847
	imul	eax, [edx+4]
847
	imul	eax, [edx+4]
848
	sub	[edx+12], eax
848
	sub	[edx+12], eax
849
	movzx	eax, byte [edx]
849
	movzx	eax, byte [edx]
850
	imul	eax, [edx+8]
850
	imul	eax, [edx+8]
851
	add	[edx+12], eax
851
	add	[edx+12], eax
852
	add	edx, 56
852
	add	edx, 56
853
	cmp	edx, [ebx+jpeg.work.cur_components_end]
853
	cmp	edx, [ebx+jpeg.work.cur_components_end]
854
	jnz	@b
854
	jnz	@b
855
	sub	[ebx+jpeg.work.cur_y], 1
855
	sub	[ebx+jpeg.work.cur_y], 1
856
	jnz	.coeff_decode_loop
856
	jnz	.coeff_decode_loop
857
	jmp	.markers_loop
857
	jmp	.markers_loop
858
.norst:
858
.norst:
859
.end4:
859
.end4:
860
	jmp	.end3
860
	jmp	.end3
861
.sos_noprogressive:
861
.sos_noprogressive:
862
; normal mode - unpack JPEG image
862
; normal mode - unpack JPEG image
863
	mov	edi, [ebx+jpeg.work.image]
863
	mov	edi, [ebx+jpeg.work.image]
864
	mov	edi, [edi+Image.Data]
864
	mov	edi, [edi+Image.Data]
865
	mov	[ebx+jpeg.work.cur_out_ptr], edi
865
	mov	[ebx+jpeg.work.cur_out_ptr], edi
866
; N.B. Speed optimization has sense here.
866
; N.B. Speed optimization has sense here.
867
.decode_loop:
867
.decode_loop:
868
	call	decode_MCU
868
	call	decode_MCU
869
	call	next_MCU
869
	call	next_MCU
870
	jc	.end4
870
	jc	.end4
871
	sub	[ebx+jpeg.work.cur_x], 1
871
	sub	[ebx+jpeg.work.cur_x], 1
872
	jnz	.decode_loop
872
	jnz	.decode_loop
873
	call	next_line
873
	call	next_line
874
	sub	[ebx+jpeg.work.cur_y], 1
874
	sub	[ebx+jpeg.work.cur_y], 1
875
	jnz	.decode_loop
875
	jnz	.decode_loop
876
	jmp	.markers_loop
876
	jmp	.markers_loop
877
 
877
 
878
get_marker:
878
get_marker:
879
; in: esi -> data
879
; in: esi -> data
880
; out: CF=0, al=marker value - ok
880
; out: CF=0, al=marker value - ok
881
;      CF=1 - no marker
881
;      CF=1 - no marker
882
	sub	ebp, 1
882
	sub	ebp, 1
883
	jc	.ret
883
	jc	.ret
884
	lodsb
884
	lodsb
885
if 1
885
if 1
886
	cmp	al, 0xFF
886
	cmp	al, 0xFF
887
	jae	@f
887
	jae	@f
888
; Some stupid men, which do not read specifications and manuals,
888
; Some stupid men, which do not read specifications and manuals,
889
; sometimes create markers with length field two less than true
889
; sometimes create markers with length field two less than true
890
; value (in JPEG length of marker = length of data INCLUDING
890
; value (in JPEG length of marker = length of data INCLUDING
891
; length field itself). To open such files, allow 2 bytes
891
; length field itself). To open such files, allow 2 bytes
892
; before next marker.
892
; before next marker.
893
	cmp	ebp, 2
893
	cmp	ebp, 2
894
	jb	.ret
894
	jb	.ret
895
	lodsb
895
	lodsb
896
	lodsb
896
	lodsb
897
end if
897
end if
898
	cmp	al, 0xFF
898
	cmp	al, 0xFF
899
	jb	.ret
899
	jb	.ret
900
@@:
900
@@:
901
	sub	ebp, 1
901
	sub	ebp, 1
902
	jc	.ret
902
	jc	.ret
903
	lodsb
903
	lodsb
904
	cmp	al, 0xFF
904
	cmp	al, 0xFF
905
	jz	@b
905
	jz	@b
906
	clc
906
	clc
907
.ret:
907
.ret:
908
	ret
908
	ret
909
 
909
 
910
align 16
910
align 16
911
decode_MCU:
911
decode_MCU:
912
	lea	edx, [ebx+jpeg.work.cur_components]
912
	lea	edx, [ebx+jpeg.work.cur_components]
913
.components_loop:
913
.components_loop:
914
; decode each component
914
; decode each component
915
	push	[ebx+jpeg.work.cur_out_ptr]
915
	push	[ebx+jpeg.work.cur_out_ptr]
916
	movzx	ecx, byte [edx]
916
	movzx	ecx, byte [edx]
917
	push	dword [edx+40]
917
	push	dword [edx+40]
918
; we have H_i * V_i blocks of packed data, decode them
918
; we have H_i * V_i blocks of packed data, decode them
919
.y_loop_1:
919
.y_loop_1:
920
	push	[ebx+jpeg.work.cur_out_ptr]
920
	push	[ebx+jpeg.work.cur_out_ptr]
921
	push	ecx
921
	push	ecx
922
	movzx	eax, byte [edx+1]
922
	movzx	eax, byte [edx+1]
923
	push	dword [edx+28]
923
	push	dword [edx+28]
924
.x_loop_1:
924
.x_loop_1:
925
	push	eax
925
	push	eax
926
	call	decode_data_unit
926
	call	decode_data_unit
927
	cmp	dword [edx+40], 0
927
	cmp	dword [edx+40], 0
928
	jl	.nocopyloop
928
	jl	.nocopyloop
929
	cmp	dword [edx+28], 0
929
	cmp	dword [edx+28], 0
930
	jl	.nocopyloop
930
	jl	.nocopyloop
931
; now we have decoded block 8*8 in decoded_data
931
; now we have decoded block 8*8 in decoded_data
932
; H_i * V_i packed blocks 8*8 make up one block (8*HMax) * (8*VMax)
932
; H_i * V_i packed blocks 8*8 make up one block (8*HMax) * (8*VMax)
933
; so each pixel in packed block corresponds to HFact * VFact pixels
933
; so each pixel in packed block corresponds to HFact * VFact pixels
934
	movzx	ecx, byte [edx+2]
934
	movzx	ecx, byte [edx+2]
935
	push	esi ebp
935
	push	esi ebp
936
	mov	edi, [ebx+jpeg.work.cur_out_ptr]
936
	mov	edi, [ebx+jpeg.work.cur_out_ptr]
937
	add	edi, [edx+52]
937
	add	edi, [edx+52]
938
.y_loop_2:
938
.y_loop_2:
939
	push	ecx edi
939
	push	ecx edi
940
	cmp	ecx, [edx+44]
940
	cmp	ecx, [edx+44]
941
	mov	ecx, [edx+40]
941
	mov	ecx, [edx+40]
942
	sbb	ecx, 8-1
942
	sbb	ecx, 8-1
943
	sbb	eax, eax
943
	sbb	eax, eax
944
	and	ecx, eax
944
	and	ecx, eax
945
	add	ecx, 8
945
	add	ecx, 8
946
	jz	.skip_x_loop_2
946
	jz	.skip_x_loop_2
947
	movzx	eax, byte [edx+3]
947
	movzx	eax, byte [edx+3]
948
.x_loop_2:
948
.x_loop_2:
949
	push	eax ecx edi
949
	push	eax ecx edi
950
	cmp	eax, [edx+32]
950
	cmp	eax, [edx+32]
951
	mov	eax, [edx+28]
951
	mov	eax, [edx+28]
952
	sbb	eax, 8-1
952
	sbb	eax, 8-1
953
	sbb	ebp, ebp
953
	sbb	ebp, ebp
954
	and	eax, ebp
954
	and	eax, ebp
955
	mov	ebp, .copyiter_all
955
	mov	ebp, .copyiter_all
956
	lea	esi, [ebx+jpeg.work.decoded_data]
956
	lea	esi, [ebx+jpeg.work.decoded_data]
957
	sub	ebp, eax
957
	sub	ebp, eax
958
	sub	ebp, eax
958
	sub	ebp, eax
959
	sub	ebp, eax
959
	sub	ebp, eax
960
	mov	eax, [edx+4]
960
	mov	eax, [edx+4]
961
	sub	eax, 1
961
	sub	eax, 1
962
.copyloop:
962
.copyloop:
963
	push	esi edi
963
	push	esi edi
964
	jmp	ebp
964
	jmp	ebp
965
.copyiter_all:
965
.copyiter_all:
966
	movsb
966
	movsb
967
repeat 7
967
repeat 7
968
	add	edi, eax
968
	add	edi, eax
969
	movsb
969
	movsb
970
end repeat
970
end repeat
971
	nop
971
	nop
972
	nop
972
	nop
973
	pop	edi esi
973
	pop	edi esi
974
	add	edi, [edx+8]
974
	add	edi, [edx+8]
975
	add	esi, 8
975
	add	esi, 8
976
	sub	ecx, 1
976
	sub	ecx, 1
977
	jnz	.copyloop
977
	jnz	.copyloop
978
	pop	edi ecx eax
978
	pop	edi ecx eax
979
	add	edi, [ebx+jpeg.work.pixel_size]
979
	add	edi, [ebx+jpeg.work.pixel_size]
980
	sub	eax, 1
980
	sub	eax, 1
981
	jnz	.x_loop_2
981
	jnz	.x_loop_2
982
.skip_x_loop_2:
982
.skip_x_loop_2:
983
	pop	edi ecx
983
	pop	edi ecx
984
	add	edi, [ebx+jpeg.work.line_size]
984
	add	edi, [ebx+jpeg.work.line_size]
985
	sub	ecx, 1
985
	sub	ecx, 1
986
	jnz	.y_loop_2
986
	jnz	.y_loop_2
987
	pop	ebp esi
987
	pop	ebp esi
988
.nocopyloop:
988
.nocopyloop:
989
	mov	eax, [ebx+jpeg.work.delta_x]
989
	mov	eax, [ebx+jpeg.work.delta_x]
990
	add	[ebx+jpeg.work.cur_out_ptr], eax
990
	add	[ebx+jpeg.work.cur_out_ptr], eax
991
	pop	eax
991
	pop	eax
992
	sub	dword [edx+28], 8
992
	sub	dword [edx+28], 8
993
	sub	eax, 1
993
	sub	eax, 1
994
	jnz	.x_loop_1
994
	jnz	.x_loop_1
995
	pop	dword [edx+28]
995
	pop	dword [edx+28]
996
	pop	ecx
996
	pop	ecx
997
	pop	eax
997
	pop	eax
998
	sub	dword [edx+40], 8
998
	sub	dword [edx+40], 8
999
	add	eax, [ebx+jpeg.work.delta_y]
999
	add	eax, [ebx+jpeg.work.delta_y]
1000
	mov	[ebx+jpeg.work.cur_out_ptr], eax
1000
	mov	[ebx+jpeg.work.cur_out_ptr], eax
1001
	sub	ecx, 1
1001
	sub	ecx, 1
1002
	jnz	.y_loop_1
1002
	jnz	.y_loop_1
1003
	movzx	eax, byte [edx+1]
1003
	movzx	eax, byte [edx+1]
1004
	pop	dword [edx+40]
1004
	pop	dword [edx+40]
1005
	shl	eax, 3
1005
	shl	eax, 3
1006
	pop	[ebx+jpeg.work.cur_out_ptr]
1006
	pop	[ebx+jpeg.work.cur_out_ptr]
1007
	sub	dword [edx+28], eax
1007
	sub	dword [edx+28], eax
1008
	add	edx, 56
1008
	add	edx, 56
1009
	cmp	edx, [ebx+jpeg.work.cur_components_end]
1009
	cmp	edx, [ebx+jpeg.work.cur_components_end]
1010
	jb	.components_loop
1010
	jb	.components_loop
1011
	mov	eax, [ebx+jpeg.work.cur_block_dx]
1011
	mov	eax, [ebx+jpeg.work.cur_block_dx]
1012
	add	[ebx+jpeg.work.cur_out_ptr], eax
1012
	add	[ebx+jpeg.work.cur_out_ptr], eax
1013
	ret
1013
	ret
1014
 
1014
 
1015
align 16
1015
align 16
1016
next_MCU:
1016
next_MCU:
1017
	add	[ebx+jpeg.work.decoded_MCUs], 1
1017
	add	[ebx+jpeg.work.decoded_MCUs], 1
1018
	mov	eax, [ebx+jpeg.work.restart_interval]
1018
	mov	eax, [ebx+jpeg.work.restart_interval]
1019
	test	eax, eax
1019
	test	eax, eax
1020
	jz	.no_restart
1020
	jz	.no_restart
1021
	cmp	[ebx+jpeg.work.decoded_MCUs], eax
1021
	cmp	[ebx+jpeg.work.decoded_MCUs], eax
1022
	jb	.no_restart
1022
	jb	.no_restart
1023
	and	[ebx+jpeg.work.decoded_MCUs], 0
1023
	and	[ebx+jpeg.work.decoded_MCUs], 0
1024
	and	[ebx+jpeg.work.huffman_bits], 0
1024
	and	[ebx+jpeg.work.huffman_bits], 0
1025
	cmp	[ebx+jpeg.work.cur_x], 1
1025
	cmp	[ebx+jpeg.work.cur_x], 1
1026
	jnz	@f
1026
	jnz	@f
1027
	cmp	[ebx+jpeg.work.cur_y], 1
1027
	cmp	[ebx+jpeg.work.cur_y], 1
1028
	jz	.no_restart
1028
	jz	.no_restart
1029
@@:
1029
@@:
1030
; restart marker must be present
1030
; restart marker must be present
1031
	sub	ebp, 2
1031
	sub	ebp, 2
1032
	js	.error
1032
	js	.error
1033
	cmp	byte [esi], 0xFF
1033
	cmp	byte [esi], 0xFF
1034
	jnz	.error
1034
	jnz	.error
1035
	mov	al, [ebx+jpeg.work.cur_rst_marker]
1035
	mov	al, [ebx+jpeg.work.cur_rst_marker]
1036
	inc	eax
1036
	inc	eax
1037
	and	al, 7
1037
	and	al, 7
1038
	mov	[ebx+jpeg.work.cur_rst_marker], al
1038
	mov	[ebx+jpeg.work.cur_rst_marker], al
1039
	add	al, 0xD0
1039
	add	al, 0xD0
1040
	cmp	[esi+1], al
1040
	cmp	[esi+1], al
1041
	jnz	.error
1041
	jnz	.error
1042
	add	esi, 2
1042
	add	esi, 2
1043
; handle restart marker - zero all DC predictions
1043
; handle restart marker - zero all DC predictions
1044
	lea	edx, [ebx+jpeg.work.cur_components]
1044
	lea	edx, [ebx+jpeg.work.cur_components]
1045
@@:
1045
@@:
1046
	and	word [edx+48], 0
1046
	and	word [edx+48], 0
1047
	add	edx, 56
1047
	add	edx, 56
1048
	cmp	edx, [ebx+jpeg.work.cur_components_end]
1048
	cmp	edx, [ebx+jpeg.work.cur_components_end]
1049
	jb	@b
1049
	jb	@b
1050
.no_restart:
1050
.no_restart:
1051
	clc
1051
	clc
1052
	ret
1052
	ret
1053
.error:
1053
.error:
1054
	stc
1054
	stc
1055
	ret
1055
	ret
1056
 
1056
 
1057
next_line:
1057
next_line:
1058
	mov	eax, [ebx+jpeg.work.max_x]
1058
	mov	eax, [ebx+jpeg.work.max_x]
1059
	mov	[ebx+jpeg.work.cur_x], eax
1059
	mov	[ebx+jpeg.work.cur_x], eax
1060
	mul	[ebx+jpeg.work.cur_block_dx]
1060
	mul	[ebx+jpeg.work.cur_block_dx]
1061
	sub	eax, [ebx+jpeg.work.cur_block_dy]
1061
	sub	eax, [ebx+jpeg.work.cur_block_dy]
1062
	sub	[ebx+jpeg.work.cur_out_ptr], eax
1062
	sub	[ebx+jpeg.work.cur_out_ptr], eax
1063
	lea	edx, [ebx+jpeg.work.cur_components]
1063
	lea	edx, [ebx+jpeg.work.cur_components]
1064
@@:
1064
@@:
1065
	mov	eax, [edx+24]
1065
	mov	eax, [edx+24]
1066
	mov	[edx+28], eax
1066
	mov	[edx+28], eax
1067
	movzx	eax, byte [edx]
1067
	movzx	eax, byte [edx]
1068
	shl	eax, 3
1068
	shl	eax, 3
1069
	sub	[edx+40], eax
1069
	sub	[edx+40], eax
1070
	add	edx, 56
1070
	add	edx, 56
1071
	cmp	edx, [ebx+jpeg.work.cur_components_end]
1071
	cmp	edx, [ebx+jpeg.work.cur_components_end]
1072
	jb	@b
1072
	jb	@b
1073
	ret
1073
	ret
1074
 
1074
 
1075
init_limits:
1075
init_limits:
1076
	push	[ebx+jpeg.work.x_num_blocks]
1076
	push	[ebx+jpeg.work.x_num_blocks]
1077
	pop	[ebx+jpeg.work.max_x]
1077
	pop	[ebx+jpeg.work.max_x]
1078
	push	[ebx+jpeg.work.y_num_blocks]
1078
	push	[ebx+jpeg.work.y_num_blocks]
1079
	pop	[ebx+jpeg.work.max_y]
1079
	pop	[ebx+jpeg.work.max_y]
1080
	push	[ebx+jpeg.work.block_delta_x]
1080
	push	[ebx+jpeg.work.block_delta_x]
1081
	pop	[ebx+jpeg.work.cur_block_dx]
1081
	pop	[ebx+jpeg.work.cur_block_dx]
1082
	push	[ebx+jpeg.work.block_delta_y]
1082
	push	[ebx+jpeg.work.block_delta_y]
1083
	pop	[ebx+jpeg.work.cur_block_dy]
1083
	pop	[ebx+jpeg.work.cur_block_dy]
1084
	cmp	[ebx+jpeg.work.not_interleaved], 0
1084
	cmp	[ebx+jpeg.work.not_interleaved], 0
1085
	jz	@f
1085
	jz	@f
1086
	mov	eax, dword [ebx+jpeg.work.cur_components+28]
1086
	mov	eax, dword [ebx+jpeg.work.cur_components+28]
1087
	movzx	ecx, byte [ebx+jpeg.work.cur_components+3]
1087
	movzx	ecx, byte [ebx+jpeg.work.cur_components+3]
1088
	cmp	cl, [ebx+jpeg.work.cur_components+32]
1088
	cmp	cl, [ebx+jpeg.work.cur_components+32]
1089
	sbb	eax, -7-1
1089
	sbb	eax, -7-1
1090
	shr	eax, 3
1090
	shr	eax, 3
1091
	mov	[ebx+jpeg.work.max_x], eax
1091
	mov	[ebx+jpeg.work.max_x], eax
1092
	mov	eax, dword [ebx+jpeg.work.cur_components+40]
1092
	mov	eax, dword [ebx+jpeg.work.cur_components+40]
1093
	movzx	edx, byte [ebx+jpeg.work.cur_components+2]
1093
	movzx	edx, byte [ebx+jpeg.work.cur_components+2]
1094
	cmp	dl, [ebx+jpeg.work.cur_components+44]
1094
	cmp	dl, [ebx+jpeg.work.cur_components+44]
1095
	sbb	eax, -7-1
1095
	sbb	eax, -7-1
1096
	shr	eax, 3
1096
	shr	eax, 3
1097
	mov	[ebx+jpeg.work.max_y], eax
1097
	mov	[ebx+jpeg.work.max_y], eax
1098
	imul	ecx, [ebx+jpeg.work.delta_x]
1098
	imul	ecx, [ebx+jpeg.work.delta_x]
1099
	mov	[ebx+jpeg.work.cur_block_dx], ecx
1099
	mov	[ebx+jpeg.work.cur_block_dx], ecx
1100
	imul	edx, [ebx+jpeg.work.delta_y]
1100
	imul	edx, [ebx+jpeg.work.delta_y]
1101
	mov	[ebx+jpeg.work.cur_block_dy], edx
1101
	mov	[ebx+jpeg.work.cur_block_dy], edx
1102
@@:
1102
@@:
1103
	push	[ebx+jpeg.work.max_x]
1103
	push	[ebx+jpeg.work.max_x]
1104
	pop	[ebx+jpeg.work.cur_x]
1104
	pop	[ebx+jpeg.work.cur_x]
1105
	push	[ebx+jpeg.work.max_y]
1105
	push	[ebx+jpeg.work.max_y]
1106
	pop	[ebx+jpeg.work.cur_y]
1106
	pop	[ebx+jpeg.work.cur_y]
1107
	ret
1107
	ret
1108
 
1108
 
1109
;macro get_bit
1109
;macro get_bit
1110
;{
1110
;{
1111
;local .l1,.l2,.marker
1111
;local .l1,.l2,.marker
1112
;	add	cl, cl
1112
;	add	cl, cl
1113
;	jnz	.l1
1113
;	jnz	.l1
1114
;	sub	ebp, 1
1114
;	sub	ebp, 1
1115
;	js	decode_data_unit.eof
1115
;	js	decode_data_unit.eof
1116
;	mov	cl, [esi]
1116
;	mov	cl, [esi]
1117
;	cmp	cl, 0xFF
1117
;	cmp	cl, 0xFF
1118
;	jnz	.l2
1118
;	jnz	.l2
1119
;.marker:
1119
;.marker:
1120
;	add	esi, 1
1120
;	add	esi, 1
1121
;	sub	ebp, 1
1121
;	sub	ebp, 1
1122
;	js	decode_data_unit.eof
1122
;	js	decode_data_unit.eof
1123
;	cmp	byte [esi], 0xFF
1123
;	cmp	byte [esi], 0xFF
1124
;	jz	.marker
1124
;	jz	.marker
1125
;	cmp	byte [esi], 0
1125
;	cmp	byte [esi], 0
1126
;	jnz	decode_data_unit.eof
1126
;	jnz	decode_data_unit.eof
1127
;.l2:
1127
;.l2:
1128
;	sub	esi, -1
1128
;	sub	esi, -1
1129
;	adc	cl, cl
1129
;	adc	cl, cl
1130
;.l1:
1130
;.l1:
1131
;}
1131
;}
1132
macro get_bit stack_depth
1132
macro get_bit stack_depth
1133
{
1133
{
1134
local .l1,.l2,.marker
1134
local .l1,.l2,.marker
1135
	sub	cl, 1
1135
	sub	cl, 1
1136
	jns	.l1
1136
	jns	.l1
1137
	sub	ebp, 1
1137
	sub	ebp, 1
1138
	js	.eof_pop#stack_depth
1138
	js	.eof_pop#stack_depth
1139
	mov	ch, [esi]
1139
	mov	ch, [esi]
1140
	cmp	ch, 0xFF
1140
	cmp	ch, 0xFF
1141
	jnz	.l2
1141
	jnz	.l2
1142
.marker:
1142
.marker:
1143
	add	esi, 1
1143
	add	esi, 1
1144
	sub	ebp, 1
1144
	sub	ebp, 1
1145
	js	.eof_pop#stack_depth
1145
	js	.eof_pop#stack_depth
1146
	cmp	byte [esi], 0xFF
1146
	cmp	byte [esi], 0xFF
1147
	jz	.marker
1147
	jz	.marker
1148
	cmp	byte [esi], 0
1148
	cmp	byte [esi], 0
1149
	jnz	.eof_pop#stack_depth
1149
	jnz	.eof_pop#stack_depth
1150
.l2:
1150
.l2:
1151
	add	esi, 1
1151
	add	esi, 1
1152
	mov	cl, 7
1152
	mov	cl, 7
1153
.l1:
1153
.l1:
1154
	add	ch, ch
1154
	add	ch, ch
1155
}
1155
}
1156
macro get_bits stack_depth,stack_depth_p1,restore_edx
1156
macro get_bits stack_depth,stack_depth_p1,restore_edx
1157
{
1157
{
1158
local .l1,.l2,.l3,.marker2
1158
local .l1,.l2,.l3,.marker2
1159
	movzx	eax, ch
1159
	movzx	eax, ch
1160
	mov	dl, cl
1160
	mov	dl, cl
1161
	shl	eax, 24
1161
	shl	eax, 24
1162
	neg	cl
1162
	neg	cl
1163
	push	ebx
1163
	push	ebx
1164
	add	cl, 24
1164
	add	cl, 24
1165
.l1:
1165
.l1:
1166
	cmp	bl, dl
1166
	cmp	bl, dl
1167
	jbe	.l2
1167
	jbe	.l2
1168
	sub	bl, dl
1168
	sub	bl, dl
1169
	sub	ebp, 1
1169
	sub	ebp, 1
1170
	js	.eof_pop#stack_depth_p1
1170
	js	.eof_pop#stack_depth_p1
1171
	mov	ch, [esi]
1171
	mov	ch, [esi]
1172
	cmp	ch, 0xFF
1172
	cmp	ch, 0xFF
1173
	jnz	.l3
1173
	jnz	.l3
1174
.marker2:
1174
.marker2:
1175
	add	esi, 1
1175
	add	esi, 1
1176
	sub	ebp, 1
1176
	sub	ebp, 1
1177
	js	.eof_pop#stack_depth_p1
1177
	js	.eof_pop#stack_depth_p1
1178
	cmp	byte [esi], 0xFF
1178
	cmp	byte [esi], 0xFF
1179
	jz	.marker2
1179
	jz	.marker2
1180
	cmp	byte [esi], 0
1180
	cmp	byte [esi], 0
1181
	jnz	.eof_pop#stack_depth_p1
1181
	jnz	.eof_pop#stack_depth_p1
1182
.l3:
1182
.l3:
1183
	movzx	edx, ch
1183
	movzx	edx, ch
1184
	add	esi, 1
1184
	add	esi, 1
1185
	shl	edx, cl
1185
	shl	edx, cl
1186
	sub	cl, 8
1186
	sub	cl, 8
1187
	or	eax, edx
1187
	or	eax, edx
1188
	mov	dl, 8
1188
	mov	dl, 8
1189
	jmp	.l1
1189
	jmp	.l1
1190
.l2:
1190
.l2:
1191
	mov	cl, bl
1191
	mov	cl, bl
1192
	sub	dl, bl
1192
	sub	dl, bl
1193
	shl	ch, cl
1193
	shl	ch, cl
1194
	pop	ebx
1194
	pop	ebx
1195
	cmp	eax, 80000000h
1195
	cmp	eax, 80000000h
1196
	rcr	eax, 1
1196
	rcr	eax, 1
1197
	mov	cl, 31
1197
	mov	cl, 31
1198
	sub	cl, bl
1198
	sub	cl, bl
1199
	sar	eax, cl
1199
	sar	eax, cl
1200
	mov	cl, dl
1200
	mov	cl, dl
1201
if restore_edx eq true
1201
if restore_edx eq true
1202
	pop	edx
1202
	pop	edx
1203
end if
1203
end if
1204
	add	eax, 80000000h
1204
	add	eax, 80000000h
1205
	adc	eax, 80000000h
1205
	adc	eax, 80000000h
1206
}
1206
}
1207
; macro get_huffman_code
1207
; macro get_huffman_code
1208
; {
1208
; {
1209
; local .l1
1209
; local .l1
1210
	; xor	ebx, ebx
1210
	; xor	ebx, ebx
1211
; .l1:
1211
; .l1:
1212
	; get_bit
1212
	; get_bit
1213
	; adc	ebx, ebx
1213
	; adc	ebx, ebx
1214
	; mov	eax, [eax+4*ebx]
1214
	; mov	eax, [eax+4*ebx]
1215
	; xor	ebx, ebx
1215
	; xor	ebx, ebx
1216
	; cmp	eax, -1
1216
	; cmp	eax, -1
1217
	; jz	.eof_pop
1217
	; jz	.eof_pop
1218
	; cmp	eax, 0x1000
1218
	; cmp	eax, 0x1000
1219
	; jae	.l1
1219
	; jae	.l1
1220
	; mov	ebx, eax
1220
	; mov	ebx, eax
1221
; }
1221
; }
1222
macro get_huffman_code stack_depth,stack_depth_p1
1222
macro get_huffman_code stack_depth,stack_depth_p1
1223
{
1223
{
1224
local .l1,.l2,.l3,.l4,.l5,.l6,.nomarker1,.marker1,.nomarker2,.marker2,.nomarker3,.marker3,.done
1224
local .l1,.l2,.l3,.l4,.l5,.l6,.nomarker1,.marker1,.nomarker2,.marker2,.nomarker3,.marker3,.done
1225
; 1. (First level in Huffman table) Does the current Huffman code fit in 8 bits
1225
; 1. (First level in Huffman table) Does the current Huffman code fit in 8 bits
1226
; and have we got enough bits?
1226
; and have we got enough bits?
1227
	movzx	ebx, ch
1227
	movzx	ebx, ch
1228
	cmp	byte [eax+ebx*2], cl
1228
	cmp	byte [eax+ebx*2], cl
1229
	jbe	.l1
1229
	jbe	.l1
1230
; 2a. No; load next byte
1230
; 2a. No; load next byte
1231
	sub	ebp, 1
1231
	sub	ebp, 1
1232
	js	.eof_pop#stack_depth
1232
	js	.eof_pop#stack_depth
1233
	mov	ch, [esi]
1233
	mov	ch, [esi]
1234
	movzx	edx, ch
1234
	movzx	edx, ch
1235
	cmp	ch, 0xFF
1235
	cmp	ch, 0xFF
1236
	jnz	.nomarker1
1236
	jnz	.nomarker1
1237
.marker1:
1237
.marker1:
1238
	add	esi, 1
1238
	add	esi, 1
1239
	sub	ebp, 1
1239
	sub	ebp, 1
1240
	js	.eof_pop#stack_depth
1240
	js	.eof_pop#stack_depth
1241
	cmp	byte [esi], 0xFF
1241
	cmp	byte [esi], 0xFF
1242
	jz	.marker1
1242
	jz	.marker1
1243
	cmp	byte [esi], 0
1243
	cmp	byte [esi], 0
1244
	jnz	.eof_pop#stack_depth
1244
	jnz	.eof_pop#stack_depth
1245
.nomarker1:
1245
.nomarker1:
1246
	shr	edx, cl
1246
	shr	edx, cl
1247
	add	esi, 1
1247
	add	esi, 1
1248
	or	ebx, edx
1248
	or	ebx, edx
1249
; 3a. (First level in Huffman table, >=8 bits known) Does the current Huffman code fit in 8 bits?
1249
; 3a. (First level in Huffman table, >=8 bits known) Does the current Huffman code fit in 8 bits?
1250
	cmp	byte [eax+ebx*2], 8
1250
	cmp	byte [eax+ebx*2], 8
1251
	jbe	.l2
1251
	jbe	.l2
1252
	jl	.eof_pop#stack_depth
1252
	jl	.eof_pop#stack_depth
1253
; 4aa. No; go to next level
1253
; 4aa. No; go to next level
1254
	movzx	ebx, byte [eax+ebx*2+1]
1254
	movzx	ebx, byte [eax+ebx*2+1]
1255
	mov	dl, ch
1255
	mov	dl, ch
1256
	shl	ebx, 5
1256
	shl	ebx, 5
1257
	ror	edx, cl
1257
	ror	edx, cl
1258
	lea	ebx, [eax+ebx+0x200]
1258
	lea	ebx, [eax+ebx+0x200]
1259
	shr	edx, 24
1259
	shr	edx, 24
1260
	push	edx
1260
	push	edx
1261
	shr	edx, 4
1261
	shr	edx, 4
1262
; 5aa. (Second level in Huffman table) Does the current Huffman code fit in 12 bits
1262
; 5aa. (Second level in Huffman table) Does the current Huffman code fit in 12 bits
1263
; and have we got enough bits?
1263
; and have we got enough bits?
1264
	cmp	byte [ebx+edx*2], cl
1264
	cmp	byte [ebx+edx*2], cl
1265
	jbe	.l3
1265
	jbe	.l3
1266
; 6aaa. No; have we got 12 bits?
1266
; 6aaa. No; have we got 12 bits?
1267
	cmp	cl, 4
1267
	cmp	cl, 4
1268
	jae	.l4
1268
	jae	.l4
1269
; 7aaaa. No; load next byte
1269
; 7aaaa. No; load next byte
1270
	pop	edx
1270
	pop	edx
1271
	sub	ebp, 1
1271
	sub	ebp, 1
1272
	js	.eof_pop#stack_depth
1272
	js	.eof_pop#stack_depth
1273
	mov	ch, [esi]
1273
	mov	ch, [esi]
1274
	cmp	ch, 0xFF
1274
	cmp	ch, 0xFF
1275
	jnz	.nomarker2
1275
	jnz	.nomarker2
1276
.marker2:
1276
.marker2:
1277
	add	esi, 1
1277
	add	esi, 1
1278
	sub	ebp, 1
1278
	sub	ebp, 1
1279
	js	.eof_pop#stack_depth
1279
	js	.eof_pop#stack_depth
1280
	cmp	byte [esi], 0xFF
1280
	cmp	byte [esi], 0xFF
1281
	jz	.marker2
1281
	jz	.marker2
1282
	cmp	byte [esi], 0
1282
	cmp	byte [esi], 0
1283
	jnz	.eof_pop#stack_depth
1283
	jnz	.eof_pop#stack_depth
1284
.nomarker2:
1284
.nomarker2:
1285
	push	ecx
1285
	push	ecx
1286
	shr	ch, cl
1286
	shr	ch, cl
1287
	add	esi, 1
1287
	add	esi, 1
1288
	or	dl, ch
1288
	or	dl, ch
1289
	pop	ecx
1289
	pop	ecx
1290
	push	edx
1290
	push	edx
1291
	shr	edx, 4
1291
	shr	edx, 4
1292
; 8aaaa. (Second level in Huffman table) Does the current Huffman code fit in 12 bits?
1292
; 8aaaa. (Second level in Huffman table) Does the current Huffman code fit in 12 bits?
1293
	cmp	byte [ebx+edx*2], 4
1293
	cmp	byte [ebx+edx*2], 4
1294
	jbe	.l5
1294
	jbe	.l5
1295
	jl	.eof_pop#stack_depth_p1
1295
	jl	.eof_pop#stack_depth_p1
1296
; 9aaaaa. No; go to next level
1296
; 9aaaaa. No; go to next level
1297
	movzx	ebx, byte [ebx+edx*2+1]
1297
	movzx	ebx, byte [ebx+edx*2+1]
1298
	pop	edx
1298
	pop	edx
1299
	shl	ebx, 5
1299
	shl	ebx, 5
1300
	and	edx, 0xF
1300
	and	edx, 0xF
1301
	lea	ebx, [eax+ebx+0x200]
1301
	lea	ebx, [eax+ebx+0x200]
1302
; 10aaaaa. Get current code length and value
1302
; 10aaaaa. Get current code length and value
1303
	sub	cl, [ebx+edx*2]
1303
	sub	cl, [ebx+edx*2]
1304
	movzx	eax, byte [ebx+edx*2+1]
1304
	movzx	eax, byte [ebx+edx*2+1]
1305
	neg	cl
1305
	neg	cl
1306
	shl	ch, cl
1306
	shl	ch, cl
1307
	neg	cl
1307
	neg	cl
1308
	add	cl, 8
1308
	add	cl, 8
1309
	jmp	.done
1309
	jmp	.done
1310
.l5:
1310
.l5:
1311
; 9aaaab. Yes; get current code length and value
1311
; 9aaaab. Yes; get current code length and value
1312
	sub	cl, [ebx+edx*2]
1312
	sub	cl, [ebx+edx*2]
1313
	movzx	eax, byte [ebx+edx*2+1]
1313
	movzx	eax, byte [ebx+edx*2+1]
1314
	neg	cl
1314
	neg	cl
1315
	pop	edx
1315
	pop	edx
1316
	shl	ch, cl
1316
	shl	ch, cl
1317
	neg	cl
1317
	neg	cl
1318
	add	cl, 8
1318
	add	cl, 8
1319
	jmp	.done
1319
	jmp	.done
1320
.l4:
1320
.l4:
1321
; 7aaab. Yes; go to next level
1321
; 7aaab. Yes; go to next level
1322
	movzx	ebx, byte [ebx+edx*2+1]
1322
	movzx	ebx, byte [ebx+edx*2+1]
1323
	pop	edx
1323
	pop	edx
1324
	shl	ebx, 5
1324
	shl	ebx, 5
1325
	and	edx, 0xF
1325
	and	edx, 0xF
1326
	lea	ebx, [eax+ebx+0x200]
1326
	lea	ebx, [eax+ebx+0x200]
1327
; 8aaab. (Third level in Huffman table) Have we got enough bits?
1327
; 8aaab. (Third level in Huffman table) Have we got enough bits?
1328
	cmp	[ebx+edx*2], cl
1328
	cmp	[ebx+edx*2], cl
1329
	jbe	.l6
1329
	jbe	.l6
1330
; 9aaaba. No; load next byte
1330
; 9aaaba. No; load next byte
1331
	sub	ebp, 1
1331
	sub	ebp, 1
1332
	js	.eof_pop#stack_depth
1332
	js	.eof_pop#stack_depth
1333
	mov	ch, [esi]
1333
	mov	ch, [esi]
1334
	cmp	ch, 0xFF
1334
	cmp	ch, 0xFF
1335
	jnz	.nomarker3
1335
	jnz	.nomarker3
1336
.marker3:
1336
.marker3:
1337
	add	esi, 1
1337
	add	esi, 1
1338
	sub	ebp, 1
1338
	sub	ebp, 1
1339
	js	.eof_pop#stack_depth
1339
	js	.eof_pop#stack_depth
1340
	cmp	byte [esi], 0xFF
1340
	cmp	byte [esi], 0xFF
1341
	jz	.marker3
1341
	jz	.marker3
1342
	cmp	byte [esi], 0
1342
	cmp	byte [esi], 0
1343
	jnz	.eof_pop#stack_depth
1343
	jnz	.eof_pop#stack_depth
1344
.nomarker3:
1344
.nomarker3:
1345
	push	ecx
1345
	push	ecx
1346
	shr	ch, cl
1346
	shr	ch, cl
1347
	add	esi, 1
1347
	add	esi, 1
1348
	or	dl, ch
1348
	or	dl, ch
1349
	pop	ecx
1349
	pop	ecx
1350
; 10aaaba. Get current code length and value
1350
; 10aaaba. Get current code length and value
1351
	sub	cl, [ebx+edx*2]
1351
	sub	cl, [ebx+edx*2]
1352
	movzx	eax, byte [ebx+edx*2+1]
1352
	movzx	eax, byte [ebx+edx*2+1]
1353
	neg	cl
1353
	neg	cl
1354
	shl	ch, cl
1354
	shl	ch, cl
1355
	neg	cl
1355
	neg	cl
1356
	add	cl, 8
1356
	add	cl, 8
1357
	jmp	.done
1357
	jmp	.done
1358
.l3:
1358
.l3:
1359
; 6aab. Yes; get current code length and value
1359
; 6aab. Yes; get current code length and value
1360
	pop	eax
1360
	pop	eax
1361
.l6:
1361
.l6:
1362
; 9aaabb. Yes; get current code length and value
1362
; 9aaabb. Yes; get current code length and value
1363
	sub	cl, [ebx+edx*2]
1363
	sub	cl, [ebx+edx*2]
1364
	movzx	eax, byte [ebx+edx*2+1]
1364
	movzx	eax, byte [ebx+edx*2+1]
1365
	xor	cl, 7
1365
	xor	cl, 7
1366
	shl	ch, cl
1366
	shl	ch, cl
1367
	xor	cl, 7
1367
	xor	cl, 7
1368
	add	ch, ch
1368
	add	ch, ch
1369
	jmp	.done
1369
	jmp	.done
1370
.l2:
1370
.l2:
1371
; 3ab. Yes; get current code length and value
1371
; 3ab. Yes; get current code length and value
1372
	sub	cl, [eax+ebx*2]
1372
	sub	cl, [eax+ebx*2]
1373
	movzx	eax, byte [eax+ebx*2+1]
1373
	movzx	eax, byte [eax+ebx*2+1]
1374
	neg	cl
1374
	neg	cl
1375
	shl	ch, cl
1375
	shl	ch, cl
1376
	neg	cl
1376
	neg	cl
1377
	add	cl, 8
1377
	add	cl, 8
1378
	jmp	.done
1378
	jmp	.done
1379
.l1:
1379
.l1:
1380
; 3b. Yes; get current code length and value
1380
; 3b. Yes; get current code length and value
1381
	mov	dl, [eax+ebx*2]
1381
	mov	dl, [eax+ebx*2]
1382
	movzx	eax, byte [eax+ebx*2+1]
1382
	movzx	eax, byte [eax+ebx*2+1]
1383
	xchg	cl, dl
1383
	xchg	cl, dl
1384
	sub	dl, cl
1384
	sub	dl, cl
1385
	shl	ch, cl
1385
	shl	ch, cl
1386
	mov	cl, dl
1386
	mov	cl, dl
1387
.done:
1387
.done:
1388
	mov	ebx, eax
1388
	mov	ebx, eax
1389
}
1389
}
1390
; Decode DCT coefficients for one 8*8 block in progressive mode
1390
; Decode DCT coefficients for one 8*8 block in progressive mode
1391
; from input stream, given by pointer esi and length ebp
1391
; from input stream, given by pointer esi and length ebp
1392
; N.B. Speed optimization has sense here.
1392
; N.B. Speed optimization has sense here.
1393
align 16
1393
align 16
1394
decode_progressive_coeff:
1394
decode_progressive_coeff:
1395
	mov	ecx, [ebx+jpeg.work.huffman_bits]
1395
	mov	ecx, [ebx+jpeg.work.huffman_bits]
1396
	cmp	[ebx+jpeg.work.ScanStart], 0
1396
	cmp	[ebx+jpeg.work.ScanStart], 0
1397
	jnz	.ac
1397
	jnz	.ac
1398
; DC coefficient
1398
; DC coefficient
1399
	cmp	[ebx+jpeg.work.ApproxPosHigh], 0
1399
	cmp	[ebx+jpeg.work.ApproxPosHigh], 0
1400
	jz	.dc_first
1400
	jz	.dc_first
1401
; DC coefficient, subsequent passes
1401
; DC coefficient, subsequent passes
1402
	xor	eax, eax
1402
	xor	eax, eax
1403
	get_bit 0
1403
	get_bit 0
1404
	adc	eax, eax
1404
	adc	eax, eax
1405
	mov	[ebx+jpeg.work.huffman_bits], ecx
1405
	mov	[ebx+jpeg.work.huffman_bits], ecx
1406
	mov	cl, [ebx+jpeg.work.ApproxPosLow]
1406
	mov	cl, [ebx+jpeg.work.ApproxPosLow]
1407
	shl	eax, cl
1407
	shl	eax, cl
1408
	or	[edi], ax
1408
	or	[edi], ax
1409
	ret
1409
	ret
1410
.dc_first:
1410
.dc_first:
1411
; DC coefficient, first pass
1411
; DC coefficient, first pass
1412
	mov	eax, [edx+16]
1412
	mov	eax, [edx+16]
1413
	push	ebx
1413
	push	ebx
1414
	push	edx
1414
	push	edx
1415
	get_huffman_code 2,3
1415
	get_huffman_code 2,3
1416
	get_bits 2,3,true
1416
	get_bits 2,3,true
1417
	pop	ebx
1417
	pop	ebx
1418
	add	eax, [edx+48]
1418
	add	eax, [edx+48]
1419
	mov	[edx+48], ax
1419
	mov	[edx+48], ax
1420
	mov	[ebx+jpeg.work.huffman_bits], ecx
1420
	mov	[ebx+jpeg.work.huffman_bits], ecx
1421
	mov	cl, [ebx+jpeg.work.ApproxPosLow]
1421
	mov	cl, [ebx+jpeg.work.ApproxPosLow]
1422
	shl	eax, cl
1422
	shl	eax, cl
1423
	mov	[edi], ax
1423
	mov	[edi], ax
1424
	ret
1424
	ret
1425
.ac:
1425
.ac:
1426
; AC coefficients
1426
; AC coefficients
1427
	movzx	eax, [ebx+jpeg.work.ScanStart]
1427
	movzx	eax, [ebx+jpeg.work.ScanStart]
1428
	cmp	al, [ebx+jpeg.work.ScanEnd]
1428
	cmp	al, [ebx+jpeg.work.ScanEnd]
1429
	ja	.ret
1429
	ja	.ret
1430
	cmp	dword [edx+52], 0
1430
	cmp	dword [edx+52], 0
1431
	jnz	.was_eob
1431
	jnz	.was_eob
1432
	push	ebx
1432
	push	ebx
1433
.acloop:
1433
.acloop:
1434
	push	edx
1434
	push	edx
1435
	push	eax
1435
	push	eax
1436
	mov	eax, [edx+20]
1436
	mov	eax, [edx+20]
1437
	get_huffman_code 3,4
1437
	get_huffman_code 3,4
1438
	pop	eax
1438
	pop	eax
1439
	test	ebx, 15
1439
	test	ebx, 15
1440
	jz	.band
1440
	jz	.band
1441
	push	eax ebx
1441
	push	eax ebx
1442
	and	ebx, 15
1442
	and	ebx, 15
1443
	get_bits 4,5,false
1443
	get_bits 4,5,false
1444
	pop	ebx
1444
	pop	ebx
1445
	xchg	eax, [esp]
1445
	xchg	eax, [esp]
1446
	shr	ebx, 4
1446
	shr	ebx, 4
1447
	mov	edx, [esp+8]
1447
	mov	edx, [esp+8]
1448
.zeroloop1:
1448
.zeroloop1:
1449
	push	eax ebx
1449
	push	eax ebx
1450
	movzx	eax, byte [zigzag+eax]
1450
	movzx	eax, byte [zigzag+eax]
1451
	xor	ebx, ebx
1451
	xor	ebx, ebx
1452
	cmp	word [edi+eax], bx
1452
	cmp	word [edi+eax], bx
1453
	jz	.zeroloop2
1453
	jz	.zeroloop2
1454
	get_bit 5
1454
	get_bit 5
1455
	jnc	@f
1455
	jnc	@f
1456
	push	ecx
1456
	push	ecx
1457
	mov	cl, [edx+jpeg.work.ApproxPosLow]
1457
	mov	cl, [edx+jpeg.work.ApproxPosLow]
1458
	xor	ebx, ebx
1458
	xor	ebx, ebx
1459
	cmp	byte [edi+eax+1], 80h
1459
	cmp	byte [edi+eax+1], 80h
1460
	adc	ebx, 0
1460
	adc	ebx, 0
1461
	add	ebx, ebx
1461
	add	ebx, ebx
1462
	sub	ebx, 1
1462
	sub	ebx, 1
1463
	shl	ebx, cl
1463
	shl	ebx, cl
1464
	pop	ecx
1464
	pop	ecx
1465
	add	[edi+eax], bx
1465
	add	[edi+eax], bx
1466
@@:
1466
@@:
1467
	pop	ebx eax
1467
	pop	ebx eax
1468
@@:
1468
@@:
1469
	add	eax, 1
1469
	add	eax, 1
1470
	cmp	al, [edx+jpeg.work.ScanEnd]
1470
	cmp	al, [edx+jpeg.work.ScanEnd]
1471
	ja	decode_data_unit.eof_pop3
1471
	ja	decode_data_unit.eof_pop3
1472
	jmp	.zeroloop1
1472
	jmp	.zeroloop1
1473
.zeroloop2:
1473
.zeroloop2:
1474
	pop	ebx eax
1474
	pop	ebx eax
1475
	sub	ebx, 1
1475
	sub	ebx, 1
1476
	jns	@b
1476
	jns	@b
1477
.nozero1:
1477
.nozero1:
1478
	pop	ebx
1478
	pop	ebx
1479
	test	ebx, ebx
1479
	test	ebx, ebx
1480
	jz	@f
1480
	jz	@f
1481
	push	eax
1481
	push	eax
1482
	movzx	eax, byte [zigzag+eax]
1482
	movzx	eax, byte [zigzag+eax]
1483
	push	ecx
1483
	push	ecx
1484
	mov	cl, [edx+jpeg.work.ApproxPosLow]
1484
	mov	cl, [edx+jpeg.work.ApproxPosLow]
1485
	shl	ebx, cl
1485
	shl	ebx, cl
1486
	pop	ecx
1486
	pop	ecx
1487
	mov	[edi+eax], bx
1487
	mov	[edi+eax], bx
1488
	pop	eax
1488
	pop	eax
1489
@@:
1489
@@:
1490
	add	eax, 1
1490
	add	eax, 1
1491
	cmp	al, [edx+jpeg.work.ScanEnd]
1491
	cmp	al, [edx+jpeg.work.ScanEnd]
1492
	pop	edx
1492
	pop	edx
1493
	jbe	.acloop
1493
	jbe	.acloop
1494
	pop	ebx
1494
	pop	ebx
1495
	mov	[ebx+jpeg.work.huffman_bits], ecx
1495
	mov	[ebx+jpeg.work.huffman_bits], ecx
1496
.ret:
1496
.ret:
1497
	ret
1497
	ret
1498
.eof_pop5:
1498
.eof_pop5:
1499
	pop	ebx
1499
	pop	ebx
1500
.eof_pop4:
1500
.eof_pop4:
1501
	pop	ebx
1501
	pop	ebx
1502
.eof_pop3:
1502
.eof_pop3:
1503
	pop	ebx
1503
	pop	ebx
1504
.eof_pop2:
1504
.eof_pop2:
1505
	pop	ebx
1505
	pop	ebx
1506
.eof_pop1:
1506
.eof_pop1:
1507
	pop	ebx
1507
	pop	ebx
1508
.eof_pop0:
1508
.eof_pop0:
1509
	jmp	decode_data_unit.eof_pop0
1509
	jmp	decode_data_unit.eof_pop0
1510
.band:
1510
.band:
1511
	shr	ebx, 4
1511
	shr	ebx, 4
1512
	cmp	ebx, 15
1512
	cmp	ebx, 15
1513
	jnz	.eob
1513
	jnz	.eob
1514
	mov	edx, [esp+4]
1514
	mov	edx, [esp+4]
1515
	push	0
1515
	push	0
1516
	jmp	.zeroloop1
1516
	jmp	.zeroloop1
1517
.eob:
1517
.eob:
1518
	pop	edx
1518
	pop	edx
1519
	push	eax
1519
	push	eax
1520
	mov	eax, 1
1520
	mov	eax, 1
1521
	test	ebx, ebx
1521
	test	ebx, ebx
1522
	jz	.eob0
1522
	jz	.eob0
1523
@@:
1523
@@:
1524
	get_bit 2
1524
	get_bit 2
1525
	adc	eax, eax
1525
	adc	eax, eax
1526
	sub	ebx, 1
1526
	sub	ebx, 1
1527
	jnz	@b
1527
	jnz	@b
1528
.eob0:
1528
.eob0:
1529
	mov	[edx+52], eax
1529
	mov	[edx+52], eax
1530
	pop	eax
1530
	pop	eax
1531
	pop	ebx
1531
	pop	ebx
1532
.was_eob:
1532
.was_eob:
1533
	sub	dword [edx+52], 1
1533
	sub	dword [edx+52], 1
1534
	cmp	al, [ebx+jpeg.work.ScanEnd]
1534
	cmp	al, [ebx+jpeg.work.ScanEnd]
1535
	ja	.ret2
1535
	ja	.ret2
1536
	push	edx
1536
	push	edx
1537
.zeroloop3:
1537
.zeroloop3:
1538
	push	eax
1538
	push	eax
1539
	movzx	eax, byte [zigzag+eax]
1539
	movzx	eax, byte [zigzag+eax]
1540
	xor	edx, edx
1540
	xor	edx, edx
1541
	cmp	word [edi+eax], dx
1541
	cmp	word [edi+eax], dx
1542
	jz	@f
1542
	jz	@f
1543
	get_bit 2
1543
	get_bit 2
1544
	jnc	@f
1544
	jnc	@f
1545
	push	ecx
1545
	push	ecx
1546
	mov	cl, [ebx+jpeg.work.ApproxPosLow]
1546
	mov	cl, [ebx+jpeg.work.ApproxPosLow]
1547
	xor	edx, edx
1547
	xor	edx, edx
1548
	cmp	byte [edi+eax+1], 80h
1548
	cmp	byte [edi+eax+1], 80h
1549
	adc	edx, 0
1549
	adc	edx, 0
1550
	add	edx, edx
1550
	add	edx, edx
1551
	sub	edx, 1
1551
	sub	edx, 1
1552
	shl	edx, cl
1552
	shl	edx, cl
1553
	pop	ecx
1553
	pop	ecx
1554
	add	[edi+eax], dx
1554
	add	[edi+eax], dx
1555
@@:
1555
@@:
1556
	pop	eax
1556
	pop	eax
1557
	add	eax, 1
1557
	add	eax, 1
1558
	cmp	al, [ebx+jpeg.work.ScanEnd]
1558
	cmp	al, [ebx+jpeg.work.ScanEnd]
1559
	jbe	.zeroloop3
1559
	jbe	.zeroloop3
1560
	pop	edx
1560
	pop	edx
1561
.ret2:
1561
.ret2:
1562
	mov	[ebx+jpeg.work.huffman_bits], ecx
1562
	mov	[ebx+jpeg.work.huffman_bits], ecx
1563
	ret
1563
	ret
1564
 
1564
 
1565
handle_progressive:
1565
handle_progressive:
1566
	cmp	[ebx+jpeg.work.dct_buffer], 0
1566
	cmp	[ebx+jpeg.work.dct_buffer], 0
1567
	jnz	@f
1567
	jnz	@f
1568
	ret
1568
	ret
1569
@@:
1569
@@:
1570
; information for all components
1570
; information for all components
1571
	lea	esi, [ebx+jpeg.work.components]
1571
	lea	esi, [ebx+jpeg.work.components]
1572
	xor	ebp, ebp
1572
	xor	ebp, ebp
1573
	mov	ecx, [ebx+jpeg.work.pixel_size]
1573
	mov	ecx, [ebx+jpeg.work.pixel_size]
1574
.next_component:
1574
.next_component:
1575
	lea	edi, [ebx+jpeg.work.cur_components]
1575
	lea	edi, [ebx+jpeg.work.cur_components]
1576
	lodsb	; ComponentID
1576
	lodsb	; ComponentID
1577
	lodsd
1577
	lodsd
1578
	mov	ax, 0x0101
1578
	mov	ax, 0x0101
1579
	stosd	; db V, db H, db VFactor, db HFactor
1579
	stosd	; db V, db H, db VFactor, db HFactor
1580
	xor	eax, eax
1580
	xor	eax, eax
1581
	mov	al, byte [edi-1]	; get HFactor
1581
	mov	al, byte [edi-1]	; get HFactor
1582
	mul	byte [ebx+jpeg.work.pixel_size]	; number of components
1582
	mul	byte [ebx+jpeg.work.pixel_size]	; number of components
1583
	stosd			; HIncrement_i = HFactor_i * sizeof(pixel)
1583
	stosd			; HIncrement_i = HFactor_i * sizeof(pixel)
1584
	movzx	eax, byte [edi-4-2]	; get VFactor
1584
	movzx	eax, byte [edi-4-2]	; get VFactor
1585
	mul	[ebx+jpeg.work.line_size]	; number of components * image width
1585
	mul	[ebx+jpeg.work.line_size]	; number of components * image width
1586
	stosd			; VIncrement_i = VFactor_i * sizeof(row)
1586
	stosd			; VIncrement_i = VFactor_i * sizeof(row)
1587
	lodsb
1587
	lodsb
1588
	and	eax, 3
1588
	and	eax, 3
1589
	cmp	[ebx+jpeg.work.quant_tables_defined+eax], 0
1589
	cmp	[ebx+jpeg.work.quant_tables_defined+eax], 0
1590
	jz	.error
1590
	jz	.error
1591
	shl	eax, 8
1591
	shl	eax, 8
1592
	lea	eax, [ebx+jpeg.work.quant_tables+eax]
1592
	lea	eax, [ebx+jpeg.work.quant_tables+eax]
1593
	stosd		; dd QuantizationTable
1593
	stosd		; dd QuantizationTable
1594
	stosd		; dd DCTable - ignored
1594
	stosd		; dd DCTable - ignored
1595
	mov	eax, ebp
1595
	mov	eax, ebp
1596
	mul	[ebx+jpeg.work.dct_buffer_size]
1596
	mul	[ebx+jpeg.work.dct_buffer_size]
1597
	add	eax, [ebx+jpeg.work.dct_buffer]
1597
	add	eax, [ebx+jpeg.work.dct_buffer]
1598
	stosd		; instead of dd ACTable - pointer to current DCT coefficients
1598
	stosd		; instead of dd ACTable - pointer to current DCT coefficients
1599
	push	ecx
1599
	push	ecx
1600
	mov	eax, [ebx+jpeg.work.image]
1600
	mov	eax, [ebx+jpeg.work.image]
1601
	mov	eax, [eax+Image.Width]
1601
	mov	eax, [eax+Image.Width]
1602
	movzx	ecx, byte [edi-21]	; get HFactor
1602
	movzx	ecx, byte [edi-21]	; get HFactor
1603
;	cdq	; edx = 0 as a result of previous mul
1603
;	cdq	; edx = 0 as a result of previous mul
1604
	div	ecx
1604
	div	ecx
1605
	stosd		; dd width / HFactor_i
1605
	stosd		; dd width / HFactor_i
1606
	stosd
1606
	stosd
1607
	xchg	eax, ecx
1607
	xchg	eax, ecx
1608
	inc	eax
1608
	inc	eax
1609
	sub	eax, edx
1609
	sub	eax, edx
1610
	stosd		; dd HFactor_i+1 - (width % HFactor_i)
1610
	stosd		; dd HFactor_i+1 - (width % HFactor_i)
1611
	mov	eax, [ebx+jpeg.work.image]
1611
	mov	eax, [ebx+jpeg.work.image]
1612
	mov	eax, [eax+Image.Height]
1612
	mov	eax, [eax+Image.Height]
1613
	movzx	ecx, byte [edi-34]	; get VFactor
1613
	movzx	ecx, byte [edi-34]	; get VFactor
1614
	cdq
1614
	cdq
1615
	div	ecx
1615
	div	ecx
1616
	stosd		; dd height / VFactor_i
1616
	stosd		; dd height / VFactor_i
1617
	stosd
1617
	stosd
1618
	xchg	eax, ecx
1618
	xchg	eax, ecx
1619
	inc	eax
1619
	inc	eax
1620
	sub	eax, edx
1620
	sub	eax, edx
1621
	stosd		; dd VFactor_i+1 - (height % VFactor_i)
1621
	stosd		; dd VFactor_i+1 - (height % VFactor_i)
1622
	pop	ecx
1622
	pop	ecx
1623
	xor	eax, eax
1623
	xor	eax, eax
1624
	test	ebp, ebp
1624
	test	ebp, ebp
1625
	setnp	al
1625
	setnp	al
1626
	ror	eax, 1
1626
	ror	eax, 1
1627
	stosd		; dd DCPrediction
1627
	stosd		; dd DCPrediction
1628
	mov	eax, ebp
1628
	mov	eax, ebp
1629
	stosd		; dd ComponentOffset
1629
	stosd		; dd ComponentOffset
1630
	inc	ebp
1630
	inc	ebp
1631
	push	ecx
1631
	push	ecx
1632
	mov	[ebx+jpeg.work.cur_components_end], edi
1632
	mov	[ebx+jpeg.work.cur_components_end], edi
1633
	lea	edx, [edi-56]
1633
	lea	edx, [edi-56]
1634
; do IDCT and unpack
1634
; do IDCT and unpack
1635
	mov	edi, [ebx+jpeg.work.image]
1635
	mov	edi, [ebx+jpeg.work.image]
1636
	mov	edi, [edi+Image.Data]
1636
	mov	edi, [edi+Image.Data]
1637
	mov	[ebx+jpeg.work.cur_out_ptr], edi
1637
	mov	[ebx+jpeg.work.cur_out_ptr], edi
1638
	mov	[ebx+jpeg.work.not_interleaved], 1
1638
	mov	[ebx+jpeg.work.not_interleaved], 1
1639
	call	init_limits
1639
	call	init_limits
1640
.decode_loop:
1640
.decode_loop:
1641
	call	decode_MCU
1641
	call	decode_MCU
1642
	sub	[ebx+jpeg.work.cur_x], 1
1642
	sub	[ebx+jpeg.work.cur_x], 1
1643
	jnz	.decode_loop
1643
	jnz	.decode_loop
1644
	call	next_line
1644
	call	next_line
1645
	sub	[ebx+jpeg.work.cur_y], 1
1645
	sub	[ebx+jpeg.work.cur_y], 1
1646
	jnz	.decode_loop
1646
	jnz	.decode_loop
1647
	pop	ecx
1647
	pop	ecx
1648
	dec	ecx
1648
	dec	ecx
1649
	jnz	.next_component
1649
	jnz	.next_component
1650
; image unpacked, return
1650
; image unpacked, return
1651
.error:
1651
.error:
1652
	push	[ebx+jpeg.work.dct_buffer]
1652
	push	[ebx+jpeg.work.dct_buffer]
1653
	call	[mem.free]
1653
	call	[mem.free]
1654
	ret
1654
	ret
1655
 
1655
 
1656
; Support for YCbCr -> RGB conversion
1656
; Support for YCbCr -> RGB conversion
1657
; R = Y                          + 1.402 * (Cr - 128)
1657
; R = Y                          + 1.402 * (Cr - 128)
1658
; G = Y - 0.34414 * (Cb - 128) - 0.71414 * (Cr - 128)
1658
; G = Y - 0.34414 * (Cb - 128) - 0.71414 * (Cr - 128)
1659
; B = Y +   1.772 * (Cb - 128)
1659
; B = Y +   1.772 * (Cb - 128)
1660
; When converting YCbCr -> RGB, we need to do some multiplications;
1660
; When converting YCbCr -> RGB, we need to do some multiplications;
1661
; to be faster, we precalculate the table for all 256 possible values
1661
; to be faster, we precalculate the table for all 256 possible values
1662
; Also we approximate fractions with N/65536, this gives sufficient precision
1662
; Also we approximate fractions with N/65536, this gives sufficient precision
1663
img.initialize.jpeg:
1663
img.initialize.jpeg:
1664
;initialize_color_table:
1664
;initialize_color_table:
1665
; 1.402 = 1 + 26345/65536, -0.71414 = -46802/65536
1665
; 1.402 = 1 + 26345/65536, -0.71414 = -46802/65536
1666
; -0.34414 = -22554/65536, 1.772 = 1 + 50594/65536
1666
; -0.34414 = -22554/65536, 1.772 = 1 + 50594/65536
1667
	pushad
1667
	pushad
1668
	mov	edi, color_table_1
1668
	mov	edi, color_table_1
1669
	mov	ecx, 128
1669
	mov	ecx, 128
1670
; 1. Cb -> 1.772*Cb
1670
; 1. Cb -> 1.772*Cb
1671
	xor	eax, eax
1671
	xor	eax, eax
1672
	mov	dx, 8000h
1672
	mov	dx, 8000h
1673
.l1:
1673
.l1:
1674
	push	ecx
1674
	push	ecx
1675
@@:
1675
@@:
1676
	stosd
1676
	stosd
1677
	add	dx, 50594
1677
	add	dx, 50594
1678
	adc	eax, 1
1678
	adc	eax, 1
1679
	loop	@b
1679
	loop	@b
1680
	neg	dx
1680
	neg	dx
1681
	adc	eax, -1
1681
	adc	eax, -1
1682
	neg	eax
1682
	neg	eax
1683
	pop	ecx
1683
	pop	ecx
1684
	jnz	.l1
1684
	jnz	.l1
1685
; 2. Cb -> -0.34414*Cb
1685
; 2. Cb -> -0.34414*Cb
1686
	mov	ax, dx
1686
	mov	ax, dx
1687
.l2:
1687
.l2:
1688
	push	ecx
1688
	push	ecx
1689
@@:
1689
@@:
1690
	stosd
1690
	stosd
1691
	sub	eax, 22554
1691
	sub	eax, 22554
1692
	loop	@b
1692
	loop	@b
1693
	neg	eax
1693
	neg	eax
1694
	pop	ecx
1694
	pop	ecx
1695
	cmp	ax, dx
1695
	cmp	ax, dx
1696
	jnz	.l2
1696
	jnz	.l2
1697
	xor	eax, eax
1697
	xor	eax, eax
1698
; 3. Cr -> -0.71414*Cr
1698
; 3. Cr -> -0.71414*Cr
1699
.l3:
1699
.l3:
1700
	push	ecx
1700
	push	ecx
1701
@@:
1701
@@:
1702
	stosd
1702
	stosd
1703
	sub	eax, 46802
1703
	sub	eax, 46802
1704
	loop	@b
1704
	loop	@b
1705
	neg	eax
1705
	neg	eax
1706
	pop	ecx
1706
	pop	ecx
1707
	jnz	.l3
1707
	jnz	.l3
1708
; 4. Cr -> 1.402*Cr
1708
; 4. Cr -> 1.402*Cr
1709
.l4:
1709
.l4:
1710
	push	ecx
1710
	push	ecx
1711
@@:
1711
@@:
1712
	stosd
1712
	stosd
1713
	add	dx, 26345
1713
	add	dx, 26345
1714
	adc	eax, 1
1714
	adc	eax, 1
1715
	loop	@b
1715
	loop	@b
1716
	neg	dx
1716
	neg	dx
1717
	adc	eax, -1
1717
	adc	eax, -1
1718
	neg	eax
1718
	neg	eax
1719
	pop	ecx
1719
	pop	ecx
1720
	jnz	.l4
1720
	jnz	.l4
1721
	popad
1721
	popad
1722
	ret
1722
	ret
1723
 
1723
 
1724
; this function is called in the end of image loading
1724
; this function is called in the end of image loading
1725
convert_to_rgb:
1725
convert_to_rgb:
1726
; some checks
1726
; some checks
1727
	mov	eax, [ebx+jpeg.work.image]
1727
	mov	eax, [ebx+jpeg.work.image]
1728
	test	eax, eax	; image exists?
1728
	test	eax, eax	; image exists?
1729
	jz	.ret
1729
	jz	.ret
1730
	cmp	byte [ebx+jpeg.work.pixel_size], 3	; full-color image?
1730
	cmp	byte [ebx+jpeg.work.pixel_size], 3	; full-color image?
1731
	jz	.ycc2rgb
1731
	jz	.ycc2rgb
1732
	cmp	byte [ebx+jpeg.work.pixel_size], 4
1732
	cmp	byte [ebx+jpeg.work.pixel_size], 4
1733
	jz	.ycck2rgb
1733
	jz	.ycck2rgb
1734
.ret:
1734
.ret:
1735
	ret
1735
	ret
1736
.ycc2rgb:
1736
.ycc2rgb:
1737
; conversion is needed
1737
; conversion is needed
1738
	mov	esi, [eax+Image.Width]
1738
	mov	esi, [eax+Image.Width]
1739
	imul	esi, [eax+Image.Height]
1739
	imul	esi, [eax+Image.Height]
1740
	mov	edi, [eax+Image.Data]
1740
	mov	edi, [eax+Image.Data]
1741
	push	ebx
1741
	push	ebx
1742
; N.B. Speed optimization has sense here.
1742
; N.B. Speed optimization has sense here.
1743
align 16
1743
align 16
1744
.loop:
1744
.loop:
1745
;	mov	ebx, [edi]
1745
;	mov	ebx, [edi]
1746
;	mov	edx, ebx
1746
;	mov	edx, ebx
1747
;	mov	ecx, ebx
1747
;	mov	ecx, ebx
1748
;	movzx	ebx, bl		; ebx = Y
1748
;	movzx	ebx, bl		; ebx = Y
1749
;	shr	edx, 16
1749
;	shr	edx, 16
1750
;	mov	eax, ebx
1750
;	mov	eax, ebx
1751
;	movzx	edx, dl		; edx = Cr
1751
;	movzx	edx, dl		; edx = Cr
1752
;	movzx	ecx, ch		; ecx = Cb
1752
;	movzx	ecx, ch		; ecx = Cb
1753
	movzx	ebx, byte [edi]
1753
	movzx	ebx, byte [edi]
1754
	movzx	ecx, byte [edi+1]
1754
	movzx	ecx, byte [edi+1]
1755
	mov	eax, ebx
1755
	mov	eax, ebx
1756
	movzx	edx, byte [edi+2]
1756
	movzx	edx, byte [edi+2]
1757
; B = Y + color_table_1[Cb]
1757
; B = Y + color_table_1[Cb]
1758
	add	eax, [color_table_1+ecx*4]
1758
	add	eax, [color_table_1+ecx*4]
1759
	mov	ebp, [color_table_2+ecx*4]
1759
	mov	ebp, [color_table_2+ecx*4]
1760
	cmp	eax, 80000000h
1760
	cmp	eax, 80000000h
1761
	sbb	ecx, ecx
1761
	sbb	ecx, ecx
1762
	and	eax, ecx
1762
	and	eax, ecx
1763
	add	ebp, [color_table_3+edx*4]
1763
	add	ebp, [color_table_3+edx*4]
1764
	cmp	eax, 0x100
1764
	cmp	eax, 0x100
1765
	sbb	ecx, ecx
1765
	sbb	ecx, ecx
1766
	not	ecx
1766
	not	ecx
1767
	sar	ebp, 16
1767
	sar	ebp, 16
1768
	or	eax, ecx
1768
	or	eax, ecx
1769
	mov	[edi], al
1769
	mov	[edi], al
1770
; G = Y + color_table_2[Cb] + color_table_3[Cr]
1770
; G = Y + color_table_2[Cb] + color_table_3[Cr]
1771
	lea	eax, [ebx+ebp]
1771
	lea	eax, [ebx+ebp]
1772
	cmp	eax, 80000000h
1772
	cmp	eax, 80000000h
1773
	sbb	ecx, ecx
1773
	sbb	ecx, ecx
1774
	and	eax, ecx
1774
	and	eax, ecx
1775
	cmp	eax, 0x100
1775
	cmp	eax, 0x100
1776
	sbb	ecx, ecx
1776
	sbb	ecx, ecx
1777
	not	ecx
1777
	not	ecx
1778
	or	eax, ecx
1778
	or	eax, ecx
1779
	mov	[edi+1], al
1779
	mov	[edi+1], al
1780
; R = Y + color_table_4[Cr]
1780
; R = Y + color_table_4[Cr]
1781
	mov	eax, ebx
1781
	mov	eax, ebx
1782
	add	eax, [color_table_4+edx*4]
1782
	add	eax, [color_table_4+edx*4]
1783
	cmp	eax, 80000000h
1783
	cmp	eax, 80000000h
1784
	sbb	ecx, ecx
1784
	sbb	ecx, ecx
1785
	and	eax, ecx
1785
	and	eax, ecx
1786
	cmp	eax, 0x100
1786
	cmp	eax, 0x100
1787
	sbb	ecx, ecx
1787
	sbb	ecx, ecx
1788
	not	ecx
1788
	not	ecx
1789
	or	eax, ecx
1789
	or	eax, ecx
1790
	mov	[edi+2], al
1790
	mov	[edi+2], al
1791
	add	edi, 3
1791
	add	edi, 3
1792
	sub	esi, 1
1792
	sub	esi, 1
1793
	jnz	.loop
1793
	jnz	.loop
1794
	pop	ebx
1794
	pop	ebx
1795
	ret
1795
	ret
1796
.ycck2rgb:
1796
.ycck2rgb:
1797
; conversion is needed
1797
; conversion is needed
1798
	mov	esi, [eax+Image.Width]
1798
	mov	esi, [eax+Image.Width]
1799
	imul	esi, [eax+Image.Height]
1799
	imul	esi, [eax+Image.Height]
1800
	push	ebx
1800
	push	ebx
1801
	push	esi
1801
	push	esi
1802
	mov	edi, [eax+Image.Data]
1802
	mov	edi, [eax+Image.Data]
1803
	mov	esi, edi
1803
	mov	esi, edi
1804
; N.B. Speed optimization has sense here.
1804
; N.B. Speed optimization has sense here.
1805
align 16
1805
align 16
1806
.kloop:
1806
.kloop:
1807
;	mov	ebx, [esi]
1807
;	mov	ebx, [esi]
1808
;	mov	edx, ebx
1808
;	mov	edx, ebx
1809
;	mov	ecx, ebx
1809
;	mov	ecx, ebx
1810
;	movzx	ebx, bl		; ebx = Y
1810
;	movzx	ebx, bl		; ebx = Y
1811
;	shr	edx, 16
1811
;	shr	edx, 16
1812
;	mov	eax, ebx
1812
;	mov	eax, ebx
1813
;	movzx	edx, dl		; edx = Cr
1813
;	movzx	edx, dl		; edx = Cr
1814
;	movzx	ecx, ch		; ecx = Cb
1814
;	movzx	ecx, ch		; ecx = Cb
1815
	movzx	ebx, byte [esi]
1815
	movzx	ebx, byte [esi]
1816
	movzx	ecx, byte [esi+1]
1816
	movzx	ecx, byte [esi+1]
1817
	mov	eax, ebx
1817
	mov	eax, ebx
1818
	movzx	edx, byte [esi+2]
1818
	movzx	edx, byte [esi+2]
1819
; B = Y + color_table_1[Cb]
1819
; B = Y + color_table_1[Cb]
1820
	add	eax, [color_table_1+ecx*4]
1820
	add	eax, [color_table_1+ecx*4]
1821
	mov	ebp, [color_table_2+ecx*4]
1821
	mov	ebp, [color_table_2+ecx*4]
1822
	cmp	eax, 80000000h
1822
	cmp	eax, 80000000h
1823
	sbb	ecx, ecx
1823
	sbb	ecx, ecx
1824
	and	eax, ecx
1824
	and	eax, ecx
1825
	add	ebp, [color_table_3+edx*4]
1825
	add	ebp, [color_table_3+edx*4]
1826
	cmp	eax, 0x100
1826
	cmp	eax, 0x100
1827
	sbb	ecx, ecx
1827
	sbb	ecx, ecx
1828
	not	ecx
1828
	not	ecx
1829
	sar	ebp, 16
1829
	sar	ebp, 16
1830
	or	eax, ecx
1830
	or	eax, ecx
1831
	xor	al, 0xFF
1831
	xor	al, 0xFF
1832
	mul	byte [esi+3]
1832
	mul	byte [esi+3]
1833
	add	al, ah
1833
	add	al, ah
1834
	adc	ah, 0
1834
	adc	ah, 0
1835
	add	al, 80h
1835
	add	al, 80h
1836
	adc	ah, 0
1836
	adc	ah, 0
1837
	mov	byte [edi], ah
1837
	mov	byte [edi], ah
1838
; G = Y + color_table_2[Cb] + color_table_3[Cr]
1838
; G = Y + color_table_2[Cb] + color_table_3[Cr]
1839
	lea	eax, [ebx+ebp]
1839
	lea	eax, [ebx+ebp]
1840
	cmp	eax, 80000000h
1840
	cmp	eax, 80000000h
1841
	sbb	ecx, ecx
1841
	sbb	ecx, ecx
1842
	and	eax, ecx
1842
	and	eax, ecx
1843
	cmp	eax, 0x100
1843
	cmp	eax, 0x100
1844
	sbb	ecx, ecx
1844
	sbb	ecx, ecx
1845
	not	ecx
1845
	not	ecx
1846
	or	eax, ecx
1846
	or	eax, ecx
1847
	xor	al, 0xFF
1847
	xor	al, 0xFF
1848
	mul	byte [esi+3]
1848
	mul	byte [esi+3]
1849
	add	al, ah
1849
	add	al, ah
1850
	adc	ah, 0
1850
	adc	ah, 0
1851
	add	al, 80h
1851
	add	al, 80h
1852
	adc	ah, 0
1852
	adc	ah, 0
1853
	mov	byte [edi+1], ah
1853
	mov	byte [edi+1], ah
1854
; R = Y + color_table_4[Cr]
1854
; R = Y + color_table_4[Cr]
1855
	mov	eax, ebx
1855
	mov	eax, ebx
1856
	add	eax, [color_table_4+edx*4]
1856
	add	eax, [color_table_4+edx*4]
1857
	cmp	eax, 80000000h
1857
	cmp	eax, 80000000h
1858
	sbb	ecx, ecx
1858
	sbb	ecx, ecx
1859
	and	eax, ecx
1859
	and	eax, ecx
1860
	cmp	eax, 0x100
1860
	cmp	eax, 0x100
1861
	sbb	ecx, ecx
1861
	sbb	ecx, ecx
1862
	not	ecx
1862
	not	ecx
1863
	or	eax, ecx
1863
	or	eax, ecx
1864
	xor	al, 0xFF
1864
	xor	al, 0xFF
1865
	mul	byte [esi+3]
1865
	mul	byte [esi+3]
1866
	add	al, ah
1866
	add	al, ah
1867
	adc	ah, 0
1867
	adc	ah, 0
1868
	add	al, 80h
1868
	add	al, 80h
1869
	adc	ah, 0
1869
	adc	ah, 0
1870
	mov	byte [edi+2], ah
1870
	mov	byte [edi+2], ah
1871
	add	esi, 4
1871
	add	esi, 4
1872
	add	edi, 4 ;3
1872
	add	edi, 4 ;3
1873
	sub	dword [esp], 1
1873
	sub	dword [esp], 1
1874
	jnz	.kloop
1874
	jnz	.kloop
1875
	pop	eax
1875
	pop	eax
1876
	pop	ebx
1876
	pop	ebx
1877
; release some memory - must succeed because we decrease size
1877
; release some memory - must succeed because we decrease size
1878
;	add	ecx, 44+1
1878
;	add	ecx, 44+1
1879
;	mov	edx, ebx
1879
;	mov	edx, ebx
1880
;	push	68
1880
;	push	68
1881
;	pop	eax
1881
;	pop	eax
1882
;	push	20
1882
;	push	20
1883
;	pop	ebx
1883
;	pop	ebx
1884
;	int	0x40
1884
;	int	0x40
1885
;	mov	ebx, eax
1885
;	mov	ebx, eax
1886
	ret
1886
	ret
1887
 
1887
 
1888
; Decodes one data unit, that is, 8*8 block,
1888
; Decodes one data unit, that is, 8*8 block,
1889
; from input stream, given by pointer esi and length ebp
1889
; from input stream, given by pointer esi and length ebp
1890
; N.B. Speed optimization has sense here.
1890
; N.B. Speed optimization has sense here.
1891
align 16
1891
align 16
1892
decode_data_unit:
1892
decode_data_unit:
1893
; edx -> component data
1893
; edx -> component data
1894
	cmp	[ebx+jpeg.work.progressive], 0
1894
	cmp	[ebx+jpeg.work.progressive], 0
1895
	jz	@f
1895
	jz	@f
1896
	mov	edi, [edx+20]
1896
	mov	edi, [edx+20]
1897
	add	dword [edx+20], 64*2
1897
	add	dword [edx+20], 64*2
1898
	jmp	.coeff_decoded
1898
	jmp	.coeff_decoded
1899
@@:
1899
@@:
1900
	lea	edi, [ebx+jpeg.work.dct_coeff]
1900
	lea	edi, [ebx+jpeg.work.dct_coeff]
1901
	mov	ecx, 64*2/4
1901
	mov	ecx, 64*2/4
1902
	xor	eax, eax
1902
	xor	eax, eax
1903
	rep	stosd
1903
	rep	stosd
1904
	mov	edi, zigzag+1
1904
	mov	edi, zigzag+1
1905
	mov	ecx, [ebx+jpeg.work.huffman_bits]
1905
	mov	ecx, [ebx+jpeg.work.huffman_bits]
1906
; read DC coefficient
1906
; read DC coefficient
1907
	push	ebx
1907
	push	ebx
1908
	mov	eax, [edx+16]
1908
	mov	eax, [edx+16]
1909
	push	edx
1909
	push	edx
1910
	get_huffman_code 2,3
1910
	get_huffman_code 2,3
1911
	get_bits 2,3,true
1911
	get_bits 2,3,true
1912
	pop	ebx
1912
	pop	ebx
1913
	add	eax, [edx+48]
1913
	add	eax, [edx+48]
1914
	mov	[ebx+jpeg.work.dct_coeff], ax
1914
	mov	[ebx+jpeg.work.dct_coeff], ax
1915
	mov	[edx+48], ax
1915
	mov	[edx+48], ax
1916
; read AC coefficients
1916
; read AC coefficients
1917
	push	ebx
1917
	push	ebx
1918
@@:
1918
@@:
1919
	mov	eax, [edx+20]
1919
	mov	eax, [edx+20]
1920
	push	edx
1920
	push	edx
1921
	get_huffman_code 2,3
1921
	get_huffman_code 2,3
1922
	shr	eax, 4
1922
	shr	eax, 4
1923
	and	ebx, 15
1923
	and	ebx, 15
1924
	jz	.band
1924
	jz	.band
1925
	add	edi, eax
1925
	add	edi, eax
1926
	cmp	edi, zigzag+64
1926
	cmp	edi, zigzag+64
1927
	jae	.eof_pop2
1927
	jae	.eof_pop2
1928
	get_bits 2,3,true
1928
	get_bits 2,3,true
1929
	movzx	ebx, byte [edi]
1929
	movzx	ebx, byte [edi]
1930
	add	ebx, [esp]
1930
	add	ebx, [esp]
1931
	mov	[jpeg.work.dct_coeff+ebx], ax
1931
	mov	[jpeg.work.dct_coeff+ebx], ax
1932
	add	edi, 1
1932
	add	edi, 1
1933
	cmp	edi, zigzag+64
1933
	cmp	edi, zigzag+64
1934
	jb	@b
1934
	jb	@b
1935
	jmp	.do_idct
1935
	jmp	.do_idct
1936
.band:
1936
.band:
1937
	pop	edx
1937
	pop	edx
1938
	cmp	al, 15
1938
	cmp	al, 15
1939
	jnz	.do_idct
1939
	jnz	.do_idct
1940
	add	edi, 16
1940
	add	edi, 16
1941
	cmp	edi, zigzag+64
1941
	cmp	edi, zigzag+64
1942
	jb	@b
1942
	jb	@b
1943
;	jmp	.eof_pop1
1943
;	jmp	.eof_pop1
1944
.do_idct:
1944
.do_idct:
1945
	pop	ebx
1945
	pop	ebx
1946
	lea	edi, [ebx+jpeg.work.dct_coeff]
1946
	lea	edi, [ebx+jpeg.work.dct_coeff]
1947
	mov	[ebx+jpeg.work.huffman_bits], ecx
1947
	mov	[ebx+jpeg.work.huffman_bits], ecx
1948
; coefficients loaded, now IDCT
1948
; coefficients loaded, now IDCT
1949
.coeff_decoded:
1949
.coeff_decoded:
1950
	mov	eax, [edx+12]
1950
	mov	eax, [edx+12]
1951
	add	ebx, jpeg.work.idct_tmp_area
1951
	add	ebx, jpeg.work.idct_tmp_area
1952
	push	8
1952
	push	8
1953
.idct_loop1:
1953
.idct_loop1:
1954
	mov	cx, word [edi+1*16]
1954
	mov	cx, word [edi+1*16]
1955
repeat 6
1955
repeat 6
1956
	or	cx, word [edi+(%+1)*16]
1956
	or	cx, word [edi+(%+1)*16]
1957
end repeat
1957
end repeat
1958
	jnz	.real_transform
1958
	jnz	.real_transform
1959
	fild	word [edi]
1959
	fild	word [edi]
1960
	fmul	dword [eax]
1960
	fmul	dword [eax]
1961
	fstp	dword [ebx]
1961
	fstp	dword [ebx]
1962
	mov	ecx, [ebx]
1962
	mov	ecx, [ebx]
1963
repeat 7
1963
repeat 7
1964
	mov	[ebx+%*32], ecx
1964
	mov	[ebx+%*32], ecx
1965
end repeat
1965
end repeat
1966
	jmp	.idct_next1
1966
	jmp	.idct_next1
1967
.real_transform:
1967
.real_transform:
1968
; S0,...,S7 - transformed values, s0,...,s7 - sought-for values
1968
; S0,...,S7 - transformed values, s0,...,s7 - sought-for values
1969
; S0,...,S7 are dequantized;
1969
; S0,...,S7 are dequantized;
1970
; dequantization table elements were multiplied to [idct_pre_table],
1970
; dequantization table elements were multiplied to [idct_pre_table],
1971
; so S0,S1,... later denote S0/2\sqrt{2},S1*\cos{\pi/16}/2,...
1971
; so S0,S1,... later denote S0/2\sqrt{2},S1*\cos{\pi/16}/2,...
1972
; 	sqrt2 = \sqrt{2}, cos = 2\cos{\pi/8},
1972
; 	sqrt2 = \sqrt{2}, cos = 2\cos{\pi/8},
1973
; 	cos_sum = -2(\cos{\pi/8}+\cos{3\pi/8}), cos_diff = 2(\cos{\pi/8}-\cos{3\pi/8})
1973
; 	cos_sum = -2(\cos{\pi/8}+\cos{3\pi/8}), cos_diff = 2(\cos{\pi/8}-\cos{3\pi/8})
1974
; Now formulas:
1974
; Now formulas:
1975
; s0 = ((S0+S4)+(S2+S6)) + ((S1+S7)+(S3+S5))
1975
; s0 = ((S0+S4)+(S2+S6)) + ((S1+S7)+(S3+S5))
1976
; s7 = ((S0+S4)+(S2+S6)) - ((S1+S7)+(S3+S5))
1976
; s7 = ((S0+S4)+(S2+S6)) - ((S1+S7)+(S3+S5))
1977
; val0 = ((cos-1)S1-(cos+cos_sum+1)S3+(cos+cos_sum-1)S5-(cos+1)S7)
1977
; val0 = ((cos-1)S1-(cos+cos_sum+1)S3+(cos+cos_sum-1)S5-(cos+1)S7)
1978
; s1 = ((S0-S4)+((sqrt2-1)S2-(sqrt2+1)S6)) + val0
1978
; s1 = ((S0-S4)+((sqrt2-1)S2-(sqrt2+1)S6)) + val0
1979
; s6 = ((S0-S4)+((sqrt2-1)S2-(sqrt2+1)S6)) - val0
1979
; s6 = ((S0-S4)+((sqrt2-1)S2-(sqrt2+1)S6)) - val0
1980
; val1 = (S1+S7-S3-S5)sqrt2 - val0
1980
; val1 = (S1+S7-S3-S5)sqrt2 - val0
1981
; s2 = ((S0-S4)-((sqrt2-1)S2-(sqrt2+1)S6)) + val1
1981
; s2 = ((S0-S4)-((sqrt2-1)S2-(sqrt2+1)S6)) + val1
1982
; s5 = ((S0-S4)-((sqrt2-1)S2-(sqrt2+1)S6)) - val1
1982
; s5 = ((S0-S4)-((sqrt2-1)S2-(sqrt2+1)S6)) - val1
1983
; val2 = (S1-S7)cos_diff - (S1-S3+S5-S7)cos + val1
1983
; val2 = (S1-S7)cos_diff - (S1-S3+S5-S7)cos + val1
1984
; s3 = ((S0+S4)-(S2+S6)) - val2
1984
; s3 = ((S0+S4)-(S2+S6)) - val2
1985
; s4 = ((S0+S4)-(S2+S6)) + val2
1985
; s4 = ((S0+S4)-(S2+S6)) + val2
1986
	fild	word [edi+3*16]
1986
	fild	word [edi+3*16]
1987
	fmul	dword [eax+3*32]
1987
	fmul	dword [eax+3*32]
1988
	fild	word [edi+5*16]
1988
	fild	word [edi+5*16]
1989
	fmul	dword [eax+5*32]	; st0=S5,st1=S3
1989
	fmul	dword [eax+5*32]	; st0=S5,st1=S3
1990
	fadd	st1,st0
1990
	fadd	st1,st0
1991
	fadd	st0,st0
1991
	fadd	st0,st0
1992
	fsub	st0,st1		; st0=S5-S3,st1=S5+S3
1992
	fsub	st0,st1		; st0=S5-S3,st1=S5+S3
1993
	fild	word [edi+1*16]
1993
	fild	word [edi+1*16]
1994
	fmul	dword [eax+1*32]
1994
	fmul	dword [eax+1*32]
1995
	fild	word [edi+7*16]
1995
	fild	word [edi+7*16]
1996
	fmul	dword [eax+7*32]	; st0=S7,st1=S1
1996
	fmul	dword [eax+7*32]	; st0=S7,st1=S1
1997
	fsub	st1,st0
1997
	fsub	st1,st0
1998
	fadd	st0,st0
1998
	fadd	st0,st0
1999
	fadd	st0,st1		; st0=S1+S7,st1=S1-S7,st2=S5-S3,st3=S5+S3
1999
	fadd	st0,st1		; st0=S1+S7,st1=S1-S7,st2=S5-S3,st3=S5+S3
2000
	fadd	st3,st0
2000
	fadd	st3,st0
2001
	fadd	st0,st0
2001
	fadd	st0,st0
2002
	fsub	st0,st3		; st0=S1-S3-S5+S7,st1=S1-S7,st2=S5-S3,st3=S1+S3+S5+S7
2002
	fsub	st0,st3		; st0=S1-S3-S5+S7,st1=S1-S7,st2=S5-S3,st3=S1+S3+S5+S7
2003
	fmul	[idct_sqrt2]
2003
	fmul	[idct_sqrt2]
2004
	fld	st2
2004
	fld	st2
2005
	fadd	st0,st2
2005
	fadd	st0,st2
2006
	fmul	[idct_cos]	; st0=(S1-S3+S5-S7)cos,st1=(S1-S3-S5+S7)sqrt2,
2006
	fmul	[idct_cos]	; st0=(S1-S3+S5-S7)cos,st1=(S1-S3-S5+S7)sqrt2,
2007
				; st2=S1-S7,st3=S5-S3,st4=S1+S3+S5+S7
2007
				; st2=S1-S7,st3=S5-S3,st4=S1+S3+S5+S7
2008
	fxch	st2
2008
	fxch	st2
2009
	fmul	[idct_cos_diff]
2009
	fmul	[idct_cos_diff]
2010
	fsub	st0,st2		; st0=(S1-S7)cos_diff - (S1-S3+S5-S7)cos
2010
	fsub	st0,st2		; st0=(S1-S7)cos_diff - (S1-S3+S5-S7)cos
2011
	fxch	st3
2011
	fxch	st3
2012
	fmul	[idct_cos_sum]
2012
	fmul	[idct_cos_sum]
2013
	fadd	st0,st2		; st0=(S5-S3)cos_sum+(S1-S3+S5-S7)cos
2013
	fadd	st0,st2		; st0=(S5-S3)cos_sum+(S1-S3+S5-S7)cos
2014
	fsub	st0,st4		; st0=val0
2014
	fsub	st0,st4		; st0=val0
2015
	fsub	st1,st0		; st0=val0,st1=val1,st2=(S1-S3+S5-S7)cos,
2015
	fsub	st1,st0		; st0=val0,st1=val1,st2=(S1-S3+S5-S7)cos,
2016
				; st3=(S1-S7)cos_diff-(S1-S3+S5-S7)cos,st4=S1+S3+S5+S7
2016
				; st3=(S1-S7)cos_diff-(S1-S3+S5-S7)cos,st4=S1+S3+S5+S7
2017
	fxch	st2
2017
	fxch	st2
2018
	fstp	st0
2018
	fstp	st0
2019
	fadd	st2,st0		; st0=val1,st1=val0,st2=val2,st3=S1+S3+S5+S7
2019
	fadd	st2,st0		; st0=val1,st1=val0,st2=val2,st3=S1+S3+S5+S7
2020
 
2020
 
2021
	fild	word [edi+0*16]
2021
	fild	word [edi+0*16]
2022
	fmul	dword [eax+0*32]
2022
	fmul	dword [eax+0*32]
2023
	fild	word [edi+4*16]
2023
	fild	word [edi+4*16]
2024
	fmul	dword [eax+4*32]	; st0=S4,st1=S0
2024
	fmul	dword [eax+4*32]	; st0=S4,st1=S0
2025
	fsub	st1,st0
2025
	fsub	st1,st0
2026
	fadd	st0,st0
2026
	fadd	st0,st0
2027
	fadd	st0,st1		; st0=S0+S4,st1=S0-S4
2027
	fadd	st0,st1		; st0=S0+S4,st1=S0-S4
2028
	fild	word [edi+6*16]
2028
	fild	word [edi+6*16]
2029
	fmul	dword [eax+6*32]
2029
	fmul	dword [eax+6*32]
2030
	fild	word [edi+2*16]
2030
	fild	word [edi+2*16]
2031
	fmul	dword [eax+2*32]	; st0=S2,st1=S6
2031
	fmul	dword [eax+2*32]	; st0=S2,st1=S6
2032
	fadd	st1,st0
2032
	fadd	st1,st0
2033
	fadd	st0,st0
2033
	fadd	st0,st0
2034
	fsub	st0,st1		; st0=S2-S6,st1=S2+S6
2034
	fsub	st0,st1		; st0=S2-S6,st1=S2+S6
2035
	fmul	[idct_sqrt2]
2035
	fmul	[idct_sqrt2]
2036
	fsub	st0,st1
2036
	fsub	st0,st1
2037
	fsub	st3,st0
2037
	fsub	st3,st0
2038
	fadd	st0,st0
2038
	fadd	st0,st0
2039
	fadd	st0,st3		; st0=(S0-S4)+((S2-S6)sqrt2-(S2+S6))
2039
	fadd	st0,st3		; st0=(S0-S4)+((S2-S6)sqrt2-(S2+S6))
2040
				; st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
2040
				; st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
2041
	fxch	st1
2041
	fxch	st1
2042
	fsub	st2,st0
2042
	fsub	st2,st0
2043
	fadd	st0,st0
2043
	fadd	st0,st0
2044
	fadd	st0,st2		; st0=(S0+S4)+(S2+S6),st1=(S0-S4)+((S2-S6)sqrt2-(S2+S6)),
2044
	fadd	st0,st2		; st0=(S0+S4)+(S2+S6),st1=(S0-S4)+((S2-S6)sqrt2-(S2+S6)),
2045
				; st2=(S0+S4)-(S2+S6),st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
2045
				; st2=(S0+S4)-(S2+S6),st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
2046
				; st4=val1,st5=val0,st6=val2,st7=S1+S3+S5+S7
2046
				; st4=val1,st5=val0,st6=val2,st7=S1+S3+S5+S7
2047
	fsubr	st7,st0
2047
	fsubr	st7,st0
2048
	fadd	st0,st0
2048
	fadd	st0,st0
2049
	fsub	st0,st7
2049
	fsub	st0,st7
2050
	fstp	dword [ebx+0*32]
2050
	fstp	dword [ebx+0*32]
2051
	fsubr	st4,st0
2051
	fsubr	st4,st0
2052
	fadd	st0,st0
2052
	fadd	st0,st0
2053
	fsub	st0,st4
2053
	fsub	st0,st4
2054
	fstp	dword [ebx+1*32]
2054
	fstp	dword [ebx+1*32]
2055
	fadd	st4,st0
2055
	fadd	st4,st0
2056
	fadd	st0,st0
2056
	fadd	st0,st0
2057
	fsub	st0,st4
2057
	fsub	st0,st4
2058
	fstp	dword [ebx+3*32]
2058
	fstp	dword [ebx+3*32]
2059
	fsubr	st1,st0
2059
	fsubr	st1,st0
2060
	fadd	st0,st0
2060
	fadd	st0,st0
2061
	fsub	st0,st1
2061
	fsub	st0,st1
2062
	fstp	dword [ebx+2*32]
2062
	fstp	dword [ebx+2*32]
2063
	fstp	dword [ebx+5*32]
2063
	fstp	dword [ebx+5*32]
2064
	fstp	dword [ebx+6*32]
2064
	fstp	dword [ebx+6*32]
2065
	fstp	dword [ebx+4*32]
2065
	fstp	dword [ebx+4*32]
2066
	fstp	dword [ebx+7*32]
2066
	fstp	dword [ebx+7*32]
2067
.idct_next1:
2067
.idct_next1:
2068
	add	ebx, 4
2068
	add	ebx, 4
2069
	add	edi, 2
2069
	add	edi, 2
2070
	add	eax, 4
2070
	add	eax, 4
2071
	sub	dword [esp], 1
2071
	sub	dword [esp], 1
2072
	jnz	.idct_loop1
2072
	jnz	.idct_loop1
2073
	pop	ecx
2073
	pop	ecx
2074
	sub	ebx, 8*4
2074
	sub	ebx, 8*4
2075
	mov	ecx, 8
2075
	mov	ecx, 8
2076
.idct_loop2:
2076
.idct_loop2:
2077
	fld	dword [ebx+3*4]
2077
	fld	dword [ebx+3*4]
2078
	fld	dword [ebx+5*4]
2078
	fld	dword [ebx+5*4]
2079
	fadd	st1,st0
2079
	fadd	st1,st0
2080
	fadd	st0,st0
2080
	fadd	st0,st0
2081
	fsub	st0,st1		; st0=S5-S3,st1=S5+S3
2081
	fsub	st0,st1		; st0=S5-S3,st1=S5+S3
2082
	fld	dword [ebx+1*4]
2082
	fld	dword [ebx+1*4]
2083
	fld	dword [ebx+7*4]
2083
	fld	dword [ebx+7*4]
2084
	fsub	st1,st0
2084
	fsub	st1,st0
2085
	fadd	st0,st0
2085
	fadd	st0,st0
2086
	fadd	st0,st1		; st0=S1+S7,st1=S1-S7,st2=S5-S3,st3=S5+S3
2086
	fadd	st0,st1		; st0=S1+S7,st1=S1-S7,st2=S5-S3,st3=S5+S3
2087
	fadd	st3,st0
2087
	fadd	st3,st0
2088
	fadd	st0,st0
2088
	fadd	st0,st0
2089
	fsub	st0,st3		; st0=S1-S3-S5+S7,st1=S1-S7,st2=S5-S3,st3=S1+S3+S5+S7
2089
	fsub	st0,st3		; st0=S1-S3-S5+S7,st1=S1-S7,st2=S5-S3,st3=S1+S3+S5+S7
2090
	fmul	[idct_sqrt2]
2090
	fmul	[idct_sqrt2]
2091
	fld	st2
2091
	fld	st2
2092
	fadd	st0,st2
2092
	fadd	st0,st2
2093
	fmul	[idct_cos]	; st0=(S1-S3+S5-S7)cos,st1=(S1-S3-S5+S7)sqrt2,
2093
	fmul	[idct_cos]	; st0=(S1-S3+S5-S7)cos,st1=(S1-S3-S5+S7)sqrt2,
2094
				; st2=S1-S7,st3=S5-S3,st4=S1+S3+S5+S7
2094
				; st2=S1-S7,st3=S5-S3,st4=S1+S3+S5+S7
2095
	fxch	st2
2095
	fxch	st2
2096
	fmul	[idct_cos_diff]
2096
	fmul	[idct_cos_diff]
2097
	fsub	st0,st2		; st0=(S1-S7)cos_diff - (S1-S3+S5-S7)cos
2097
	fsub	st0,st2		; st0=(S1-S7)cos_diff - (S1-S3+S5-S7)cos
2098
	fxch	st3
2098
	fxch	st3
2099
	fmul	[idct_cos_sum]
2099
	fmul	[idct_cos_sum]
2100
	fadd	st0,st2		; st0=(S5-S3)cos_sum+(S1-S3+S5-S7)cos
2100
	fadd	st0,st2		; st0=(S5-S3)cos_sum+(S1-S3+S5-S7)cos
2101
	fsub	st0,st4		; st0=val0
2101
	fsub	st0,st4		; st0=val0
2102
	fsub	st1,st0		; st0=val0,st1=val1,st2=(S1-S3+S5-S7)cos,
2102
	fsub	st1,st0		; st0=val0,st1=val1,st2=(S1-S3+S5-S7)cos,
2103
				; st3=(S1-S7)cos_diff-(S1-S3+S5-S7)cos,st4=S1+S3+S5+S7
2103
				; st3=(S1-S7)cos_diff-(S1-S3+S5-S7)cos,st4=S1+S3+S5+S7
2104
	fxch	st2
2104
	fxch	st2
2105
	fstp	st0
2105
	fstp	st0
2106
	fadd	st2,st0		; st0=val1,st1=val0,st2=val2,st3=S1+S3+S5+S7
2106
	fadd	st2,st0		; st0=val1,st1=val0,st2=val2,st3=S1+S3+S5+S7
2107
 
2107
 
2108
	fld	dword [ebx+0*4]
2108
	fld	dword [ebx+0*4]
2109
	fld	dword [ebx+4*4]
2109
	fld	dword [ebx+4*4]
2110
	fsub	st1,st0
2110
	fsub	st1,st0
2111
	fadd	st0,st0
2111
	fadd	st0,st0
2112
	fadd	st0,st1		; st0=S0+S4,st1=S0-S4
2112
	fadd	st0,st1		; st0=S0+S4,st1=S0-S4
2113
	fld	dword [ebx+6*4]
2113
	fld	dword [ebx+6*4]
2114
	fld	dword [ebx+2*4]
2114
	fld	dword [ebx+2*4]
2115
	fadd	st1,st0
2115
	fadd	st1,st0
2116
	fadd	st0,st0
2116
	fadd	st0,st0
2117
	fsub	st0,st1		; st0=S2-S6,st1=S2+S6
2117
	fsub	st0,st1		; st0=S2-S6,st1=S2+S6
2118
	fmul	[idct_sqrt2]
2118
	fmul	[idct_sqrt2]
2119
	fsub	st0,st1
2119
	fsub	st0,st1
2120
	fsub	st3,st0
2120
	fsub	st3,st0
2121
	fadd	st0,st0
2121
	fadd	st0,st0
2122
	fadd	st0,st3		; st0=(S0-S4)+((S2-S6)sqrt2-(S2+S6))
2122
	fadd	st0,st3		; st0=(S0-S4)+((S2-S6)sqrt2-(S2+S6))
2123
				; st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
2123
				; st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
2124
	fxch	st1
2124
	fxch	st1
2125
	fsub	st2,st0
2125
	fsub	st2,st0
2126
	fadd	st0,st0
2126
	fadd	st0,st0
2127
	fadd	st0,st2		; st0=(S0+S4)+(S2+S6),st1=(S0-S4)+((S2-S6)sqrt2-(S2+S6)),
2127
	fadd	st0,st2		; st0=(S0+S4)+(S2+S6),st1=(S0-S4)+((S2-S6)sqrt2-(S2+S6)),
2128
				; st2=(S0+S4)-(S2+S6),st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
2128
				; st2=(S0+S4)-(S2+S6),st3=(S0-S4)-((S2-S6)sqrt2-(S2+S6))
2129
				; st4=val1,st5=val0,st6=val2,st7=S1+S3+S5+S7
2129
				; st4=val1,st5=val0,st6=val2,st7=S1+S3+S5+S7
2130
	fsubr	st7,st0
2130
	fsubr	st7,st0
2131
	fadd	st0,st0
2131
	fadd	st0,st0
2132
	fsub	st0,st7
2132
	fsub	st0,st7
2133
	fistp	dword [ebx+0*4]
2133
	fistp	dword [ebx+0*4]
2134
	fsubr	st4,st0
2134
	fsubr	st4,st0
2135
	fadd	st0,st0
2135
	fadd	st0,st0
2136
	fsub	st0,st4
2136
	fsub	st0,st4
2137
	fistp	dword [ebx+1*4]
2137
	fistp	dword [ebx+1*4]
2138
	fadd	st4,st0
2138
	fadd	st4,st0
2139
	fadd	st0,st0
2139
	fadd	st0,st0
2140
	fsub	st0,st4
2140
	fsub	st0,st4
2141
	fistp	dword [ebx+3*4]
2141
	fistp	dword [ebx+3*4]
2142
	fsubr	st1,st0
2142
	fsubr	st1,st0
2143
	fadd	st0,st0
2143
	fadd	st0,st0
2144
	fsub	st0,st1
2144
	fsub	st0,st1
2145
	fistp	dword [ebx+2*4]
2145
	fistp	dword [ebx+2*4]
2146
	fistp	dword [ebx+5*4]
2146
	fistp	dword [ebx+5*4]
2147
	fistp	dword [ebx+6*4]
2147
	fistp	dword [ebx+6*4]
2148
	fistp	dword [ebx+4*4]
2148
	fistp	dword [ebx+4*4]
2149
	fistp	dword [ebx+7*4]
2149
	fistp	dword [ebx+7*4]
2150
 
2150
 
2151
	add	ebx, 32
2151
	add	ebx, 32
2152
	sub	ecx, 1
2152
	sub	ecx, 1
2153
	jnz	.idct_loop2
2153
	jnz	.idct_loop2
2154
 
2154
 
2155
	sub	ebx, 32*8
2155
	sub	ebx, 32*8
2156
	mov	ecx, 64
2156
	mov	ecx, 64
2157
	lea	edi, [ebx - jpeg.work.idct_tmp_area + jpeg.work.decoded_data - 1]
2157
	lea	edi, [ebx - jpeg.work.idct_tmp_area + jpeg.work.decoded_data - 1]
2158
	push	esi
2158
	push	esi
2159
.idct_loop3:
2159
.idct_loop3:
2160
	mov	eax, [ebx]
2160
	mov	eax, [ebx]
2161
	add	ebx, 4
2161
	add	ebx, 4
2162
	add	eax, 80h
2162
	add	eax, 80h
2163
	cmp	eax, 80000000h
2163
	cmp	eax, 80000000h
2164
	sbb	esi, esi
2164
	sbb	esi, esi
2165
	add	edi, 1
2165
	add	edi, 1
2166
	and	eax, esi
2166
	and	eax, esi
2167
	cmp	eax, 100h
2167
	cmp	eax, 100h
2168
	sbb	esi, esi
2168
	sbb	esi, esi
2169
	not	esi
2169
	not	esi
2170
	or	eax, esi
2170
	or	eax, esi
2171
	sub	al, [edx+51]
2171
	sub	al, [edx+51]
2172
	sub	ecx, 1
2172
	sub	ecx, 1
2173
	mov	[edi], al
2173
	mov	[edi], al
2174
	jnz	.idct_loop3
2174
	jnz	.idct_loop3
2175
	pop	esi
2175
	pop	esi
2176
	sub	ebx, 64*4 + jpeg.work.idct_tmp_area
2176
	sub	ebx, 64*4 + jpeg.work.idct_tmp_area
2177
; done
2177
; done
2178
	ret
2178
	ret
2179
 
2179
 
2180
.eof_pop3:
2180
.eof_pop3:
2181
	pop	ebx
2181
	pop	ebx
2182
.eof_pop2:
2182
.eof_pop2:
2183
	pop	ebx
2183
	pop	ebx
2184
.eof_pop1:
2184
.eof_pop1:
2185
	pop	ebx
2185
	pop	ebx
2186
.eof_pop0:
2186
.eof_pop0:
2187
; EOF or incorrect data during scanning
2187
; EOF or incorrect data during scanning
2188
	mov	esp, [ebx + jpeg.work._esp]
2188
	mov	esp, [ebx + jpeg.work._esp]
2189
	jmp	img.decode.jpg.end
2189
	jmp	img.decode.jpg.end
2190
 
2190
 
2191
img.encode.jpg:
2191
img.encode.jpg:
2192
	xor	eax, eax
2192
	xor	eax, eax
2193
	ret	8
2193
	ret	8
2194
 
2194
 
2195
zigzag:
2195
zigzag:
2196
; (x,y) -> 2*(x+y*8)
2196
; (x,y) -> 2*(x+y*8)
2197
repeat 8
2197
repeat 8
2198
	.cur = %
2198
	.cur = %
2199
	if .cur and 1
2199
	if .cur and 1
2200
		repeat %
2200
		repeat %
2201
			db	2*((%-1) + (.cur-%)*8)
2201
			db	2*((%-1) + (.cur-%)*8)
2202
		end repeat
2202
		end repeat
2203
	else
2203
	else
2204
		repeat %
2204
		repeat %
2205
			db	2*((.cur-%) + (%-1)*8)
2205
			db	2*((.cur-%) + (%-1)*8)
2206
		end repeat
2206
		end repeat
2207
	end if
2207
	end if
2208
end repeat
2208
end repeat
2209
repeat 7
2209
repeat 7
2210
	.cur = %
2210
	.cur = %
2211
	if .cur and 1
2211
	if .cur and 1
2212
		repeat 8-%
2212
		repeat 8-%
2213
			db	2*((%+.cur-1) + (8-%)*8)
2213
			db	2*((%+.cur-1) + (8-%)*8)
2214
		end repeat
2214
		end repeat
2215
	else
2215
	else
2216
		repeat 8-%
2216
		repeat 8-%
2217
			db	2*((8-%) + (%+.cur-1)*8)
2217
			db	2*((8-%) + (%+.cur-1)*8)
2218
		end repeat
2218
		end repeat
2219
	end if
2219
	end if
2220
end repeat
2220
end repeat
2221
 
2221
 
2222
align 4
2222
align 4
2223
idct_pre_table:
2223
idct_pre_table:
2224
; c_0 = 1/(2\sqrt{2}), c_i = cos(i*\pi/16)/2
2224
; c_0 = 1/(2\sqrt{2}), c_i = cos(i*\pi/16)/2
2225
	dd	0.35355339, 0.49039264, 0.461939766, 0.41573481
2225
	dd	0.35355339, 0.49039264, 0.461939766, 0.41573481
2226
	dd	0.35355339, 0.27778512, 0.19134172, 0.09754516
2226
	dd	0.35355339, 0.27778512, 0.19134172, 0.09754516
2227
idct_sqrt2	dd	1.41421356	; \sqrt{2}
2227
idct_sqrt2	dd	1.41421356	; \sqrt{2}
2228
idct_cos	dd	1.847759065	; 2\cos{\pi/8}
2228
idct_cos	dd	1.847759065	; 2\cos{\pi/8}
2229
idct_cos_sum	dd	-2.61312593	; -2(\cos{\pi/8} + \cos{3\pi/8})
2229
idct_cos_sum	dd	-2.61312593	; -2(\cos{\pi/8} + \cos{3\pi/8})
2230
idct_cos_diff	dd	1.08239220	; 2(\cos{\pi/8} - \cos{3\pi/8})
2230
idct_cos_diff	dd	1.08239220	; 2(\cos{\pi/8} - \cos{3\pi/8})
2231
;---------------------------------------------------------------------
2231
;---------------------------------------------------------------------