Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
 
2
3
 
4
	.register %g3, #scratch
5
6
 
7
8
 
9
#define STACK_VAR_OFF	(2047 + (8 * 16))
10
#else
11
#define STACK_VAR_OFF	(4 * 16)
12
#endif
13
14
 
15
	 * (and less accurate) than direct fsqrts/fdivs.
16
	 */
17
#define ONE_DOT_ZERO	0x3f800000
18
19
 
20
_mesa_sparc_transform_normalize_normals:
21
	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
22
23
 
24
	sub	%sp, 16, %sp
25
	st	%g2, [%sp + STACK_VAR_OFF+0x0]
26
	st	%o1, [%sp + STACK_VAR_OFF+0x4]
27
	ld	[%sp + STACK_VAR_OFF+0x0], %f12	! f12 = 1.0f
28
	ld	[%sp + STACK_VAR_OFF+0x4], %f15	! f15 = scale
29
	add	%sp, 16, %sp
30
31
 
32
	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
33
	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
34
	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
35
	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
36
37
 
38
39
 
40
	st	%g1, [%o4 + V4F_COUNT]
41
42
 
43
	bl	7f
44
	 cmp	%o3, 0
45
	bne	4f
46
	 clr	%o4				! 'i' for STRIDE_LOOP
47
48
 
49
	ld	[%o5 + 0x00], %f0		! ux = from[0]
50
	ld	[%o5 + 0x04], %f1		! uy = from[1]
51
	ld	[%o5 + 0x08], %f2		! uz = from[2]
52
	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
53
	add	%o4, 1, %o4			! i++
54
55
 
56
	 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
57
	 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
58
	 */
59
	fmuls	%f0, M0, %f3			! FGM	Group
60
	fmuls	%f1, M1, %f4			! FGM	Group
61
	fmuls	%f0, M4, %f5			! FGM	Group
62
	fmuls	%f1, M5, %f6			! FGM	Group
63
	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
64
	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
65
	fadds	%f3, %f4, %f3			! FGA
66
	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
67
	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
68
	fadds	%f5, %f6, %f5			! FGA
69
	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
70
	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
71
	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
72
	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
73
	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
74
75
 
76
77
 
78
	fmuls	%f3, %f3, %f6			! FGM	Group	f3 available
79
	fmuls	%f5, %f5, %f8			! FGM	Group	f5 available
80
	fmuls	%f7, %f7, %f10			! FGM	Group	f7 available
81
	fadds	%f6, %f8, %f6			! FGA	Group	2cyc stall f6,f8 available
82
	fadds	%f6, %f10, %f6			! FGA	Group	4cyc stall f6,f10 available
83
84
 
85
	fsqrts	%f6, %f6			! FDIV  20 cycles
86
	fdivs	%f12, %f6, %f6			! FDIV	14 cycles
87
88
 
89
	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
90
	fmuls	%f5, %f6, %f5
91
	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
92
	fmuls	%f7, %f6, %f7
93
	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
94
95
 
96
	bl	1b
97
	 add	%g3, 0x10, %g3			! advance out vector pointer
98
99
 
100
	 nop
101
102
 
103
	fmuls	M0, %f15, M0
104
	fmuls	M1, %f15, M1
105
	fmuls	M2, %f15, M2
106
	fmuls	M4, %f15, M4
107
	fmuls	M5, %f15, M5
108
	fmuls	M6, %f15, M6
109
	fmuls	M8, %f15, M8
110
	fmuls	M9, %f15, M9
111
	fmuls	M10, %f15, M10
112
113
 
114
	ld	[%o5 + 0x00], %f0		! ux = from[0]
115
	ld	[%o5 + 0x04], %f1		! uy = from[1]
116
	ld	[%o5 + 0x08], %f2		! uz = from[2]
117
	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
118
	add	%o4, 1, %o4			! i++
119
120
 
121
	 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
122
	 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
123
	 */
124
	fmuls	%f0, M0, %f3			! FGM	Group
125
	fmuls	%f1, M1, %f4			! FGM	Group
126
	fmuls	%f0, M4, %f5			! FGM	Group
127
	fmuls	%f1, M5, %f6			! FGM	Group
128
	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
129
	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
130
	fadds	%f3, %f4, %f3			! FGA
131
	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
132
	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
133
	fadds	%f5, %f6, %f5			! FGA
134
	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
135
	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
136
	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
137
	ld	[%o3], %f13			! LSU
138
	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
139
	add	%o3, 4, %o3			! IEU0
140
	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
141
142
 
143
144
 
145
	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * len
146
	fmuls	%f5, %f13, %f5
147
	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * len
148
	fmuls	%f7, %f13, %f7
149
	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * len
150
151
 
152
	bl	5b
153
	 add	%g3, 0x10, %g3			! advance out vector pointer
154
155
 
156
	 nop
157
158
 
159
_mesa_sparc_transform_normalize_normals_no_rot:
160
	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
161
162
 
163
	sub	%sp, 16, %sp
164
	st	%g2, [%sp + STACK_VAR_OFF+0x0]
165
	st	%o1, [%sp + STACK_VAR_OFF+0x4]
166
	ld	[%sp + STACK_VAR_OFF+0x0], %f12	! f12 = 1.0f
167
	ld	[%sp + STACK_VAR_OFF+0x4], %f15	! f15 = scale
168
	add	%sp, 16, %sp
169
170
 
171
	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
172
	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
173
	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
174
	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
175
176
 
177
178
 
179
	st	%g1, [%o4 + V4F_COUNT]
180
181
 
182
	bl	7f
183
	 cmp	%o3, 0
184
	bne	4f
185
	 clr	%o4				! 'i' for STRIDE_LOOP
186
187
 
188
	ld	[%o5 + 0x00], %f0		! ux = from[0]
189
	ld	[%o5 + 0x04], %f1		! uy = from[1]
190
	ld	[%o5 + 0x08], %f2		! uz = from[2]
191
	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
192
	add	%o4, 1, %o4			! i++
193
194
 
195
	 * ty (f5) = (uy * m5)
196
	 * tz (f7) = (uz * m10)
197
	 */
198
	fmuls	%f0, M0, %f3			! FGM	Group
199
	fmuls	%f1, M5, %f5			! FGM	Group
200
	fmuls	%f2, M10, %f7			! FGM	Group
201
202
 
203
204
 
205
	fmuls	%f3, %f3, %f6			! FGM	Group	stall, f3 available
206
	fmuls	%f5, %f5, %f8			! FGM	Group	f5 available
207
	fmuls	%f7, %f7, %f10			! FGM	Group	f7 available
208
	fadds	%f6, %f8, %f6			! FGA	Group	2cyc stall f6,f8 available
209
	fadds	%f6, %f10, %f6			! FGA	Group	4cyc stall f6,f10 available
210
211
 
212
	fsqrts	%f6, %f6			! FDIV  20 cycles
213
	fdivs	%f12, %f6, %f6			! FDIV	14 cycles
214
215
 
216
	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
217
	fmuls	%f5, %f6, %f5
218
	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
219
	fmuls	%f7, %f6, %f7
220
	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
221
222
 
223
	bl	1b
224
	 add	%g3, 0x10, %g3			! advance out vector pointer
225
226
 
227
	 nop
228
229
 
230
	fmuls	M0, %f15, M0
231
	fmuls	M5, %f15, M5
232
	fmuls	M10, %f15, M10
233
234
 
235
	ld	[%o5 + 0x00], %f0		! ux = from[0]
236
	ld	[%o5 + 0x04], %f1		! uy = from[1]
237
	ld	[%o5 + 0x08], %f2		! uz = from[2]
238
	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
239
	add	%o4, 1, %o4			! i++
240
241
 
242
	 * ty (f5) = (uy * m5)
243
	 * tz (f7) = (uz * m10)
244
	 */
245
	fmuls	%f0, M0, %f3			! FGM	Group
246
	ld	[%o3], %f13			! LSU
247
	fmuls	%f1, M5, %f5			! FGM	Group
248
	add	%o3, 4, %o3			! IEU0
249
	fmuls	%f2, M10, %f7			! FGM	Group
250
251
 
252
253
 
254
	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * len
255
	fmuls	%f5, %f13, %f5
256
	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * len
257
	fmuls	%f7, %f13, %f7
258
	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * len
259
260
 
261
	bl	5b
262
	 add	%g3, 0x10, %g3			! advance out vector pointer
263
264
 
265
	 nop
266
267
 
268
_mesa_sparc_transform_rescale_normals_no_rot:
269
	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
270
	sub	%sp, 16, %sp
271
	st	%o1, [%sp + STACK_VAR_OFF+0x0]
272
	ld	[%sp + STACK_VAR_OFF+0x0], %f15	! f15 = scale
273
	add	%sp, 16, %sp
274
275
 
276
	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
277
	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
278
	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
279
	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
280
281
 
282
283
 
284
	st	%g1, [%o4 + V4F_COUNT]
285
286
 
287
	bl	7f
288
	 clr	%o4				! 'i' for STRIDE_LOOP
289
290
 
291
	fmuls	M5, %f15, M5
292
	fmuls	M10, %f15, M10
293
294
 
295
	ld	[%o5 + 0x04], %f1		! uy = from[1]
296
	ld	[%o5 + 0x08], %f2		! uz = from[2]
297
	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
298
	add	%o4, 1, %o4			! i++
299
300
 
301
	 * ty (f5) = (uy * m5)
302
	 * tz (f7) = (uz * m10)
303
	 */
304
	fmuls	%f0, M0, %f3			! FGM	Group
305
	st	%f3, [%g3 + 0x00]		! LSU
306
	fmuls	%f1, M5, %f5			! FGM	Group
307
	st	%f5, [%g3 + 0x04]		! LSU
308
	fmuls	%f2, M10, %f7			! FGM	Group
309
	st	%f7, [%g3 + 0x08]		! LSU
310
311
 
312
	bl	1b
313
	 add	%g3, 0x10, %g3			! advance out vector pointer
314
315
 
316
	 nop
317
318
 
319
_mesa_sparc_transform_rescale_normals:
320
	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
321
	sub	%sp, 16, %sp
322
	st	%o1, [%sp + STACK_VAR_OFF+0x0]
323
	ld	[%sp + STACK_VAR_OFF+0x0], %f15	! f15 = scale
324
	add	%sp, 16, %sp
325
326
 
327
	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
328
	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
329
	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
330
	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
331
332
 
333
334
 
335
	st	%g1, [%o4 + V4F_COUNT]
336
337
 
338
	bl	7f
339
	 clr	%o4				! 'i' for STRIDE_LOOP
340
341
 
342
	fmuls	M1, %f15, M1
343
	fmuls	M2, %f15, M2
344
	fmuls	M4, %f15, M4
345
	fmuls	M5, %f15, M5
346
	fmuls	M6, %f15, M6
347
	fmuls	M8, %f15, M8
348
	fmuls	M9, %f15, M9
349
	fmuls	M10, %f15, M10
350
351
 
352
	ld	[%o5 + 0x04], %f1		! uy = from[1]
353
	ld	[%o5 + 0x08], %f2		! uz = from[2]
354
	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
355
	add	%o4, 1, %o4			! i++
356
357
 
358
	fmuls	%f1, M1, %f4			! FGM	Group
359
	fmuls	%f0, M4, %f5			! FGM	Group
360
	fmuls	%f1, M5, %f6			! FGM	Group
361
	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
362
	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
363
	fadds	%f3, %f4, %f3			! FGA
364
	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
365
	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
366
	fadds	%f5, %f6, %f5			! FGA
367
	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
368
	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
369
	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
370
	st	%f3, [%g3 + 0x00]		! LSU
371
	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
372
	st	%f5, [%g3 + 0x04]		! LSU
373
	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
374
	st	%f7, [%g3 + 0x08]		! LSU
375
376
 
377
	bl	1b
378
	 add	%g3, 0x10, %g3			! advance out vector pointer
379
380
 
381
	 nop
382
383
 
384
_mesa_sparc_transform_normals_no_rot:
385
	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
386
	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
387
	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
388
	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
389
	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
390
	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
391
392
 
393
394
 
395
	st	%g1, [%o4 + V4F_COUNT]
396
397
 
398
	bl	7f
399
	 clr	%o4				! 'i' for STRIDE_LOOP
400
401
 
402
	ld	[%o5 + 0x04], %f1		! uy = from[1]
403
	ld	[%o5 + 0x08], %f2		! uz = from[2]
404
	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
405
	add	%o4, 1, %o4			! i++
406
407
 
408
	 * ty (f5) = (uy * m5)
409
	 * tz (f7) = (uz * m10)
410
	 */
411
	fmuls	%f0, M0, %f3			! FGM	Group
412
	st	%f3, [%g3 + 0x00]		! LSU
413
	fmuls	%f1, M5, %f5			! FGM	Group
414
	st	%f5, [%g3 + 0x04]		! LSU
415
	fmuls	%f2, M10, %f7			! FGM	Group
416
	st	%f7, [%g3 + 0x08]		! LSU
417
418
 
419
	bl	1b
420
	 add	%g3, 0x10, %g3			! advance out vector pointer
421
422
 
423
	 nop
424
425
 
426
_mesa_sparc_transform_normals:
427
	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
428
	LDPTR	[%o0 + MAT_INV], %o0		! o0 = mat->inv
429
	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
430
	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
431
	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
432
	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
433
434
 
435
436
 
437
	st	%g1, [%o4 + V4F_COUNT]
438
439
 
440
	bl	7f
441
	 clr	%o4				! 'i' for STRIDE_LOOP
442
443
 
444
	ld	[%o5 + 0x04], %f1		! uy = from[1]
445
	ld	[%o5 + 0x08], %f2		! uz = from[2]
446
	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
447
	add	%o4, 1, %o4			! i++
448
449
 
450
	fmuls	%f1, M1, %f4			! FGM	Group
451
	fmuls	%f0, M4, %f5			! FGM	Group
452
	fmuls	%f1, M5, %f6			! FGM	Group
453
	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
454
	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
455
	fadds	%f3, %f4, %f3			! FGA
456
	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
457
	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
458
	fadds	%f5, %f6, %f5			! FGA
459
	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
460
	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
461
	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
462
	st	%f3, [%g3 + 0x00]		! LSU
463
	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
464
	st	%f5, [%g3 + 0x04]		! LSU
465
	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
466
	st	%f7, [%g3 + 0x08]		! LSU
467
468
 
469
	bl	1b
470
	 add	%g3, 0x10, %g3			! advance out vector pointer
471
472
 
473
	 nop
474
475
 
476
_mesa_sparc_normalize_normals:
477
	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
478
479
 
480
	sub	%sp, 16, %sp
481
	st	%g2, [%sp + STACK_VAR_OFF+0x0]
482
	ld	[%sp + STACK_VAR_OFF+0x0], %f12	! f12 = 1.0f
483
	add	%sp, 16, %sp
484
485
 
486
	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
487
	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
488
	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
489
490
 
491
	st	%g1, [%o4 + V4F_COUNT]
492
493
 
494
	bl	7f
495
	 cmp	%o3, 0
496
	bne	4f
497
	 clr	%o4				! 'i' for STRIDE_LOOP
498
499
 
500
	ld	[%o5 + 0x00], %f3		! ux = from[0]
501
	ld	[%o5 + 0x04], %f5		! uy = from[1]
502
	ld	[%o5 + 0x08], %f7		! uz = from[2]
503
	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
504
	add	%o4, 1, %o4			! i++
505
506
 
507
508
 
509
	fmuls	%f3, %f3, %f6			! FGM	Group	f3 available
510
	fmuls	%f5, %f5, %f8			! FGM	Group	f5 available
511
	fmuls	%f7, %f7, %f10			! FGM	Group	f7 available
512
	fadds	%f6, %f8, %f6			! FGA	Group	2cyc stall f6,f8 available
513
	fadds	%f6, %f10, %f6			! FGA	Group	4cyc stall f6,f10 available
514
515
 
516
	fsqrts	%f6, %f6			! FDIV  20 cycles
517
	fdivs	%f12, %f6, %f6			! FDIV	14 cycles
518
519
 
520
	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
521
	fmuls	%f5, %f6, %f5
522
	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
523
	fmuls	%f7, %f6, %f7
524
	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
525
526
 
527
	bl	1b
528
	 add	%g3, 0x10, %g3			! advance out vector pointer
529
530
 
531
	 nop
532
533
 
534
535
 
536
	ld	[%o5 + 0x00], %f3		! ux = from[0]
537
	ld	[%o5 + 0x04], %f5		! uy = from[1]
538
	ld	[%o5 + 0x08], %f7		! uz = from[2]
539
	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
540
	add	%o4, 1, %o4			! i++
541
542
 
543
	add	%o3, 4, %o3			! IEU0
544
545
 
546
547
 
548
	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * len
549
	fmuls	%f5, %f13, %f5
550
	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * len
551
	fmuls	%f7, %f13, %f7
552
	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * len
553
554
 
555
	bl	5b
556
	 add	%g3, 0x10, %g3			! advance out vector pointer
557
558
 
559
	 nop
560
561
 
562
_mesa_sparc_rescale_normals:
563
	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
564
565
 
566
	sub	%sp, 16, %sp
567
	st	%o1, [%sp + STACK_VAR_OFF+0x0]
568
	ld	[%sp + STACK_VAR_OFF+0x0], %f15	! f15 = scale
569
	add	%sp, 16, %sp
570
571
 
572
	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
573
	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
574
	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
575
576
 
577
	st	%g1, [%o4 + V4F_COUNT]
578
579
 
580
	bl	7f
581
	 clr	%o4				! 'i' for STRIDE_LOOP
582
583
 
584
	ld	[%o5 + 0x00], %f3		! ux = from[0]
585
	ld	[%o5 + 0x04], %f5		! uy = from[1]
586
	ld	[%o5 + 0x08], %f7		! uz = from[2]
587
	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
588
	add	%o4, 1, %o4			! i++
589
590
 
591
592
 
593
	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
594
	fmuls	%f5, %f15, %f5
595
	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
596
	fmuls	%f7, %f15, %f7
597
	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
598
599
 
600
	bl	1b
601
	 add	%g3, 0x10, %g3			! advance out vector pointer
602
603
 
604
	 nop
605