Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | * Clip testing in SPARC assembly |
||
3 | */ |
||
4 | |||
5 | #if __arch64__ |
||
6 | #define LDPTR ldx |
||
7 | #define V4F_DATA 0x00 |
||
8 | #define V4F_START 0x08 |
||
9 | #define V4F_COUNT 0x10 |
||
10 | #define V4F_STRIDE 0x14 |
||
11 | #define V4F_SIZE 0x18 |
||
12 | #define V4F_FLAGS 0x1c |
||
13 | #else |
||
14 | #define LDPTR ld |
||
15 | #define V4F_DATA 0x00 |
||
16 | #define V4F_START 0x04 |
||
17 | #define V4F_COUNT 0x08 |
||
18 | #define V4F_STRIDE 0x0c |
||
19 | #define V4F_SIZE 0x10 |
||
20 | #define V4F_FLAGS 0x14 |
||
21 | #endif |
||
22 | |||
23 | #define VEC_SIZE_1 1 |
||
24 | #define VEC_SIZE_2 3 |
||
25 | #define VEC_SIZE_3 7 |
||
26 | #define VEC_SIZE_4 15 |
||
27 | |||
28 | .register %g2, #scratch |
||
29 | .register %g3, #scratch |
||
30 | |||
31 | .text |
||
32 | .align 64 |
||
33 | |||
34 | one_dot_zero: |
||
35 | .word 0x3f800000 /* 1.0f */ |
||
36 | |||
37 | /* This trick is shamelessly stolen from the x86 |
||
38 | * Mesa asm. Very clever, and we can do it too |
||
39 | * since we have the necessary add with carry |
||
40 | * instructions on Sparc. |
||
41 | */ |
||
42 | clip_table: |
||
43 | .byte 0, 1, 0, 2, 4, 5, 4, 6 |
||
44 | .byte 0, 1, 0, 2, 8, 9, 8, 10 |
||
45 | .byte 32, 33, 32, 34, 36, 37, 36, 38 |
||
46 | .byte 32, 33, 32, 34, 40, 41, 40, 42 |
||
47 | .byte 0, 1, 0, 2, 4, 5, 4, 6 |
||
48 | .byte 0, 1, 0, 2, 8, 9, 8, 10 |
||
49 | .byte 16, 17, 16, 18, 20, 21, 20, 22 |
||
50 | .byte 16, 17, 16, 18, 24, 25, 24, 26 |
||
51 | .byte 63, 61, 63, 62, 55, 53, 55, 54 |
||
52 | .byte 63, 61, 63, 62, 59, 57, 59, 58 |
||
53 | .byte 47, 45, 47, 46, 39, 37, 39, 38 |
||
54 | .byte 47, 45, 47, 46, 43, 41, 43, 42 |
||
55 | .byte 63, 61, 63, 62, 55, 53, 55, 54 |
||
56 | .byte 63, 61, 63, 62, 59, 57, 59, 58 |
||
57 | .byte 31, 29, 31, 30, 23, 21, 23, 22 |
||
58 | .byte 31, 29, 31, 30, 27, 25, 27, 26 |
||
59 | |||
60 | /* GLvector4f *clip_vec, GLvector4f *proj_vec, |
||
61 | GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask, |
||
62 | GLboolean viewport_z_enable */ |
||
63 | |||
64 | .align 64 |
||
65 | __pc_tramp: |
||
66 | retl |
||
67 | nop |
||
68 | |||
69 | .globl _mesa_sparc_cliptest_points4 |
||
70 | _mesa_sparc_cliptest_points4: |
||
71 | save %sp, -64, %sp |
||
72 | call __pc_tramp |
||
73 | sub %o7, (. - one_dot_zero - 4), %g1 |
||
74 | ld [%g1 + 0x0], %f4 |
||
75 | add %g1, 0x4, %g1 |
||
76 | |||
77 | ld [%i0 + V4F_STRIDE], %l1 |
||
78 | ld [%i0 + V4F_COUNT], %l3 |
||
79 | LDPTR [%i0 + V4F_START], %i0 |
||
80 | LDPTR [%i1 + V4F_START], %i5 |
||
81 | ldub [%i3], %g2 |
||
82 | ldub [%i4], %g3 |
||
83 | sll %g3, 8, %g3 |
||
84 | or %g2, %g3, %g2 |
||
85 | |||
86 | ld [%i1 + V4F_FLAGS], %g3 |
||
87 | or %g3, VEC_SIZE_4, %g3 |
||
88 | st %g3, [%i1 + V4F_FLAGS] |
||
89 | mov 3, %g3 |
||
90 | st %g3, [%i1 + V4F_SIZE] |
||
91 | st %l3, [%i1 + V4F_COUNT] |
||
92 | clr %l2 |
||
93 | clr %l0 |
||
94 | |||
95 | /* l0: i |
||
96 | * l3: count |
||
97 | * l1: stride |
||
98 | * l2: c |
||
99 | * g2: (tmpAndMask << 8) | tmpOrMask |
||
100 | * g1: clip_table |
||
101 | * i0: from[stride][i] |
||
102 | * i2: clipMask |
||
103 | * i5: vProj[4][i] |
||
104 | */ |
||
105 | |||
106 | 1: ld [%i0 + 0x0c], %f3 ! LSU Group |
||
107 | ld [%i0 + 0x0c], %g5 ! LSU Group |
||
108 | ld [%i0 + 0x08], %g4 ! LSU Group |
||
109 | fdivs %f4, %f3, %f8 ! FGM |
||
110 | addcc %g5, %g5, %g5 ! IEU1 Group |
||
111 | addx %g0, 0x0, %g3 ! IEU1 Group |
||
112 | addcc %g4, %g4, %g4 ! IEU1 Group |
||
113 | addx %g3, %g3, %g3 ! IEU1 Group |
||
114 | subcc %g5, %g4, %g0 ! IEU1 Group |
||
115 | ld [%i0 + 0x04], %g4 ! LSU Group |
||
116 | addx %g3, %g3, %g3 ! IEU1 Group |
||
117 | addcc %g4, %g4, %g4 ! IEU1 Group |
||
118 | addx %g3, %g3, %g3 ! IEU1 Group |
||
119 | subcc %g5, %g4, %g0 ! IEU1 Group |
||
120 | ld [%i0 + 0x00], %g4 ! LSU Group |
||
121 | addx %g3, %g3, %g3 ! IEU1 Group |
||
122 | addcc %g4, %g4, %g4 ! IEU1 Group |
||
123 | addx %g3, %g3, %g3 ! IEU1 Group |
||
124 | subcc %g5, %g4, %g0 ! IEU1 Group |
||
125 | addx %g3, %g3, %g3 ! IEU1 Group |
||
126 | ldub [%g1 + %g3], %g3 ! LSU Group |
||
127 | cmp %g3, 0 ! IEU1 Group, stall |
||
128 | be 2f ! CTI |
||
129 | stb %g3, [%i2] ! LSU |
||
130 | sll %g3, 8, %g4 ! IEU1 Group |
||
131 | add %l2, 1, %l2 ! IEU0 |
||
132 | st %g0, [%i5 + 0x00] ! LSU |
||
133 | or %g4, 0xff, %g4 ! IEU0 Group |
||
134 | or %g2, %g3, %g2 ! IEU1 |
||
135 | st %g0, [%i5 + 0x04] ! LSU |
||
136 | and %g2, %g4, %g2 ! IEU0 Group |
||
137 | st %g0, [%i5 + 0x08] ! LSU |
||
138 | b 3f ! CTI |
||
139 | st %f4, [%i5 + 0x0c] ! LSU Group |
||
140 | 2: ld [%i0 + 0x00], %f0 ! LSU Group |
||
141 | ld [%i0 + 0x04], %f1 ! LSU Group |
||
142 | ld [%i0 + 0x08], %f2 ! LSU Group |
||
143 | fmuls %f0, %f8, %f0 ! FGM |
||
144 | st %f0, [%i5 + 0x00] ! LSU Group |
||
145 | fmuls %f1, %f8, %f1 ! FGM |
||
146 | st %f1, [%i5 + 0x04] ! LSU Group |
||
147 | fmuls %f2, %f8, %f2 ! FGM |
||
148 | st %f2, [%i5 + 0x08] ! LSU Group |
||
149 | st %f8, [%i5 + 0x0c] ! LSU Group |
||
150 | 3: add %i5, 0x10, %i5 ! IEU1 |
||
151 | add %l0, 1, %l0 ! IEU0 Group |
||
152 | add %i2, 1, %i2 ! IEU0 Group |
||
153 | cmp %l0, %l3 ! IEU1 Group |
||
154 | bne 1b ! CTI |
||
155 | add %i0, %l1, %i0 ! IEU0 Group |
||
156 | stb %g2, [%i3] ! LSU |
||
157 | srl %g2, 8, %g3 ! IEU0 Group |
||
158 | cmp %l2, %l3 ! IEU1 Group |
||
159 | bl,a 1f ! CTI |
||
160 | clr %g3 ! IEU0 |
||
161 | 1: stb %g3, [%i4] ! LSU Group |
||
162 | ret ! CTI Group |
||
163 | restore %i1, 0x0, %o0 |
||
164 | |||
165 | .globl _mesa_sparc_cliptest_points4_np |
||
166 | _mesa_sparc_cliptest_points4_np: |
||
167 | save %sp, -64, %sp |
||
168 | |||
169 | call __pc_tramp |
||
170 | sub %o7, (. - one_dot_zero - 4), %g1 |
||
171 | add %g1, 0x4, %g1 |
||
172 | |||
173 | ld [%i0 + V4F_STRIDE], %l1 |
||
174 | ld [%i0 + V4F_COUNT], %l3 |
||
175 | LDPTR [%i0 + V4F_START], %i0 |
||
176 | ldub [%i3], %g2 |
||
177 | ldub [%i4], %g3 |
||
178 | sll %g3, 8, %g3 |
||
179 | or %g2, %g3, %g2 |
||
180 | |||
181 | clr %l2 |
||
182 | clr %l0 |
||
183 | |||
184 | /* l0: i |
||
185 | * l3: count |
||
186 | * l1: stride |
||
187 | * l2: c |
||
188 | * g2: (tmpAndMask << 8) | tmpOrMask |
||
189 | * g1: clip_table |
||
190 | * i0: from[stride][i] |
||
191 | * i2: clipMask |
||
192 | */ |
||
193 | |||
194 | 1: ld [%i0 + 0x0c], %g5 ! LSU Group |
||
195 | ld [%i0 + 0x08], %g4 ! LSU Group |
||
196 | addcc %g5, %g5, %g5 ! IEU1 Group |
||
197 | addx %g0, 0x0, %g3 ! IEU1 Group |
||
198 | addcc %g4, %g4, %g4 ! IEU1 Group |
||
199 | addx %g3, %g3, %g3 ! IEU1 Group |
||
200 | subcc %g5, %g4, %g0 ! IEU1 Group |
||
201 | ld [%i0 + 0x04], %g4 ! LSU Group |
||
202 | addx %g3, %g3, %g3 ! IEU1 Group |
||
203 | addcc %g4, %g4, %g4 ! IEU1 Group |
||
204 | addx %g3, %g3, %g3 ! IEU1 Group |
||
205 | subcc %g5, %g4, %g0 ! IEU1 Group |
||
206 | ld [%i0 + 0x00], %g4 ! LSU Group |
||
207 | addx %g3, %g3, %g3 ! IEU1 Group |
||
208 | addcc %g4, %g4, %g4 ! IEU1 Group |
||
209 | addx %g3, %g3, %g3 ! IEU1 Group |
||
210 | subcc %g5, %g4, %g0 ! IEU1 Group |
||
211 | addx %g3, %g3, %g3 ! IEU1 Group |
||
212 | ldub [%g1 + %g3], %g3 ! LSU Group |
||
213 | cmp %g3, 0 ! IEU1 Group, stall |
||
214 | be 2f ! CTI |
||
215 | stb %g3, [%i2] ! LSU |
||
216 | sll %g3, 8, %g4 ! IEU1 Group |
||
217 | add %l2, 1, %l2 ! IEU0 |
||
218 | or %g4, 0xff, %g4 ! IEU0 Group |
||
219 | or %g2, %g3, %g2 ! IEU1 |
||
220 | and %g2, %g4, %g2 ! IEU0 Group |
||
221 | 2: add %l0, 1, %l0 ! IEU0 Group |
||
222 | add %i2, 1, %i2 ! IEU0 Group |
||
223 | cmp %l0, %l3 ! IEU1 Group |
||
224 | bne 1b ! CTI |
||
225 | add %i0, %l1, %i0 ! IEU0 Group |
||
226 | stb %g2, [%i3] ! LSU |
||
227 | srl %g2, 8, %g3 ! IEU0 Group |
||
228 | cmp %l2, %l3 ! IEU1 Group |
||
229 | bl,a 1f ! CTI |
||
230 | clr %g3 ! IEU0 |
||
231 | 1: stb %g3, [%i4] ! LSU Group |
||
232 | ret ! CTI Group |
||
233 | restore %i1, 0x0, %o0><>><> |