Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5563 | serge | 1 | /************************************************************************** |
2 | * |
||
3 | * Copyright 2010 VMware, Inc. |
||
4 | * All Rights Reserved. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the |
||
8 | * "Software"), to deal in the Software without restriction, including |
||
9 | * without limitation the rights to use, copy, modify, merge, publish, |
||
10 | * distribute, sub license, and/or sell copies of the Software, and to |
||
11 | * permit persons to whom the Software is furnished to do so, subject to |
||
12 | * the following conditions: |
||
13 | * |
||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
||
17 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
||
18 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
||
19 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
||
20 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
21 | * |
||
22 | * The above copyright notice and this permission notice (including the |
||
23 | * next paragraph) shall be included in all copies or substantial portions |
||
24 | * of the Software. |
||
25 | * |
||
26 | **************************************************************************/ |
||
27 | |||
28 | |||
29 | #include "lp_bld_type.h" |
||
30 | #include "lp_bld_arit.h" |
||
31 | #include "lp_bld_const.h" |
||
32 | #include "lp_bld_swizzle.h" |
||
33 | #include "lp_bld_quad.h" |
||
34 | #include "lp_bld_pack.h" |
||
35 | |||
36 | |||
37 | static const unsigned char |
||
38 | swizzle_left[4] = { |
||
39 | LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_LEFT, |
||
40 | LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_QUAD_BOTTOM_LEFT |
||
41 | }; |
||
42 | |||
43 | static const unsigned char |
||
44 | swizzle_right[4] = { |
||
45 | LP_BLD_QUAD_TOP_RIGHT, LP_BLD_QUAD_TOP_RIGHT, |
||
46 | LP_BLD_QUAD_BOTTOM_RIGHT, LP_BLD_QUAD_BOTTOM_RIGHT |
||
47 | }; |
||
48 | |||
49 | static const unsigned char |
||
50 | swizzle_top[4] = { |
||
51 | LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_RIGHT, |
||
52 | LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_RIGHT |
||
53 | }; |
||
54 | |||
55 | static const unsigned char |
||
56 | swizzle_bottom[4] = { |
||
57 | LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_QUAD_BOTTOM_RIGHT, |
||
58 | LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_QUAD_BOTTOM_RIGHT |
||
59 | }; |
||
60 | |||
61 | |||
62 | LLVMValueRef |
||
63 | lp_build_ddx(struct lp_build_context *bld, |
||
64 | LLVMValueRef a) |
||
65 | { |
||
66 | LLVMValueRef a_left = lp_build_swizzle_aos(bld, a, swizzle_left); |
||
67 | LLVMValueRef a_right = lp_build_swizzle_aos(bld, a, swizzle_right); |
||
68 | return lp_build_sub(bld, a_right, a_left); |
||
69 | } |
||
70 | |||
71 | |||
72 | LLVMValueRef |
||
73 | lp_build_ddy(struct lp_build_context *bld, |
||
74 | LLVMValueRef a) |
||
75 | { |
||
76 | LLVMValueRef a_top = lp_build_swizzle_aos(bld, a, swizzle_top); |
||
77 | LLVMValueRef a_bottom = lp_build_swizzle_aos(bld, a, swizzle_bottom); |
||
78 | return lp_build_sub(bld, a_bottom, a_top); |
||
79 | } |
||
80 | |||
81 | /* |
||
82 | * Helper for building packed ddx/ddy vector for one coord (scalar per quad |
||
83 | * values). The vector will look like this (8-wide): |
||
84 | * dr1dx _____ -dr1dy _____ dr2dx _____ -dr2dy _____ |
||
85 | * This only requires one shuffle instead of two for more straightforward packing. |
||
86 | */ |
||
87 | LLVMValueRef |
||
88 | lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld, |
||
89 | LLVMValueRef a) |
||
90 | { |
||
91 | struct gallivm_state *gallivm = bld->gallivm; |
||
92 | LLVMBuilderRef builder = gallivm->builder; |
||
93 | LLVMValueRef vec1, vec2; |
||
94 | |||
95 | /* use aos swizzle helper */ |
||
96 | |||
97 | static const unsigned char swizzle1[] = { /* no-op swizzle */ |
||
98 | LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE, |
||
99 | LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_SWIZZLE_DONTCARE |
||
100 | }; |
||
101 | static const unsigned char swizzle2[] = { |
||
102 | LP_BLD_QUAD_TOP_RIGHT, LP_BLD_SWIZZLE_DONTCARE, |
||
103 | LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE |
||
104 | }; |
||
105 | |||
106 | vec1 = lp_build_swizzle_aos(bld, a, swizzle1); |
||
107 | vec2 = lp_build_swizzle_aos(bld, a, swizzle2); |
||
108 | |||
109 | if (bld->type.floating) |
||
110 | return LLVMBuildFSub(builder, vec2, vec1, "ddxddy"); |
||
111 | else |
||
112 | return LLVMBuildSub(builder, vec2, vec1, "ddxddy"); |
||
113 | } |
||
114 | |||
115 | |||
116 | /* |
||
117 | * Helper for building packed ddx/ddy vector for one coord (scalar per quad |
||
118 | * values). The vector will look like this (8-wide): |
||
119 | * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy |
||
120 | * This only needs 2 (v)shufps. |
||
121 | */ |
||
122 | LLVMValueRef |
||
123 | lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld, |
||
124 | LLVMValueRef a, LLVMValueRef b) |
||
125 | { |
||
126 | struct gallivm_state *gallivm = bld->gallivm; |
||
127 | LLVMBuilderRef builder = gallivm->builder; |
||
128 | LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH/4]; |
||
129 | LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH/4]; |
||
130 | LLVMValueRef vec1, vec2; |
||
131 | unsigned length, num_quads, i; |
||
132 | |||
133 | /* XXX: do hsub version */ |
||
134 | length = bld->type.length; |
||
135 | num_quads = length / 4; |
||
136 | for (i = 0; i < num_quads; i++) { |
||
137 | unsigned s1 = 4 * i; |
||
138 | unsigned s2 = 4 * i + length; |
||
139 | shuffles1[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1); |
||
140 | shuffles1[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1); |
||
141 | shuffles1[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2); |
||
142 | shuffles1[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2); |
||
143 | shuffles2[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s1); |
||
144 | shuffles2[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s1); |
||
145 | shuffles2[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s2); |
||
146 | shuffles2[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s2); |
||
147 | } |
||
148 | vec1 = LLVMBuildShuffleVector(builder, a, b, |
||
149 | LLVMConstVector(shuffles1, length), ""); |
||
150 | vec2 = LLVMBuildShuffleVector(builder, a, b, |
||
151 | LLVMConstVector(shuffles2, length), ""); |
||
152 | if (bld->type.floating) |
||
153 | return LLVMBuildFSub(builder, vec2, vec1, "ddxddyddxddy"); |
||
154 | else |
||
155 | return LLVMBuildSub(builder, vec2, vec1, "ddxddyddxddy"); |
||
156 | } |
||
157 | |||
158 | |||
159 | /** |
||
160 | * Twiddle from quad format to row format |
||
161 | * |
||
162 | * src0 src1 |
||
163 | * ######### ######### ################# |
||
164 | * # 0 | 1 # # 4 | 5 # # 0 | 1 | 4 | 5 # src0 |
||
165 | * #---+---# #---+---# -> ################# |
||
166 | * # 2 | 3 # # 6 | 7 # # 2 | 3 | 6 | 7 # src1 |
||
167 | * ######### ######### ################# |
||
168 | * |
||
169 | */ |
||
170 | void |
||
171 | lp_bld_quad_twiddle(struct gallivm_state *gallivm, |
||
172 | struct lp_type lp_dst_type, |
||
173 | const LLVMValueRef* src, |
||
174 | unsigned src_count, |
||
175 | LLVMValueRef* dst) |
||
176 | { |
||
177 | LLVMBuilderRef builder = gallivm->builder; |
||
178 | LLVMTypeRef dst_type_ref; |
||
179 | LLVMTypeRef type2_ref; |
||
180 | struct lp_type type2; |
||
181 | unsigned i; |
||
182 | |||
183 | assert((src_count % 2) == 0); |
||
184 | |||
185 | /* Create a type with only 2 elements */ |
||
186 | type2 = lp_dst_type; |
||
187 | type2.width = (lp_dst_type.width * lp_dst_type.length) / 2; |
||
188 | type2.length = 2; |
||
189 | type2.floating = 0; |
||
190 | |||
191 | type2_ref = lp_build_vec_type(gallivm, type2); |
||
192 | dst_type_ref = lp_build_vec_type(gallivm, lp_dst_type); |
||
193 | |||
194 | for (i = 0; i < src_count; i += 2) { |
||
195 | LLVMValueRef src0, src1; |
||
196 | |||
197 | src0 = LLVMBuildBitCast(builder, src[i + 0], type2_ref, ""); |
||
198 | src1 = LLVMBuildBitCast(builder, src[i + 1], type2_ref, ""); |
||
199 | |||
200 | dst[i + 0] = lp_build_interleave2(gallivm, type2, src0, src1, 0); |
||
201 | dst[i + 1] = lp_build_interleave2(gallivm, type2, src0, src1, 1); |
||
202 | |||
203 | dst[i + 0] = LLVMBuildBitCast(builder, dst[i + 0], dst_type_ref, ""); |
||
204 | dst[i + 1] = LLVMBuildBitCast(builder, dst[i + 1], dst_type_ref, ""); |
||
205 | } |
||
206 | }>> |