Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Copyright © 2012 Intel Corporation |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
||
21 | * IN THE SOFTWARE. |
||
22 | */ |
||
23 | |||
24 | #include |
||
25 | #include "program/program.h" |
||
26 | #include "brw_state.h" |
||
27 | #include "brw_defines.h" |
||
28 | #include "intel_batchbuffer.h" |
||
29 | |||
30 | void |
||
31 | gen8_upload_ps_extra(struct brw_context *brw, |
||
32 | const struct gl_fragment_program *fp, |
||
33 | const struct brw_wm_prog_data *prog_data, |
||
34 | bool multisampled_fbo) |
||
35 | { |
||
36 | struct gl_context *ctx = &brw->ctx; |
||
37 | uint32_t dw1 = 0; |
||
38 | |||
39 | dw1 |= GEN8_PSX_PIXEL_SHADER_VALID; |
||
40 | dw1 |= prog_data->computed_depth_mode << GEN8_PSX_COMPUTED_DEPTH_MODE_SHIFT; |
||
41 | |||
42 | if (prog_data->uses_kill) |
||
43 | dw1 |= GEN8_PSX_KILL_ENABLE; |
||
44 | |||
45 | if (prog_data->num_varying_inputs != 0) |
||
46 | dw1 |= GEN8_PSX_ATTRIBUTE_ENABLE; |
||
47 | |||
48 | if (fp->Base.InputsRead & VARYING_BIT_POS) |
||
49 | dw1 |= GEN8_PSX_USES_SOURCE_DEPTH | GEN8_PSX_USES_SOURCE_W; |
||
50 | |||
51 | if (multisampled_fbo && |
||
52 | _mesa_get_min_invocations_per_fragment(ctx, fp, false) > 1) |
||
53 | dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE; |
||
54 | |||
55 | if (fp->Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN) |
||
56 | dw1 |= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK; |
||
57 | |||
58 | if (prog_data->uses_omask) |
||
59 | dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET; |
||
60 | |||
61 | BEGIN_BATCH(2); |
||
62 | OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2)); |
||
63 | OUT_BATCH(dw1); |
||
64 | ADVANCE_BATCH(); |
||
65 | } |
||
66 | |||
67 | static void |
||
68 | upload_ps_extra(struct brw_context *brw) |
||
69 | { |
||
70 | /* BRW_NEW_FRAGMENT_PROGRAM */ |
||
71 | const struct brw_fragment_program *fp = |
||
72 | brw_fragment_program_const(brw->fragment_program); |
||
73 | /* BRW_NEW_FS_PROG_DATA */ |
||
74 | const struct brw_wm_prog_data *prog_data = brw->wm.prog_data; |
||
75 | /* BRW_NEW_NUM_SAMPLES | _NEW_MULTISAMPLE */ |
||
76 | const bool multisampled_fbo = brw->num_samples > 1; |
||
77 | |||
78 | gen8_upload_ps_extra(brw, &fp->program, prog_data, multisampled_fbo); |
||
79 | } |
||
80 | |||
81 | const struct brw_tracked_state gen8_ps_extra = { |
||
82 | .dirty = { |
||
83 | .mesa = _NEW_MULTISAMPLE, |
||
84 | .brw = BRW_NEW_CONTEXT | |
||
85 | BRW_NEW_FRAGMENT_PROGRAM | |
||
86 | BRW_NEW_FS_PROG_DATA | |
||
87 | BRW_NEW_NUM_SAMPLES, |
||
88 | }, |
||
89 | .emit = upload_ps_extra, |
||
90 | }; |
||
91 | |||
92 | static void |
||
93 | upload_wm_state(struct brw_context *brw) |
||
94 | { |
||
95 | struct gl_context *ctx = &brw->ctx; |
||
96 | uint32_t dw1 = 0; |
||
97 | |||
98 | dw1 |= GEN7_WM_STATISTICS_ENABLE; |
||
99 | dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0; |
||
100 | dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5; |
||
101 | dw1 |= GEN7_WM_POINT_RASTRULE_UPPER_RIGHT; |
||
102 | |||
103 | /* _NEW_LINE */ |
||
104 | if (ctx->Line.StippleFlag) |
||
105 | dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE; |
||
106 | |||
107 | /* _NEW_POLYGON */ |
||
108 | if (ctx->Polygon.StippleFlag) |
||
109 | dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE; |
||
110 | |||
111 | /* BRW_NEW_FS_PROG_DATA */ |
||
112 | dw1 |= brw->wm.prog_data->barycentric_interp_modes << |
||
113 | GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; |
||
114 | |||
115 | BEGIN_BATCH(2); |
||
116 | OUT_BATCH(_3DSTATE_WM << 16 | (2 - 2)); |
||
117 | OUT_BATCH(dw1); |
||
118 | ADVANCE_BATCH(); |
||
119 | } |
||
120 | |||
121 | const struct brw_tracked_state gen8_wm_state = { |
||
122 | .dirty = { |
||
123 | .mesa = _NEW_LINE | |
||
124 | _NEW_POLYGON, |
||
125 | .brw = BRW_NEW_CONTEXT | |
||
126 | BRW_NEW_FS_PROG_DATA, |
||
127 | }, |
||
128 | .emit = upload_wm_state, |
||
129 | }; |
||
130 | |||
131 | void |
||
132 | gen8_upload_ps_state(struct brw_context *brw, |
||
133 | const struct gl_fragment_program *fp, |
||
134 | const struct brw_stage_state *stage_state, |
||
135 | const struct brw_wm_prog_data *prog_data, |
||
136 | uint32_t fast_clear_op) |
||
137 | { |
||
138 | struct gl_context *ctx = &brw->ctx; |
||
139 | uint32_t dw3 = 0, dw6 = 0, dw7 = 0, ksp0, ksp2 = 0; |
||
140 | |||
141 | /* Initialize the execution mask with VMask. Otherwise, derivatives are |
||
142 | * incorrect for subspans where some of the pixels are unlit. We believe |
||
143 | * the bit just didn't take effect in previous generations. |
||
144 | */ |
||
145 | dw3 |= GEN7_PS_VECTOR_MASK_ENABLE; |
||
146 | |||
147 | const unsigned sampler_count = |
||
148 | DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); |
||
149 | dw3 |= SET_FIELD(sampler_count, GEN7_PS_SAMPLER_COUNT); |
||
150 | |||
151 | /* BRW_NEW_FS_PROG_DATA */ |
||
152 | dw3 |= |
||
153 | ((prog_data->base.binding_table.size_bytes / 4) << |
||
154 | GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT); |
||
155 | |||
156 | if (prog_data->base.use_alt_mode) |
||
157 | dw3 |= GEN7_PS_FLOATING_POINT_MODE_ALT; |
||
158 | |||
159 | /* 3DSTATE_PS expects the number of threads per PSD, which is always 64; |
||
160 | * it implicitly scales for different GT levels (which have some # of PSDs). |
||
161 | * |
||
162 | * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1. |
||
163 | */ |
||
164 | if (brw->gen >= 9) |
||
165 | dw6 |= (64 - 1) << HSW_PS_MAX_THREADS_SHIFT; |
||
166 | else |
||
167 | dw6 |= (64 - 2) << HSW_PS_MAX_THREADS_SHIFT; |
||
168 | |||
169 | if (prog_data->base.nr_params > 0) |
||
170 | dw6 |= GEN7_PS_PUSH_CONSTANT_ENABLE; |
||
171 | |||
172 | /* From the documentation for this packet: |
||
173 | * "If the PS kernel does not need the Position XY Offsets to |
||
174 | * compute a Position Value, then this field should be programmed |
||
175 | * to POSOFFSET_NONE." |
||
176 | * |
||
177 | * "SW Recommendation: If the PS kernel needs the Position Offsets |
||
178 | * to compute a Position XY value, this field should match Position |
||
179 | * ZW Interpolation Mode to ensure a consistent position.xyzw |
||
180 | * computation." |
||
181 | * |
||
182 | * We only require XY sample offsets. So, this recommendation doesn't |
||
183 | * look useful at the moment. We might need this in future. |
||
184 | */ |
||
185 | if (prog_data->uses_pos_offset) |
||
186 | dw6 |= GEN7_PS_POSOFFSET_SAMPLE; |
||
187 | else |
||
188 | dw6 |= GEN7_PS_POSOFFSET_NONE; |
||
189 | |||
190 | dw6 |= fast_clear_op; |
||
191 | |||
192 | /* _NEW_MULTISAMPLE |
||
193 | * In case of non 1x per sample shading, only one of SIMD8 and SIMD16 |
||
194 | * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader |
||
195 | * is successfully compiled. In majority of the cases that bring us |
||
196 | * better performance than 'SIMD8 only' dispatch. |
||
197 | */ |
||
198 | int min_invocations_per_fragment = |
||
199 | _mesa_get_min_invocations_per_fragment(ctx, fp, false); |
||
200 | assert(min_invocations_per_fragment >= 1); |
||
201 | |||
202 | if (prog_data->prog_offset_16 || prog_data->no_8) { |
||
203 | dw6 |= GEN7_PS_16_DISPATCH_ENABLE; |
||
204 | if (!prog_data->no_8 && min_invocations_per_fragment == 1) { |
||
205 | dw6 |= GEN7_PS_8_DISPATCH_ENABLE; |
||
206 | dw7 |= (prog_data->base.dispatch_grf_start_reg << |
||
207 | GEN7_PS_DISPATCH_START_GRF_SHIFT_0); |
||
208 | dw7 |= (prog_data->dispatch_grf_start_reg_16 << |
||
209 | GEN7_PS_DISPATCH_START_GRF_SHIFT_2); |
||
210 | ksp0 = stage_state->prog_offset; |
||
211 | ksp2 = stage_state->prog_offset + prog_data->prog_offset_16; |
||
212 | } else { |
||
213 | dw7 |= (prog_data->dispatch_grf_start_reg_16 << |
||
214 | GEN7_PS_DISPATCH_START_GRF_SHIFT_0); |
||
215 | |||
216 | ksp0 = stage_state->prog_offset + prog_data->prog_offset_16; |
||
217 | } |
||
218 | } else { |
||
219 | dw6 |= GEN7_PS_8_DISPATCH_ENABLE; |
||
220 | dw7 |= (prog_data->base.dispatch_grf_start_reg << |
||
221 | GEN7_PS_DISPATCH_START_GRF_SHIFT_0); |
||
222 | ksp0 = stage_state->prog_offset; |
||
223 | } |
||
224 | |||
225 | BEGIN_BATCH(12); |
||
226 | OUT_BATCH(_3DSTATE_PS << 16 | (12 - 2)); |
||
227 | OUT_BATCH(ksp0); |
||
228 | OUT_BATCH(0); |
||
229 | OUT_BATCH(dw3); |
||
230 | if (prog_data->base.total_scratch) { |
||
231 | OUT_RELOC64(stage_state->scratch_bo, |
||
232 | I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, |
||
233 | ffs(prog_data->base.total_scratch) - 11); |
||
234 | } else { |
||
235 | OUT_BATCH(0); |
||
236 | OUT_BATCH(0); |
||
237 | } |
||
238 | OUT_BATCH(dw6); |
||
239 | OUT_BATCH(dw7); |
||
240 | OUT_BATCH(0); /* kernel 1 pointer */ |
||
241 | OUT_BATCH(0); |
||
242 | OUT_BATCH(ksp2); |
||
243 | OUT_BATCH(0); |
||
244 | ADVANCE_BATCH(); |
||
245 | } |
||
246 | |||
247 | static void |
||
248 | upload_ps_state(struct brw_context *brw) |
||
249 | { |
||
250 | /* BRW_NEW_FS_PROG_DATA */ |
||
251 | const struct brw_wm_prog_data *prog_data = brw->wm.prog_data; |
||
252 | gen8_upload_ps_state(brw, brw->fragment_program, &brw->wm.base, prog_data, |
||
253 | brw->wm.fast_clear_op); |
||
254 | } |
||
255 | |||
256 | const struct brw_tracked_state gen8_ps_state = { |
||
257 | .dirty = { |
||
258 | .mesa = _NEW_MULTISAMPLE, |
||
259 | .brw = BRW_NEW_BATCH | |
||
260 | BRW_NEW_FRAGMENT_PROGRAM | |
||
261 | BRW_NEW_FS_PROG_DATA, |
||
262 | }, |
||
263 | .emit = upload_ps_state, |
||
264 | };><> |