Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Copyright 2013 Nouveau Project |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice shall be included in |
||
12 | * all copies or substantial portions of the Software. |
||
13 | * |
||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
||
20 | * OTHER DEALINGS IN THE SOFTWARE. |
||
21 | * |
||
22 | * Authors: Christoph Bumiller, Samuel Pitoiset |
||
23 | */ |
||
24 | |||
25 | #include "nvc0/nvc0_context.h" |
||
26 | #include "nvc0/nvc0_compute.h" |
||
27 | |||
28 | int |
||
29 | nvc0_screen_compute_setup(struct nvc0_screen *screen, |
||
30 | struct nouveau_pushbuf *push) |
||
31 | { |
||
32 | struct nouveau_object *chan = screen->base.channel; |
||
33 | struct nouveau_device *dev = screen->base.device; |
||
34 | uint32_t obj_class; |
||
35 | int ret; |
||
36 | int i; |
||
37 | |||
38 | switch (dev->chipset & ~0xf) { |
||
39 | case 0xc0: |
||
40 | if (dev->chipset == 0xc8) |
||
41 | obj_class = NVC8_COMPUTE_CLASS; |
||
42 | else |
||
43 | obj_class = NVC0_COMPUTE_CLASS; |
||
44 | break; |
||
45 | case 0xd0: |
||
46 | obj_class = NVC0_COMPUTE_CLASS; |
||
47 | break; |
||
48 | default: |
||
49 | NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); |
||
50 | return -1; |
||
51 | } |
||
52 | |||
53 | ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0, |
||
54 | &screen->compute); |
||
55 | if (ret) { |
||
56 | NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret); |
||
57 | return ret; |
||
58 | } |
||
59 | |||
60 | ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 1 << 12, NULL, |
||
61 | &screen->parm); |
||
62 | if (ret) |
||
63 | return ret; |
||
64 | |||
65 | BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); |
||
66 | PUSH_DATA (push, screen->compute->oclass); |
||
67 | |||
68 | /* hardware limit */ |
||
69 | BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1); |
||
70 | PUSH_DATA (push, screen->mp_count); |
||
71 | BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1); |
||
72 | PUSH_DATA (push, 0xf); |
||
73 | |||
74 | BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1); |
||
75 | PUSH_DATA (push, 0x8000); |
||
76 | |||
77 | /* global memory setup */ |
||
78 | BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); |
||
79 | PUSH_DATA (push, 0); |
||
80 | BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100); |
||
81 | for (i = 0; i <= 0xff; i++) |
||
82 | PUSH_DATA (push, (0xc << 28) | (i << 16) | i); |
||
83 | BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); |
||
84 | PUSH_DATA (push, 1); |
||
85 | |||
86 | /* local memory and cstack setup */ |
||
87 | BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2); |
||
88 | PUSH_DATAh(push, screen->tls->offset); |
||
89 | PUSH_DATA (push, screen->tls->offset); |
||
90 | BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2); |
||
91 | PUSH_DATAh(push, screen->tls->size); |
||
92 | PUSH_DATA (push, screen->tls->size); |
||
93 | BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1); |
||
94 | PUSH_DATA (push, 0); |
||
95 | BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1); |
||
96 | PUSH_DATA (push, 1 << 24); |
||
97 | |||
98 | /* shared memory setup */ |
||
99 | BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1); |
||
100 | PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1); |
||
101 | BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1); |
||
102 | PUSH_DATA (push, 2 << 24); |
||
103 | BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1); |
||
104 | PUSH_DATA (push, 0); |
||
105 | |||
106 | /* code segment setup */ |
||
107 | BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2); |
||
108 | PUSH_DATAh(push, screen->text->offset); |
||
109 | PUSH_DATA (push, screen->text->offset); |
||
110 | |||
111 | /* bind parameters buffer */ |
||
112 | BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); |
||
113 | PUSH_DATA (push, screen->parm->size); |
||
114 | PUSH_DATAh(push, screen->parm->offset); |
||
115 | PUSH_DATA (push, screen->parm->offset); |
||
116 | BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); |
||
117 | PUSH_DATA (push, (0 << 8) | 1); |
||
118 | |||
119 | /* TODO: textures & samplers */ |
||
120 | |||
121 | return 0; |
||
122 | } |
||
123 | |||
124 | boolean |
||
125 | nvc0_compute_validate_program(struct nvc0_context *nvc0) |
||
126 | { |
||
127 | struct nvc0_program *prog = nvc0->compprog; |
||
128 | |||
129 | if (prog->mem) |
||
130 | return TRUE; |
||
131 | |||
132 | if (!prog->translated) { |
||
133 | prog->translated = nvc0_program_translate( |
||
134 | prog, nvc0->screen->base.device->chipset); |
||
135 | if (!prog->translated) |
||
136 | return FALSE; |
||
137 | } |
||
138 | if (unlikely(!prog->code_size)) |
||
139 | return FALSE; |
||
140 | |||
141 | if (likely(prog->code_size)) { |
||
142 | if (nvc0_program_upload_code(nvc0, prog)) { |
||
143 | struct nouveau_pushbuf *push = nvc0->base.pushbuf; |
||
144 | BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); |
||
145 | PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE); |
||
146 | return TRUE; |
||
147 | } |
||
148 | } |
||
149 | return FALSE; |
||
150 | } |
||
151 | |||
152 | static boolean |
||
153 | nvc0_compute_state_validate(struct nvc0_context *nvc0) |
||
154 | { |
||
155 | if (!nvc0_compute_validate_program(nvc0)) |
||
156 | return FALSE; |
||
157 | |||
158 | /* TODO: textures, samplers, surfaces, global memory buffers */ |
||
159 | |||
160 | nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE); |
||
161 | |||
162 | nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp); |
||
163 | if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf))) |
||
164 | return FALSE; |
||
165 | if (unlikely(nvc0->state.flushed)) |
||
166 | nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE); |
||
167 | |||
168 | return TRUE; |
||
169 | |||
170 | } |
||
171 | |||
172 | static void |
||
173 | nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input) |
||
174 | { |
||
175 | struct nouveau_pushbuf *push = nvc0->base.pushbuf; |
||
176 | struct nvc0_screen *screen = nvc0->screen; |
||
177 | struct nvc0_program *cp = nvc0->compprog; |
||
178 | |||
179 | if (cp->parm_size) { |
||
180 | BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); |
||
181 | PUSH_DATA (push, align(cp->parm_size, 0x100)); |
||
182 | PUSH_DATAh(push, screen->parm->offset); |
||
183 | PUSH_DATA (push, screen->parm->offset); |
||
184 | BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); |
||
185 | PUSH_DATA (push, (0 << 8) | 1); |
||
186 | /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */ |
||
187 | BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + cp->parm_size / 4); |
||
188 | PUSH_DATA (push, 0); |
||
189 | PUSH_DATAp(push, input, cp->parm_size / 4); |
||
190 | |||
191 | BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); |
||
192 | PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); |
||
193 | } |
||
194 | } |
||
195 | |||
196 | void |
||
197 | nvc0_launch_grid(struct pipe_context *pipe, |
||
198 | const uint *block_layout, const uint *grid_layout, |
||
199 | uint32_t label, |
||
200 | const void *input) |
||
201 | { |
||
202 | struct nvc0_context *nvc0 = nvc0_context(pipe); |
||
203 | struct nouveau_pushbuf *push = nvc0->base.pushbuf; |
||
204 | struct nvc0_program *cp = nvc0->compprog; |
||
205 | unsigned s, i; |
||
206 | int ret; |
||
207 | |||
208 | ret = !nvc0_compute_state_validate(nvc0); |
||
209 | if (ret) |
||
210 | goto out; |
||
211 | |||
212 | nvc0_compute_upload_input(nvc0, input); |
||
213 | |||
214 | BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1); |
||
215 | PUSH_DATA (push, nvc0_program_symbol_offset(cp, label)); |
||
216 | |||
217 | BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3); |
||
218 | PUSH_DATA (push, align(cp->cp.lmem_size, 0x10)); |
||
219 | PUSH_DATA (push, 0); |
||
220 | PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */ |
||
221 | |||
222 | BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3); |
||
223 | PUSH_DATA (push, align(cp->cp.smem_size, 0x100)); |
||
224 | PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]); |
||
225 | PUSH_DATA (push, cp->num_barriers); |
||
226 | BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1); |
||
227 | PUSH_DATA (push, cp->num_gprs); |
||
228 | |||
229 | /* grid/block setup */ |
||
230 | BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2); |
||
231 | PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]); |
||
232 | PUSH_DATA (push, grid_layout[2]); |
||
233 | BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2); |
||
234 | PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]); |
||
235 | PUSH_DATA (push, block_layout[2]); |
||
236 | |||
237 | /* launch preliminary setup */ |
||
238 | BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1); |
||
239 | PUSH_DATA (push, 0x1); |
||
240 | BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1); |
||
241 | PUSH_DATA (push, 0); |
||
242 | BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); |
||
243 | PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8); |
||
244 | |||
245 | /* kernel launching */ |
||
246 | BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1); |
||
247 | PUSH_DATA (push, 0); |
||
248 | BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1); |
||
249 | PUSH_DATA (push, 0); |
||
250 | BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1); |
||
251 | PUSH_DATA (push, 0x1000); |
||
252 | BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1); |
||
253 | PUSH_DATA (push, 0); |
||
254 | BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1); |
||
255 | PUSH_DATA (push, 0x1); |
||
256 | |||
257 | /* rebind all the 3D constant buffers |
||
258 | * (looks like binding a CB on COMPUTE clobbers 3D state) */ |
||
259 | nvc0->dirty |= NVC0_NEW_CONSTBUF; |
||
260 | for (s = 0; s < 6; s++) { |
||
261 | for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++) |
||
262 | if (nvc0->constbuf[s][i].u.buf) |
||
263 | nvc0->constbuf_dirty[s] |= 1 << i; |
||
264 | } |
||
265 | memset(nvc0->state.uniform_buffer_bound, 0, |
||
266 | sizeof(nvc0->state.uniform_buffer_bound)); |
||
267 | |||
268 | out: |
||
269 | if (ret) |
||
270 | NOUVEAU_ERR("Failed to launch grid !\n"); |
||
271 | }><>>>><>><>>><>><>><>><>><>><>=>><> |