Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/*
2
 * Copyright 2013 Nouveau Project
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
 * OTHER DEALINGS IN THE SOFTWARE.
21
 *
22
 * Authors: Christoph Bumiller, Samuel Pitoiset
23
 */
24
 
25
#include "nvc0/nvc0_context.h"
26
#include "nvc0/nvc0_compute.h"
27
 
28
int
29
nvc0_screen_compute_setup(struct nvc0_screen *screen,
30
                          struct nouveau_pushbuf *push)
31
{
32
   struct nouveau_object *chan = screen->base.channel;
33
   struct nouveau_device *dev = screen->base.device;
34
   uint32_t obj_class;
35
   int ret;
36
   int i;
37
 
38
   switch (dev->chipset & ~0xf) {
39
   case 0xc0:
40
      if (dev->chipset == 0xc8)
41
         obj_class = NVC8_COMPUTE_CLASS;
42
      else
43
         obj_class = NVC0_COMPUTE_CLASS;
44
      break;
45
   case 0xd0:
46
      obj_class = NVC0_COMPUTE_CLASS;
47
      break;
48
   default:
49
      NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
50
      return -1;
51
   }
52
 
53
   ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0,
54
                            &screen->compute);
55
   if (ret) {
56
      NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
57
      return ret;
58
   }
59
 
60
   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 1 << 12, NULL,
61
                        &screen->parm);
62
   if (ret)
63
      return ret;
64
 
65
   BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
66
   PUSH_DATA (push, screen->compute->oclass);
67
 
68
   /* hardware limit */
69
   BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1);
70
   PUSH_DATA (push, screen->mp_count);
71
   BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1);
72
   PUSH_DATA (push, 0xf);
73
 
74
   BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1);
75
   PUSH_DATA (push, 0x8000);
76
 
77
   /* global memory setup */
78
   BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
79
   PUSH_DATA (push, 0);
80
   BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100);
81
   for (i = 0; i <= 0xff; i++)
82
      PUSH_DATA (push, (0xc << 28) | (i << 16) | i);
83
   BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
84
   PUSH_DATA (push, 1);
85
 
86
   /* local memory and cstack setup */
87
   BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2);
88
   PUSH_DATAh(push, screen->tls->offset);
89
   PUSH_DATA (push, screen->tls->offset);
90
   BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2);
91
   PUSH_DATAh(push, screen->tls->size);
92
   PUSH_DATA (push, screen->tls->size);
93
   BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1);
94
   PUSH_DATA (push, 0);
95
   BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1);
96
   PUSH_DATA (push, 1 << 24);
97
 
98
   /* shared memory setup */
99
   BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1);
100
   PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
101
   BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1);
102
   PUSH_DATA (push, 2 << 24);
103
   BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1);
104
   PUSH_DATA (push, 0);
105
 
106
   /* code segment setup */
107
   BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2);
108
   PUSH_DATAh(push, screen->text->offset);
109
   PUSH_DATA (push, screen->text->offset);
110
 
111
   /* bind parameters buffer */
112
   BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
113
   PUSH_DATA (push, screen->parm->size);
114
   PUSH_DATAh(push, screen->parm->offset);
115
   PUSH_DATA (push, screen->parm->offset);
116
   BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
117
   PUSH_DATA (push, (0 << 8) | 1);
118
 
119
   /* TODO: textures & samplers */
120
 
121
   return 0;
122
}
123
 
124
boolean
125
nvc0_compute_validate_program(struct nvc0_context *nvc0)
126
{
127
   struct nvc0_program *prog = nvc0->compprog;
128
 
129
   if (prog->mem)
130
      return TRUE;
131
 
132
   if (!prog->translated) {
133
      prog->translated = nvc0_program_translate(
134
         prog, nvc0->screen->base.device->chipset);
135
      if (!prog->translated)
136
         return FALSE;
137
   }
138
   if (unlikely(!prog->code_size))
139
      return FALSE;
140
 
141
   if (likely(prog->code_size)) {
142
      if (nvc0_program_upload_code(nvc0, prog)) {
143
         struct nouveau_pushbuf *push = nvc0->base.pushbuf;
144
         BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
145
         PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);
146
         return TRUE;
147
      }
148
   }
149
   return FALSE;
150
}
151
 
152
static boolean
153
nvc0_compute_state_validate(struct nvc0_context *nvc0)
154
{
155
   if (!nvc0_compute_validate_program(nvc0))
156
      return FALSE;
157
 
158
   /* TODO: textures, samplers, surfaces, global memory buffers */
159
 
160
   nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE);
161
 
162
   nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
163
   if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
164
      return FALSE;
165
   if (unlikely(nvc0->state.flushed))
166
      nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE);
167
 
168
   return TRUE;
169
 
170
}
171
 
172
static void
173
nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
174
{
175
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
176
   struct nvc0_screen *screen = nvc0->screen;
177
   struct nvc0_program *cp = nvc0->compprog;
178
 
179
   if (cp->parm_size) {
180
      BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
181
      PUSH_DATA (push, align(cp->parm_size, 0x100));
182
      PUSH_DATAh(push, screen->parm->offset);
183
      PUSH_DATA (push, screen->parm->offset);
184
      BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
185
      PUSH_DATA (push, (0 << 8) | 1);
186
      /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
187
      BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + cp->parm_size / 4);
188
      PUSH_DATA (push, 0);
189
      PUSH_DATAp(push, input, cp->parm_size / 4);
190
 
191
      BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
192
      PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
193
   }
194
}
195
 
196
void
197
nvc0_launch_grid(struct pipe_context *pipe,
198
                 const uint *block_layout, const uint *grid_layout,
199
                 uint32_t label,
200
                 const void *input)
201
{
202
   struct nvc0_context *nvc0 = nvc0_context(pipe);
203
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
204
   struct nvc0_program *cp = nvc0->compprog;
205
   unsigned s, i;
206
   int ret;
207
 
208
   ret = !nvc0_compute_state_validate(nvc0);
209
   if (ret)
210
      goto out;
211
 
212
   nvc0_compute_upload_input(nvc0, input);
213
 
214
   BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1);
215
   PUSH_DATA (push, nvc0_program_symbol_offset(cp, label));
216
 
217
   BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3);
218
   PUSH_DATA (push, align(cp->cp.lmem_size, 0x10));
219
   PUSH_DATA (push, 0);
220
   PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */
221
 
222
   BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3);
223
   PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
224
   PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]);
225
   PUSH_DATA (push, cp->num_barriers);
226
   BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1);
227
   PUSH_DATA (push, cp->num_gprs);
228
 
229
   /* grid/block setup */
230
   BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
231
   PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]);
232
   PUSH_DATA (push, grid_layout[2]);
233
   BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
234
   PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]);
235
   PUSH_DATA (push, block_layout[2]);
236
 
237
   /* launch preliminary setup */
238
   BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1);
239
   PUSH_DATA (push, 0x1);
240
   BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1);
241
   PUSH_DATA (push, 0);
242
   BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
243
   PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
244
 
245
   /* kernel launching */
246
   BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
247
   PUSH_DATA (push, 0);
248
   BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
249
   PUSH_DATA (push, 0);
250
   BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
251
   PUSH_DATA (push, 0x1000);
252
   BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
253
   PUSH_DATA (push, 0);
254
   BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
255
   PUSH_DATA (push, 0x1);
256
 
257
   /* rebind all the 3D constant buffers
258
    * (looks like binding a CB on COMPUTE clobbers 3D state) */
259
   nvc0->dirty |= NVC0_NEW_CONSTBUF;
260
   for (s = 0; s < 6; s++) {
261
      for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++)
262
         if (nvc0->constbuf[s][i].u.buf)
263
            nvc0->constbuf_dirty[s] |= 1 << i;
264
   }
265
   memset(nvc0->state.uniform_buffer_bound, 0,
266
          sizeof(nvc0->state.uniform_buffer_bound));
267
 
268
out:
269
   if (ret)
270
      NOUVEAU_ERR("Failed to launch grid !\n");
271
}