Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6146 serge 1
/*
2
 * Copyright © 2014 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
 * DEALINGS IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *    Eric Anholt 
25
 *    Keith Packard 
26
 *    Xiang Haihao 
27
 *    Zhao Yakui 
28
 *
29
 */
30
 
31
/*
32
 * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
33
 */
34
 
35
#include 
36
#include 
37
#include 
38
#include 
39
#include 
40
 
41
#include 
42
 
43
#include "intel_batchbuffer.h"
44
#include "intel_driver.h"
45
#include "i965_defines.h"
46
#include "i965_drv_video.h"
47
#include "i965_structs.h"
48
#include "i965_yuv_coefs.h"
49
 
50
#include "i965_render.h"
51
 
52
/* Programs for Gen8 */
53
static const uint32_t sf_kernel_static_gen9[][4] ={
54
 
55
};
56
static const uint32_t ps_kernel_static_gen9[][4] = {
57
#include "shaders/render/exa_wm_src_affine.g9b"
58
#include "shaders/render/exa_wm_src_sample_planar.g9b"
59
#include "shaders/render/exa_wm_yuv_color_balance.g9b"
60
#include "shaders/render/exa_wm_yuv_rgb.g9b"
61
#include "shaders/render/exa_wm_write.g9b"
62
};
63
 
64
static const uint32_t ps_subpic_kernel_static_gen9[][4] = {
65
#include "shaders/render/exa_wm_src_affine.g9b"
66
#include "shaders/render/exa_wm_src_sample_argb.g9b"
67
#include "shaders/render/exa_wm_write.g9b"
68
};
69
 
70
 
71
#define SURFACE_STATE_PADDED_SIZE       SURFACE_STATE_PADDED_SIZE_GEN8
72
 
73
#define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
74
#define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
75
 
76
enum {
77
    SF_KERNEL = 0,
78
    PS_KERNEL,
79
    PS_SUBPIC_KERNEL
80
};
81
 
82
static struct i965_kernel render_kernels_gen9[] = {
83
    {
84
        "SF",
85
        SF_KERNEL,
86
        sf_kernel_static_gen9,
87
        sizeof(sf_kernel_static_gen9),
88
        NULL
89
    },
90
    {
91
        "PS",
92
        PS_KERNEL,
93
        ps_kernel_static_gen9,
94
        sizeof(ps_kernel_static_gen9),
95
        NULL
96
    },
97
 
98
    {
99
        "PS_SUBPIC",
100
        PS_SUBPIC_KERNEL,
101
        ps_subpic_kernel_static_gen9,
102
        sizeof(ps_subpic_kernel_static_gen9),
103
        NULL
104
    }
105
};
106
 
107
#define URB_VS_ENTRIES	      8
108
#define URB_VS_ENTRY_SIZE     1
109
 
110
#define URB_GS_ENTRIES	      0
111
#define URB_GS_ENTRY_SIZE     0
112
 
113
#define URB_CLIP_ENTRIES      0
114
#define URB_CLIP_ENTRY_SIZE   0
115
 
116
#define URB_SF_ENTRIES	      1
117
#define URB_SF_ENTRY_SIZE     2
118
 
119
#define URB_CS_ENTRIES	      4
120
#define URB_CS_ENTRY_SIZE     4
121
 
122
static void
123
gen9_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling)
124
{
125
   switch (tiling) {
126
   case I915_TILING_NONE:
127
      ss->ss0.tiled_surface = 0;
128
      ss->ss0.tile_walk = 0;
129
      break;
130
   case I915_TILING_X:
131
      ss->ss0.tiled_surface = 1;
132
      ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
133
      break;
134
   case I915_TILING_Y:
135
      ss->ss0.tiled_surface = 1;
136
      ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
137
      break;
138
   }
139
}
140
 
141
/* Set "Shader Channel Select" for GEN9+ */
142
static void
143
gen9_render_set_surface_scs(struct gen8_surface_state *ss)
144
{
145
    ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
146
    ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
147
    ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
148
    ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
149
}
150
 
151
static void
152
gen9_render_set_surface_state(
153
    struct gen8_surface_state *ss,
154
    dri_bo                    *bo,
155
    unsigned long              offset,
156
    int                        width,
157
    int                        height,
158
    int                        pitch,
159
    int                        format,
160
    unsigned int               flags
161
)
162
{
163
    unsigned int tiling;
164
    unsigned int swizzle;
165
 
166
    memset(ss, 0, sizeof(*ss));
167
 
168
    switch (flags & (VA_TOP_FIELD|VA_BOTTOM_FIELD)) {
169
    case VA_BOTTOM_FIELD:
170
        ss->ss0.vert_line_stride_ofs = 1;
171
        /* fall-through */
172
    case VA_TOP_FIELD:
173
        ss->ss0.vert_line_stride = 1;
174
        height /= 2;
175
        break;
176
    }
177
 
178
    ss->ss0.surface_type = I965_SURFACE_2D;
179
    ss->ss0.surface_format = format;
180
 
181
    ss->ss8.base_addr = bo->offset + offset;
182
 
183
    ss->ss2.width = width - 1;
184
    ss->ss2.height = height - 1;
185
 
186
    ss->ss3.pitch = pitch - 1;
187
 
188
    /* Always set 1(align 4 mode) per B-spec */
189
    ss->ss0.vertical_alignment = 1;
190
    ss->ss0.horizontal_alignment = 1;
191
 
192
    dri_bo_get_tiling(bo, &tiling, &swizzle);
193
    gen9_render_set_surface_tiling(ss, tiling);
194
}
195
 
196
static void
197
gen9_render_src_surface_state(
198
    VADriverContextP ctx,
199
    int              index,
200
    dri_bo          *region,
201
    unsigned long    offset,
202
    int              w,
203
    int              h,
204
    int              pitch,
205
    int              format,
206
    unsigned int     flags
207
)
208
{
209
    struct i965_driver_data *i965 = i965_driver_data(ctx);
210
    struct i965_render_state *render_state = &i965->render_state;
211
    void *ss;
212
    dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
213
 
214
    assert(index < MAX_RENDER_SURFACES);
215
 
216
    dri_bo_map(ss_bo, 1);
217
    assert(ss_bo->virtual);
218
    ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
219
 
220
    gen9_render_set_surface_state(ss,
221
                                  region, offset,
222
                                  w, h,
223
                                  pitch, format, flags);
224
    gen9_render_set_surface_scs(ss);
225
    dri_bo_emit_reloc(ss_bo,
226
                      I915_GEM_DOMAIN_SAMPLER, 0,
227
                      offset,
228
                      SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
229
                      region);
230
 
231
    ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
232
    dri_bo_unmap(ss_bo);
233
    render_state->wm.sampler_count++;
234
}
235
 
236
static void
237
gen9_render_src_surfaces_state(
238
    VADriverContextP ctx,
239
    struct object_surface *obj_surface,
240
    unsigned int     flags
241
)
242
{
243
    int region_pitch;
244
    int rw, rh;
245
    dri_bo *region;
246
 
247
    region_pitch = obj_surface->width;
248
    rw = obj_surface->orig_width;
249
    rh = obj_surface->orig_height;
250
    region = obj_surface->bo;
251
 
252
    if (obj_surface->fourcc == VA_FOURCC('P', '0', '1', '0')) {
253
        gen9_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R16_UNORM, flags);     /* Y */
254
        gen9_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R16_UNORM, flags);
255
        gen9_render_src_surface_state(ctx, 3, region,
256
                                      region_pitch * obj_surface->y_cb_offset,
257
                                      obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
258
                                      I965_SURFACEFORMAT_R16G16_UNORM, flags); /* UV */
259
        gen9_render_src_surface_state(ctx, 4, region,
260
                                      region_pitch * obj_surface->y_cb_offset,
261
                                      obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
262
                                      I965_SURFACEFORMAT_R16G16_UNORM, flags);
263
    } else {
264
        gen9_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
265
        gen9_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
266
 
267
        if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
268
            gen9_render_src_surface_state(ctx, 3, region,
269
                                          region_pitch * obj_surface->y_cb_offset,
270
                                          obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
271
                                          I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
272
            gen9_render_src_surface_state(ctx, 4, region,
273
                                          region_pitch * obj_surface->y_cb_offset,
274
                                          obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
275
                                          I965_SURFACEFORMAT_R8G8_UNORM, flags);
276
        } else {
277
            gen9_render_src_surface_state(ctx, 3, region,
278
                                          region_pitch * obj_surface->y_cb_offset,
279
                                          obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
280
                                          I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
281
            gen9_render_src_surface_state(ctx, 4, region,
282
                                          region_pitch * obj_surface->y_cb_offset,
283
                                          obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
284
                                          I965_SURFACEFORMAT_R8_UNORM, flags);
285
            gen9_render_src_surface_state(ctx, 5, region,
286
                                          region_pitch * obj_surface->y_cr_offset,
287
                                          obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
288
                                          I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
289
            gen9_render_src_surface_state(ctx, 6, region,
290
                                          region_pitch * obj_surface->y_cr_offset,
291
                                          obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
292
                                          I965_SURFACEFORMAT_R8_UNORM, flags);
293
        }
294
    }
295
}
296
 
297
static void
298
gen9_subpic_render_src_surfaces_state(VADriverContextP ctx,
299
                                      struct object_surface *obj_surface)
300
{
301
    dri_bo *subpic_region;
302
    unsigned int index = obj_surface->subpic_render_idx;
303
    struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
304
    struct object_image *obj_image = obj_subpic->obj_image;
305
 
306
    assert(obj_surface);
307
    assert(obj_surface->bo);
308
    subpic_region = obj_image->bo;
309
    /*subpicture surface*/
310
    gen9_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
311
    gen9_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
312
}
313
 
314
static void
315
gen9_render_dest_surface_state(VADriverContextP ctx, int index)
316
{
317
    struct i965_driver_data *i965 = i965_driver_data(ctx);
318
    struct i965_render_state *render_state = &i965->render_state;
319
    struct intel_region *dest_region = render_state->draw_region;
320
    void *ss;
321
    dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
322
    int format;
323
    assert(index < MAX_RENDER_SURFACES);
324
 
325
    if (dest_region->cpp == 2) {
326
	format = I965_SURFACEFORMAT_B5G6R5_UNORM;
327
    } else {
328
	format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
329
    }
330
 
331
    dri_bo_map(ss_bo, 1);
332
    assert(ss_bo->virtual);
333
    ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
334
 
335
    gen9_render_set_surface_state(ss,
336
                                  dest_region->bo, 0,
337
                                  dest_region->width, dest_region->height,
338
                                  dest_region->pitch, format, 0);
339
    gen9_render_set_surface_scs(ss);
340
    dri_bo_emit_reloc(ss_bo,
341
                      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
342
                      0,
343
                      SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
344
                      dest_region->bo);
345
 
346
    ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
347
    dri_bo_unmap(ss_bo);
348
}
349
 
350
static void
351
i965_fill_vertex_buffer(
352
    VADriverContextP ctx,
353
    float tex_coords[4], /* [(u1,v1);(u2,v2)] */
354
    float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
355
)
356
{
357
    struct i965_driver_data * const i965 = i965_driver_data(ctx);
358
    float vb[12];
359
 
360
    enum { X1, Y1, X2, Y2 };
361
 
362
    static const unsigned int g_rotation_indices[][6] = {
363
        [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
364
        [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
365
        [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
366
        [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
367
    };
368
 
369
    const unsigned int * const rotation_indices =
370
        g_rotation_indices[i965->rotation_attrib->value];
371
 
372
    vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
373
    vb[1]  = tex_coords[rotation_indices[1]];
374
    vb[2]  = vid_coords[X2];
375
    vb[3]  = vid_coords[Y2];
376
 
377
    vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
378
    vb[5]  = tex_coords[rotation_indices[3]];
379
    vb[6]  = vid_coords[X1];
380
    vb[7]  = vid_coords[Y2];
381
 
382
    vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
383
    vb[9]  = tex_coords[rotation_indices[5]];
384
    vb[10] = vid_coords[X1];
385
    vb[11] = vid_coords[Y1];
386
 
387
    dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
388
}
389
 
390
static void
391
i965_subpic_render_upload_vertex(VADriverContextP ctx,
392
                                 struct object_surface *obj_surface,
393
                                 const VARectangle *output_rect)
394
{
395
    unsigned int index = obj_surface->subpic_render_idx;
396
    struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
397
    float tex_coords[4], vid_coords[4];
398
    VARectangle dst_rect;
399
 
400
    if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
401
        dst_rect = obj_subpic->dst_rect;
402
    else {
403
        const float sx  = (float)output_rect->width  / obj_surface->orig_width;
404
        const float sy  = (float)output_rect->height / obj_surface->orig_height;
405
        dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
406
        dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
407
        dst_rect.width  = sx * obj_subpic->dst_rect.width;
408
        dst_rect.height = sy * obj_subpic->dst_rect.height;
409
    }
410
 
411
    tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
412
    tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
413
    tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
414
    tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
415
 
416
    vid_coords[0] = dst_rect.x;
417
    vid_coords[1] = dst_rect.y;
418
    vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
419
    vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
420
 
421
    i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
422
}
423
 
424
static void
425
i965_render_upload_vertex(
426
    VADriverContextP   ctx,
427
    struct object_surface *obj_surface,
428
    const VARectangle *src_rect,
429
    const VARectangle *dst_rect
430
)
431
{
432
    struct i965_driver_data *i965 = i965_driver_data(ctx);
433
    struct i965_render_state *render_state = &i965->render_state;
434
    struct intel_region *dest_region = render_state->draw_region;
435
    float tex_coords[4], vid_coords[4];
436
    int width, height;
437
 
438
    width  = obj_surface->orig_width;
439
    height = obj_surface->orig_height;
440
 
441
    tex_coords[0] = (float)src_rect->x / width;
442
    tex_coords[1] = (float)src_rect->y / height;
443
    tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
444
    tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
445
 
446
    vid_coords[0] = dest_region->x + dst_rect->x;
447
    vid_coords[1] = dest_region->y + dst_rect->y;
448
    vid_coords[2] = vid_coords[0] + dst_rect->width;
449
    vid_coords[3] = vid_coords[1] + dst_rect->height;
450
 
451
    i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
452
}
453
 
454
static void
455
i965_render_drawing_rectangle(VADriverContextP ctx)
456
{
457
    struct i965_driver_data *i965 = i965_driver_data(ctx);
458
    struct intel_batchbuffer *batch = i965->batch;
459
    struct i965_render_state *render_state = &i965->render_state;
460
    struct intel_region *dest_region = render_state->draw_region;
461
 
462
    BEGIN_BATCH(batch, 4);
463
    OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
464
    OUT_BATCH(batch, 0x00000000);
465
    OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
466
    OUT_BATCH(batch, 0x00000000);
467
    ADVANCE_BATCH(batch);
468
}
469
 
470
static void
471
i965_render_upload_image_palette(
472
    VADriverContextP ctx,
473
    struct object_image *obj_image,
474
    unsigned int     alpha
475
)
476
{
477
    struct i965_driver_data *i965 = i965_driver_data(ctx);
478
    struct intel_batchbuffer *batch = i965->batch;
479
    unsigned int i;
480
 
481
    assert(obj_image);
482
 
483
    if (!obj_image)
484
        return;
485
 
486
    if (obj_image->image.num_palette_entries == 0)
487
        return;
488
 
489
    BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
490
    OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
491
    /*fill palette*/
492
    //int32_t out[16]; //0-23:color 23-31:alpha
493
    for (i = 0; i < obj_image->image.num_palette_entries; i++)
494
        OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
495
    ADVANCE_BATCH(batch);
496
}
497
 
498
static void
499
gen9_clear_dest_region(VADriverContextP ctx)
500
{
501
    struct i965_driver_data *i965 = i965_driver_data(ctx);
502
    struct intel_batchbuffer *batch = i965->batch;
503
    struct i965_render_state *render_state = &i965->render_state;
504
    struct intel_region *dest_region = render_state->draw_region;
505
    unsigned int blt_cmd, br13;
506
    int pitch;
507
 
508
    blt_cmd = GEN8_XY_COLOR_BLT_CMD;
509
    br13 = 0xf0 << 16;
510
    pitch = dest_region->pitch;
511
 
512
    if (dest_region->cpp == 4) {
513
        br13 |= BR13_8888;
514
        blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
515
    } else {
516
        assert(dest_region->cpp == 2);
517
        br13 |= BR13_565;
518
    }
519
 
520
    if (dest_region->tiling != I915_TILING_NONE) {
521
        blt_cmd |= XY_COLOR_BLT_DST_TILED;
522
        pitch /= 4;
523
    }
524
 
525
    br13 |= pitch;
526
 
527
    intel_batchbuffer_start_atomic_blt(batch, 24);
528
    BEGIN_BLT_BATCH(batch, 7);
529
 
530
    OUT_BATCH(batch, blt_cmd);
531
    OUT_BATCH(batch, br13);
532
    OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
533
    OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
534
              (dest_region->x + dest_region->width));
535
    OUT_RELOC(batch, dest_region->bo,
536
              I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
537
              0);
538
    OUT_BATCH(batch, 0x0);
539
    OUT_BATCH(batch, 0x0);
540
    ADVANCE_BATCH(batch);
541
    intel_batchbuffer_end_atomic(batch);
542
}
543
 
544
 
545
/*
546
 * for GEN8
547
 */
548
#define ALIGNMENT       64
549
 
550
static void
551
gen9_render_initialize(VADriverContextP ctx)
552
{
553
    struct i965_driver_data *i965 = i965_driver_data(ctx);
554
    struct i965_render_state *render_state = &i965->render_state;
555
    dri_bo *bo;
556
    int size;
557
    unsigned int end_offset;
558
 
559
    /* VERTEX BUFFER */
560
    dri_bo_unreference(render_state->vb.vertex_buffer);
561
    bo = dri_bo_alloc(i965->intel.bufmgr,
562
                      "vertex buffer",
563
                      4096,
564
                      4096);
565
    assert(bo);
566
    render_state->vb.vertex_buffer = bo;
567
 
568
    /* WM */
569
    dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
570
    bo = dri_bo_alloc(i965->intel.bufmgr,
571
                      "surface state & binding table",
572
                      (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
573
                      4096);
574
    assert(bo);
575
    render_state->wm.surface_state_binding_table_bo = bo;
576
 
577
    render_state->curbe_size = 256;
578
 
579
    render_state->wm.sampler_count = 0;
580
 
581
    render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state);
582
 
583
    render_state->cc_state_size = sizeof(struct gen6_color_calc_state);
584
 
585
    render_state->cc_viewport_size = sizeof(struct i965_cc_viewport);
586
 
587
    render_state->blend_state_size = sizeof(struct gen8_global_blend_state) +
588
			16 * sizeof(struct gen8_blend_state_rt);
589
 
590
    render_state->sf_clip_size = 1024;
591
 
592
    render_state->scissor_size = 1024;
593
 
594
    size = ALIGN(render_state->curbe_size, ALIGNMENT) +
595
        ALIGN(render_state->sampler_size, ALIGNMENT) +
596
        ALIGN(render_state->cc_viewport_size, ALIGNMENT) +
597
        ALIGN(render_state->cc_state_size, ALIGNMENT) +
598
        ALIGN(render_state->blend_state_size, ALIGNMENT) +
599
        ALIGN(render_state->sf_clip_size, ALIGNMENT) +
600
        ALIGN(render_state->scissor_size, ALIGNMENT);
601
 
602
    dri_bo_unreference(render_state->dynamic_state.bo);
603
    bo = dri_bo_alloc(i965->intel.bufmgr,
604
                      "dynamic_state",
605
                      size,
606
                      4096);
607
 
608
    render_state->dynamic_state.bo = bo;
609
 
610
    end_offset = 0;
611
    render_state->dynamic_state.end_offset = 0;
612
 
613
    /* Constant buffer offset */
614
    render_state->curbe_offset = end_offset;
615
    end_offset += ALIGN(render_state->curbe_size, ALIGNMENT);
616
 
617
    /* Sampler_state  */
618
    render_state->sampler_offset = end_offset;
619
    end_offset += ALIGN(render_state->sampler_size, ALIGNMENT);
620
 
621
    /* CC_VIEWPORT_state  */
622
    render_state->cc_viewport_offset = end_offset;
623
    end_offset += ALIGN(render_state->cc_viewport_size, ALIGNMENT);
624
 
625
    /* CC_STATE_state  */
626
    render_state->cc_state_offset = end_offset;
627
    end_offset += ALIGN(render_state->cc_state_size, ALIGNMENT);
628
 
629
    /* Blend_state  */
630
    render_state->blend_state_offset = end_offset;
631
    end_offset += ALIGN(render_state->blend_state_size, ALIGNMENT);
632
 
633
    /* SF_CLIP_state  */
634
    render_state->sf_clip_offset = end_offset;
635
    end_offset += ALIGN(render_state->sf_clip_size, ALIGNMENT);
636
 
637
    /* SCISSOR_state  */
638
    render_state->scissor_offset = end_offset;
639
    end_offset += ALIGN(render_state->scissor_size, ALIGNMENT);
640
 
641
    /* update the end offset of dynamic_state */
642
    render_state->dynamic_state.end_offset = end_offset;
643
 
644
}
645
 
646
static void
647
gen9_render_sampler(VADriverContextP ctx)
648
{
649
    struct i965_driver_data *i965 = i965_driver_data(ctx);
650
    struct i965_render_state *render_state = &i965->render_state;
651
    struct gen8_sampler_state *sampler_state;
652
    int i;
653
    unsigned char *cc_ptr;
654
 
655
    assert(render_state->wm.sampler_count > 0);
656
    assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
657
 
658
    dri_bo_map(render_state->dynamic_state.bo, 1);
659
    assert(render_state->dynamic_state.bo->virtual);
660
 
661
    cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
662
			render_state->sampler_offset;
663
 
664
    sampler_state = (struct gen8_sampler_state *) cc_ptr;
665
 
666
    for (i = 0; i < render_state->wm.sampler_count; i++) {
667
        memset(sampler_state, 0, sizeof(*sampler_state));
668
        sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
669
        sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
670
        sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
671
        sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
672
        sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
673
        sampler_state++;
674
    }
675
 
676
    dri_bo_unmap(render_state->dynamic_state.bo);
677
}
678
 
679
static void
680
gen9_render_blend_state(VADriverContextP ctx)
681
{
682
    struct i965_driver_data *i965 = i965_driver_data(ctx);
683
    struct i965_render_state *render_state = &i965->render_state;
684
    struct gen8_global_blend_state *global_blend_state;
685
    struct gen8_blend_state_rt *blend_state;
686
    unsigned char *cc_ptr;
687
 
688
    dri_bo_map(render_state->dynamic_state.bo, 1);
689
    assert(render_state->dynamic_state.bo->virtual);
690
 
691
    cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
692
			render_state->blend_state_offset;
693
 
694
    global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
695
 
696
    memset(global_blend_state, 0, render_state->blend_state_size);
697
    /* Global blend state + blend_state for Render Target */
698
    blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
699
    blend_state->blend1.logic_op_enable = 1;
700
    blend_state->blend1.logic_op_func = 0xc;
701
    blend_state->blend1.pre_blend_clamp_enable = 1;
702
 
703
    dri_bo_unmap(render_state->dynamic_state.bo);
704
}
705
 
706
 
707
static void
708
gen9_render_cc_viewport(VADriverContextP ctx)
709
{
710
    struct i965_driver_data *i965 = i965_driver_data(ctx);
711
    struct i965_render_state *render_state = &i965->render_state;
712
    struct i965_cc_viewport *cc_viewport;
713
    unsigned char *cc_ptr;
714
 
715
    dri_bo_map(render_state->dynamic_state.bo, 1);
716
    assert(render_state->dynamic_state.bo->virtual);
717
 
718
    cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
719
			render_state->cc_viewport_offset;
720
 
721
    cc_viewport = (struct i965_cc_viewport *) cc_ptr;
722
 
723
    memset(cc_viewport, 0, sizeof(*cc_viewport));
724
 
725
    cc_viewport->min_depth = -1.e35;
726
    cc_viewport->max_depth = 1.e35;
727
 
728
    dri_bo_unmap(render_state->dynamic_state.bo);
729
}
730
 
731
static void
732
gen9_render_color_calc_state(VADriverContextP ctx)
733
{
734
    struct i965_driver_data *i965 = i965_driver_data(ctx);
735
    struct i965_render_state *render_state = &i965->render_state;
736
    struct gen6_color_calc_state *color_calc_state;
737
    unsigned char *cc_ptr;
738
 
739
    dri_bo_map(render_state->dynamic_state.bo, 1);
740
    assert(render_state->dynamic_state.bo->virtual);
741
 
742
    cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
743
			render_state->cc_state_offset;
744
 
745
    color_calc_state = (struct gen6_color_calc_state *) cc_ptr;
746
 
747
    memset(color_calc_state, 0, sizeof(*color_calc_state));
748
    color_calc_state->constant_r = 1.0;
749
    color_calc_state->constant_g = 0.0;
750
    color_calc_state->constant_b = 1.0;
751
    color_calc_state->constant_a = 1.0;
752
    dri_bo_unmap(render_state->dynamic_state.bo);
753
}
754
 
755
#define PI  3.1415926
756
 
757
static void
758
gen9_render_upload_constants(VADriverContextP ctx,
759
                             struct object_surface *obj_surface,
760
                             unsigned int flags)
761
{
762
    struct i965_driver_data *i965 = i965_driver_data(ctx);
763
    struct i965_render_state *render_state = &i965->render_state;
764
    unsigned short *constant_buffer;
765
    unsigned char *cc_ptr;
766
    float *color_balance_base;
767
    float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
768
    float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
769
    float hue = (float)i965->hue_attrib->value / 180 * PI;
770
    float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
771
    float *yuv_to_rgb;
772
    unsigned int color_flag;
773
    const float* yuv_coefs;
774
    size_t coefs_length;
775
 
776
    dri_bo_map(render_state->dynamic_state.bo, 1);
777
    assert(render_state->dynamic_state.bo->virtual);
778
 
779
    cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
780
			render_state->curbe_offset;
781
 
782
    constant_buffer = (unsigned short *) cc_ptr;
783
 
784
    if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
785
        assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0'));
786
 
787
        *constant_buffer = 2;
788
    } else {
789
        if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2') ||
790
            obj_surface->fourcc == VA_FOURCC('P', '0', '1', '0'))
791
            *constant_buffer = 1;
792
        else
793
            *constant_buffer = 0;
794
    }
795
 
796
    if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
797
        i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
798
        i965->hue_attrib->value == DEFAULT_HUE &&
799
        i965->saturation_attrib->value == DEFAULT_SATURATION)
800
        constant_buffer[1] = 1; /* skip color balance transformation */
801
    else
802
        constant_buffer[1] = 0;
803
 
804
    color_balance_base = (float *)constant_buffer + 4;
805
    *color_balance_base++ = contrast;
806
    *color_balance_base++ = brightness;
807
    *color_balance_base++ = cos(hue) * contrast * saturation;
808
    *color_balance_base++ = sin(hue) * contrast * saturation;
809
 
810
    color_flag = flags & VA_SRC_COLOR_MASK;
811
    yuv_to_rgb = (float *)constant_buffer + 8;
812
 
813
    yuv_coefs = i915_color_standard_to_coefs(i915_filter_to_color_standard(color_flag),
814
                                             &coefs_length);
815
    memcpy(yuv_to_rgb, yuv_coefs, coefs_length);
816
 
817
    dri_bo_unmap(render_state->dynamic_state.bo);
818
}
819
 
820
static void
821
gen9_render_setup_states(
822
    VADriverContextP   ctx,
823
    struct object_surface *obj_surface,
824
    const VARectangle *src_rect,
825
    const VARectangle *dst_rect,
826
    unsigned int       flags
827
)
828
{
829
    gen9_render_dest_surface_state(ctx, 0);
830
    gen9_render_src_surfaces_state(ctx, obj_surface, flags);
831
    gen9_render_sampler(ctx);
832
    gen9_render_cc_viewport(ctx);
833
    gen9_render_color_calc_state(ctx);
834
    gen9_render_blend_state(ctx);
835
    gen9_render_upload_constants(ctx, obj_surface, flags);
836
    i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
837
}
838
 
839
static void
840
gen9_emit_state_base_address(VADriverContextP ctx)
841
{
842
    struct i965_driver_data *i965 = i965_driver_data(ctx);
843
    struct intel_batchbuffer *batch = i965->batch;
844
    struct i965_render_state *render_state = &i965->render_state;
845
 
846
    BEGIN_BATCH(batch, 19);
847
    OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (19 - 2));
848
    OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
849
	OUT_BATCH(batch, 0);
850
	OUT_BATCH(batch, 0);
851
	/*DW4 */
852
    OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
853
	OUT_BATCH(batch, 0);
854
 
855
	/*DW6*/
856
    /* Dynamic state base address */
857
    OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
858
		0, BASE_ADDRESS_MODIFY);
859
    OUT_BATCH(batch, 0);
860
 
861
	/*DW8*/
862
    OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
863
    OUT_BATCH(batch, 0);
864
 
865
	/*DW10 */
866
    /* Instruction base address */
867
    OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
868
    OUT_BATCH(batch, 0);
869
 
870
	/*DW12 */
871
    OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */
872
    OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
873
    OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
874
    OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
875
 
876
    /* the bindless surface state address */
877
    OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
878
    OUT_BATCH(batch, 0);
879
    OUT_BATCH(batch, 0xFFFF0000);
880
    ADVANCE_BATCH(batch);
881
}
882
 
883
static void
884
gen9_emit_cc_state_pointers(VADriverContextP ctx)
885
{
886
    struct i965_driver_data *i965 = i965_driver_data(ctx);
887
    struct intel_batchbuffer *batch = i965->batch;
888
    struct i965_render_state *render_state = &i965->render_state;
889
 
890
    BEGIN_BATCH(batch, 2);
891
    OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
892
    OUT_BATCH(batch, (render_state->cc_state_offset + 1));
893
    ADVANCE_BATCH(batch);
894
 
895
    BEGIN_BATCH(batch, 2);
896
    OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
897
    OUT_BATCH(batch, (render_state->blend_state_offset + 1));
898
    ADVANCE_BATCH(batch);
899
 
900
}
901
 
902
static void
903
gen9_emit_vertices(VADriverContextP ctx)
904
{
905
    struct i965_driver_data *i965 = i965_driver_data(ctx);
906
    struct intel_batchbuffer *batch = i965->batch;
907
    struct i965_render_state *render_state = &i965->render_state;
908
 
909
    BEGIN_BATCH(batch, 5);
910
    OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
911
    OUT_BATCH(batch,
912
              (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) |
913
	      (0 << GEN8_VB0_MOCS_SHIFT) |
914
              GEN7_VB0_ADDRESS_MODIFYENABLE |
915
              ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
916
    OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
917
    OUT_BATCH(batch, 0);
918
    OUT_BATCH(batch, 12 * 4);
919
    ADVANCE_BATCH(batch);
920
 
921
    /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */
922
    BEGIN_BATCH(batch, 2);
923
    OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2));
924
    OUT_BATCH(batch,
925
              _3DPRIM_RECTLIST);
926
    ADVANCE_BATCH(batch);
927
 
928
    OUT_BATCH(batch, GEN7_3DSTATE_VF | (2 - 2));
929
    OUT_BATCH(batch, 0);
930
 
931
    OUT_BATCH(batch, GEN8_3DSTATE_VF_INSTANCING | (3 - 2));
932
    OUT_BATCH(batch, 0);
933
    OUT_BATCH(batch, 0);
934
 
935
    OUT_BATCH(batch, GEN8_3DSTATE_VF_SGVS | (2 - 2));
936
    OUT_BATCH(batch, 0);
937
 
938
    BEGIN_BATCH(batch, 7);
939
    OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
940
    OUT_BATCH(batch,
941
              GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
942
    OUT_BATCH(batch, 3); /* vertex count per instance */
943
    OUT_BATCH(batch, 0); /* start vertex offset */
944
    OUT_BATCH(batch, 1); /* single instance */
945
    OUT_BATCH(batch, 0); /* start instance location */
946
    OUT_BATCH(batch, 0);
947
    ADVANCE_BATCH(batch);
948
}
949
 
950
static void
951
gen9_emit_vertex_element_state(VADriverContextP ctx)
952
{
953
    struct i965_driver_data *i965 = i965_driver_data(ctx);
954
    struct intel_batchbuffer *batch = i965->batch;
955
    int i;
956
 
957
    /*
958
     * The VUE layout
959
     * dword 0-3: pad (0, 0, 0. 0)
960
     * dword 4-7: position (x, y, 1.0, 1.0),
961
     * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0)
962
     */
963
 
964
    /* Set up our vertex elements, sourced from the single vertex buffer. */
965
    OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (7 - 2));
966
 
967
    /* Element state 0. These are 4 dwords of 0 required for the VUE format.
968
     * We don't really know or care what they do.
969
     */
970
 
971
    OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
972
              GEN8_VE0_VALID |
973
              (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
974
              (0 << VE0_OFFSET_SHIFT));
975
    OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
976
              (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
977
              (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
978
              (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
979
 
980
    /* offset 8: X, Y -> {x, y, 1.0, 1.0} */
981
    OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
982
              GEN8_VE0_VALID |
983
              (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
984
              (8 << VE0_OFFSET_SHIFT));
985
    OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
986
	      (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
987
              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
988
              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
989
 
990
    /* offset 0: u,v -> {U, V, 1.0, 1.0} */
991
    OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
992
              GEN8_VE0_VALID |
993
              (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
994
              (0 << VE0_OFFSET_SHIFT));
995
    OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
996
              (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
997
              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
998
              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
999
 
1000
    /* Disable instancing for all vertex elements. */
1001
    for (i = 0; i < 3; i++) {
1002
        OUT_BATCH(batch, GEN8_3DSTATE_VF_INSTANCING | (3 - 2));
1003
        OUT_BATCH(batch, i);
1004
        OUT_BATCH(batch, 0);
1005
    }
1006
}
1007
 
1008
static void
1009
gen9_emit_vs_state(VADriverContextP ctx)
1010
{
1011
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1012
    struct intel_batchbuffer *batch = i965->batch;
1013
 
1014
    /* disable VS constant buffer */
1015
    BEGIN_BATCH(batch, 11);
1016
    OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2));
1017
    OUT_BATCH(batch, 0);
1018
    OUT_BATCH(batch, 0);
1019
    /* CS Buffer 0 */
1020
    OUT_BATCH(batch, 0);
1021
    OUT_BATCH(batch, 0);
1022
    /* CS Buffer 1 */
1023
    OUT_BATCH(batch, 0);
1024
    OUT_BATCH(batch, 0);
1025
    /* CS Buffer 2 */
1026
    OUT_BATCH(batch, 0);
1027
    OUT_BATCH(batch, 0);
1028
    /* CS Buffer 3 */
1029
    OUT_BATCH(batch, 0);
1030
    OUT_BATCH(batch, 0);
1031
    ADVANCE_BATCH(batch);
1032
 
1033
    BEGIN_BATCH(batch, 9);
1034
    OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2));
1035
    OUT_BATCH(batch, 0); /* without VS kernel */
1036
    OUT_BATCH(batch, 0);
1037
    /* VS shader dispatch flag */
1038
    OUT_BATCH(batch, 0);
1039
    OUT_BATCH(batch, 0);
1040
    OUT_BATCH(batch, 0);
1041
    /* DW6. VS shader GRF and URB buffer definition */
1042
    OUT_BATCH(batch, 0);
1043
    OUT_BATCH(batch, 0); /* pass-through */
1044
    OUT_BATCH(batch, 0);
1045
    ADVANCE_BATCH(batch);
1046
 
1047
    BEGIN_BATCH(batch, 2);
1048
    OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
1049
    OUT_BATCH(batch, 0);
1050
    ADVANCE_BATCH(batch);
1051
 
1052
    BEGIN_BATCH(batch, 2);
1053
    OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
1054
    OUT_BATCH(batch, 0);
1055
    ADVANCE_BATCH(batch);
1056
 
1057
}
1058
 
1059
/*
1060
 * URB layout on GEN8
1061
 * ----------------------------------------
1062
 * | PS Push Constants (8KB) | VS entries |
1063
 * ----------------------------------------
1064
 */
1065
static void
1066
gen9_emit_urb(VADriverContextP ctx)
1067
{
1068
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1069
    struct intel_batchbuffer *batch = i965->batch;
1070
    unsigned int num_urb_entries = 64;
1071
 
1072
    /* The minimum urb entries is 64 */
1073
 
1074
    BEGIN_BATCH(batch, 2);
1075
    OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2));
1076
    OUT_BATCH(batch, 0);
1077
    ADVANCE_BATCH(batch);
1078
 
1079
    BEGIN_BATCH(batch, 2);
1080
    OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2));
1081
    OUT_BATCH(batch, 0);
1082
    ADVANCE_BATCH(batch);
1083
 
1084
    BEGIN_BATCH(batch, 2);
1085
    OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2));
1086
    OUT_BATCH(batch, 0);
1087
    ADVANCE_BATCH(batch);
1088
 
1089
    BEGIN_BATCH(batch, 2);
1090
    OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2));
1091
    OUT_BATCH(batch, 0);
1092
    ADVANCE_BATCH(batch);
1093
 
1094
    /* Size is 8Kbs and base address is 0Kb */
1095
    BEGIN_BATCH(batch, 2);
1096
    OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
1097
    /* Size is 8Kbs and base address is 0Kb */
1098
    OUT_BATCH(batch,
1099
		(0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) |
1100
		(8 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT));
1101
    ADVANCE_BATCH(batch);
1102
 
1103
    BEGIN_BATCH(batch, 2);
1104
    OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
1105
    OUT_BATCH(batch,
1106
              (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
1107
              (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
1108
              (4 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1109
   ADVANCE_BATCH(batch);
1110
 
1111
   BEGIN_BATCH(batch, 2);
1112
   OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
1113
   OUT_BATCH(batch,
1114
             (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1115
             (5 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1116
   ADVANCE_BATCH(batch);
1117
 
1118
   BEGIN_BATCH(batch, 2);
1119
   OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
1120
   OUT_BATCH(batch,
1121
             (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1122
             (6 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1123
   ADVANCE_BATCH(batch);
1124
 
1125
   BEGIN_BATCH(batch, 2);
1126
   OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
1127
   OUT_BATCH(batch,
1128
             (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1129
             (7 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1130
   ADVANCE_BATCH(batch);
1131
}
1132
 
1133
static void
1134
gen9_emit_bypass_state(VADriverContextP ctx)
1135
{
1136
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1137
    struct intel_batchbuffer *batch = i965->batch;
1138
 
1139
    /* bypass GS */
1140
    BEGIN_BATCH(batch, 11);
1141
    OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2));
1142
    OUT_BATCH(batch, 0);
1143
    OUT_BATCH(batch, 0);
1144
    OUT_BATCH(batch, 0);
1145
    OUT_BATCH(batch, 0);
1146
    OUT_BATCH(batch, 0);
1147
    OUT_BATCH(batch, 0);
1148
    OUT_BATCH(batch, 0);
1149
    OUT_BATCH(batch, 0);
1150
    OUT_BATCH(batch, 0);
1151
    OUT_BATCH(batch, 0);
1152
    ADVANCE_BATCH(batch);
1153
 
1154
    BEGIN_BATCH(batch, 10);
1155
    OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2));
1156
    /* GS shader address */
1157
    OUT_BATCH(batch, 0); /* without GS kernel */
1158
    OUT_BATCH(batch, 0);
1159
    /* DW3. GS shader dispatch flag */
1160
    OUT_BATCH(batch, 0);
1161
    OUT_BATCH(batch, 0);
1162
    OUT_BATCH(batch, 0);
1163
    /* DW6. GS shader GRF and URB offset/length */
1164
    OUT_BATCH(batch, 0);
1165
    OUT_BATCH(batch, 0); /* pass-through */
1166
    OUT_BATCH(batch, 0);
1167
    OUT_BATCH(batch, 0);
1168
    ADVANCE_BATCH(batch);
1169
 
1170
    BEGIN_BATCH(batch, 2);
1171
    OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
1172
    OUT_BATCH(batch, 0);
1173
    ADVANCE_BATCH(batch);
1174
 
1175
    BEGIN_BATCH(batch, 2);
1176
    OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
1177
    OUT_BATCH(batch, 0);
1178
    ADVANCE_BATCH(batch);
1179
 
1180
    /* disable HS */
1181
    BEGIN_BATCH(batch, 11);
1182
    OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2));
1183
    OUT_BATCH(batch, 0);
1184
    OUT_BATCH(batch, 0);
1185
    OUT_BATCH(batch, 0);
1186
    OUT_BATCH(batch, 0);
1187
    OUT_BATCH(batch, 0);
1188
    OUT_BATCH(batch, 0);
1189
    OUT_BATCH(batch, 0);
1190
    OUT_BATCH(batch, 0);
1191
    OUT_BATCH(batch, 0);
1192
    OUT_BATCH(batch, 0);
1193
    ADVANCE_BATCH(batch);
1194
 
1195
    BEGIN_BATCH(batch, 9);
1196
    OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2));
1197
    OUT_BATCH(batch, 0);
1198
    /*DW2. HS pass-through */
1199
    OUT_BATCH(batch, 0);
1200
    /*DW3. HS shader address */
1201
    OUT_BATCH(batch, 0);
1202
    OUT_BATCH(batch, 0);
1203
    /*DW5. HS shader flag. URB offset/length and so on */
1204
    OUT_BATCH(batch, 0);
1205
    OUT_BATCH(batch, 0);
1206
    OUT_BATCH(batch, 0);
1207
    OUT_BATCH(batch, 0);
1208
    ADVANCE_BATCH(batch);
1209
 
1210
    BEGIN_BATCH(batch, 2);
1211
    OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
1212
    OUT_BATCH(batch, 0);
1213
    ADVANCE_BATCH(batch);
1214
 
1215
    BEGIN_BATCH(batch, 2);
1216
    OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
1217
    OUT_BATCH(batch, 0);
1218
    ADVANCE_BATCH(batch);
1219
 
1220
    /* Disable TE */
1221
    BEGIN_BATCH(batch, 4);
1222
    OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
1223
    OUT_BATCH(batch, 0);
1224
    OUT_BATCH(batch, 0);
1225
    OUT_BATCH(batch, 0);
1226
    ADVANCE_BATCH(batch);
1227
 
1228
    /* Disable DS */
1229
    BEGIN_BATCH(batch, 11);
1230
    OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2));
1231
    OUT_BATCH(batch, 0);
1232
    OUT_BATCH(batch, 0);
1233
    OUT_BATCH(batch, 0);
1234
    OUT_BATCH(batch, 0);
1235
    OUT_BATCH(batch, 0);
1236
    OUT_BATCH(batch, 0);
1237
    OUT_BATCH(batch, 0);
1238
    OUT_BATCH(batch, 0);
1239
    OUT_BATCH(batch, 0);
1240
    OUT_BATCH(batch, 0);
1241
    ADVANCE_BATCH(batch);
1242
 
1243
    BEGIN_BATCH(batch, 11);
1244
    OUT_BATCH(batch, GEN7_3DSTATE_DS | (11 - 2));
1245
    /* DW1. DS shader pointer */
1246
    OUT_BATCH(batch, 0);
1247
    OUT_BATCH(batch, 0);
1248
    /* DW3-5. DS shader dispatch flag.*/
1249
    OUT_BATCH(batch, 0);
1250
    OUT_BATCH(batch, 0);
1251
    OUT_BATCH(batch, 0);
1252
    /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/
1253
    OUT_BATCH(batch, 0);
1254
    OUT_BATCH(batch, 0);
1255
    /* DW8. DS shader output URB */
1256
    OUT_BATCH(batch, 0);
1257
 
1258
    /* Dual-patch kernel start pointer */
1259
    OUT_BATCH(batch, 0);
1260
    OUT_BATCH(batch, 0);
1261
    ADVANCE_BATCH(batch);
1262
 
1263
    BEGIN_BATCH(batch, 2);
1264
    OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
1265
    OUT_BATCH(batch, 0);
1266
    ADVANCE_BATCH(batch);
1267
 
1268
    BEGIN_BATCH(batch, 2);
1269
    OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
1270
    OUT_BATCH(batch, 0);
1271
    ADVANCE_BATCH(batch);
1272
 
1273
    /* Disable STREAMOUT */
1274
    BEGIN_BATCH(batch, 5);
1275
    OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2));
1276
    OUT_BATCH(batch, 0);
1277
    OUT_BATCH(batch, 0);
1278
    OUT_BATCH(batch, 0);
1279
    OUT_BATCH(batch, 0);
1280
    ADVANCE_BATCH(batch);
1281
}
1282
 
1283
static void
1284
gen9_emit_invarient_states(VADriverContextP ctx)
1285
{
1286
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1287
    struct intel_batchbuffer *batch = i965->batch;
1288
 
1289
    BEGIN_BATCH(batch, 1);
1290
    OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D |
1291
                     GEN9_PIPELINE_SELECTION_MASK);
1292
    ADVANCE_BATCH(batch);
1293
 
1294
    BEGIN_BATCH(batch, 2);
1295
    OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2));
1296
    OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1297
              GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1298
    ADVANCE_BATCH(batch);
1299
 
1300
    /* Update 3D Multisample pattern */
1301
    BEGIN_BATCH(batch, 9);
1302
    OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2));
1303
    OUT_BATCH(batch, 0);
1304
    OUT_BATCH(batch, 0);
1305
    OUT_BATCH(batch, 0);
1306
    OUT_BATCH(batch, 0);
1307
    OUT_BATCH(batch, 0);
1308
    OUT_BATCH(batch, 0);
1309
    OUT_BATCH(batch, 0);
1310
    OUT_BATCH(batch, 0);
1311
    ADVANCE_BATCH(batch);
1312
 
1313
 
1314
    BEGIN_BATCH(batch, 2);
1315
    OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1316
    OUT_BATCH(batch, 1);
1317
    ADVANCE_BATCH(batch);
1318
 
1319
    /* Set system instruction pointer */
1320
    BEGIN_BATCH(batch, 3);
1321
    OUT_BATCH(batch, CMD_STATE_SIP | 0);
1322
    OUT_BATCH(batch, 0);
1323
    OUT_BATCH(batch, 0);
1324
    ADVANCE_BATCH(batch);
1325
}
1326
 
1327
static void
1328
gen9_emit_clip_state(VADriverContextP ctx)
1329
{
1330
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1331
    struct intel_batchbuffer *batch = i965->batch;
1332
 
1333
    OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
1334
    OUT_BATCH(batch, 0);
1335
    OUT_BATCH(batch, 0); /* pass-through */
1336
    OUT_BATCH(batch, 0);
1337
}
1338
 
1339
static void
1340
gen9_emit_sf_state(VADriverContextP ctx)
1341
{
1342
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1343
    struct intel_batchbuffer *batch = i965->batch;
1344
 
1345
    BEGIN_BATCH(batch, 5);
1346
    OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2));
1347
    OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE);
1348
    OUT_BATCH(batch, 0);
1349
    OUT_BATCH(batch, 0);
1350
    OUT_BATCH(batch, 0);
1351
    ADVANCE_BATCH(batch);
1352
 
1353
 
1354
    BEGIN_BATCH(batch, 6);
1355
    OUT_BATCH(batch, GEN7_3DSTATE_SBE | (6 - 2));
1356
    OUT_BATCH(batch,
1357
	      (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) |
1358
	      (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) |
1359
              (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
1360
              (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
1361
              (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
1362
    OUT_BATCH(batch, 0);
1363
    OUT_BATCH(batch, 0);
1364
    OUT_BATCH(batch, GEN9_SBE_ACTIVE_COMPONENT_XYZW);
1365
    OUT_BATCH(batch, 0);
1366
    ADVANCE_BATCH(batch);
1367
 
1368
    /* SBE for backend setup */
1369
    BEGIN_BATCH(batch, 11);
1370
    OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2));
1371
    OUT_BATCH(batch, 0);
1372
    OUT_BATCH(batch, 0);
1373
    OUT_BATCH(batch, 0);
1374
    OUT_BATCH(batch, 0);
1375
    OUT_BATCH(batch, 0);
1376
    OUT_BATCH(batch, 0);
1377
    OUT_BATCH(batch, 0);
1378
    OUT_BATCH(batch, 0);
1379
    OUT_BATCH(batch, 0);
1380
    OUT_BATCH(batch, 0);
1381
    ADVANCE_BATCH(batch);
1382
 
1383
    BEGIN_BATCH(batch, 4);
1384
    OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2));
1385
    OUT_BATCH(batch, 0);
1386
    OUT_BATCH(batch, 0);
1387
    OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
1388
    ADVANCE_BATCH(batch);
1389
}
1390
 
1391
static void
1392
gen9_emit_wm_state(VADriverContextP ctx, int kernel)
1393
{
1394
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1395
    struct intel_batchbuffer *batch = i965->batch;
1396
    struct i965_render_state *render_state = &i965->render_state;
1397
    unsigned int num_samples = 0;
1398
    unsigned int max_threads;
1399
 
1400
    max_threads = i965->intel.device_info->max_wm_threads - 2;
1401
 
1402
    BEGIN_BATCH(batch, 2);
1403
    OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2));
1404
    OUT_BATCH(batch,
1405
              (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE));
1406
    ADVANCE_BATCH(batch);
1407
 
1408
    if (kernel == PS_KERNEL) {
1409
	BEGIN_BATCH(batch, 2);
1410
	OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
1411
	OUT_BATCH(batch,
1412
		GEN8_PS_BLEND_HAS_WRITEABLE_RT);
1413
	ADVANCE_BATCH(batch);
1414
    } else if (kernel == PS_SUBPIC_KERNEL) {
1415
	BEGIN_BATCH(batch, 2);
1416
	OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
1417
	OUT_BATCH(batch,
1418
		(GEN8_PS_BLEND_HAS_WRITEABLE_RT |
1419
		 GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE |
1420
		 (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) |
1421
		 (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) |
1422
		 (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) |
1423
		 (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT)));
1424
	ADVANCE_BATCH(batch);
1425
    }
1426
 
1427
    BEGIN_BATCH(batch, 2);
1428
    OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2));
1429
    OUT_BATCH(batch,
1430
              GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
1431
    ADVANCE_BATCH(batch);
1432
 
1433
    BEGIN_BATCH(batch, 11);
1434
    OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2));
1435
    OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
1436
    OUT_BATCH(batch, 0);
1437
    /*DW3-4. Constant buffer 0 */
1438
    OUT_BATCH(batch, render_state->curbe_offset);
1439
    OUT_BATCH(batch, 0);
1440
 
1441
    /*DW5-10. Constant buffer 1-3 */
1442
    OUT_BATCH(batch, 0);
1443
    OUT_BATCH(batch, 0);
1444
    OUT_BATCH(batch, 0);
1445
    OUT_BATCH(batch, 0);
1446
    OUT_BATCH(batch, 0);
1447
    OUT_BATCH(batch, 0);
1448
    ADVANCE_BATCH(batch);
1449
 
1450
    BEGIN_BATCH(batch, 12);
1451
    OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2));
1452
    /* PS shader address */
1453
    OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset);
1454
 
1455
    OUT_BATCH(batch, 0);
1456
    /* DW3. PS shader flag .Binding table cnt/sample cnt */
1457
    OUT_BATCH(batch,
1458
              (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
1459
              (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
1460
              GEN7_PS_VECTOR_MASK_ENABLE);
1461
    /* DW4-5. Scatch space */
1462
    OUT_BATCH(batch, 0); /* scratch space base offset */
1463
    OUT_BATCH(batch, 0);
1464
    /* DW6. PS shader threads. */
1465
    OUT_BATCH(batch,
1466
              ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples |
1467
              GEN7_PS_PUSH_CONSTANT_ENABLE |
1468
              GEN7_PS_16_DISPATCH_ENABLE);
1469
    /* DW7. PS shader GRF */
1470
    OUT_BATCH(batch,
1471
              (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
1472
    OUT_BATCH(batch, 0); /* kernel 1 pointer */
1473
    OUT_BATCH(batch, 0);
1474
    OUT_BATCH(batch, 0); /* kernel 2 pointer */
1475
    OUT_BATCH(batch, 0);
1476
    ADVANCE_BATCH(batch);
1477
 
1478
    BEGIN_BATCH(batch, 2);
1479
    OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
1480
    OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1481
    ADVANCE_BATCH(batch);
1482
}
1483
 
1484
static void
1485
gen9_emit_depth_buffer_state(VADriverContextP ctx)
1486
{
1487
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1488
    struct intel_batchbuffer *batch = i965->batch;
1489
 
1490
    BEGIN_BATCH(batch, 8);
1491
    OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2));
1492
    OUT_BATCH(batch,
1493
              (I965_DEPTHFORMAT_D32_FLOAT << 18) |
1494
              (I965_SURFACE_NULL << 29));
1495
    /* DW2-3. Depth Buffer Address */
1496
    OUT_BATCH(batch, 0);
1497
    OUT_BATCH(batch, 0);
1498
    /* DW4-7. Surface structure */
1499
    OUT_BATCH(batch, 0);
1500
    OUT_BATCH(batch, 0);
1501
    OUT_BATCH(batch, 0);
1502
    OUT_BATCH(batch, 0);
1503
    ADVANCE_BATCH(batch);
1504
 
1505
    /* Update the Hier Depth buffer */
1506
    BEGIN_BATCH(batch, 5);
1507
    OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2));
1508
    OUT_BATCH(batch, 0);
1509
    OUT_BATCH(batch, 0);
1510
    OUT_BATCH(batch, 0);
1511
    OUT_BATCH(batch, 0);
1512
    ADVANCE_BATCH(batch);
1513
 
1514
    /* Update the stencil buffer */
1515
    BEGIN_BATCH(batch, 5);
1516
    OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2));
1517
    OUT_BATCH(batch, 0);
1518
    OUT_BATCH(batch, 0);
1519
    OUT_BATCH(batch, 0);
1520
    OUT_BATCH(batch, 0);
1521
    ADVANCE_BATCH(batch);
1522
 
1523
    BEGIN_BATCH(batch, 3);
1524
    OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
1525
    OUT_BATCH(batch, 0);
1526
    OUT_BATCH(batch, 0);
1527
    ADVANCE_BATCH(batch);
1528
}
1529
 
1530
static void
1531
gen9_emit_depth_stencil_state(VADriverContextP ctx)
1532
{
1533
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1534
    struct intel_batchbuffer *batch = i965->batch;
1535
 
1536
    BEGIN_BATCH(batch, 3);
1537
    OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2));
1538
    OUT_BATCH(batch, 0);
1539
    OUT_BATCH(batch, 0);
1540
    ADVANCE_BATCH(batch);
1541
}
1542
 
1543
static void
1544
gen9_emit_wm_hz_op(VADriverContextP ctx)
1545
{
1546
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1547
    struct intel_batchbuffer *batch = i965->batch;
1548
 
1549
    BEGIN_BATCH(batch, 5);
1550
    OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2));
1551
    OUT_BATCH(batch, 0);
1552
    OUT_BATCH(batch, 0);
1553
    OUT_BATCH(batch, 0);
1554
    OUT_BATCH(batch, 0);
1555
    ADVANCE_BATCH(batch);
1556
}
1557
 
1558
static void
1559
gen9_emit_viewport_state_pointers(VADriverContextP ctx)
1560
{
1561
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1562
    struct intel_batchbuffer *batch = i965->batch;
1563
    struct i965_render_state *render_state = &i965->render_state;
1564
 
1565
    BEGIN_BATCH(batch, 2);
1566
    OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
1567
    OUT_BATCH(batch, render_state->cc_viewport_offset);
1568
    ADVANCE_BATCH(batch);
1569
 
1570
    BEGIN_BATCH(batch, 2);
1571
    OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
1572
    OUT_BATCH(batch, 0);
1573
    ADVANCE_BATCH(batch);
1574
}
1575
 
1576
static void
1577
gen9_emit_sampler_state_pointers(VADriverContextP ctx)
1578
{
1579
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1580
    struct intel_batchbuffer *batch = i965->batch;
1581
    struct i965_render_state *render_state = &i965->render_state;
1582
 
1583
    BEGIN_BATCH(batch, 2);
1584
    OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
1585
    OUT_BATCH(batch, render_state->sampler_offset);
1586
    ADVANCE_BATCH(batch);
1587
}
1588
 
1589
 
1590
static void
1591
gen9_emit_drawing_rectangle(VADriverContextP ctx)
1592
{
1593
    i965_render_drawing_rectangle(ctx);
1594
}
1595
 
1596
static void
1597
gen9_render_emit_states(VADriverContextP ctx, int kernel)
1598
{
1599
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1600
    struct intel_batchbuffer *batch = i965->batch;
1601
 
1602
    intel_batchbuffer_start_atomic(batch, 0x1000);
1603
    intel_batchbuffer_emit_mi_flush(batch);
1604
    gen9_emit_invarient_states(ctx);
1605
    gen9_emit_state_base_address(ctx);
1606
    gen9_emit_viewport_state_pointers(ctx);
1607
    gen9_emit_urb(ctx);
1608
    gen9_emit_cc_state_pointers(ctx);
1609
    gen9_emit_sampler_state_pointers(ctx);
1610
    gen9_emit_wm_hz_op(ctx);
1611
    gen9_emit_bypass_state(ctx);
1612
    gen9_emit_vs_state(ctx);
1613
    gen9_emit_clip_state(ctx);
1614
    gen9_emit_sf_state(ctx);
1615
    gen9_emit_depth_stencil_state(ctx);
1616
    gen9_emit_wm_state(ctx, kernel);
1617
    gen9_emit_depth_buffer_state(ctx);
1618
    gen9_emit_drawing_rectangle(ctx);
1619
    gen9_emit_vertex_element_state(ctx);
1620
    gen9_emit_vertices(ctx);
1621
    intel_batchbuffer_end_atomic(batch);
1622
}
1623
 
1624
static void
1625
gen9_render_put_surface(
1626
    VADriverContextP   ctx,
1627
    struct object_surface *obj_surface,
1628
    const VARectangle *src_rect,
1629
    const VARectangle *dst_rect,
1630
    unsigned int       flags
1631
)
1632
{
1633
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1634
    struct intel_batchbuffer *batch = i965->batch;
1635
 
1636
    gen9_render_initialize(ctx);
1637
    gen9_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
1638
    gen9_clear_dest_region(ctx);
1639
    gen9_render_emit_states(ctx, PS_KERNEL);
1640
    intel_batchbuffer_flush(batch);
1641
}
1642
 
1643
static void
1644
gen9_subpicture_render_blend_state(VADriverContextP ctx)
1645
{
1646
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1647
    struct i965_render_state *render_state = &i965->render_state;
1648
    struct gen8_global_blend_state *global_blend_state;
1649
    struct gen8_blend_state_rt *blend_state;
1650
    unsigned char *cc_ptr;
1651
 
1652
    dri_bo_map(render_state->dynamic_state.bo, 1);
1653
    assert(render_state->dynamic_state.bo->virtual);
1654
 
1655
    cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
1656
			render_state->blend_state_offset;
1657
 
1658
    global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
1659
 
1660
    memset(global_blend_state, 0, render_state->blend_state_size);
1661
    /* Global blend state + blend_state for Render Target */
1662
    blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
1663
    blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD;
1664
    blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1665
    blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1666
    blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD;
1667
    blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1668
    blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1669
    blend_state->blend0.colorbuf_blend = 1;
1670
    blend_state->blend1.post_blend_clamp_enable = 1;
1671
    blend_state->blend1.pre_blend_clamp_enable = 1;
1672
    blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
1673
 
1674
    dri_bo_unmap(render_state->dynamic_state.bo);
1675
}
1676
 
1677
static void
1678
gen9_subpic_render_upload_constants(VADriverContextP ctx,
1679
                                    struct object_surface *obj_surface)
1680
{
1681
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1682
    struct i965_render_state *render_state = &i965->render_state;
1683
    float *constant_buffer;
1684
    float global_alpha = 1.0;
1685
    unsigned int index = obj_surface->subpic_render_idx;
1686
    struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1687
    unsigned char *cc_ptr;
1688
 
1689
    if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1690
        global_alpha = obj_subpic->global_alpha;
1691
    }
1692
 
1693
 
1694
    dri_bo_map(render_state->dynamic_state.bo, 1);
1695
    assert(render_state->dynamic_state.bo->virtual);
1696
 
1697
    cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
1698
				render_state->curbe_offset;
1699
 
1700
    constant_buffer = (float *) cc_ptr;
1701
    *constant_buffer = global_alpha;
1702
 
1703
    dri_bo_unmap(render_state->dynamic_state.bo);
1704
}
1705
 
1706
static void
1707
gen9_subpicture_render_setup_states(
1708
    VADriverContextP   ctx,
1709
    struct object_surface *obj_surface,
1710
    const VARectangle *src_rect,
1711
    const VARectangle *dst_rect
1712
)
1713
{
1714
    gen9_render_dest_surface_state(ctx, 0);
1715
    gen9_subpic_render_src_surfaces_state(ctx, obj_surface);
1716
    gen9_render_sampler(ctx);
1717
    gen9_render_cc_viewport(ctx);
1718
    gen9_render_color_calc_state(ctx);
1719
    gen9_subpicture_render_blend_state(ctx);
1720
    gen9_subpic_render_upload_constants(ctx, obj_surface);
1721
    i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1722
}
1723
 
1724
static void
1725
gen9_render_put_subpicture(
1726
    VADriverContextP   ctx,
1727
    struct object_surface *obj_surface,
1728
    const VARectangle *src_rect,
1729
    const VARectangle *dst_rect
1730
)
1731
{
1732
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1733
    struct intel_batchbuffer *batch = i965->batch;
1734
    unsigned int index = obj_surface->subpic_render_idx;
1735
    struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1736
 
1737
    assert(obj_subpic);
1738
    gen9_render_initialize(ctx);
1739
    gen9_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
1740
    gen9_render_emit_states(ctx, PS_SUBPIC_KERNEL);
1741
    i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1742
    intel_batchbuffer_flush(batch);
1743
}
1744
 
1745
static void
1746
gen9_render_terminate(VADriverContextP ctx)
1747
{
1748
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1749
    struct i965_render_state *render_state = &i965->render_state;
1750
 
1751
    dri_bo_unreference(render_state->vb.vertex_buffer);
1752
    render_state->vb.vertex_buffer = NULL;
1753
 
1754
    dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1755
    render_state->wm.surface_state_binding_table_bo = NULL;
1756
 
1757
    if (render_state->instruction_state.bo) {
1758
        dri_bo_unreference(render_state->instruction_state.bo);
1759
        render_state->instruction_state.bo = NULL;
1760
    }
1761
 
1762
    if (render_state->dynamic_state.bo) {
1763
        dri_bo_unreference(render_state->dynamic_state.bo);
1764
        render_state->dynamic_state.bo = NULL;
1765
    }
1766
 
1767
    if (render_state->indirect_state.bo) {
1768
        dri_bo_unreference(render_state->indirect_state.bo);
1769
        render_state->indirect_state.bo = NULL;
1770
    }
1771
 
1772
    if (render_state->draw_region) {
1773
        dri_bo_unreference(render_state->draw_region->bo);
1774
        free(render_state->draw_region);
1775
        render_state->draw_region = NULL;
1776
    }
1777
}
1778
 
1779
bool
1780
gen9_render_init(VADriverContextP ctx)
1781
{
1782
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1783
    struct i965_render_state *render_state = &i965->render_state;
1784
    int i, kernel_size;
1785
    unsigned int kernel_offset, end_offset;
1786
    unsigned char *kernel_ptr;
1787
    struct i965_kernel *kernel;
1788
 
1789
    render_state->render_put_surface = gen9_render_put_surface;
1790
    render_state->render_put_subpicture = gen9_render_put_subpicture;
1791
    render_state->render_terminate = gen9_render_terminate;
1792
 
1793
    memcpy(render_state->render_kernels, render_kernels_gen9,
1794
			sizeof(render_state->render_kernels));
1795
 
1796
    kernel_size = 4096;
1797
 
1798
    for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1799
        kernel = &render_state->render_kernels[i];
1800
 
1801
        if (!kernel->size)
1802
            continue;
1803
 
1804
        kernel_size += ALIGN(kernel->size, ALIGNMENT);
1805
    }
1806
 
1807
    render_state->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
1808
                                  "kernel shader",
1809
                                  kernel_size,
1810
                                  0x1000);
1811
    if (render_state->instruction_state.bo == NULL) {
1812
        WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
1813
        return false;
1814
    }
1815
 
1816
    assert(render_state->instruction_state.bo);
1817
 
1818
    render_state->instruction_state.bo_size = kernel_size;
1819
    render_state->instruction_state.end_offset = 0;
1820
    end_offset = 0;
1821
 
1822
    dri_bo_map(render_state->instruction_state.bo, 1);
1823
    kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual);
1824
    for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1825
        kernel = &render_state->render_kernels[i];
1826
        kernel_offset = end_offset;
1827
        kernel->kernel_offset = kernel_offset;
1828
 
1829
        if (!kernel->size)
1830
            continue;
1831
 
1832
        memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
1833
 
1834
        end_offset += ALIGN(kernel->size, ALIGNMENT);
1835
    }
1836
 
1837
    render_state->instruction_state.end_offset = end_offset;
1838
 
1839
    dri_bo_unmap(render_state->instruction_state.bo);
1840
 
1841
    return true;
1842
}