Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5361 serge 1
/*
2
 * Copyright © 2014 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
 * DEALINGS IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *    Eric Anholt 
25
 *    Keith Packard 
26
 *    Xiang Haihao 
27
 *    Zhao Yakui 
28
 *
29
 */
30
 
31
/*
32
 * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
33
 */
34
 
35
#include 
36
#include 
37
#include 
38
#include 
39
#include 
40
 
41
#include 
42
 
43
#include "intel_batchbuffer.h"
44
#include "intel_driver.h"
45
#include "i965_defines.h"
46
#include "i965_drv_video.h"
47
#include "i965_structs.h"
48
 
49
#include "i965_render.h"
50
 
51
#define SF_KERNEL_NUM_GRF       16
52
#define SF_MAX_THREADS          1
53
 
54
#define PS_KERNEL_NUM_GRF       48
55
#define PS_MAX_THREADS          32
56
 
57
/* Programs for Gen8 */
58
static const uint32_t sf_kernel_static_gen8[][4] ={
59
 
60
};
61
static const uint32_t ps_kernel_static_gen8[][4] = {
62
#include "shaders/render/exa_wm_src_affine.g8b"
63
#include "shaders/render/exa_wm_src_sample_planar.g8b"
64
#include "shaders/render/exa_wm_yuv_color_balance.g8b"
65
#include "shaders/render/exa_wm_yuv_rgb.g8b"
66
#include "shaders/render/exa_wm_write.g8b"
67
};
68
 
69
static const uint32_t ps_subpic_kernel_static_gen8[][4] = {
70
#include "shaders/render/exa_wm_src_affine.g8b"
71
#include "shaders/render/exa_wm_src_sample_argb.g8b"
72
#include "shaders/render/exa_wm_write.g8b"
73
};
74
 
75
 
76
#define SURFACE_STATE_PADDED_SIZE       SURFACE_STATE_PADDED_SIZE_GEN8
77
 
78
#define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
79
#define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
80
 
81
enum {
82
    SF_KERNEL = 0,
83
    PS_KERNEL,
84
    PS_SUBPIC_KERNEL
85
};
86
 
87
static struct i965_kernel render_kernels_gen8[] = {
88
    {
89
        "SF",
90
        SF_KERNEL,
91
        sf_kernel_static_gen8,
92
        sizeof(sf_kernel_static_gen8),
93
        NULL
94
    },
95
    {
96
        "PS",
97
        PS_KERNEL,
98
        ps_kernel_static_gen8,
99
        sizeof(ps_kernel_static_gen8),
100
        NULL
101
    },
102
 
103
    {
104
        "PS_SUBPIC",
105
        PS_SUBPIC_KERNEL,
106
        ps_subpic_kernel_static_gen8,
107
        sizeof(ps_subpic_kernel_static_gen8),
108
        NULL
109
    }
110
};
111
 
112
#define URB_VS_ENTRIES	      8
113
#define URB_VS_ENTRY_SIZE     1
114
 
115
#define URB_GS_ENTRIES	      0
116
#define URB_GS_ENTRY_SIZE     0
117
 
118
#define URB_CLIP_ENTRIES      0
119
#define URB_CLIP_ENTRY_SIZE   0
120
 
121
#define URB_SF_ENTRIES	      1
122
#define URB_SF_ENTRY_SIZE     2
123
 
124
#define URB_CS_ENTRIES	      4
125
#define URB_CS_ENTRY_SIZE     4
126
 
127
static float yuv_to_rgb_bt601[3][4] = {
128
{1.164,		0,	1.596,		-0.06275,},
129
{1.164,		-0.392,	-0.813,		-0.50196,},
130
{1.164,		2.017,	0,		-0.50196,},
131
};
132
 
133
static float yuv_to_rgb_bt709[3][4] = {
134
{1.164,		0,	1.793,		-0.06275,},
135
{1.164,		-0.213,	-0.533,		-0.50196,},
136
{1.164,		2.112,	0,		-0.50196,},
137
};
138
 
139
static float yuv_to_rgb_smpte_240[3][4] = {
140
{1.164,		0,	1.794,		-0.06275,},
141
{1.164,		-0.258,	-0.5425,	-0.50196,},
142
{1.164,		2.078,	0,		-0.50196,},
143
};
144
 
145
 
146
static void
147
gen8_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling)
148
{
149
   switch (tiling) {
150
   case I915_TILING_NONE:
151
      ss->ss0.tiled_surface = 0;
152
      ss->ss0.tile_walk = 0;
153
      break;
154
   case I915_TILING_X:
155
      ss->ss0.tiled_surface = 1;
156
      ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
157
      break;
158
   case I915_TILING_Y:
159
      ss->ss0.tiled_surface = 1;
160
      ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
161
      break;
162
   }
163
}
164
 
165
/* Set "Shader Channel Select" for GEN8+ */
166
void
167
gen8_render_set_surface_scs(struct gen8_surface_state *ss)
168
{
169
    ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
170
    ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
171
    ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
172
    ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
173
}
174
 
175
static void
176
gen8_render_set_surface_state(
177
    struct gen8_surface_state *ss,
178
    dri_bo                    *bo,
179
    unsigned long              offset,
180
    int                        width,
181
    int                        height,
182
    int                        pitch,
183
    int                        format,
184
    unsigned int               flags
185
)
186
{
187
    unsigned int tiling;
188
    unsigned int swizzle;
189
 
190
    memset(ss, 0, sizeof(*ss));
191
 
192
    switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
193
    case I965_PP_FLAG_BOTTOM_FIELD:
194
        ss->ss0.vert_line_stride_ofs = 1;
195
        /* fall-through */
196
    case I965_PP_FLAG_TOP_FIELD:
197
        ss->ss0.vert_line_stride = 1;
198
        height /= 2;
199
        break;
200
    }
201
 
202
    ss->ss0.surface_type = I965_SURFACE_2D;
203
    ss->ss0.surface_format = format;
204
 
205
    ss->ss8.base_addr = bo->offset + offset;
206
 
207
    ss->ss2.width = width - 1;
208
    ss->ss2.height = height - 1;
209
 
210
    ss->ss3.pitch = pitch - 1;
211
 
212
    /* Always set 1(align 4 mode) per B-spec */
213
    ss->ss0.vertical_alignment = 1;
214
    ss->ss0.horizontal_alignment = 1;
215
 
216
    dri_bo_get_tiling(bo, &tiling, &swizzle);
217
    gen8_render_set_surface_tiling(ss, tiling);
218
}
219
 
220
static void
221
gen8_render_src_surface_state(
222
    VADriverContextP ctx,
223
    int              index,
224
    dri_bo          *region,
225
    unsigned long    offset,
226
    int              w,
227
    int              h,
228
    int              pitch,
229
    int              format,
230
    unsigned int     flags
231
)
232
{
233
    struct i965_driver_data *i965 = i965_driver_data(ctx);
234
    struct i965_render_state *render_state = &i965->render_state;
235
    void *ss;
236
    dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
237
 
238
    assert(index < MAX_RENDER_SURFACES);
239
 
240
    dri_bo_map(ss_bo, 1);
241
    assert(ss_bo->virtual);
242
    ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
243
 
244
    gen8_render_set_surface_state(ss,
245
                                  region, offset,
246
                                  w, h,
247
                                  pitch, format, flags);
248
    gen8_render_set_surface_scs(ss);
249
    dri_bo_emit_reloc(ss_bo,
250
                      I915_GEM_DOMAIN_SAMPLER, 0,
251
                      offset,
252
                      SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
253
                      region);
254
 
255
    ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
256
    dri_bo_unmap(ss_bo);
257
    render_state->wm.sampler_count++;
258
}
259
 
260
static void
261
gen8_render_src_surfaces_state(
262
    VADriverContextP ctx,
263
    struct object_surface *obj_surface,
264
    unsigned int     flags
265
)
266
{
267
    int region_pitch;
268
    int rw, rh;
269
    dri_bo *region;
270
 
271
    region_pitch = obj_surface->width;
272
    rw = obj_surface->orig_width;
273
    rh = obj_surface->orig_height;
274
    region = obj_surface->bo;
275
 
276
    gen8_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
277
    gen8_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
278
 
279
    if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */
280
        return;
281
 
282
    if (obj_surface->fourcc == VA_FOURCC_NV12) {
283
        gen8_render_src_surface_state(ctx, 3, region,
284
                                      region_pitch * obj_surface->y_cb_offset,
285
                                      obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
286
                                      I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
287
        gen8_render_src_surface_state(ctx, 4, region,
288
                                      region_pitch * obj_surface->y_cb_offset,
289
                                      obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
290
                                      I965_SURFACEFORMAT_R8G8_UNORM, flags);
291
    } else {
292
        gen8_render_src_surface_state(ctx, 3, region,
293
                                      region_pitch * obj_surface->y_cb_offset,
294
                                      obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
295
                                      I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
296
        gen8_render_src_surface_state(ctx, 4, region,
297
                                      region_pitch * obj_surface->y_cb_offset,
298
                                      obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
299
                                      I965_SURFACEFORMAT_R8_UNORM, flags);
300
        gen8_render_src_surface_state(ctx, 5, region,
301
                                      region_pitch * obj_surface->y_cr_offset,
302
                                      obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
303
                                      I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
304
        gen8_render_src_surface_state(ctx, 6, region,
305
                                      region_pitch * obj_surface->y_cr_offset,
306
                                      obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
307
                                      I965_SURFACEFORMAT_R8_UNORM, flags);
308
    }
309
}
310
 
311
static void
312
gen8_subpic_render_src_surfaces_state(VADriverContextP ctx,
313
                                      struct object_surface *obj_surface)
314
{
315
    dri_bo *subpic_region;
316
    unsigned int index = obj_surface->subpic_render_idx;
317
    struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
318
    struct object_image *obj_image = obj_subpic->obj_image;
319
 
320
    assert(obj_surface);
321
    assert(obj_surface->bo);
322
    subpic_region = obj_image->bo;
323
    /*subpicture surface*/
324
    gen8_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
325
    gen8_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
326
}
327
 
328
static void
329
gen8_render_dest_surface_state(VADriverContextP ctx, int index)
330
{
331
    struct i965_driver_data *i965 = i965_driver_data(ctx);
332
    struct i965_render_state *render_state = &i965->render_state;
333
    struct intel_region *dest_region = render_state->draw_region;
334
    void *ss;
335
    dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
336
    int format;
337
    assert(index < MAX_RENDER_SURFACES);
338
 
339
    if (dest_region->cpp == 2) {
340
	format = I965_SURFACEFORMAT_B5G6R5_UNORM;
341
    } else {
342
	format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
343
    }
344
 
345
    dri_bo_map(ss_bo, 1);
346
    assert(ss_bo->virtual);
347
    ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
348
 
349
    gen8_render_set_surface_state(ss,
350
                                  dest_region->bo, 0,
351
                                  dest_region->width, dest_region->height,
352
                                  dest_region->pitch, format, 0);
353
    gen8_render_set_surface_scs(ss);
354
    dri_bo_emit_reloc(ss_bo,
355
                      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
356
                      0,
357
                      SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
358
                      dest_region->bo);
359
 
360
    ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
361
    dri_bo_unmap(ss_bo);
362
}
363
 
364
static void
365
i965_fill_vertex_buffer(
366
    VADriverContextP ctx,
367
    float tex_coords[4], /* [(u1,v1);(u2,v2)] */
368
    float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
369
)
370
{
371
    struct i965_driver_data * const i965 = i965_driver_data(ctx);
372
    float vb[12];
373
 
374
    enum { X1, Y1, X2, Y2 };
375
 
376
    static const unsigned int g_rotation_indices[][6] = {
377
        [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
378
        [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
379
        [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
380
        [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
381
    };
382
 
383
    const unsigned int * const rotation_indices =
384
        g_rotation_indices[i965->rotation_attrib->value];
385
 
386
    vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
387
    vb[1]  = tex_coords[rotation_indices[1]];
388
    vb[2]  = vid_coords[X2];
389
    vb[3]  = vid_coords[Y2];
390
 
391
    vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
392
    vb[5]  = tex_coords[rotation_indices[3]];
393
    vb[6]  = vid_coords[X1];
394
    vb[7]  = vid_coords[Y2];
395
 
396
    vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
397
    vb[9]  = tex_coords[rotation_indices[5]];
398
    vb[10] = vid_coords[X1];
399
    vb[11] = vid_coords[Y1];
400
 
401
    dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
402
}
403
 
404
static void
405
i965_subpic_render_upload_vertex(VADriverContextP ctx,
406
                                 struct object_surface *obj_surface,
407
                                 const VARectangle *output_rect)
408
{
409
    unsigned int index = obj_surface->subpic_render_idx;
410
    struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
411
    float tex_coords[4], vid_coords[4];
412
    VARectangle dst_rect;
413
 
414
    if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
415
        dst_rect = obj_subpic->dst_rect;
416
    else {
417
        const float sx  = (float)output_rect->width  / obj_surface->orig_width;
418
        const float sy  = (float)output_rect->height / obj_surface->orig_height;
419
        dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
420
        dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
421
        dst_rect.width  = sx * obj_subpic->dst_rect.width;
422
        dst_rect.height = sy * obj_subpic->dst_rect.height;
423
    }
424
 
425
    tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
426
    tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
427
    tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
428
    tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
429
 
430
    vid_coords[0] = dst_rect.x;
431
    vid_coords[1] = dst_rect.y;
432
    vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
433
    vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
434
 
435
    i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
436
}
437
 
438
static void
439
i965_render_upload_vertex(
440
    VADriverContextP   ctx,
441
    struct object_surface *obj_surface,
442
    const VARectangle *src_rect,
443
    const VARectangle *dst_rect
444
)
445
{
446
    struct i965_driver_data *i965 = i965_driver_data(ctx);
447
    struct i965_render_state *render_state = &i965->render_state;
448
    struct intel_region *dest_region = render_state->draw_region;
449
    float tex_coords[4], vid_coords[4];
450
    int width, height;
451
 
452
    width  = obj_surface->orig_width;
453
    height = obj_surface->orig_height;
454
 
455
    tex_coords[0] = (float)src_rect->x / width;
456
    tex_coords[1] = (float)src_rect->y / height;
457
    tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
458
    tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
459
 
460
    vid_coords[0] = dest_region->x + dst_rect->x;
461
    vid_coords[1] = dest_region->y + dst_rect->y;
462
    vid_coords[2] = vid_coords[0] + dst_rect->width;
463
    vid_coords[3] = vid_coords[1] + dst_rect->height;
464
 
465
    i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
466
}
467
 
468
static void
469
i965_render_drawing_rectangle(VADriverContextP ctx)
470
{
471
    struct i965_driver_data *i965 = i965_driver_data(ctx);
472
    struct intel_batchbuffer *batch = i965->batch;
473
    struct i965_render_state *render_state = &i965->render_state;
474
    struct intel_region *dest_region = render_state->draw_region;
475
 
476
    BEGIN_BATCH(batch, 4);
477
    OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
478
    OUT_BATCH(batch, 0x00000000);
479
    OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
480
    OUT_BATCH(batch, 0x00000000);
481
    ADVANCE_BATCH(batch);
482
}
483
 
484
static void
485
i965_render_upload_image_palette(
486
    VADriverContextP ctx,
487
    struct object_image *obj_image,
488
    unsigned int     alpha
489
)
490
{
491
    struct i965_driver_data *i965 = i965_driver_data(ctx);
492
    struct intel_batchbuffer *batch = i965->batch;
493
    unsigned int i;
494
 
495
    assert(obj_image);
496
 
497
    if (!obj_image)
498
        return;
499
 
500
    if (obj_image->image.num_palette_entries == 0)
501
        return;
502
 
503
    BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
504
    OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
505
    /*fill palette*/
506
    //int32_t out[16]; //0-23:color 23-31:alpha
507
    for (i = 0; i < obj_image->image.num_palette_entries; i++)
508
        OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
509
    ADVANCE_BATCH(batch);
510
}
511
 
512
static void
513
gen8_clear_dest_region(VADriverContextP ctx)
514
{
515
    struct i965_driver_data *i965 = i965_driver_data(ctx);
516
    struct intel_batchbuffer *batch = i965->batch;
517
    struct i965_render_state *render_state = &i965->render_state;
518
    struct intel_region *dest_region = render_state->draw_region;
519
    unsigned int blt_cmd, br13;
520
    int pitch;
521
 
522
    blt_cmd = GEN8_XY_COLOR_BLT_CMD;
523
    br13 = 0xf0 << 16;
524
    pitch = dest_region->pitch;
525
 
526
    if (dest_region->cpp == 4) {
527
        br13 |= BR13_8888;
528
        blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
529
    } else {
530
        assert(dest_region->cpp == 2);
531
        br13 |= BR13_565;
532
    }
533
 
534
    if (dest_region->tiling != I915_TILING_NONE) {
535
        blt_cmd |= XY_COLOR_BLT_DST_TILED;
536
        pitch /= 4;
537
    }
538
 
539
    br13 |= pitch;
540
 
541
    intel_batchbuffer_start_atomic_blt(batch, 24);
542
    BEGIN_BLT_BATCH(batch, 7);
543
 
544
    OUT_BATCH(batch, blt_cmd);
545
    OUT_BATCH(batch, br13);
546
    OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
547
    OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
548
              (dest_region->x + dest_region->width));
549
    OUT_RELOC(batch, dest_region->bo,
550
              I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
551
              0);
552
    OUT_BATCH(batch, 0x0);
553
    OUT_BATCH(batch, 0x0);
554
    ADVANCE_BATCH(batch);
555
    intel_batchbuffer_end_atomic(batch);
556
}
557
 
558
 
559
/*
560
 * for GEN8
561
 */
562
#define ALIGNMENT       64
563
 
564
static void
565
gen8_render_initialize(VADriverContextP ctx)
566
{
567
    struct i965_driver_data *i965 = i965_driver_data(ctx);
568
    struct i965_render_state *render_state = &i965->render_state;
569
    dri_bo *bo;
570
    int size;
571
    unsigned int end_offset;
572
 
573
    /* VERTEX BUFFER */
574
    dri_bo_unreference(render_state->vb.vertex_buffer);
575
    bo = dri_bo_alloc(i965->intel.bufmgr,
576
                      "vertex buffer",
577
                      4096,
578
                      4096);
579
    assert(bo);
580
    render_state->vb.vertex_buffer = bo;
581
 
582
    /* WM */
583
    dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
584
    bo = dri_bo_alloc(i965->intel.bufmgr,
585
                      "surface state & binding table",
586
                      (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
587
                      4096);
588
    assert(bo);
589
    render_state->wm.surface_state_binding_table_bo = bo;
590
 
591
    render_state->curbe_size = 256;
592
 
593
    render_state->wm.sampler_count = 0;
594
 
595
    render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state);
596
 
597
    render_state->cc_state_size = sizeof(struct gen6_color_calc_state);
598
 
599
    render_state->cc_viewport_size = sizeof(struct i965_cc_viewport);
600
 
601
    render_state->blend_state_size = sizeof(struct gen8_global_blend_state) +
602
			16 * sizeof(struct gen8_blend_state_rt);
603
 
604
    render_state->sf_clip_size = 1024;
605
 
606
    render_state->scissor_size = 1024;
607
 
608
    size = ALIGN(render_state->curbe_size, ALIGNMENT) +
609
        ALIGN(render_state->sampler_size, ALIGNMENT) +
610
        ALIGN(render_state->cc_viewport_size, ALIGNMENT) +
611
        ALIGN(render_state->cc_state_size, ALIGNMENT) +
612
        ALIGN(render_state->blend_state_size, ALIGNMENT) +
613
        ALIGN(render_state->sf_clip_size, ALIGNMENT) +
614
        ALIGN(render_state->scissor_size, ALIGNMENT);
615
 
616
    dri_bo_unreference(render_state->dynamic_state.bo);
617
    bo = dri_bo_alloc(i965->intel.bufmgr,
618
                      "dynamic_state",
619
                      size,
620
                      4096);
621
 
622
    render_state->dynamic_state.bo = bo;
623
 
624
    end_offset = 0;
625
    render_state->dynamic_state.end_offset = 0;
626
 
627
    /* Constant buffer offset */
628
    render_state->curbe_offset = end_offset;
629
    end_offset += ALIGN(render_state->curbe_size, ALIGNMENT);
630
 
631
    /* Sampler_state  */
632
    render_state->sampler_offset = end_offset;
633
    end_offset += ALIGN(render_state->sampler_size, ALIGNMENT);
634
 
635
    /* CC_VIEWPORT_state  */
636
    render_state->cc_viewport_offset = end_offset;
637
    end_offset += ALIGN(render_state->cc_viewport_size, ALIGNMENT);
638
 
639
    /* CC_STATE_state  */
640
    render_state->cc_state_offset = end_offset;
641
    end_offset += ALIGN(render_state->cc_state_size, ALIGNMENT);
642
 
643
    /* Blend_state  */
644
    render_state->blend_state_offset = end_offset;
645
    end_offset += ALIGN(render_state->blend_state_size, ALIGNMENT);
646
 
647
    /* SF_CLIP_state  */
648
    render_state->sf_clip_offset = end_offset;
649
    end_offset += ALIGN(render_state->sf_clip_size, ALIGNMENT);
650
 
651
    /* SCISSOR_state  */
652
    render_state->scissor_offset = end_offset;
653
    end_offset += ALIGN(render_state->scissor_size, ALIGNMENT);
654
 
655
    /* update the end offset of dynamic_state */
656
    render_state->dynamic_state.end_offset = end_offset;
657
 
658
}
659
 
660
static void
661
gen8_render_sampler(VADriverContextP ctx)
662
{
663
    struct i965_driver_data *i965 = i965_driver_data(ctx);
664
    struct i965_render_state *render_state = &i965->render_state;
665
    struct gen8_sampler_state *sampler_state;
666
    int i;
667
    unsigned char *cc_ptr;
668
 
669
    assert(render_state->wm.sampler_count > 0);
670
    assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
671
 
672
    dri_bo_map(render_state->dynamic_state.bo, 1);
673
    assert(render_state->dynamic_state.bo->virtual);
674
 
675
    cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
676
			render_state->sampler_offset;
677
 
678
    sampler_state = (struct gen8_sampler_state *) cc_ptr;
679
 
680
    for (i = 0; i < render_state->wm.sampler_count; i++) {
681
        memset(sampler_state, 0, sizeof(*sampler_state));
682
        sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
683
        sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
684
        sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
685
        sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
686
        sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
687
        sampler_state++;
688
    }
689
 
690
    dri_bo_unmap(render_state->dynamic_state.bo);
691
}
692
 
693
static void
694
gen8_render_blend_state(VADriverContextP ctx)
695
{
696
    struct i965_driver_data *i965 = i965_driver_data(ctx);
697
    struct i965_render_state *render_state = &i965->render_state;
698
    struct gen8_global_blend_state *global_blend_state;
699
    struct gen8_blend_state_rt *blend_state;
700
    unsigned char *cc_ptr;
701
 
702
    dri_bo_map(render_state->dynamic_state.bo, 1);
703
    assert(render_state->dynamic_state.bo->virtual);
704
 
705
    cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
706
			render_state->blend_state_offset;
707
 
708
    global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
709
 
710
    memset(global_blend_state, 0, render_state->blend_state_size);
711
    /* Global blend state + blend_state for Render Target */
712
    blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
713
    blend_state->blend1.logic_op_enable = 1;
714
    blend_state->blend1.logic_op_func = 0xc;
715
    blend_state->blend1.pre_blend_clamp_enable = 1;
716
 
717
    dri_bo_unmap(render_state->dynamic_state.bo);
718
}
719
 
720
 
721
static void
722
gen8_render_cc_viewport(VADriverContextP ctx)
723
{
724
    struct i965_driver_data *i965 = i965_driver_data(ctx);
725
    struct i965_render_state *render_state = &i965->render_state;
726
    struct i965_cc_viewport *cc_viewport;
727
    unsigned char *cc_ptr;
728
 
729
    dri_bo_map(render_state->dynamic_state.bo, 1);
730
    assert(render_state->dynamic_state.bo->virtual);
731
 
732
    cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
733
			render_state->cc_viewport_offset;
734
 
735
    cc_viewport = (struct i965_cc_viewport *) cc_ptr;
736
 
737
    memset(cc_viewport, 0, sizeof(*cc_viewport));
738
 
739
    cc_viewport->min_depth = -1.e35;
740
    cc_viewport->max_depth = 1.e35;
741
 
742
    dri_bo_unmap(render_state->dynamic_state.bo);
743
}
744
 
745
static void
746
gen8_render_color_calc_state(VADriverContextP ctx)
747
{
748
    struct i965_driver_data *i965 = i965_driver_data(ctx);
749
    struct i965_render_state *render_state = &i965->render_state;
750
    struct gen6_color_calc_state *color_calc_state;
751
    unsigned char *cc_ptr;
752
 
753
    dri_bo_map(render_state->dynamic_state.bo, 1);
754
    assert(render_state->dynamic_state.bo->virtual);
755
 
756
    cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
757
			render_state->cc_state_offset;
758
 
759
    color_calc_state = (struct gen6_color_calc_state *) cc_ptr;
760
 
761
    memset(color_calc_state, 0, sizeof(*color_calc_state));
762
    color_calc_state->constant_r = 1.0;
763
    color_calc_state->constant_g = 0.0;
764
    color_calc_state->constant_b = 1.0;
765
    color_calc_state->constant_a = 1.0;
766
    dri_bo_unmap(render_state->dynamic_state.bo);
767
}
768
 
769
#define PI  3.1415926
770
 
771
static void
772
gen8_render_upload_constants(VADriverContextP ctx,
773
                             struct object_surface *obj_surface,
774
                             unsigned int flags)
775
{
776
    struct i965_driver_data *i965 = i965_driver_data(ctx);
777
    struct i965_render_state *render_state = &i965->render_state;
778
    unsigned short *constant_buffer;
779
    unsigned char *cc_ptr;
780
    float *color_balance_base;
781
    float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
782
    float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
783
    float hue = (float)i965->hue_attrib->value / 180 * PI;
784
    float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
785
    float *yuv_to_rgb;
786
    unsigned int color_flag;
787
 
788
    dri_bo_map(render_state->dynamic_state.bo, 1);
789
    assert(render_state->dynamic_state.bo->virtual);
790
 
791
    cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
792
			render_state->curbe_offset;
793
 
794
    constant_buffer = (unsigned short *) cc_ptr;
795
 
796
    if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
797
        assert(obj_surface->fourcc == VA_FOURCC_Y800);
798
 
799
        *constant_buffer = 2;
800
    } else {
801
        if (obj_surface->fourcc == VA_FOURCC_NV12)
802
            *constant_buffer = 1;
803
        else
804
            *constant_buffer = 0;
805
    }
806
 
807
    if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
808
        i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
809
        i965->hue_attrib->value == DEFAULT_HUE &&
810
        i965->saturation_attrib->value == DEFAULT_SATURATION)
811
        constant_buffer[1] = 1; /* skip color balance transformation */
812
    else
813
        constant_buffer[1] = 0;
814
 
815
    color_balance_base = (float *)constant_buffer + 4;
816
    *color_balance_base++ = contrast;
817
    *color_balance_base++ = brightness;
818
    *color_balance_base++ = cos(hue) * contrast * saturation;
819
    *color_balance_base++ = sin(hue) * contrast * saturation;
820
 
821
    color_flag = flags & VA_SRC_COLOR_MASK;
822
    yuv_to_rgb = (float *)constant_buffer + 8;
823
    if (color_flag == VA_SRC_BT709)
824
        memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
825
    else if (color_flag == VA_SRC_SMPTE_240)
826
        memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
827
    else
828
        memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
829
 
830
    dri_bo_unmap(render_state->dynamic_state.bo);
831
}
832
 
833
static void
834
gen8_render_setup_states(
835
    VADriverContextP   ctx,
836
    struct object_surface *obj_surface,
837
    const VARectangle *src_rect,
838
    const VARectangle *dst_rect,
839
    unsigned int       flags
840
)
841
{
842
    gen8_render_dest_surface_state(ctx, 0);
843
    gen8_render_src_surfaces_state(ctx, obj_surface, flags);
844
    gen8_render_sampler(ctx);
845
    gen8_render_cc_viewport(ctx);
846
    gen8_render_color_calc_state(ctx);
847
    gen8_render_blend_state(ctx);
848
    gen8_render_upload_constants(ctx, obj_surface, flags);
849
    i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
850
}
851
 
852
static void
853
gen8_emit_state_base_address(VADriverContextP ctx)
854
{
855
    struct i965_driver_data *i965 = i965_driver_data(ctx);
856
    struct intel_batchbuffer *batch = i965->batch;
857
    struct i965_render_state *render_state = &i965->render_state;
858
 
859
    BEGIN_BATCH(batch, 16);
860
    OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2));
861
    OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
862
	OUT_BATCH(batch, 0);
863
	OUT_BATCH(batch, 0);
864
	/*DW4 */
865
    OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
866
	OUT_BATCH(batch, 0);
867
 
868
	/*DW6*/
869
    /* Dynamic state base address */
870
    OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
871
		0, BASE_ADDRESS_MODIFY);
872
    OUT_BATCH(batch, 0);
873
 
874
	/*DW8*/
875
    OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
876
    OUT_BATCH(batch, 0);
877
 
878
	/*DW10 */
879
    /* Instruction base address */
880
    OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
881
    OUT_BATCH(batch, 0);
882
 
883
	/*DW12 */
884
    OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */
885
    OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
886
    OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
887
    OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
888
    ADVANCE_BATCH(batch);
889
}
890
 
891
static void
892
gen8_emit_cc_state_pointers(VADriverContextP ctx)
893
{
894
    struct i965_driver_data *i965 = i965_driver_data(ctx);
895
    struct intel_batchbuffer *batch = i965->batch;
896
    struct i965_render_state *render_state = &i965->render_state;
897
 
898
    BEGIN_BATCH(batch, 2);
899
    OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
900
    OUT_BATCH(batch, (render_state->cc_state_offset + 1));
901
    ADVANCE_BATCH(batch);
902
 
903
    BEGIN_BATCH(batch, 2);
904
    OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
905
    OUT_BATCH(batch, (render_state->blend_state_offset + 1));
906
    ADVANCE_BATCH(batch);
907
 
908
}
909
 
910
static void
911
gen8_emit_vertices(VADriverContextP ctx)
912
{
913
    struct i965_driver_data *i965 = i965_driver_data(ctx);
914
    struct intel_batchbuffer *batch = i965->batch;
915
    struct i965_render_state *render_state = &i965->render_state;
916
 
917
    BEGIN_BATCH(batch, 5);
918
    OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
919
    OUT_BATCH(batch,
920
              (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) |
921
	      (0 << GEN8_VB0_MOCS_SHIFT) |
922
              GEN7_VB0_ADDRESS_MODIFYENABLE |
923
              ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
924
    OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
925
    OUT_BATCH(batch, 0);
926
    OUT_BATCH(batch, 12 * 4);
927
    ADVANCE_BATCH(batch);
928
 
929
    /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */
930
    BEGIN_BATCH(batch, 2);
931
    OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2));
932
    OUT_BATCH(batch,
933
              _3DPRIM_RECTLIST);
934
    ADVANCE_BATCH(batch);
935
 
936
    BEGIN_BATCH(batch, 7);
937
    OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
938
    OUT_BATCH(batch,
939
              GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
940
    OUT_BATCH(batch, 3); /* vertex count per instance */
941
    OUT_BATCH(batch, 0); /* start vertex offset */
942
    OUT_BATCH(batch, 1); /* single instance */
943
    OUT_BATCH(batch, 0); /* start instance location */
944
    OUT_BATCH(batch, 0);
945
    ADVANCE_BATCH(batch);
946
}
947
 
948
static void
949
gen8_emit_vertex_element_state(VADriverContextP ctx)
950
{
951
    struct i965_driver_data *i965 = i965_driver_data(ctx);
952
    struct intel_batchbuffer *batch = i965->batch;
953
 
954
    /*
955
     * The VUE layout
956
     * dword 0-3: pad (0, 0, 0. 0)
957
     * dword 4-7: position (x, y, 1.0, 1.0),
958
     * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0)
959
     */
960
 
961
    /* Set up our vertex elements, sourced from the single vertex buffer. */
962
    OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (7 - 2));
963
 
964
    /* Element state 0. These are 4 dwords of 0 required for the VUE format.
965
     * We don't really know or care what they do.
966
     */
967
 
968
    OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
969
              GEN8_VE0_VALID |
970
              (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
971
              (0 << VE0_OFFSET_SHIFT));
972
    OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
973
              (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
974
              (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
975
              (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
976
 
977
    /* offset 8: X, Y -> {x, y, 1.0, 1.0} */
978
    OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
979
              GEN8_VE0_VALID |
980
              (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
981
              (8 << VE0_OFFSET_SHIFT));
982
    OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
983
	      (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
984
              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
985
              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
986
 
987
    /* offset 0: u,v -> {U, V, 1.0, 1.0} */
988
    OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
989
              GEN8_VE0_VALID |
990
              (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
991
              (0 << VE0_OFFSET_SHIFT));
992
    OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
993
              (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
994
              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
995
              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
996
}
997
 
998
static void
999
gen8_emit_vs_state(VADriverContextP ctx)
1000
{
1001
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1002
    struct intel_batchbuffer *batch = i965->batch;
1003
 
1004
    /* disable VS constant buffer */
1005
    BEGIN_BATCH(batch, 11);
1006
    OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2));
1007
    OUT_BATCH(batch, 0);
1008
    OUT_BATCH(batch, 0);
1009
    /* CS Buffer 0 */
1010
    OUT_BATCH(batch, 0);
1011
    OUT_BATCH(batch, 0);
1012
    /* CS Buffer 1 */
1013
    OUT_BATCH(batch, 0);
1014
    OUT_BATCH(batch, 0);
1015
    /* CS Buffer 2 */
1016
    OUT_BATCH(batch, 0);
1017
    OUT_BATCH(batch, 0);
1018
    /* CS Buffer 3 */
1019
    OUT_BATCH(batch, 0);
1020
    OUT_BATCH(batch, 0);
1021
    ADVANCE_BATCH(batch);
1022
 
1023
    BEGIN_BATCH(batch, 9);
1024
    OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2));
1025
    OUT_BATCH(batch, 0); /* without VS kernel */
1026
    OUT_BATCH(batch, 0);
1027
    /* VS shader dispatch flag */
1028
    OUT_BATCH(batch, 0);
1029
    OUT_BATCH(batch, 0);
1030
    OUT_BATCH(batch, 0);
1031
    /* DW6. VS shader GRF and URB buffer definition */
1032
    OUT_BATCH(batch, 0);
1033
    OUT_BATCH(batch, 0); /* pass-through */
1034
    OUT_BATCH(batch, 0);
1035
    ADVANCE_BATCH(batch);
1036
 
1037
    BEGIN_BATCH(batch, 2);
1038
    OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
1039
    OUT_BATCH(batch, 0);
1040
    ADVANCE_BATCH(batch);
1041
 
1042
    BEGIN_BATCH(batch, 2);
1043
    OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
1044
    OUT_BATCH(batch, 0);
1045
    ADVANCE_BATCH(batch);
1046
 
1047
}
1048
 
1049
/*
1050
 * URB layout on GEN8
1051
 * ----------------------------------------
1052
 * | PS Push Constants (8KB) | VS entries |
1053
 * ----------------------------------------
1054
 */
1055
static void
1056
gen8_emit_urb(VADriverContextP ctx)
1057
{
1058
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1059
    struct intel_batchbuffer *batch = i965->batch;
1060
    unsigned int num_urb_entries = 64;
1061
 
1062
    /* The minimum urb entries is 64 */
1063
 
1064
    BEGIN_BATCH(batch, 2);
1065
    OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2));
1066
    OUT_BATCH(batch, 0);
1067
    ADVANCE_BATCH(batch);
1068
 
1069
    BEGIN_BATCH(batch, 2);
1070
    OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2));
1071
    OUT_BATCH(batch, 0);
1072
    ADVANCE_BATCH(batch);
1073
 
1074
    BEGIN_BATCH(batch, 2);
1075
    OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2));
1076
    OUT_BATCH(batch, 0);
1077
    ADVANCE_BATCH(batch);
1078
 
1079
    BEGIN_BATCH(batch, 2);
1080
    OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2));
1081
    OUT_BATCH(batch, 0);
1082
    ADVANCE_BATCH(batch);
1083
 
1084
    /* Size is 8Kbs and base address is 0Kb */
1085
    BEGIN_BATCH(batch, 2);
1086
    OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
1087
    /* Size is 8Kbs and base address is 0Kb */
1088
    OUT_BATCH(batch,
1089
		(0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) |
1090
		(8 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT));
1091
    ADVANCE_BATCH(batch);
1092
 
1093
    BEGIN_BATCH(batch, 2);
1094
    OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
1095
    OUT_BATCH(batch,
1096
              (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
1097
              (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
1098
              (4 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1099
   ADVANCE_BATCH(batch);
1100
 
1101
   BEGIN_BATCH(batch, 2);
1102
   OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
1103
   OUT_BATCH(batch,
1104
             (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1105
             (5 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1106
   ADVANCE_BATCH(batch);
1107
 
1108
   BEGIN_BATCH(batch, 2);
1109
   OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
1110
   OUT_BATCH(batch,
1111
             (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1112
             (6 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1113
   ADVANCE_BATCH(batch);
1114
 
1115
   BEGIN_BATCH(batch, 2);
1116
   OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
1117
   OUT_BATCH(batch,
1118
             (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1119
             (7 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1120
   ADVANCE_BATCH(batch);
1121
}
1122
 
1123
static void
1124
gen8_emit_bypass_state(VADriverContextP ctx)
1125
{
1126
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1127
    struct intel_batchbuffer *batch = i965->batch;
1128
 
1129
    /* bypass GS */
1130
    BEGIN_BATCH(batch, 11);
1131
    OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2));
1132
    OUT_BATCH(batch, 0);
1133
    OUT_BATCH(batch, 0);
1134
    OUT_BATCH(batch, 0);
1135
    OUT_BATCH(batch, 0);
1136
    OUT_BATCH(batch, 0);
1137
    OUT_BATCH(batch, 0);
1138
    OUT_BATCH(batch, 0);
1139
    OUT_BATCH(batch, 0);
1140
    OUT_BATCH(batch, 0);
1141
    OUT_BATCH(batch, 0);
1142
    ADVANCE_BATCH(batch);
1143
 
1144
    BEGIN_BATCH(batch, 10);
1145
    OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2));
1146
    /* GS shader address */
1147
    OUT_BATCH(batch, 0); /* without GS kernel */
1148
    OUT_BATCH(batch, 0);
1149
    /* DW3. GS shader dispatch flag */
1150
    OUT_BATCH(batch, 0);
1151
    OUT_BATCH(batch, 0);
1152
    OUT_BATCH(batch, 0);
1153
    /* DW6. GS shader GRF and URB offset/length */
1154
    OUT_BATCH(batch, 0);
1155
    OUT_BATCH(batch, 0); /* pass-through */
1156
    OUT_BATCH(batch, 0);
1157
    OUT_BATCH(batch, 0);
1158
    ADVANCE_BATCH(batch);
1159
 
1160
    BEGIN_BATCH(batch, 2);
1161
    OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
1162
    OUT_BATCH(batch, 0);
1163
    ADVANCE_BATCH(batch);
1164
 
1165
    BEGIN_BATCH(batch, 2);
1166
    OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
1167
    OUT_BATCH(batch, 0);
1168
    ADVANCE_BATCH(batch);
1169
 
1170
    /* disable HS */
1171
    BEGIN_BATCH(batch, 11);
1172
    OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2));
1173
    OUT_BATCH(batch, 0);
1174
    OUT_BATCH(batch, 0);
1175
    OUT_BATCH(batch, 0);
1176
    OUT_BATCH(batch, 0);
1177
    OUT_BATCH(batch, 0);
1178
    OUT_BATCH(batch, 0);
1179
    OUT_BATCH(batch, 0);
1180
    OUT_BATCH(batch, 0);
1181
    OUT_BATCH(batch, 0);
1182
    OUT_BATCH(batch, 0);
1183
    ADVANCE_BATCH(batch);
1184
 
1185
    BEGIN_BATCH(batch, 9);
1186
    OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2));
1187
    OUT_BATCH(batch, 0);
1188
    /*DW2. HS pass-through */
1189
    OUT_BATCH(batch, 0);
1190
    /*DW3. HS shader address */
1191
    OUT_BATCH(batch, 0);
1192
    OUT_BATCH(batch, 0);
1193
    /*DW5. HS shader flag. URB offset/length and so on */
1194
    OUT_BATCH(batch, 0);
1195
    OUT_BATCH(batch, 0);
1196
    OUT_BATCH(batch, 0);
1197
    OUT_BATCH(batch, 0);
1198
    ADVANCE_BATCH(batch);
1199
 
1200
    BEGIN_BATCH(batch, 2);
1201
    OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
1202
    OUT_BATCH(batch, 0);
1203
    ADVANCE_BATCH(batch);
1204
 
1205
    BEGIN_BATCH(batch, 2);
1206
    OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
1207
    OUT_BATCH(batch, 0);
1208
    ADVANCE_BATCH(batch);
1209
 
1210
    /* Disable TE */
1211
    BEGIN_BATCH(batch, 4);
1212
    OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
1213
    OUT_BATCH(batch, 0);
1214
    OUT_BATCH(batch, 0);
1215
    OUT_BATCH(batch, 0);
1216
    ADVANCE_BATCH(batch);
1217
 
1218
    /* Disable DS */
1219
    BEGIN_BATCH(batch, 11);
1220
    OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2));
1221
    OUT_BATCH(batch, 0);
1222
    OUT_BATCH(batch, 0);
1223
    OUT_BATCH(batch, 0);
1224
    OUT_BATCH(batch, 0);
1225
    OUT_BATCH(batch, 0);
1226
    OUT_BATCH(batch, 0);
1227
    OUT_BATCH(batch, 0);
1228
    OUT_BATCH(batch, 0);
1229
    OUT_BATCH(batch, 0);
1230
    OUT_BATCH(batch, 0);
1231
    ADVANCE_BATCH(batch);
1232
 
1233
    BEGIN_BATCH(batch, 9);
1234
    OUT_BATCH(batch, GEN7_3DSTATE_DS | (9 - 2));
1235
    /* DW1. DS shader pointer */
1236
    OUT_BATCH(batch, 0);
1237
    OUT_BATCH(batch, 0);
1238
    /* DW3-5. DS shader dispatch flag.*/
1239
    OUT_BATCH(batch, 0);
1240
    OUT_BATCH(batch, 0);
1241
    OUT_BATCH(batch, 0);
1242
    /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/
1243
    OUT_BATCH(batch, 0);
1244
    OUT_BATCH(batch, 0);
1245
    /* DW8. DS shader output URB */
1246
    OUT_BATCH(batch, 0);
1247
    ADVANCE_BATCH(batch);
1248
 
1249
    BEGIN_BATCH(batch, 2);
1250
    OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
1251
    OUT_BATCH(batch, 0);
1252
    ADVANCE_BATCH(batch);
1253
 
1254
    BEGIN_BATCH(batch, 2);
1255
    OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
1256
    OUT_BATCH(batch, 0);
1257
    ADVANCE_BATCH(batch);
1258
 
1259
    /* Disable STREAMOUT */
1260
    BEGIN_BATCH(batch, 5);
1261
    OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2));
1262
    OUT_BATCH(batch, 0);
1263
    OUT_BATCH(batch, 0);
1264
    OUT_BATCH(batch, 0);
1265
    OUT_BATCH(batch, 0);
1266
    ADVANCE_BATCH(batch);
1267
}
1268
 
1269
static void
1270
gen8_emit_invarient_states(VADriverContextP ctx)
1271
{
1272
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1273
    struct intel_batchbuffer *batch = i965->batch;
1274
 
1275
    BEGIN_BATCH(batch, 1);
1276
    OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1277
    ADVANCE_BATCH(batch);
1278
 
1279
    BEGIN_BATCH(batch, 2);
1280
    OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2));
1281
    OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1282
              GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1283
    ADVANCE_BATCH(batch);
1284
 
1285
    /* Update 3D Multisample pattern */
1286
    BEGIN_BATCH(batch, 9);
1287
    OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2));
1288
    OUT_BATCH(batch, 0);
1289
    OUT_BATCH(batch, 0);
1290
    OUT_BATCH(batch, 0);
1291
    OUT_BATCH(batch, 0);
1292
    OUT_BATCH(batch, 0);
1293
    OUT_BATCH(batch, 0);
1294
    OUT_BATCH(batch, 0);
1295
    OUT_BATCH(batch, 0);
1296
    ADVANCE_BATCH(batch);
1297
 
1298
 
1299
    BEGIN_BATCH(batch, 2);
1300
    OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1301
    OUT_BATCH(batch, 1);
1302
    ADVANCE_BATCH(batch);
1303
 
1304
    /* Set system instruction pointer */
1305
    BEGIN_BATCH(batch, 3);
1306
    OUT_BATCH(batch, CMD_STATE_SIP | 0);
1307
    OUT_BATCH(batch, 0);
1308
    OUT_BATCH(batch, 0);
1309
    ADVANCE_BATCH(batch);
1310
}
1311
 
1312
static void
1313
gen8_emit_clip_state(VADriverContextP ctx)
1314
{
1315
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1316
    struct intel_batchbuffer *batch = i965->batch;
1317
 
1318
    OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
1319
    OUT_BATCH(batch, 0);
1320
    OUT_BATCH(batch, 0); /* pass-through */
1321
    OUT_BATCH(batch, 0);
1322
}
1323
 
1324
static void
1325
gen8_emit_sf_state(VADriverContextP ctx)
1326
{
1327
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1328
    struct intel_batchbuffer *batch = i965->batch;
1329
 
1330
    BEGIN_BATCH(batch, 5);
1331
    OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2));
1332
    OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE);
1333
    OUT_BATCH(batch, 0);
1334
    OUT_BATCH(batch, 0);
1335
    OUT_BATCH(batch, 0);
1336
    ADVANCE_BATCH(batch);
1337
 
1338
 
1339
    BEGIN_BATCH(batch, 4);
1340
    OUT_BATCH(batch, GEN7_3DSTATE_SBE | (4 - 2));
1341
    OUT_BATCH(batch,
1342
	      (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) |
1343
	      (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) |
1344
              (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
1345
              (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
1346
              (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
1347
    OUT_BATCH(batch, 0);
1348
    OUT_BATCH(batch, 0);
1349
    ADVANCE_BATCH(batch);
1350
 
1351
    /* SBE for backend setup */
1352
    BEGIN_BATCH(batch, 11);
1353
    OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2));
1354
    OUT_BATCH(batch, 0);
1355
    OUT_BATCH(batch, 0);
1356
    OUT_BATCH(batch, 0);
1357
    OUT_BATCH(batch, 0);
1358
    OUT_BATCH(batch, 0);
1359
    OUT_BATCH(batch, 0);
1360
    OUT_BATCH(batch, 0);
1361
    OUT_BATCH(batch, 0);
1362
    OUT_BATCH(batch, 0);
1363
    OUT_BATCH(batch, 0);
1364
    ADVANCE_BATCH(batch);
1365
 
1366
    BEGIN_BATCH(batch, 4);
1367
    OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2));
1368
    OUT_BATCH(batch, 0);
1369
    OUT_BATCH(batch, 0);
1370
    OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
1371
    ADVANCE_BATCH(batch);
1372
}
1373
 
1374
static void
1375
gen8_emit_wm_state(VADriverContextP ctx, int kernel)
1376
{
1377
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1378
    struct intel_batchbuffer *batch = i965->batch;
1379
    struct i965_render_state *render_state = &i965->render_state;
1380
    unsigned int num_samples = 0;
1381
    unsigned int max_threads;
1382
 
1383
    max_threads = i965->intel.device_info->max_wm_threads - 2;
1384
 
1385
    BEGIN_BATCH(batch, 2);
1386
    OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2));
1387
    OUT_BATCH(batch,
1388
              (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE));
1389
    ADVANCE_BATCH(batch);
1390
 
1391
    if (kernel == PS_KERNEL) {
1392
	BEGIN_BATCH(batch, 2);
1393
	OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
1394
	OUT_BATCH(batch,
1395
		GEN8_PS_BLEND_HAS_WRITEABLE_RT);
1396
    	ADVANCE_BATCH(batch);
1397
    } else if (kernel == PS_SUBPIC_KERNEL) {
1398
	BEGIN_BATCH(batch, 2);
1399
	OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
1400
	OUT_BATCH(batch,
1401
              	(GEN8_PS_BLEND_HAS_WRITEABLE_RT |
1402
		 GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE |
1403
		 (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) |
1404
		 (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) |
1405
		 (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) |
1406
		 (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT)));
1407
	ADVANCE_BATCH(batch);
1408
    }
1409
 
1410
    BEGIN_BATCH(batch, 2);
1411
    OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2));
1412
    OUT_BATCH(batch,
1413
              GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
1414
    ADVANCE_BATCH(batch);
1415
 
1416
    BEGIN_BATCH(batch, 11);
1417
    OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2));
1418
    OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
1419
    OUT_BATCH(batch, 0);
1420
    /*DW3-4. Constant buffer 0 */
1421
    OUT_BATCH(batch, render_state->curbe_offset);
1422
    OUT_BATCH(batch, 0);
1423
 
1424
    /*DW5-10. Constant buffer 1-3 */
1425
    OUT_BATCH(batch, 0);
1426
    OUT_BATCH(batch, 0);
1427
    OUT_BATCH(batch, 0);
1428
    OUT_BATCH(batch, 0);
1429
    OUT_BATCH(batch, 0);
1430
    OUT_BATCH(batch, 0);
1431
    ADVANCE_BATCH(batch);
1432
 
1433
    BEGIN_BATCH(batch, 12);
1434
    OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2));
1435
    /* PS shader address */
1436
    OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset);
1437
 
1438
    OUT_BATCH(batch, 0);
1439
    /* DW3. PS shader flag .Binding table cnt/sample cnt */
1440
    OUT_BATCH(batch,
1441
              (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
1442
              (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
1443
    /* DW4-5. Scatch space */
1444
    OUT_BATCH(batch, 0); /* scratch space base offset */
1445
    OUT_BATCH(batch, 0);
1446
    /* DW6. PS shader threads. */
1447
    OUT_BATCH(batch,
1448
              ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples |
1449
              GEN7_PS_PUSH_CONSTANT_ENABLE |
1450
              GEN7_PS_16_DISPATCH_ENABLE);
1451
    /* DW7. PS shader GRF */
1452
    OUT_BATCH(batch,
1453
              (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
1454
    OUT_BATCH(batch, 0); /* kernel 1 pointer */
1455
    OUT_BATCH(batch, 0);
1456
    OUT_BATCH(batch, 0); /* kernel 2 pointer */
1457
    OUT_BATCH(batch, 0);
1458
    ADVANCE_BATCH(batch);
1459
 
1460
    BEGIN_BATCH(batch, 2);
1461
    OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
1462
    OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1463
    ADVANCE_BATCH(batch);
1464
}
1465
 
1466
static void
1467
gen8_emit_depth_buffer_state(VADriverContextP ctx)
1468
{
1469
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1470
    struct intel_batchbuffer *batch = i965->batch;
1471
 
1472
    BEGIN_BATCH(batch, 8);
1473
    OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2));
1474
    OUT_BATCH(batch,
1475
              (I965_DEPTHFORMAT_D32_FLOAT << 18) |
1476
              (I965_SURFACE_NULL << 29));
1477
    /* DW2-3. Depth Buffer Address */
1478
    OUT_BATCH(batch, 0);
1479
    OUT_BATCH(batch, 0);
1480
    /* DW4-7. Surface structure */
1481
    OUT_BATCH(batch, 0);
1482
    OUT_BATCH(batch, 0);
1483
    OUT_BATCH(batch, 0);
1484
    OUT_BATCH(batch, 0);
1485
    ADVANCE_BATCH(batch);
1486
 
1487
    /* Update the Hier Depth buffer */
1488
    BEGIN_BATCH(batch, 5);
1489
    OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2));
1490
    OUT_BATCH(batch, 0);
1491
    OUT_BATCH(batch, 0);
1492
    OUT_BATCH(batch, 0);
1493
    OUT_BATCH(batch, 0);
1494
    ADVANCE_BATCH(batch);
1495
 
1496
    /* Update the stencil buffer */
1497
    BEGIN_BATCH(batch, 5);
1498
    OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2));
1499
    OUT_BATCH(batch, 0);
1500
    OUT_BATCH(batch, 0);
1501
    OUT_BATCH(batch, 0);
1502
    OUT_BATCH(batch, 0);
1503
    ADVANCE_BATCH(batch);
1504
 
1505
    BEGIN_BATCH(batch, 3);
1506
    OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
1507
    OUT_BATCH(batch, 0);
1508
    OUT_BATCH(batch, 0);
1509
    ADVANCE_BATCH(batch);
1510
}
1511
 
1512
static void
1513
gen8_emit_depth_stencil_state(VADriverContextP ctx)
1514
{
1515
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1516
    struct intel_batchbuffer *batch = i965->batch;
1517
 
1518
    BEGIN_BATCH(batch, 3);
1519
    OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2));
1520
    OUT_BATCH(batch, 0);
1521
    OUT_BATCH(batch, 0);
1522
    ADVANCE_BATCH(batch);
1523
}
1524
 
1525
static void
1526
gen8_emit_wm_hz_op(VADriverContextP ctx)
1527
{
1528
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1529
    struct intel_batchbuffer *batch = i965->batch;
1530
 
1531
    BEGIN_BATCH(batch, 5);
1532
    OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2));
1533
    OUT_BATCH(batch, 0);
1534
    OUT_BATCH(batch, 0);
1535
    OUT_BATCH(batch, 0);
1536
    OUT_BATCH(batch, 0);
1537
    ADVANCE_BATCH(batch);
1538
}
1539
 
1540
static void
1541
gen8_emit_viewport_state_pointers(VADriverContextP ctx)
1542
{
1543
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1544
    struct intel_batchbuffer *batch = i965->batch;
1545
    struct i965_render_state *render_state = &i965->render_state;
1546
 
1547
    BEGIN_BATCH(batch, 2);
1548
    OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
1549
    OUT_BATCH(batch, render_state->cc_viewport_offset);
1550
    ADVANCE_BATCH(batch);
1551
 
1552
    BEGIN_BATCH(batch, 2);
1553
    OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
1554
    OUT_BATCH(batch, 0);
1555
    ADVANCE_BATCH(batch);
1556
}
1557
 
1558
static void
1559
gen8_emit_sampler_state_pointers(VADriverContextP ctx)
1560
{
1561
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1562
    struct intel_batchbuffer *batch = i965->batch;
1563
    struct i965_render_state *render_state = &i965->render_state;
1564
 
1565
    BEGIN_BATCH(batch, 2);
1566
    OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
1567
    OUT_BATCH(batch, render_state->sampler_offset);
1568
    ADVANCE_BATCH(batch);
1569
}
1570
 
1571
 
1572
static void
1573
gen7_emit_drawing_rectangle(VADriverContextP ctx)
1574
{
1575
    i965_render_drawing_rectangle(ctx);
1576
}
1577
 
1578
static void
1579
gen8_render_emit_states(VADriverContextP ctx, int kernel)
1580
{
1581
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1582
    struct intel_batchbuffer *batch = i965->batch;
1583
 
1584
    intel_batchbuffer_start_atomic(batch, 0x1000);
1585
    intel_batchbuffer_emit_mi_flush(batch);
1586
    gen8_emit_invarient_states(ctx);
1587
    gen8_emit_state_base_address(ctx);
1588
    gen8_emit_viewport_state_pointers(ctx);
1589
    gen8_emit_urb(ctx);
1590
    gen8_emit_cc_state_pointers(ctx);
1591
    gen8_emit_sampler_state_pointers(ctx);
1592
    gen8_emit_wm_hz_op(ctx);
1593
    gen8_emit_bypass_state(ctx);
1594
    gen8_emit_vs_state(ctx);
1595
    gen8_emit_clip_state(ctx);
1596
    gen8_emit_sf_state(ctx);
1597
    gen8_emit_depth_stencil_state(ctx);
1598
    gen8_emit_wm_state(ctx, kernel);
1599
    gen8_emit_depth_buffer_state(ctx);
1600
    gen7_emit_drawing_rectangle(ctx);
1601
    gen8_emit_vertex_element_state(ctx);
1602
    gen8_emit_vertices(ctx);
1603
    intel_batchbuffer_end_atomic(batch);
1604
}
1605
 
1606
static void
1607
gen8_render_put_surface(
1608
    VADriverContextP   ctx,
1609
    struct object_surface *obj_surface,
1610
    const VARectangle *src_rect,
1611
    const VARectangle *dst_rect,
1612
    unsigned int       flags
1613
)
1614
{
1615
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1616
    struct intel_batchbuffer *batch = i965->batch;
1617
 
1618
    gen8_render_initialize(ctx);
1619
    gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
1620
    gen8_clear_dest_region(ctx);
1621
    gen8_render_emit_states(ctx, PS_KERNEL);
1622
    intel_batchbuffer_flush(batch);
1623
}
1624
 
1625
static void
1626
gen8_subpicture_render_blend_state(VADriverContextP ctx)
1627
{
1628
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1629
    struct i965_render_state *render_state = &i965->render_state;
1630
    struct gen8_global_blend_state *global_blend_state;
1631
    struct gen8_blend_state_rt *blend_state;
1632
    unsigned char *cc_ptr;
1633
 
1634
    dri_bo_map(render_state->dynamic_state.bo, 1);
1635
    assert(render_state->dynamic_state.bo->virtual);
1636
 
1637
    cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
1638
			render_state->blend_state_offset;
1639
 
1640
    global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
1641
 
1642
    memset(global_blend_state, 0, render_state->blend_state_size);
1643
    /* Global blend state + blend_state for Render Target */
1644
    blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
1645
    blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD;
1646
    blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1647
    blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1648
    blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD;
1649
    blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1650
    blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1651
    blend_state->blend0.colorbuf_blend = 1;
1652
    blend_state->blend1.post_blend_clamp_enable = 1;
1653
    blend_state->blend1.pre_blend_clamp_enable = 1;
1654
    blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
1655
 
1656
    dri_bo_unmap(render_state->dynamic_state.bo);
1657
}
1658
 
1659
static void
1660
gen8_subpic_render_upload_constants(VADriverContextP ctx,
1661
                                    struct object_surface *obj_surface)
1662
{
1663
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1664
    struct i965_render_state *render_state = &i965->render_state;
1665
    float *constant_buffer;
1666
    float global_alpha = 1.0;
1667
    unsigned int index = obj_surface->subpic_render_idx;
1668
    struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1669
    unsigned char *cc_ptr;
1670
 
1671
    if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1672
        global_alpha = obj_subpic->global_alpha;
1673
    }
1674
 
1675
 
1676
    dri_bo_map(render_state->dynamic_state.bo, 1);
1677
    assert(render_state->dynamic_state.bo->virtual);
1678
 
1679
    cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
1680
				render_state->curbe_offset;
1681
 
1682
    constant_buffer = (float *) cc_ptr;
1683
    *constant_buffer = global_alpha;
1684
 
1685
    dri_bo_unmap(render_state->dynamic_state.bo);
1686
}
1687
 
1688
static void
1689
gen8_subpicture_render_setup_states(
1690
    VADriverContextP   ctx,
1691
    struct object_surface *obj_surface,
1692
    const VARectangle *src_rect,
1693
    const VARectangle *dst_rect
1694
)
1695
{
1696
    gen8_render_dest_surface_state(ctx, 0);
1697
    gen8_subpic_render_src_surfaces_state(ctx, obj_surface);
1698
    gen8_render_sampler(ctx);
1699
    gen8_render_cc_viewport(ctx);
1700
    gen8_render_color_calc_state(ctx);
1701
    gen8_subpicture_render_blend_state(ctx);
1702
    gen8_subpic_render_upload_constants(ctx, obj_surface);
1703
    i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1704
}
1705
 
1706
static void
1707
gen8_render_put_subpicture(
1708
    VADriverContextP   ctx,
1709
    struct object_surface *obj_surface,
1710
    const VARectangle *src_rect,
1711
    const VARectangle *dst_rect
1712
)
1713
{
1714
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1715
    struct intel_batchbuffer *batch = i965->batch;
1716
    unsigned int index = obj_surface->subpic_render_idx;
1717
    struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1718
 
1719
    assert(obj_subpic);
1720
    gen8_render_initialize(ctx);
1721
    gen8_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
1722
    gen8_render_emit_states(ctx, PS_SUBPIC_KERNEL);
1723
    i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1724
    intel_batchbuffer_flush(batch);
1725
}
1726
 
1727
static void
1728
gen8_render_terminate(VADriverContextP ctx)
1729
{
1730
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1731
    struct i965_render_state *render_state = &i965->render_state;
1732
 
1733
    dri_bo_unreference(render_state->vb.vertex_buffer);
1734
    render_state->vb.vertex_buffer = NULL;
1735
 
1736
    dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1737
    render_state->wm.surface_state_binding_table_bo = NULL;
1738
 
1739
    if (render_state->instruction_state.bo) {
1740
        dri_bo_unreference(render_state->instruction_state.bo);
1741
        render_state->instruction_state.bo = NULL;
1742
    }
1743
 
1744
    if (render_state->dynamic_state.bo) {
1745
        dri_bo_unreference(render_state->dynamic_state.bo);
1746
        render_state->dynamic_state.bo = NULL;
1747
    }
1748
 
1749
    if (render_state->indirect_state.bo) {
1750
        dri_bo_unreference(render_state->indirect_state.bo);
1751
        render_state->indirect_state.bo = NULL;
1752
    }
1753
 
1754
    if (render_state->draw_region) {
1755
        dri_bo_unreference(render_state->draw_region->bo);
1756
        free(render_state->draw_region);
1757
        render_state->draw_region = NULL;
1758
    }
1759
}
1760
 
1761
bool
1762
gen8_render_init(VADriverContextP ctx)
1763
{
1764
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1765
    struct i965_render_state *render_state = &i965->render_state;
1766
    int i, kernel_size;
1767
    unsigned int kernel_offset, end_offset;
1768
    unsigned char *kernel_ptr;
1769
    struct i965_kernel *kernel;
1770
 
1771
    render_state->render_put_surface = gen8_render_put_surface;
1772
    render_state->render_put_subpicture = gen8_render_put_subpicture;
1773
    render_state->render_terminate = gen8_render_terminate;
1774
 
1775
    memcpy(render_state->render_kernels, render_kernels_gen8,
1776
           sizeof(render_state->render_kernels));
1777
 
1778
    kernel_size = 4096;
1779
 
1780
    for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1781
        kernel = &render_state->render_kernels[i];
1782
 
1783
        if (!kernel->size)
1784
            continue;
1785
 
1786
        kernel_size += kernel->size;
1787
    }
1788
 
1789
    render_state->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
1790
                                  "kernel shader",
1791
                                  kernel_size,
1792
                                  0x1000);
1793
    if (render_state->instruction_state.bo == NULL) {
1794
        WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
1795
        return false;
1796
    }
1797
 
1798
    assert(render_state->instruction_state.bo);
1799
 
1800
    render_state->instruction_state.bo_size = kernel_size;
1801
    render_state->instruction_state.end_offset = 0;
1802
    end_offset = 0;
1803
 
1804
    dri_bo_map(render_state->instruction_state.bo, 1);
1805
    kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual);
1806
    for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1807
        kernel = &render_state->render_kernels[i];
1808
        kernel_offset = end_offset;
1809
        kernel->kernel_offset = kernel_offset;
1810
 
1811
        if (!kernel->size)
1812
            continue;
1813
 
1814
        memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
1815
 
1816
        end_offset += ALIGN(kernel->size, ALIGNMENT);
1817
    }
1818
 
1819
    render_state->instruction_state.end_offset = end_offset;
1820
 
1821
    dri_bo_unmap(render_state->instruction_state.bo);
1822
 
1823
    return true;
1824
}