Subversion Repositories Kolibri OS

Rev

Rev 4358 | Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
  2.  
  3. /*
  4.  * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice (including the next
  14.  * paragraph) shall be included in all copies or substantial portions of the
  15.  * Software.
  16.  *
  17.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22.  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23.  * SOFTWARE.
  24.  *
  25.  * Authors:
  26.  *    Rob Clark <robclark@freedesktop.org>
  27.  */
  28.  
  29. #include "pipe/p_state.h"
  30. #include "util/u_string.h"
  31. #include "util/u_memory.h"
  32. #include "util/u_helpers.h"
  33. #include "util/u_format.h"
  34.  
  35. #include "freedreno_resource.h"
  36.  
  37. #include "fd3_emit.h"
  38. #include "fd3_blend.h"
  39. #include "fd3_context.h"
  40. #include "fd3_program.h"
  41. #include "fd3_rasterizer.h"
  42. #include "fd3_texture.h"
  43. #include "fd3_util.h"
  44. #include "fd3_zsa.h"
  45.  
  46. /* regid:          base const register
  47.  * prsc or dwords: buffer containing constant values
  48.  * sizedwords:     size of const value buffer
  49.  */
  50. void
  51. fd3_emit_constant(struct fd_ringbuffer *ring,
  52.                 enum adreno_state_block sb,
  53.                 uint32_t regid, uint32_t offset, uint32_t sizedwords,
  54.                 const uint32_t *dwords, struct pipe_resource *prsc)
  55. {
  56.         uint32_t i, sz;
  57.         enum adreno_state_src src;
  58.  
  59.         if (prsc) {
  60.                 sz = 0;
  61.                 src = SS_INDIRECT;
  62.         } else {
  63.                 sz = sizedwords;
  64.                 src = SS_DIRECT;
  65.         }
  66.  
  67.         /* we have this sometimes, not others.. perhaps we could be clever
  68.          * and figure out actually when we need to invalidate cache:
  69.          */
  70.         OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
  71.         OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
  72.         OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
  73.                         A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
  74.                         A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
  75.  
  76.         OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
  77.         OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
  78.                         CP_LOAD_STATE_0_STATE_SRC(src) |
  79.                         CP_LOAD_STATE_0_STATE_BLOCK(sb) |
  80.                         CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2));
  81.         if (prsc) {
  82.                 struct fd_bo *bo = fd_resource(prsc)->bo;
  83.                 OUT_RELOC(ring, bo, offset,
  84.                                 CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
  85.         } else {
  86.                 OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
  87.                                 CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
  88.                 dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
  89.         }
  90.         for (i = 0; i < sz; i++) {
  91.                 OUT_RING(ring, dwords[i]);
  92.         }
  93. }
  94.  
  95. static void
  96. emit_constants(struct fd_ringbuffer *ring,
  97.                 enum adreno_state_block sb,
  98.                 struct fd_constbuf_stateobj *constbuf,
  99.                 struct fd3_shader_stateobj *shader)
  100. {
  101.         uint32_t enabled_mask = constbuf->enabled_mask;
  102.         uint32_t base = 0;
  103.         unsigned i;
  104.  
  105.         // XXX TODO only emit dirty consts.. but we need to keep track if
  106.         // they are clobbered by a clear, gmem2mem, or mem2gmem..
  107.         constbuf->dirty_mask = enabled_mask;
  108.  
  109.         /* emit user constants: */
  110.         while (enabled_mask) {
  111.                 unsigned index = ffs(enabled_mask) - 1;
  112.                 struct pipe_constant_buffer *cb = &constbuf->cb[index];
  113.                 unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
  114.  
  115.                 // I expect that size should be a multiple of vec4's:
  116.                 assert(size == align(size, 4));
  117.  
  118.                 /* gallium could have const-buffer still bound, even though the
  119.                  * shader is not using it.  Writing consts above constlen (or
  120.                  * rather, HLSQ_{VS,FS}_CONTROL_REG.CONSTLENGTH) will cause a
  121.                  * hang.
  122.                  */
  123.                 if ((base / 4) >= shader->constlen)
  124.                         break;
  125.  
  126.                 if (constbuf->dirty_mask & (1 << index)) {
  127.                         fd3_emit_constant(ring, sb, base,
  128.                                         cb->buffer_offset, size,
  129.                                         cb->user_buffer, cb->buffer);
  130.                         constbuf->dirty_mask &= ~(1 << index);
  131.                 }
  132.  
  133.                 base += size;
  134.                 enabled_mask &= ~(1 << index);
  135.         }
  136.  
  137.         /* emit shader immediates: */
  138.         if (shader) {
  139.                 for (i = 0; i < shader->immediates_count; i++) {
  140.                         fd3_emit_constant(ring, sb,
  141.                                         4 * (shader->first_immediate + i),
  142.                                         0, 4, shader->immediates[i].val, NULL);
  143.                 }
  144.         }
  145. }
  146.  
  147. #define VERT_TEX_OFF    0
  148. #define FRAG_TEX_OFF    16
  149. #define BASETABLE_SZ    14
  150.  
  151. static void
  152. emit_textures(struct fd_ringbuffer *ring,
  153.                 enum adreno_state_block sb,
  154.                 struct fd_texture_stateobj *tex)
  155. {
  156.         static const unsigned tex_off[] = {
  157.                         [SB_VERT_TEX] = VERT_TEX_OFF,
  158.                         [SB_FRAG_TEX] = FRAG_TEX_OFF,
  159.         };
  160.         static const enum adreno_state_block mipaddr[] = {
  161.                         [SB_VERT_TEX] = SB_VERT_MIPADDR,
  162.                         [SB_FRAG_TEX] = SB_FRAG_MIPADDR,
  163.         };
  164.         unsigned i, j;
  165.  
  166.         assert(tex->num_samplers == tex->num_textures);  // TODO check..
  167.  
  168.         if (!tex->num_samplers)
  169.                 return;
  170.  
  171.         /* output sampler state: */
  172.         OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * tex->num_samplers));
  173.         OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(tex_off[sb]) |
  174.                         CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
  175.                         CP_LOAD_STATE_0_STATE_BLOCK(sb) |
  176.                         CP_LOAD_STATE_0_NUM_UNIT(tex->num_samplers));
  177.         OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
  178.                         CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
  179.         for (i = 0; i < tex->num_samplers; i++) {
  180.                 struct fd3_sampler_stateobj *sampler =
  181.                                 fd3_sampler_stateobj(tex->samplers[i]);
  182.                 OUT_RING(ring, sampler->texsamp0);
  183.                 OUT_RING(ring, sampler->texsamp1);
  184.         }
  185.  
  186.         /* emit texture state: */
  187.         OUT_PKT3(ring, CP_LOAD_STATE, 2 + (4 * tex->num_textures));
  188.         OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(tex_off[sb]) |
  189.                         CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
  190.                         CP_LOAD_STATE_0_STATE_BLOCK(sb) |
  191.                         CP_LOAD_STATE_0_NUM_UNIT(tex->num_textures));
  192.         OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
  193.                         CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
  194.         for (i = 0; i < tex->num_textures; i++) {
  195.                 struct fd3_pipe_sampler_view *view =
  196.                                 fd3_pipe_sampler_view(tex->textures[i]);
  197.                 OUT_RING(ring, view->texconst0);
  198.                 OUT_RING(ring, view->texconst1);
  199.                 OUT_RING(ring, view->texconst2 |
  200.                                 A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i));
  201.                 OUT_RING(ring, view->texconst3);
  202.         }
  203.  
  204.         /* emit mipaddrs: */
  205.         OUT_PKT3(ring, CP_LOAD_STATE, 2 + (BASETABLE_SZ * tex->num_textures));
  206.         OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * tex_off[sb]) |
  207.                         CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
  208.                         CP_LOAD_STATE_0_STATE_BLOCK(mipaddr[sb]) |
  209.                         CP_LOAD_STATE_0_NUM_UNIT(BASETABLE_SZ * tex->num_textures));
  210.         OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
  211.                         CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
  212.         for (i = 0; i < tex->num_textures; i++) {
  213.                 struct fd3_pipe_sampler_view *view =
  214.                                 fd3_pipe_sampler_view(tex->textures[i]);
  215.                 OUT_RELOC(ring, view->tex_resource->bo, 0, 0, 0);
  216.                 /* I think each entry is a ptr to mipmap level.. for now, just
  217.                  * pad w/ null's until I get around to actually implementing
  218.                  * mipmap support..
  219.                  */
  220.                 for (j = 1; j < BASETABLE_SZ; j++) {
  221.                         OUT_RING(ring, 0x00000000);
  222.                 }
  223.         }
  224. }
  225.  
  226. static void
  227. emit_cache_flush(struct fd_ringbuffer *ring)
  228. {
  229.         OUT_PKT3(ring, CP_EVENT_WRITE, 1);
  230.         OUT_RING(ring, CACHE_FLUSH);
  231.  
  232.         OUT_PKT3(ring, CP_DRAW_INDX, 3);
  233.         OUT_RING(ring, 0x00000000);
  234.         OUT_RING(ring, DRAW(DI_PT_POINTLIST, DI_SRC_SEL_AUTO_INDEX,
  235.                         INDEX_SIZE_IGN, IGNORE_VISIBILITY));
  236.         OUT_RING(ring, 0);                                      /* NumIndices */
  237.  
  238.         OUT_PKT3(ring, CP_NOP, 4);
  239.         OUT_RING(ring, 0x00000000);
  240.         OUT_RING(ring, 0x00000000);
  241.         OUT_RING(ring, 0x00000000);
  242.         OUT_RING(ring, 0x00000000);
  243.  
  244.         OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
  245.         OUT_RING(ring, 0x00000000);
  246. }
  247.  
  248. /* emit texture state for mem->gmem restore operation.. eventually it would
  249.  * be good to get rid of this and use normal CSO/etc state for more of these
  250.  * special cases, but for now the compiler is not sufficient..
  251.  */
  252. void
  253. fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf)
  254. {
  255.         struct fd_resource *rsc = fd_resource(psurf->texture);
  256.  
  257.         /* output sampler state: */
  258.         OUT_PKT3(ring, CP_LOAD_STATE, 4);
  259.         OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) |
  260.                         CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
  261.                         CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
  262.                         CP_LOAD_STATE_0_NUM_UNIT(1));
  263.         OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
  264.                         CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
  265.         OUT_RING(ring, A3XX_TEX_SAMP_0_XY_MAG(A3XX_TEX_NEAREST) |
  266.                         A3XX_TEX_SAMP_0_XY_MIN(A3XX_TEX_NEAREST) |
  267.                         A3XX_TEX_SAMP_0_WRAP_S(A3XX_TEX_CLAMP_TO_EDGE) |
  268.                         A3XX_TEX_SAMP_0_WRAP_T(A3XX_TEX_CLAMP_TO_EDGE) |
  269.                         A3XX_TEX_SAMP_0_WRAP_R(A3XX_TEX_REPEAT));
  270.         OUT_RING(ring, 0x00000000);
  271.  
  272.         /* emit texture state: */
  273.         OUT_PKT3(ring, CP_LOAD_STATE, 6);
  274.         OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) |
  275.                         CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
  276.                         CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
  277.                         CP_LOAD_STATE_0_NUM_UNIT(1));
  278.         OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
  279.                         CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
  280.         OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(psurf->format)) |
  281.                         0x40000000 | // XXX
  282.                         fd3_tex_swiz(psurf->format,  PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
  283.                                         PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
  284.         OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) |
  285.                         A3XX_TEX_CONST_1_WIDTH(psurf->width) |
  286.                         A3XX_TEX_CONST_1_HEIGHT(psurf->height));
  287.         OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(rsc->pitch * rsc->cpp) |
  288.                         A3XX_TEX_CONST_2_INDX(0));
  289.         OUT_RING(ring, 0x00000000);
  290.  
  291.         /* emit mipaddrs: */
  292.         OUT_PKT3(ring, CP_LOAD_STATE, 3);
  293.         OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * FRAG_TEX_OFF) |
  294.                         CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
  295.                         CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_MIPADDR) |
  296.                         CP_LOAD_STATE_0_NUM_UNIT(1));
  297.         OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
  298.                         CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
  299.         OUT_RELOC(ring, rsc->bo, 0, 0, 0);
  300. }
  301.  
  302. void
  303. fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
  304.                 struct fd_program_stateobj *prog,
  305.                 struct fd3_vertex_buf *vbufs, uint32_t n)
  306. {
  307.         struct fd3_shader_stateobj *vp = prog->vp;
  308.         uint32_t i;
  309.  
  310.         n = MIN2(n, vp->inputs_count);
  311.  
  312.         for (i = 0; i < n; i++) {
  313.                 struct pipe_resource *prsc = vbufs[i].prsc;
  314.                 struct fd_resource *rsc = fd_resource(prsc);
  315.                 enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(vbufs[i].format);
  316.                 bool switchnext = (i != (n - 1));
  317.                 uint32_t fs = util_format_get_blocksize(vbufs[i].format);
  318.  
  319.                 OUT_PKT0(ring, REG_A3XX_VFD_FETCH(i), 2);
  320.                 OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
  321.                                 A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vbufs[i].stride) |
  322.                                 COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
  323.                                 A3XX_VFD_FETCH_INSTR_0_INDEXCODE(i) |
  324.                                 A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
  325.                 OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0, 0);
  326.  
  327.                 OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(i), 1);
  328.                 OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
  329.                                 A3XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
  330.                                 A3XX_VFD_DECODE_INSTR_FORMAT(fmt) |
  331.                                 A3XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
  332.                                 A3XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
  333.                                 A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
  334.                                 COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
  335.         }
  336. }
  337.  
  338. void
  339. fd3_emit_state(struct fd_context *ctx, uint32_t dirty)
  340. {
  341.         struct fd_ringbuffer *ring = ctx->ring;
  342.  
  343.         if (dirty & FD_DIRTY_SAMPLE_MASK) {
  344.                 OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1);
  345.                 OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
  346.                                 A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
  347.                                 A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(ctx->sample_mask));
  348.         }
  349.  
  350.         if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
  351.                 struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa);
  352.                 struct pipe_stencil_ref *sr = &ctx->stencil_ref;
  353.  
  354.                 fd3_emit_rbrc_draw_state(ring, zsa->rb_render_control);
  355.  
  356.                 OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
  357.                 OUT_RING(ring, zsa->rb_depth_control);
  358.  
  359.                 OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
  360.                 OUT_RING(ring, zsa->rb_stencil_control);
  361.  
  362.                 OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
  363.                 OUT_RING(ring, zsa->rb_stencilrefmask |
  364.                                 A3XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
  365.                 OUT_RING(ring, zsa->rb_stencilrefmask_bf |
  366.                                 A3XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
  367.         }
  368.  
  369.         if (dirty & FD_DIRTY_RASTERIZER) {
  370.                 struct fd3_rasterizer_stateobj *rasterizer =
  371.                                 fd3_rasterizer_stateobj(ctx->rasterizer);
  372.  
  373.                 OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
  374.                 OUT_RING(ring, rasterizer->gras_su_mode_control);
  375.  
  376.                 OUT_PKT0(ring, REG_A3XX_GRAS_SU_POINT_MINMAX, 2);
  377.                 OUT_RING(ring, rasterizer->gras_su_point_minmax);
  378.                 OUT_RING(ring, rasterizer->gras_su_point_size);
  379.  
  380.                 OUT_PKT0(ring, REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE, 2);
  381.                 OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
  382.                 OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
  383.  
  384.                 OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
  385.                 OUT_RING(ring, rasterizer->gras_cl_clip_cntl);
  386.         }
  387.  
  388.         if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
  389.                 struct fd3_rasterizer_stateobj *rasterizer =
  390.                                 fd3_rasterizer_stateobj(ctx->rasterizer);
  391.                 struct fd3_shader_stateobj *fp = ctx->prog.fp;
  392.                 uint32_t stride_in_vpc;
  393.  
  394.                 stride_in_vpc = align(fp->total_in, 4) / 4;
  395.                 if (stride_in_vpc > 0)
  396.                         stride_in_vpc = MAX2(stride_in_vpc, 2);
  397.  
  398.                 OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
  399.                 OUT_RING(ring, rasterizer->pc_prim_vtx_cntl |
  400.                                 A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(stride_in_vpc));
  401.         }
  402.  
  403.         if (dirty & FD_DIRTY_SCISSOR) {
  404.                 struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
  405.  
  406.                 OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
  407.                 OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) |
  408.                                 A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny));
  409.                 OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) |
  410.                                 A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1));
  411.  
  412.                 ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, scissor->minx);
  413.                 ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, scissor->miny);
  414.                 ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, scissor->maxx);
  415.                 ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, scissor->maxy);
  416.         }
  417.  
  418.         if (dirty & FD_DIRTY_VIEWPORT) {
  419.                 OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
  420.                 OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(ctx->viewport.translate[0] - 0.5));
  421.                 OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(ctx->viewport.scale[0]));
  422.                 OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(ctx->viewport.translate[1] - 0.5));
  423.                 OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(ctx->viewport.scale[1]));
  424.                 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(ctx->viewport.translate[2]));
  425.                 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
  426.         }
  427.  
  428.         if (dirty & FD_DIRTY_PROG)
  429.                 fd3_program_emit(ring, &ctx->prog);
  430.  
  431.         if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
  432.                 struct fd_program_stateobj *prog = &ctx->prog;
  433.  
  434.                 emit_constants(ring,  SB_VERT_SHADER,
  435.                                 &ctx->constbuf[PIPE_SHADER_VERTEX],
  436.                                 (prog->dirty & FD_SHADER_DIRTY_VP) ? prog->vp : NULL);
  437.                 emit_constants(ring, SB_FRAG_SHADER,
  438.                                 &ctx->constbuf[PIPE_SHADER_FRAGMENT],
  439.                                 (prog->dirty & FD_SHADER_DIRTY_FP) ? prog->fp : NULL);
  440.         }
  441.  
  442.         if (dirty & FD_DIRTY_BLEND) {
  443.                 struct fd3_blend_stateobj *blend = fd3_blend_stateobj(ctx->blend);
  444.                 uint32_t i;
  445.  
  446.                 for (i = 0; i < ARRAY_SIZE(blend->rb_mrt); i++) {
  447.                         OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
  448.                         OUT_RING(ring, blend->rb_mrt[i].control);
  449.  
  450.                         OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
  451.                         OUT_RING(ring, blend->rb_mrt[i].blend_control);
  452.                 }
  453.         }
  454.  
  455.         if (dirty & FD_DIRTY_VERTTEX)
  456.                 emit_textures(ring, SB_VERT_TEX, &ctx->verttex);
  457.  
  458.         if (dirty & FD_DIRTY_FRAGTEX)
  459.                 emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex);
  460.  
  461.         ctx->dirty &= ~dirty;
  462. }
  463.  
  464. /* emit setup at begin of new cmdstream buffer (don't rely on previous
  465.  * state, there could have been a context switch between ioctls):
  466.  */
  467. void
  468. fd3_emit_restore(struct fd_context *ctx)
  469. {
  470.         struct fd3_context *fd3_ctx = fd3_context(ctx);
  471.         struct fd_ringbuffer *ring = ctx->ring;
  472.         int i;
  473.  
  474.         OUT_PKT3(ring, CP_REG_RMW, 3);
  475.         OUT_RING(ring, REG_A3XX_RBBM_CLOCK_CTL);
  476.         OUT_RING(ring, 0xfffcffff);
  477.         OUT_RING(ring, 0x00000000);
  478.  
  479.         OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
  480.         OUT_RING(ring, 0x00007fff);
  481.  
  482.         OUT_PKT0(ring, REG_A3XX_SP_VS_PVT_MEM_CTRL_REG, 3);
  483.         OUT_RING(ring, 0x08000001);                  /* SP_VS_PVT_MEM_CTRL_REG */
  484.         OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR_REG */
  485.         OUT_RING(ring, 0x00000000);                  /* SP_VS_PVT_MEM_SIZE_REG */
  486.  
  487.         OUT_PKT0(ring, REG_A3XX_SP_FS_PVT_MEM_CTRL_REG, 3);
  488.         OUT_RING(ring, 0x08000001);                  /* SP_FS_PVT_MEM_CTRL_REG */
  489.         OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR_REG */
  490.         OUT_RING(ring, 0x00000000);                  /* SP_FS_PVT_MEM_SIZE_REG */
  491.  
  492.         OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
  493.         OUT_RING(ring, 0x0000000b);                  /* PC_VERTEX_REUSE_BLOCK_CNTL */
  494.  
  495.         OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
  496.         OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
  497.                         A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
  498.                         A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
  499.  
  500.         OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 2);
  501.         OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
  502.                         A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
  503.                         A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff));
  504.         OUT_RING(ring, 0x00000000);        /* UNKNOWN_20C3 */
  505.  
  506.         OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1);
  507.         OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
  508.                         A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
  509.  
  510.         OUT_PKT0(ring, REG_A3XX_UNKNOWN_0C81, 1);
  511.         OUT_RING(ring, 0x00000001);        /* UNKNOWN_0C81 */
  512.  
  513.         OUT_PKT0(ring, REG_A3XX_TPL1_TP_VS_TEX_OFFSET, 1);
  514.         OUT_RING(ring, A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET(VERT_TEX_OFF) |
  515.                         A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET(VERT_TEX_OFF) |
  516.                         A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR(BASETABLE_SZ * VERT_TEX_OFF));
  517.  
  518.         OUT_PKT0(ring, REG_A3XX_TPL1_TP_FS_TEX_OFFSET, 1);
  519.         OUT_RING(ring, A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET(FRAG_TEX_OFF) |
  520.                         A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET(FRAG_TEX_OFF) |
  521.                         A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(BASETABLE_SZ * FRAG_TEX_OFF));
  522.  
  523.         OUT_PKT0(ring, REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0, 2);
  524.         OUT_RING(ring, 0x00000000);        /* VPC_VARY_CYLWRAP_ENABLE_0 */
  525.         OUT_RING(ring, 0x00000000);        /* VPC_VARY_CYLWRAP_ENABLE_1 */
  526.  
  527.         OUT_PKT0(ring, REG_A3XX_UNKNOWN_0E43, 1);
  528.         OUT_RING(ring, 0x00000001);        /* UNKNOWN_0E43 */
  529.  
  530.         OUT_PKT0(ring, REG_A3XX_UNKNOWN_0F03, 1);
  531.         OUT_RING(ring, 0x00000001);        /* UNKNOWN_0F03 */
  532.  
  533.         OUT_PKT0(ring, REG_A3XX_UNKNOWN_0EE0, 1);
  534.         OUT_RING(ring, 0x00000003);        /* UNKNOWN_0EE0 */
  535.  
  536.         OUT_PKT0(ring, REG_A3XX_UNKNOWN_0C3D, 1);
  537.         OUT_RING(ring, 0x00000001);        /* UNKNOWN_0C3D */
  538.  
  539.         OUT_PKT0(ring, REG_A3XX_HLSQ_PERFCOUNTER0_SELECT, 1);
  540.         OUT_RING(ring, 0x00000000);        /* HLSQ_PERFCOUNTER0_SELECT */
  541.  
  542.         OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 2);
  543.         OUT_RING(ring, A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(0) |
  544.                         A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(0));
  545.         OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0) |
  546.                         A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0));
  547.  
  548.         OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 1);
  549.         OUT_RING(ring, 0x00000001);        /* UCHE_CACHE_MODE_CONTROL_REG */
  550.  
  551.         OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);
  552.         OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
  553.  
  554.         OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
  555.         OUT_RING(ring, 0x00000000);                  /* GRAS_CL_CLIP_CNTL */
  556.  
  557.         OUT_PKT0(ring, REG_A3XX_GRAS_SU_POINT_MINMAX, 2);
  558.         OUT_RING(ring, 0xffc00010);        /* GRAS_SU_POINT_MINMAX */
  559.         OUT_RING(ring, 0x00000008);        /* GRAS_SU_POINT_SIZE */
  560.  
  561.         OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
  562.         OUT_RING(ring, 0xffffffff);        /* PC_RESTART_INDEX */
  563.  
  564.         OUT_PKT0(ring, REG_A3XX_PA_SC_WINDOW_OFFSET, 1);
  565.         OUT_RING(ring, A3XX_PA_SC_WINDOW_OFFSET_X(0) |
  566.                         A3XX_PA_SC_WINDOW_OFFSET_Y(0));
  567.  
  568.         OUT_PKT0(ring, REG_A3XX_RB_BLEND_RED, 4);
  569.         OUT_RING(ring, 0x00000000);        /* RB_BLEND_RED */
  570.         OUT_RING(ring, 0x00000000);        /* RB_BLEND_GREEN */
  571.         OUT_RING(ring, 0x00000000);        /* RB_BLEND_BLUE */
  572.         OUT_RING(ring, 0x3c0000ff);        /* RB_BLEND_ALPHA */
  573.  
  574.         for (i = 0; i < 6; i++) {
  575.                 OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(i), 4);
  576.                 OUT_RING(ring, 0x00000000);    /* GRAS_CL_USER_PLANE[i].X */
  577.                 OUT_RING(ring, 0x00000000);    /* GRAS_CL_USER_PLANE[i].Y */
  578.                 OUT_RING(ring, 0x00000000);    /* GRAS_CL_USER_PLANE[i].Z */
  579.                 OUT_RING(ring, 0x00000000);    /* GRAS_CL_USER_PLANE[i].W */
  580.         }
  581.  
  582.         emit_cache_flush(ring);
  583. }
  584.