Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
  2.  
  3. /*
  4.  * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice (including the next
  14.  * paragraph) shall be included in all copies or substantial portions of the
  15.  * Software.
  16.  *
  17.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22.  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23.  * SOFTWARE.
  24.  *
  25.  * Authors:
  26.  *    Rob Clark <robclark@freedesktop.org>
  27.  */
  28.  
  29. #include "pipe/p_state.h"
  30. #include "util/u_string.h"
  31. #include "util/u_memory.h"
  32. #include "util/u_inlines.h"
  33. #include "util/u_format.h"
  34. #include "tgsi/tgsi_dump.h"
  35. #include "tgsi/tgsi_parse.h"
  36.  
  37. #include "fd3_program.h"
  38. #include "fd3_compiler.h"
  39. #include "fd3_texture.h"
  40. #include "fd3_util.h"
  41.  
  42. static void
  43. delete_shader(struct fd3_shader_stateobj *so)
  44. {
  45.         ir3_shader_destroy(so->ir);
  46.         fd_bo_del(so->bo);
  47.         free(so);
  48. }
  49.  
  50. static void
  51. assemble_shader(struct pipe_context *pctx, struct fd3_shader_stateobj *so)
  52. {
  53.         struct fd_context *ctx = fd_context(pctx);
  54.         uint32_t sz, *bin;
  55.  
  56.         bin = ir3_shader_assemble(so->ir, &so->info);
  57.         sz = so->info.sizedwords * 4;
  58.  
  59.         so->bo = fd_bo_new(ctx->screen->dev, sz,
  60.                         DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
  61.                         DRM_FREEDRENO_GEM_TYPE_KMEM);
  62.  
  63.         memcpy(fd_bo_map(so->bo), bin, sz);
  64.  
  65.         free(bin);
  66.  
  67.         so->instrlen = so->info.sizedwords / 8;
  68.         so->constlen = so->info.max_const + 1;
  69. }
  70.  
  71. /* for vertex shader, the inputs are loaded into registers before the shader
  72.  * is executed, so max_regs from the shader instructions might not properly
  73.  * reflect the # of registers actually used:
  74.  */
  75. static void
  76. fixup_vp_regfootprint(struct fd3_shader_stateobj *so)
  77. {
  78.         unsigned i;
  79.         for (i = 0; i < so->inputs_count; i++) {
  80.                 so->info.max_reg = MAX2(so->info.max_reg, so->inputs[i].regid >> 2);
  81.         }
  82. }
  83.  
  84. static struct fd3_shader_stateobj *
  85. create_shader(struct pipe_context *pctx, const struct pipe_shader_state *cso,
  86.                 enum shader_t type)
  87. {
  88.         struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj);
  89.         int ret;
  90.  
  91.         if (!so)
  92.                 return NULL;
  93.  
  94.         so->type = type;
  95.  
  96.         if (fd_mesa_debug & FD_DBG_DISASM) {
  97.                 DBG("dump tgsi: type=%d", so->type);
  98.                 tgsi_dump(cso->tokens, 0);
  99.         }
  100.  
  101.         if (type == SHADER_FRAGMENT) {
  102.                 /* we seem to get wrong colors (maybe swap/endianess or hw issue?)
  103.                  * with full precision color reg.  And blob driver only seems to
  104.                  * use half precision register for color output (that I can find
  105.                  * so far), even with highp precision.  So for force half precision
  106.                  * for frag shader:
  107.                  */
  108.                 so->half_precision = true;
  109.         }
  110.  
  111.         ret = fd3_compile_shader(so, cso->tokens);
  112.         if (ret) {
  113.                 debug_error("compile failed!");
  114.                 goto fail;
  115.         }
  116.  
  117.         assemble_shader(pctx, so);
  118.         if (!so->bo) {
  119.                 debug_error("assemble failed!");
  120.                 goto fail;
  121.         }
  122.  
  123.         if (type == SHADER_VERTEX)
  124.                 fixup_vp_regfootprint(so);
  125.  
  126.         if (fd_mesa_debug & FD_DBG_DISASM) {
  127.                 DBG("disassemble: type=%d", so->type);
  128.                 disasm_a3xx(fd_bo_map(so->bo), so->info.sizedwords, 0, so->type);
  129.         }
  130.  
  131.         return so;
  132.  
  133. fail:
  134.         delete_shader(so);
  135.         return NULL;
  136. }
  137.  
  138. static void *
  139. fd3_fp_state_create(struct pipe_context *pctx,
  140.                 const struct pipe_shader_state *cso)
  141. {
  142.         return create_shader(pctx, cso, SHADER_FRAGMENT);
  143. }
  144.  
  145. static void
  146. fd3_fp_state_delete(struct pipe_context *pctx, void *hwcso)
  147. {
  148.         struct fd3_shader_stateobj *so = hwcso;
  149.         delete_shader(so);
  150. }
  151.  
  152. static void
  153. fd3_fp_state_bind(struct pipe_context *pctx, void *hwcso)
  154. {
  155.         struct fd_context *ctx = fd_context(pctx);
  156.         ctx->prog.fp = hwcso;
  157.         ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
  158.         ctx->dirty |= FD_DIRTY_PROG;
  159. }
  160.  
  161. static void *
  162. fd3_vp_state_create(struct pipe_context *pctx,
  163.                 const struct pipe_shader_state *cso)
  164. {
  165.         return create_shader(pctx, cso, SHADER_VERTEX);
  166. }
  167.  
  168. static void
  169. fd3_vp_state_delete(struct pipe_context *pctx, void *hwcso)
  170. {
  171.         struct fd3_shader_stateobj *so = hwcso;
  172.         delete_shader(so);
  173. }
  174.  
  175. static void
  176. fd3_vp_state_bind(struct pipe_context *pctx, void *hwcso)
  177. {
  178.         struct fd_context *ctx = fd_context(pctx);
  179.         ctx->prog.vp = hwcso;
  180.         ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
  181.         ctx->dirty |= FD_DIRTY_PROG;
  182. }
  183.  
  184. static void
  185. emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so)
  186. {
  187.         struct ir3_shader_info *si = &so->info;
  188.         enum adreno_state_block sb;
  189.         uint32_t i, *bin;
  190.  
  191.         if (so->type == SHADER_VERTEX) {
  192.                 sb = SB_VERT_SHADER;
  193.         } else {
  194.                 sb = SB_FRAG_SHADER;
  195.         }
  196.  
  197.         // XXX use SS_INDIRECT
  198.         bin = fd_bo_map(so->bo);
  199.         OUT_PKT3(ring, CP_LOAD_STATE, 2 + si->sizedwords);
  200.         OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
  201.                         CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
  202.                         CP_LOAD_STATE_0_STATE_BLOCK(sb) |
  203.                         CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
  204.         OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
  205.                         CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
  206.         for (i = 0; i < si->sizedwords; i++)
  207.                 OUT_RING(ring, bin[i]);
  208. }
  209.  
  210. void
  211. fd3_program_emit(struct fd_ringbuffer *ring,
  212.                 struct fd_program_stateobj *prog)
  213. {
  214.         struct fd3_shader_stateobj *vp = prog->vp;
  215.         struct fd3_shader_stateobj *fp = prog->fp;
  216.         struct ir3_shader_info *vsi = &vp->info;
  217.         struct ir3_shader_info *fsi = &fp->info;
  218.         int i;
  219.  
  220.         /* we could probably divide this up into things that need to be
  221.          * emitted if frag-prog is dirty vs if vert-prog is dirty..
  222.          */
  223.  
  224.         OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
  225.         OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
  226.                         A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
  227.                         A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
  228.         OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
  229.                         A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE);
  230.         OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
  231.         OUT_RING(ring, 0x00000000);        /* HLSQ_CONTROL_3_REG */
  232.         OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
  233.                         A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
  234.                         A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vp->instrlen));
  235.         OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) |
  236.                         A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) |
  237.                         A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fp->instrlen));
  238.  
  239.         OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
  240.         OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(0) |
  241.                         A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
  242.                         // XXX "resolve" (?) bit set on gmem->mem pass..
  243. //                      COND(!uniforms, A3XX_SP_SP_CTRL_REG_RESOLVE) |
  244.                         // XXX sometimes 0, sometimes 1:
  245.                         A3XX_SP_SP_CTRL_REG_LOMODE(1));
  246.  
  247.         /* emit unknown sequence of perfcounter disables that the blob
  248.          * emits as part of the program state..
  249.          */
  250.         for (i = 0; i < 6; i++) {
  251.                 OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER0_SELECT, 1);
  252.                 OUT_RING(ring, 0x00000000);    /* SP_PERFCOUNTER4_SELECT */
  253.  
  254.                 OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER4_SELECT, 1);
  255.                 OUT_RING(ring, 0x00000000);    /* SP_PERFCOUNTER4_SELECT */
  256.         }
  257.  
  258.         OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1);
  259.         OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen));
  260.  
  261.         OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
  262.         OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
  263.                         A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
  264.                         A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
  265.                         A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
  266.                         A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
  267.                         A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
  268.                         A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
  269.                         COND(vp->samplers_count > 0, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) |
  270.                         A3XX_SP_VS_CTRL_REG0_LENGTH(vp->instrlen));
  271.         OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
  272.                         A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
  273.                         A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vsi->max_const, 0)));
  274.         OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(vp->pos_regid) |
  275.                         A3XX_SP_VS_PARAM_REG_PSIZEREGID(vp->psize_regid) |
  276.                         A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(vp->outputs_count));
  277.  
  278.         assert(vp->outputs_count >= fp->inputs_count);
  279.  
  280.         for (i = 0; i < fp->inputs_count; ) {
  281.                 uint32_t reg = 0;
  282.  
  283.                 OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i/2), 1);
  284.  
  285.                 reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[i].regid);
  286.                 reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[i].compmask);
  287.                 i++;
  288.  
  289.                 reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[i].regid);
  290.                 reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[i].compmask);
  291.                 i++;
  292.  
  293.                 OUT_RING(ring, reg);
  294.         }
  295.  
  296.         for (i = 0; i < fp->inputs_count; ) {
  297.                 uint32_t reg = 0;
  298.  
  299.                 OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i/4), 1);
  300.  
  301.                 reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(fp->inputs[i++].inloc);
  302.                 reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(fp->inputs[i++].inloc);
  303.                 reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(fp->inputs[i++].inloc);
  304.                 reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(fp->inputs[i++].inloc);
  305.  
  306.                 OUT_RING(ring, reg);
  307.         }
  308.  
  309. #if 0
  310.         /* for some reason, when I write SP_{VS,FS}_OBJ_START_REG I get:
  311. [  666.663665] kgsl kgsl-3d0: |a3xx_err_callback| RBBM | AHB bus error | READ | addr=201 | ports=1:3
  312. [  666.664001] kgsl kgsl-3d0: |a3xx_err_callback| ringbuffer AHB error interrupt
  313. [  670.680909] kgsl kgsl-3d0: |adreno_idle| spun too long waiting for RB to idle
  314. [  670.681062] kgsl kgsl-3d0: |kgsl-3d0| Dump Started
  315. [  670.681123] kgsl kgsl-3d0: POWER: FLAGS = 00000007 | ACTIVE POWERLEVEL = 00000001
  316. [  670.681214] kgsl kgsl-3d0: POWER: INTERVAL TIMEOUT = 0000000A
  317. [  670.681367] kgsl kgsl-3d0: GRP_CLK = 325000000
  318. [  670.681489] kgsl kgsl-3d0: BUS CLK = 0
  319.          */
  320.         OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2);
  321.         OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) |
  322.                         A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
  323.         OUT_RELOC(ring, vp->bo, 0, 0);    /* SP_VS_OBJ_START_REG */
  324. #endif
  325.  
  326.         OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
  327.         OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));
  328.  
  329.         OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
  330.         OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
  331.                         A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
  332.                         A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
  333.                         A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
  334.                         A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
  335.                         A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
  336.                         A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
  337.                         COND(fp->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
  338.                         A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen));
  339.         OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
  340.                         A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
  341.                         A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fsi->max_const, 0)) |
  342.                         A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
  343.  
  344. #if 0
  345.         OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
  346.         OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
  347.                         A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(128 - fp->instrlen));
  348.         OUT_RELOC(ring, fp->bo, 0, 0);    /* SP_FS_OBJ_START_REG */
  349. #endif
  350.  
  351.         OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
  352.         OUT_RING(ring, 0x00000000);        /* SP_FS_FLAT_SHAD_MODE_REG_0 */
  353.         OUT_RING(ring, 0x00000000);        /* SP_FS_FLAT_SHAD_MODE_REG_1 */
  354.  
  355.         OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
  356.         OUT_RING(ring, 0x00000000);        /* SP_FS_OUTPUT_REG */
  357.  
  358.         OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
  359.         OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(fp->color_regid) |
  360.                         COND(fp->half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION));
  361.         OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
  362.         OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
  363.         OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
  364.  
  365.         OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
  366.         OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
  367.                         A3XX_VPC_ATTR_THRDASSIGN(1) |
  368.                         A3XX_VPC_ATTR_LMSIZE(1));
  369.         OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) |
  370.                         A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
  371.  
  372.         OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
  373.         OUT_RING(ring, fp->vinterp[0]);    /* VPC_VARYING_INTERP[0].MODE */
  374.         OUT_RING(ring, fp->vinterp[1]);    /* VPC_VARYING_INTERP[1].MODE */
  375.         OUT_RING(ring, fp->vinterp[2]);    /* VPC_VARYING_INTERP[2].MODE */
  376.         OUT_RING(ring, fp->vinterp[3]);    /* VPC_VARYING_INTERP[3].MODE */
  377.  
  378.         OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
  379.         OUT_RING(ring, fp->vpsrepl[0]);    /* VPC_VARYING_PS_REPL[0].MODE */
  380.         OUT_RING(ring, fp->vpsrepl[1]);    /* VPC_VARYING_PS_REPL[1].MODE */
  381.         OUT_RING(ring, fp->vpsrepl[2]);    /* VPC_VARYING_PS_REPL[2].MODE */
  382.         OUT_RING(ring, fp->vpsrepl[3]);    /* VPC_VARYING_PS_REPL[3].MODE */
  383.  
  384.         OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
  385.         OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
  386.                         A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(252));
  387.  
  388.         emit_shader(ring, vp);
  389.  
  390.         OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
  391.         OUT_RING(ring, 0x00000000);        /* VFD_PERFCOUNTER0_SELECT */
  392.  
  393.         emit_shader(ring, fp);
  394.  
  395.         OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
  396.         OUT_RING(ring, 0x00000000);        /* VFD_PERFCOUNTER0_SELECT */
  397.  
  398.         OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
  399.         OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(vp->total_in) |
  400.                         A3XX_VFD_CONTROL_0_PACKETSIZE(2) |
  401.                         A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(vp->inputs_count) |
  402.                         A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(vp->inputs_count));
  403.         OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX
  404.                         A3XX_VFD_CONTROL_1_REGID4VTX(regid(63,0)) |
  405.                         A3XX_VFD_CONTROL_1_REGID4INST(regid(63,0)));
  406. }
  407.  
  408. /* once the compiler is good enough, we should construct TGSI in the
  409.  * core freedreno driver, and then let the a2xx/a3xx parts compile
  410.  * the internal shaders from TGSI the same as regular shaders.  This
  411.  * would be the first step towards handling most of clear (and the
  412.  * gmem<->mem blits) from the core via normal state changes and shader
  413.  * state objects.
  414.  *
  415.  * (Well, there would still be some special bits, because there are
  416.  * some registers that don't get set for normal draw, but this should
  417.  * be relatively small and could be handled via callbacks from core
  418.  * into a2xx/a3xx..)
  419.  */
  420. static struct fd3_shader_stateobj *
  421. create_internal_shader(struct pipe_context *pctx, enum shader_t type,
  422.                 struct ir3_shader *ir)
  423. {
  424.         struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj);
  425.  
  426.         if (!so) {
  427.                 ir3_shader_destroy(ir);
  428.                 return NULL;
  429.         }
  430.  
  431.         so->type = type;
  432.         so->ir = ir;
  433.  
  434.         assemble_shader(pctx, so);
  435.         assert(so->bo);
  436.  
  437.         return so;
  438. }
  439.  
  440. /* Creates shader:
  441.  *    (sy)(ss)(rpt1)bary.f (ei)r0.z, (r)0, r0.x
  442.  *    (rpt5)nop
  443.  *    sam (f32)(xyzw)r0.x, r0.z, s#0, t#0
  444.  *    (sy)(rpt3)cov.f32f16 hr0.x, (r)r0.x
  445.  *    end
  446.  */
  447. static struct fd3_shader_stateobj *
  448. create_blit_fp(struct pipe_context *pctx)
  449. {
  450.         struct fd3_shader_stateobj *so;
  451.         struct ir3_shader *ir = ir3_shader_create();
  452.         struct ir3_instruction *instr;
  453.  
  454.         /* (sy)(ss)(rpt1)bary.f (ei)r0.z, (r)0, r0.x */
  455.         instr = ir3_instr_create(ir, 2, OPC_BARY_F);
  456.         instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
  457.         instr->repeat = 1;
  458.  
  459.         ir3_reg_create(instr, regid(0,2), IR3_REG_EI);    /* (ei)r0.z */
  460.         ir3_reg_create(instr, 0, IR3_REG_R |              /* (r)0 */
  461.                         IR3_REG_IMMED)->iim_val = 0;
  462.         ir3_reg_create(instr, regid(0,0), 0);             /* r0.x */
  463.  
  464.         /* (rpt5)nop */
  465.         instr = ir3_instr_create(ir, 0, OPC_NOP);
  466.         instr->repeat = 5;
  467.  
  468.         /* sam (f32)(xyzw)r0.x, r0.z, s#0, t#0 */
  469.         instr = ir3_instr_create(ir, 5, OPC_SAM);
  470.         instr->cat5.samp = 0;
  471.         instr->cat5.tex  = 0;
  472.         instr->cat5.type = TYPE_F32;
  473.  
  474.         ir3_reg_create(instr, regid(0,0),                 /* (xyzw)r0.x */
  475.                         0)->wrmask = 0xf;
  476.         ir3_reg_create(instr, regid(0,2), 0);             /* r0.z */
  477.  
  478.         /* (sy)(rpt3)cov.f32f16 hr0.x, (r)r0.x */
  479.         instr = ir3_instr_create(ir, 1, 0);  /* mov/cov instructions have no opc */
  480.         instr->flags = IR3_INSTR_SY;
  481.         instr->repeat = 3;
  482.         instr->cat1.src_type = TYPE_F32;
  483.         instr->cat1.dst_type = TYPE_F16;
  484.  
  485.         ir3_reg_create(instr, regid(0,0), IR3_REG_HALF);  /* hr0.x */
  486.         ir3_reg_create(instr, regid(0,0), IR3_REG_R);     /* (r)r0.x */
  487.  
  488.         /* end */
  489.         instr = ir3_instr_create(ir, 0, OPC_END);
  490.  
  491.         so = create_internal_shader(pctx, SHADER_FRAGMENT, ir);
  492.         if (!so)
  493.                 return NULL;
  494.  
  495.         so->color_regid = regid(0,0);
  496.         so->half_precision = true;
  497.         so->inputs_count = 1;
  498.         so->inputs[0].inloc = 8;
  499.         so->inputs[0].compmask = 0x3;
  500.         so->total_in = 2;
  501.         so->samplers_count = 1;
  502.  
  503.         so->vpsrepl[0] = 0x99999999;
  504.         so->vpsrepl[1] = 0x99999999;
  505.         so->vpsrepl[2] = 0x99999999;
  506.         so->vpsrepl[3] = 0x99999999;
  507.  
  508.         return so;
  509. }
  510.  
  511. /* Creates shader:
  512.  *    (sy)(ss)end
  513.  */
  514. static struct fd3_shader_stateobj *
  515. create_blit_vp(struct pipe_context *pctx)
  516. {
  517.         struct fd3_shader_stateobj *so;
  518.         struct ir3_shader *ir = ir3_shader_create();
  519.         struct ir3_instruction *instr;
  520.  
  521.         /* (sy)(ss)end */
  522.         instr = ir3_instr_create(ir, 0, OPC_END);
  523.         instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
  524.  
  525.         so = create_internal_shader(pctx, SHADER_VERTEX, ir);
  526.         if (!so)
  527.                 return NULL;
  528.  
  529.         so->pos_regid = regid(1,0);
  530.         so->psize_regid = regid(63,0);
  531.         so->inputs_count = 2;
  532.         so->inputs[0].regid = regid(0,0);
  533.         so->inputs[0].compmask = 0xf;
  534.         so->inputs[1].regid = regid(1,0);
  535.         so->inputs[1].compmask = 0xf;
  536.         so->total_in = 8;
  537.         so->outputs_count = 1;
  538.         so->outputs[0].regid = regid(0,0);
  539.  
  540.         fixup_vp_regfootprint(so);
  541.  
  542.         return so;
  543. }
  544.  
  545. /* Creates shader:
  546.  *    (sy)(ss)(rpt3)mov.f16f16 hr0.x, (r)hc0.x
  547.  *    end
  548.  */
  549. static struct fd3_shader_stateobj *
  550. create_solid_fp(struct pipe_context *pctx)
  551. {
  552.         struct fd3_shader_stateobj *so;
  553.         struct ir3_shader *ir = ir3_shader_create();
  554.         struct ir3_instruction *instr;
  555.  
  556.         /* (sy)(ss)(rpt3)mov.f16f16 hr0.x, (r)hc0.x */
  557.         instr = ir3_instr_create(ir, 1, 0);  /* mov/cov instructions have no opc */
  558.         instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
  559.         instr->repeat = 3;
  560.         instr->cat1.src_type = TYPE_F16;
  561.         instr->cat1.dst_type = TYPE_F16;
  562.  
  563.         ir3_reg_create(instr, regid(0,0), IR3_REG_HALF);  /* hr0.x */
  564.         ir3_reg_create(instr, regid(0,0), IR3_REG_HALF |  /* (r)hc0.x */
  565.                         IR3_REG_CONST | IR3_REG_R);
  566.  
  567.         /* end */
  568.         instr = ir3_instr_create(ir, 0, OPC_END);
  569.  
  570.         so = create_internal_shader(pctx, SHADER_FRAGMENT, ir);
  571.         if (!so)
  572.                 return NULL;
  573.  
  574.         so->color_regid = regid(0,0);
  575.         so->half_precision = true;
  576.         so->inputs_count = 0;
  577.         so->total_in = 0;
  578.  
  579.         return so;
  580. }
  581.  
  582. /* Creates shader:
  583.  *    (sy)(ss)end
  584.  */
  585. static struct fd3_shader_stateobj *
  586. create_solid_vp(struct pipe_context *pctx)
  587. {
  588.         struct fd3_shader_stateobj *so;
  589.         struct ir3_shader *ir = ir3_shader_create();
  590.         struct ir3_instruction *instr;
  591.  
  592.         /* (sy)(ss)end */
  593.         instr = ir3_instr_create(ir, 0, OPC_END);
  594.         instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
  595.  
  596.  
  597.         so = create_internal_shader(pctx, SHADER_VERTEX, ir);
  598.         if (!so)
  599.                 return NULL;
  600.  
  601.         so->pos_regid = regid(0,0);
  602.         so->psize_regid = regid(63,0);
  603.         so->inputs_count = 1;
  604.         so->inputs[0].regid = regid(0,0);
  605.         so->inputs[0].compmask = 0xf;
  606.         so->total_in = 4;
  607.         so->outputs_count = 0;
  608.  
  609.         fixup_vp_regfootprint(so);
  610.  
  611.         return so;
  612. }
  613.  
  614. void
  615. fd3_prog_init(struct pipe_context *pctx)
  616. {
  617.         struct fd_context *ctx = fd_context(pctx);
  618.  
  619.         pctx->create_fs_state = fd3_fp_state_create;
  620.         pctx->bind_fs_state = fd3_fp_state_bind;
  621.         pctx->delete_fs_state = fd3_fp_state_delete;
  622.  
  623.         pctx->create_vs_state = fd3_vp_state_create;
  624.         pctx->bind_vs_state = fd3_vp_state_bind;
  625.         pctx->delete_vs_state = fd3_vp_state_delete;
  626.  
  627.         ctx->solid_prog.fp = create_solid_fp(pctx);
  628.         ctx->solid_prog.vp = create_solid_vp(pctx);
  629.         ctx->blit_prog.fp = create_blit_fp(pctx);
  630.         ctx->blit_prog.vp = create_blit_vp(pctx);
  631. }
  632.  
  633. void
  634. fd3_prog_fini(struct pipe_context *pctx)
  635. {
  636.         struct fd_context *ctx = fd_context(pctx);
  637.  
  638.         delete_shader(ctx->solid_prog.vp);
  639.         delete_shader(ctx->solid_prog.fp);
  640.         delete_shader(ctx->blit_prog.vp);
  641.         delete_shader(ctx->blit_prog.fp);
  642. }
  643.