Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright (c) 2014 Scott Mansell
  3.  * Copyright © 2014 Broadcom
  4.  *
  5.  * Permission is hereby granted, free of charge, to any person obtaining a
  6.  * copy of this software and associated documentation files (the "Software"),
  7.  * to deal in the Software without restriction, including without limitation
  8.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9.  * and/or sell copies of the Software, and to permit persons to whom the
  10.  * Software is furnished to do so, subject to the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the next
  13.  * paragraph) shall be included in all copies or substantial portions of the
  14.  * Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22.  * IN THE SOFTWARE.
  23.  */
  24.  
  25. #include "util/u_prim.h"
  26. #include "util/u_format.h"
  27. #include "util/u_pack_color.h"
  28. #include "indices/u_primconvert.h"
  29.  
  30. #include "vc4_context.h"
  31. #include "vc4_resource.h"
  32.  
  33. static void
  34. vc4_get_draw_cl_space(struct vc4_context *vc4)
  35. {
  36.         /* Binner gets our packet state -- vc4_emit.c contents,
  37.          * and the primitive itself.
  38.          */
  39.         cl_ensure_space(&vc4->bcl, 256);
  40.  
  41.         /* Nothing for rcl -- that's covered by vc4_context.c */
  42.  
  43.         /* shader_rec gets up to 12 dwords of reloc handles plus a maximally
  44.          * sized shader_rec (104 bytes base for 8 vattrs plus 32 bytes of
  45.          * vattr stride).
  46.          */
  47.         cl_ensure_space(&vc4->shader_rec, 12 * sizeof(uint32_t) + 104 + 8 * 32);
  48.  
  49.         /* Uniforms are covered by vc4_write_uniforms(). */
  50.  
  51.         /* There could be up to 16 textures per stage, plus misc other
  52.          * pointers.
  53.          */
  54.         cl_ensure_space(&vc4->bo_handles, (2 * 16 + 20) * sizeof(uint32_t));
  55.         cl_ensure_space(&vc4->bo_pointers,
  56.                         (2 * 16 + 20) * sizeof(struct vc4_bo *));
  57. }
  58.  
  59. /**
  60.  * Does the initial bining command list setup for drawing to a given FBO.
  61.  */
  62. static void
  63. vc4_start_draw(struct vc4_context *vc4)
  64. {
  65.         if (vc4->needs_flush)
  66.                 return;
  67.  
  68.         vc4_get_draw_cl_space(vc4);
  69.  
  70.         uint32_t width = vc4->framebuffer.width;
  71.         uint32_t height = vc4->framebuffer.height;
  72.         uint32_t tilew = align(width, 64) / 64;
  73.         uint32_t tileh = align(height, 64) / 64;
  74.  
  75.         /* Tile alloc memory setup: We use an initial alloc size of 32b.  The
  76.          * hardware then aligns that to 256b (we use 4096, because all of our
  77.          * BO allocations align to that anyway), then for some reason the
  78.          * simulator wants an extra page available, even if you have overflow
  79.          * memory set up.
  80.          *
  81.          * XXX: The binner only does 28-bit addressing math, so the tile alloc
  82.          * and tile state should be in the same BO and that BO needs to not
  83.          * cross a 256MB boundary, somehow.
  84.          */
  85.         uint32_t tile_alloc_size = 32 * tilew * tileh;
  86.         tile_alloc_size = align(tile_alloc_size, 4096);
  87.         tile_alloc_size += 4096;
  88.         uint32_t tile_state_size = 48 * tilew * tileh;
  89.         if (!vc4->tile_alloc || vc4->tile_alloc->size < tile_alloc_size) {
  90.                 vc4_bo_unreference(&vc4->tile_alloc);
  91.                 vc4->tile_alloc = vc4_bo_alloc(vc4->screen, tile_alloc_size,
  92.                                                "tile_alloc");
  93.         }
  94.         if (!vc4->tile_state || vc4->tile_state->size < tile_state_size) {
  95.                 vc4_bo_unreference(&vc4->tile_state);
  96.                 vc4->tile_state = vc4_bo_alloc(vc4->screen, tile_state_size,
  97.                                                "tile_state");
  98.         }
  99.  
  100.         //   Tile state data is 48 bytes per tile, I think it can be thrown away
  101.         //   as soon as binning is finished.
  102.         cl_start_reloc(&vc4->bcl, 2);
  103.         cl_u8(&vc4->bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
  104.         cl_reloc(vc4, &vc4->bcl, vc4->tile_alloc, 0);
  105.         cl_u32(&vc4->bcl, vc4->tile_alloc->size);
  106.         cl_reloc(vc4, &vc4->bcl, vc4->tile_state, 0);
  107.         cl_u8(&vc4->bcl, tilew);
  108.         cl_u8(&vc4->bcl, tileh);
  109.         cl_u8(&vc4->bcl,
  110.               VC4_BIN_CONFIG_AUTO_INIT_TSDA |
  111.               VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 |
  112.               VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32);
  113.  
  114.         /* START_TILE_BINNING resets the statechange counters in the hardware,
  115.          * which are what is used when a primitive is binned to a tile to
  116.          * figure out what new state packets need to be written to that tile's
  117.          * command list.
  118.          */
  119.         cl_u8(&vc4->bcl, VC4_PACKET_START_TILE_BINNING);
  120.  
  121.         /* Reset the current compressed primitives format.  This gets modified
  122.          * by VC4_PACKET_GL_INDEXED_PRIMITIVE and
  123.          * VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
  124.          * of every tile.
  125.          */
  126.         cl_u8(&vc4->bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
  127.         cl_u8(&vc4->bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
  128.                           VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
  129.  
  130.         vc4->needs_flush = true;
  131.         vc4->draw_call_queued = true;
  132. }
  133.  
  134. static void
  135. vc4_update_shadow_textures(struct pipe_context *pctx,
  136.                            struct vc4_texture_stateobj *stage_tex)
  137. {
  138.         for (int i = 0; i < stage_tex->num_textures; i++) {
  139.                 struct pipe_sampler_view *view = stage_tex->textures[i];
  140.                 if (!view)
  141.                         continue;
  142.                 struct vc4_resource *rsc = vc4_resource(view->texture);
  143.                 if (rsc->shadow_parent)
  144.                         vc4_update_shadow_baselevel_texture(pctx, view);
  145.         }
  146. }
  147.  
  148. static void
  149. vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
  150. {
  151.         struct vc4_context *vc4 = vc4_context(pctx);
  152.  
  153.         if (info->mode >= PIPE_PRIM_QUADS) {
  154.                 util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf);
  155.                 util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base);
  156.                 util_primconvert_draw_vbo(vc4->primconvert, info);
  157.                 perf_debug("Fallback conversion for %d %s vertices\n",
  158.                            info->count, u_prim_name(info->mode));
  159.                 return;
  160.         }
  161.  
  162.         /* Before setting up the draw, do any fixup blits necessary. */
  163.         vc4_update_shadow_textures(pctx, &vc4->verttex);
  164.         vc4_update_shadow_textures(pctx, &vc4->fragtex);
  165.  
  166.         vc4_get_draw_cl_space(vc4);
  167.  
  168.         struct vc4_vertex_stateobj *vtx = vc4->vtx;
  169.         struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf;
  170.  
  171.         if (vc4->prim_mode != info->mode) {
  172.                 vc4->prim_mode = info->mode;
  173.                 vc4->dirty |= VC4_DIRTY_PRIM_MODE;
  174.         }
  175.  
  176.         vc4_start_draw(vc4);
  177.         vc4_update_compiled_shaders(vc4, info->mode);
  178.  
  179.         vc4_emit_state(pctx);
  180.         vc4->dirty = 0;
  181.  
  182.         vc4_write_uniforms(vc4, vc4->prog.fs,
  183.                            &vc4->constbuf[PIPE_SHADER_FRAGMENT],
  184.                            &vc4->fragtex);
  185.         vc4_write_uniforms(vc4, vc4->prog.vs,
  186.                            &vc4->constbuf[PIPE_SHADER_VERTEX],
  187.                            &vc4->verttex);
  188.         vc4_write_uniforms(vc4, vc4->prog.cs,
  189.                            &vc4->constbuf[PIPE_SHADER_VERTEX],
  190.                            &vc4->verttex);
  191.  
  192.         /* The simulator throws a fit if VS or CS don't read an attribute, so
  193.          * we emit a dummy read.
  194.          */
  195.         uint32_t num_elements_emit = MAX2(vtx->num_elements, 1);
  196.         /* Emit the shader record. */
  197.         cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit);
  198.         cl_u16(&vc4->shader_rec,
  199.                VC4_SHADER_FLAG_ENABLE_CLIPPING |
  200.                ((info->mode == PIPE_PRIM_POINTS &&
  201.                  vc4->rasterizer->base.point_size_per_vertex) ?
  202.                 VC4_SHADER_FLAG_VS_POINT_SIZE : 0));
  203.         cl_u8(&vc4->shader_rec, 0); /* fs num uniforms (unused) */
  204.         cl_u8(&vc4->shader_rec, vc4->prog.fs->num_inputs);
  205.         cl_reloc(vc4, &vc4->shader_rec, vc4->prog.fs->bo, 0);
  206.         cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
  207.  
  208.         cl_u16(&vc4->shader_rec, 0); /* vs num uniforms */
  209.         cl_u8(&vc4->shader_rec, vc4->prog.vs->vattrs_live);
  210.         cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[8]);
  211.         cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, 0);
  212.         cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
  213.  
  214.         cl_u16(&vc4->shader_rec, 0); /* cs num uniforms */
  215.         cl_u8(&vc4->shader_rec, vc4->prog.cs->vattrs_live);
  216.         cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[8]);
  217.         cl_reloc(vc4, &vc4->shader_rec, vc4->prog.cs->bo, 0);
  218.         cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
  219.  
  220.         uint32_t max_index = 0xffff;
  221.         uint32_t vpm_offset = 0;
  222.         for (int i = 0; i < vtx->num_elements; i++) {
  223.                 struct pipe_vertex_element *elem = &vtx->pipe[i];
  224.                 struct pipe_vertex_buffer *vb =
  225.                         &vertexbuf->vb[elem->vertex_buffer_index];
  226.                 struct vc4_resource *rsc = vc4_resource(vb->buffer);
  227.                 uint32_t offset = vb->buffer_offset + elem->src_offset;
  228.                 uint32_t vb_size = rsc->bo->size - offset;
  229.                 uint32_t elem_size =
  230.                         util_format_get_blocksize(elem->src_format);
  231.  
  232.                 cl_reloc(vc4, &vc4->shader_rec, rsc->bo, offset);
  233.                 cl_u8(&vc4->shader_rec, elem_size - 1);
  234.                 cl_u8(&vc4->shader_rec, vb->stride);
  235.                 cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[i]);
  236.                 cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[i]);
  237.  
  238.                 vpm_offset += align(elem_size, 4);
  239.  
  240.                 if (vb->stride > 0) {
  241.                         max_index = MIN2(max_index,
  242.                                          (vb_size - elem_size) / vb->stride);
  243.                 }
  244.         }
  245.  
  246.         if (vtx->num_elements == 0) {
  247.                 assert(num_elements_emit == 1);
  248.                 struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO");
  249.                 cl_reloc(vc4, &vc4->shader_rec, bo, 0);
  250.                 cl_u8(&vc4->shader_rec, 16 - 1); /* element size */
  251.                 cl_u8(&vc4->shader_rec, 0); /* stride */
  252.                 cl_u8(&vc4->shader_rec, 0); /* VS VPM offset */
  253.                 cl_u8(&vc4->shader_rec, 0); /* CS VPM offset */
  254.                 vc4_bo_unreference(&bo);
  255.         }
  256.  
  257.         /* the actual draw call. */
  258.         cl_u8(&vc4->bcl, VC4_PACKET_GL_SHADER_STATE);
  259.         assert(vtx->num_elements <= 8);
  260.         /* Note that number of attributes == 0 in the packet means 8
  261.          * attributes.  This field also contains the offset into shader_rec.
  262.          */
  263.         cl_u32(&vc4->bcl, num_elements_emit & 0x7);
  264.  
  265.         /* Note that the primitive type fields match with OpenGL/gallium
  266.          * definitions, up to but not including QUADS.
  267.          */
  268.         if (info->indexed) {
  269.                 struct vc4_resource *rsc = vc4_resource(vc4->indexbuf.buffer);
  270.                 uint32_t offset = vc4->indexbuf.offset;
  271.                 uint32_t index_size = vc4->indexbuf.index_size;
  272.                 if (rsc->shadow_parent) {
  273.                         vc4_update_shadow_index_buffer(pctx, &vc4->indexbuf);
  274.                         offset = 0;
  275.                 }
  276.  
  277.                 cl_start_reloc(&vc4->bcl, 1);
  278.                 cl_u8(&vc4->bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
  279.                 cl_u8(&vc4->bcl,
  280.                       info->mode |
  281.                       (index_size == 2 ?
  282.                        VC4_INDEX_BUFFER_U16:
  283.                        VC4_INDEX_BUFFER_U8));
  284.                 cl_u32(&vc4->bcl, info->count);
  285.                 cl_reloc(vc4, &vc4->bcl, rsc->bo, offset);
  286.                 cl_u32(&vc4->bcl, max_index);
  287.         } else {
  288.                 cl_u8(&vc4->bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
  289.                 cl_u8(&vc4->bcl, info->mode);
  290.                 cl_u32(&vc4->bcl, info->count);
  291.                 cl_u32(&vc4->bcl, info->start);
  292.         }
  293.  
  294.         if (vc4->zsa && vc4->zsa->base.depth.enabled) {
  295.                 vc4->resolve |= PIPE_CLEAR_DEPTH;
  296.         }
  297.         if (vc4->zsa && vc4->zsa->base.stencil[0].enabled)
  298.                 vc4->resolve |= PIPE_CLEAR_STENCIL;
  299.         vc4->resolve |= PIPE_CLEAR_COLOR0;
  300.  
  301.         vc4->shader_rec_count++;
  302.  
  303.         if (vc4_debug & VC4_DEBUG_ALWAYS_FLUSH)
  304.                 vc4_flush(pctx);
  305. }
  306.  
  307. static uint32_t
  308. pack_rgba(enum pipe_format format, const float *rgba)
  309. {
  310.         union util_color uc;
  311.         util_pack_color(rgba, format, &uc);
  312.         if (util_format_get_blocksize(format) == 2)
  313.                 return uc.us;
  314.         else
  315.                 return uc.ui[0];
  316. }
  317.  
  318. static void
  319. vc4_clear(struct pipe_context *pctx, unsigned buffers,
  320.           const union pipe_color_union *color, double depth, unsigned stencil)
  321. {
  322.         struct vc4_context *vc4 = vc4_context(pctx);
  323.  
  324.         /* We can't flag new buffers for clearing once we've queued draws.  We
  325.          * could avoid this by using the 3d engine to clear.
  326.          */
  327.         if (vc4->draw_call_queued) {
  328.                 perf_debug("Flushing rendering to process new clear.");
  329.                 vc4_flush(pctx);
  330.         }
  331.  
  332.         if (buffers & PIPE_CLEAR_COLOR0) {
  333.                 vc4->clear_color[0] = vc4->clear_color[1] =
  334.                         pack_rgba(vc4->framebuffer.cbufs[0]->format,
  335.                                   color->f);
  336.         }
  337.  
  338.         if (buffers & PIPE_CLEAR_DEPTH) {
  339.                 /* Though the depth buffer is stored with Z in the high 24,
  340.                  * for this field we just need to store it in the low 24.
  341.                  */
  342.                 vc4->clear_depth = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, depth);
  343.         }
  344.  
  345.         if (buffers & PIPE_CLEAR_STENCIL)
  346.                 vc4->clear_stencil = stencil;
  347.  
  348.         vc4->draw_min_x = 0;
  349.         vc4->draw_min_y = 0;
  350.         vc4->draw_max_x = vc4->framebuffer.width;
  351.         vc4->draw_max_y = vc4->framebuffer.height;
  352.         vc4->cleared |= buffers;
  353.         vc4->resolve |= buffers;
  354.  
  355.         vc4_start_draw(vc4);
  356. }
  357.  
  358. static void
  359. vc4_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
  360.                         const union pipe_color_union *color,
  361.                         unsigned x, unsigned y, unsigned w, unsigned h)
  362. {
  363.         fprintf(stderr, "unimpl: clear RT\n");
  364. }
  365.  
  366. static void
  367. vc4_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
  368.                         unsigned buffers, double depth, unsigned stencil,
  369.                         unsigned x, unsigned y, unsigned w, unsigned h)
  370. {
  371.         fprintf(stderr, "unimpl: clear DS\n");
  372. }
  373.  
  374. void
  375. vc4_draw_init(struct pipe_context *pctx)
  376. {
  377.         pctx->draw_vbo = vc4_draw_vbo;
  378.         pctx->clear = vc4_clear;
  379.         pctx->clear_render_target = vc4_clear_render_target;
  380.         pctx->clear_depth_stencil = vc4_clear_depth_stencil;
  381. }
  382.