Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2009 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. #include <limits.h>
  29. #include "util/u_memory.h"
  30. #include "util/u_math.h"
  31. #include "util/u_rect.h"
  32. #include "util/u_surface.h"
  33. #include "util/u_pack_color.h"
  34.  
  35. #include "os/os_time.h"
  36.  
  37. #include "lp_scene_queue.h"
  38. #include "lp_debug.h"
  39. #include "lp_fence.h"
  40. #include "lp_perf.h"
  41. #include "lp_query.h"
  42. #include "lp_rast.h"
  43. #include "lp_rast_priv.h"
  44. #include "gallivm/lp_bld_debug.h"
  45. #include "lp_scene.h"
  46. #include "lp_tex_sample.h"
  47.  
  48.  
  49. #ifdef DEBUG
  50. int jit_line = 0;
  51. const struct lp_rast_state *jit_state = NULL;
  52. const struct lp_rasterizer_task *jit_task = NULL;
  53. #endif
  54.  
  55.  
  56. /**
  57.  * Begin rasterizing a scene.
  58.  * Called once per scene by one thread.
  59.  */
  60. static void
  61. lp_rast_begin( struct lp_rasterizer *rast,
  62.                struct lp_scene *scene )
  63. {
  64.    rast->curr_scene = scene;
  65.  
  66.    LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
  67.  
  68.    lp_scene_begin_rasterization( scene );
  69.    lp_scene_bin_iter_begin( scene );
  70. }
  71.  
  72.  
  73. static void
  74. lp_rast_end( struct lp_rasterizer *rast )
  75. {
  76.    lp_scene_end_rasterization( rast->curr_scene );
  77.  
  78.    rast->curr_scene = NULL;
  79. }
  80.  
  81.  
  82. /**
  83.  * Begining rasterization of a tile.
  84.  * \param x  window X position of the tile, in pixels
  85.  * \param y  window Y position of the tile, in pixels
  86.  */
  87. static void
  88. lp_rast_tile_begin(struct lp_rasterizer_task *task,
  89.                    const struct cmd_bin *bin,
  90.                    int x, int y)
  91. {
  92.    LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
  93.  
  94.    task->bin = bin;
  95.    task->x = x * TILE_SIZE;
  96.    task->y = y * TILE_SIZE;
  97.    task->width = TILE_SIZE + x * TILE_SIZE > task->scene->fb.width ?
  98.                     task->scene->fb.width - x * TILE_SIZE : TILE_SIZE;
  99.    task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ?
  100.                     task->scene->fb.height - y * TILE_SIZE : TILE_SIZE;
  101.  
  102.    task->thread_data.vis_counter = 0;
  103.    task->ps_invocations = 0;
  104.  
  105.    /* reset pointers to color and depth tile(s) */
  106.    memset(task->color_tiles, 0, sizeof(task->color_tiles));
  107.    task->depth_tile = NULL;
  108. }
  109.  
  110.  
  111. /**
  112.  * Clear the rasterizer's current color tile.
  113.  * This is a bin command called during bin processing.
  114.  * Clear commands always clear all bound layers.
  115.  */
  116. static void
  117. lp_rast_clear_color(struct lp_rasterizer_task *task,
  118.                     const union lp_rast_cmd_arg arg)
  119. {
  120.    const struct lp_scene *scene = task->scene;
  121.  
  122.    if (scene->fb.nr_cbufs) {
  123.       unsigned i;
  124.       union util_color uc;
  125.  
  126.       if (util_format_is_pure_integer(scene->fb.cbufs[0]->format)) {
  127.          /*
  128.           * We expect int/uint clear values here, though some APIs
  129.           * might disagree (but in any case util_pack_color()
  130.           * couldn't handle it)...
  131.           */
  132.          LP_DBG(DEBUG_RAST, "%s pure int 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
  133.                     arg.clear_color.ui[0],
  134.                     arg.clear_color.ui[1],
  135.                     arg.clear_color.ui[2],
  136.                     arg.clear_color.ui[3]);
  137.  
  138.          for (i = 0; i < scene->fb.nr_cbufs; i++) {
  139.             enum pipe_format format = scene->fb.cbufs[i]->format;
  140.  
  141.             if (util_format_is_pure_sint(format)) {
  142.                util_format_write_4i(format, arg.clear_color.i, 0, &uc, 0, 0, 0, 1, 1);
  143.             }
  144.             else {
  145.                assert(util_format_is_pure_uint(format));
  146.                util_format_write_4ui(format, arg.clear_color.ui, 0, &uc, 0, 0, 0, 1, 1);
  147.             }
  148.  
  149.             util_fill_box(scene->cbufs[i].map,
  150.                           format,
  151.                           scene->cbufs[i].stride,
  152.                           scene->cbufs[i].layer_stride,
  153.                           task->x,
  154.                           task->y,
  155.                           0,
  156.                           task->width,
  157.                           task->height,
  158.                           scene->fb_max_layer + 1,
  159.                           &uc);
  160.          }
  161.       }
  162.       else {
  163.          uint8_t clear_color[4];
  164.  
  165.          for (i = 0; i < 4; ++i) {
  166.             clear_color[i] = float_to_ubyte(arg.clear_color.f[i]);
  167.          }
  168.  
  169.          LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
  170.                     clear_color[0],
  171.                     clear_color[1],
  172.                     clear_color[2],
  173.                     clear_color[3]);
  174.  
  175.          for (i = 0; i < scene->fb.nr_cbufs; i++) {
  176.             util_pack_color(arg.clear_color.f,
  177.                             scene->fb.cbufs[i]->format, &uc);
  178.  
  179.             util_fill_box(scene->cbufs[i].map,
  180.                           scene->fb.cbufs[i]->format,
  181.                           scene->cbufs[i].stride,
  182.                           scene->cbufs[i].layer_stride,
  183.                           task->x,
  184.                           task->y,
  185.                           0,
  186.                           task->width,
  187.                           task->height,
  188.                           scene->fb_max_layer + 1,
  189.                           &uc);
  190.          }
  191.       }
  192.    }
  193.  
  194.    LP_COUNT(nr_color_tile_clear);
  195. }
  196.  
  197.  
  198.  
  199.  
  200. /**
  201.  * Clear the rasterizer's current z/stencil tile.
  202.  * This is a bin command called during bin processing.
  203.  * Clear commands always clear all bound layers.
  204.  */
  205. static void
  206. lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
  207.                        const union lp_rast_cmd_arg arg)
  208. {
  209.    const struct lp_scene *scene = task->scene;
  210.    uint64_t clear_value64 = arg.clear_zstencil.value;
  211.    uint64_t clear_mask64 = arg.clear_zstencil.mask;
  212.    uint32_t clear_value = (uint32_t) clear_value64;
  213.    uint32_t clear_mask = (uint32_t) clear_mask64;
  214.    const unsigned height = task->height;
  215.    const unsigned width = task->width;
  216.    const unsigned dst_stride = scene->zsbuf.stride;
  217.    uint8_t *dst;
  218.    unsigned i, j;
  219.    unsigned block_size;
  220.  
  221.    LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n",
  222.            __FUNCTION__, clear_value, clear_mask);
  223.  
  224.    /*
  225.     * Clear the area of the depth/depth buffer matching this tile.
  226.     */
  227.  
  228.    if (scene->fb.zsbuf) {
  229.       unsigned layer;
  230.       uint8_t *dst_layer = lp_rast_get_unswizzled_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE);
  231.       block_size = util_format_get_blocksize(scene->fb.zsbuf->format);
  232.  
  233.       clear_value &= clear_mask;
  234.  
  235.       for (layer = 0; layer <= scene->fb_max_layer; layer++) {
  236.          dst = dst_layer;
  237.  
  238.          switch (block_size) {
  239.          case 1:
  240.             assert(clear_mask == 0xff);
  241.             memset(dst, (uint8_t) clear_value, height * width);
  242.             break;
  243.          case 2:
  244.             if (clear_mask == 0xffff) {
  245.                for (i = 0; i < height; i++) {
  246.                   uint16_t *row = (uint16_t *)dst;
  247.                   for (j = 0; j < width; j++)
  248.                      *row++ = (uint16_t) clear_value;
  249.                   dst += dst_stride;
  250.                }
  251.             }
  252.             else {
  253.                for (i = 0; i < height; i++) {
  254.                   uint16_t *row = (uint16_t *)dst;
  255.                   for (j = 0; j < width; j++) {
  256.                      uint16_t tmp = ~clear_mask & *row;
  257.                      *row++ = clear_value | tmp;
  258.                   }
  259.                   dst += dst_stride;
  260.                }
  261.             }
  262.             break;
  263.          case 4:
  264.             if (clear_mask == 0xffffffff) {
  265.                for (i = 0; i < height; i++) {
  266.                   uint32_t *row = (uint32_t *)dst;
  267.                   for (j = 0; j < width; j++)
  268.                      *row++ = clear_value;
  269.                   dst += dst_stride;
  270.                }
  271.             }
  272.             else {
  273.                for (i = 0; i < height; i++) {
  274.                   uint32_t *row = (uint32_t *)dst;
  275.                   for (j = 0; j < width; j++) {
  276.                      uint32_t tmp = ~clear_mask & *row;
  277.                      *row++ = clear_value | tmp;
  278.                   }
  279.                   dst += dst_stride;
  280.                }
  281.             }
  282.             break;
  283.          case 8:
  284.             clear_value64 &= clear_mask64;
  285.             if (clear_mask64 == 0xffffffffffULL) {
  286.                for (i = 0; i < height; i++) {
  287.                   uint64_t *row = (uint64_t *)dst;
  288.                   for (j = 0; j < width; j++)
  289.                      *row++ = clear_value64;
  290.                   dst += dst_stride;
  291.                }
  292.             }
  293.             else {
  294.                for (i = 0; i < height; i++) {
  295.                   uint64_t *row = (uint64_t *)dst;
  296.                   for (j = 0; j < width; j++) {
  297.                      uint64_t tmp = ~clear_mask64 & *row;
  298.                      *row++ = clear_value64 | tmp;
  299.                   }
  300.                   dst += dst_stride;
  301.                }
  302.             }
  303.             break;
  304.  
  305.          default:
  306.             assert(0);
  307.             break;
  308.          }
  309.          dst_layer += scene->zsbuf.layer_stride;
  310.       }
  311.    }
  312. }
  313.  
  314.  
  315.  
  316. /**
  317.  * Run the shader on all blocks in a tile.  This is used when a tile is
  318.  * completely contained inside a triangle.
  319.  * This is a bin command called during bin processing.
  320.  */
  321. static void
  322. lp_rast_shade_tile(struct lp_rasterizer_task *task,
  323.                    const union lp_rast_cmd_arg arg)
  324. {
  325.    const struct lp_scene *scene = task->scene;
  326.    const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
  327.    const struct lp_rast_state *state;
  328.    struct lp_fragment_shader_variant *variant;
  329.    const unsigned tile_x = task->x, tile_y = task->y;
  330.    unsigned x, y;
  331.  
  332.    if (inputs->disable) {
  333.       /* This command was partially binned and has been disabled */
  334.       return;
  335.    }
  336.  
  337.    LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
  338.  
  339.    state = task->state;
  340.    assert(state);
  341.    if (!state) {
  342.       return;
  343.    }
  344.    variant = state->variant;
  345.  
  346.    /* render the whole 64x64 tile in 4x4 chunks */
  347.    for (y = 0; y < task->height; y += 4){
  348.       for (x = 0; x < task->width; x += 4) {
  349.          uint8_t *color[PIPE_MAX_COLOR_BUFS];
  350.          unsigned stride[PIPE_MAX_COLOR_BUFS];
  351.          uint8_t *depth = NULL;
  352.          unsigned depth_stride = 0;
  353.          unsigned i;
  354.  
  355.          /* color buffer */
  356.          for (i = 0; i < scene->fb.nr_cbufs; i++){
  357.             stride[i] = scene->cbufs[i].stride;
  358.             color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, tile_x + x,
  359.                                                                   tile_y + y, inputs->layer);
  360.          }
  361.  
  362.          /* depth buffer */
  363.          if (scene->zsbuf.map) {
  364.             depth = lp_rast_get_unswizzled_depth_block_pointer(task, tile_x + x,
  365.                                                                tile_y + y, inputs->layer);
  366.             depth_stride = scene->zsbuf.stride;
  367.          }
  368.  
  369.          /* run shader on 4x4 block */
  370.          BEGIN_JIT_CALL(state, task);
  371.          variant->jit_function[RAST_WHOLE]( &state->jit_context,
  372.                                             tile_x + x, tile_y + y,
  373.                                             inputs->frontfacing,
  374.                                             GET_A0(inputs),
  375.                                             GET_DADX(inputs),
  376.                                             GET_DADY(inputs),
  377.                                             color,
  378.                                             depth,
  379.                                             0xffff,
  380.                                             &task->thread_data,
  381.                                             stride,
  382.                                             depth_stride);
  383.          END_JIT_CALL();
  384.       }
  385.    }
  386. }
  387.  
  388.  
  389. /**
  390.  * Run the shader on all blocks in a tile.  This is used when a tile is
  391.  * completely contained inside a triangle, and the shader is opaque.
  392.  * This is a bin command called during bin processing.
  393.  */
  394. static void
  395. lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task,
  396.                           const union lp_rast_cmd_arg arg)
  397. {
  398.    LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
  399.  
  400.    assert(task->state);
  401.    if (!task->state) {
  402.       return;
  403.    }
  404.  
  405.    lp_rast_shade_tile(task, arg);
  406. }
  407.  
  408.  
  409. /**
  410.  * Compute shading for a 4x4 block of pixels inside a triangle.
  411.  * This is a bin command called during bin processing.
  412.  * \param x  X position of quad in window coords
  413.  * \param y  Y position of quad in window coords
  414.  */
  415. void
  416. lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
  417.                          const struct lp_rast_shader_inputs *inputs,
  418.                          unsigned x, unsigned y,
  419.                          unsigned mask)
  420. {
  421.    const struct lp_rast_state *state = task->state;
  422.    struct lp_fragment_shader_variant *variant = state->variant;
  423.    const struct lp_scene *scene = task->scene;
  424.    uint8_t *color[PIPE_MAX_COLOR_BUFS];
  425.    unsigned stride[PIPE_MAX_COLOR_BUFS];
  426.    uint8_t *depth = NULL;
  427.    unsigned depth_stride = 0;
  428.    unsigned i;
  429.  
  430.    assert(state);
  431.  
  432.    /* Sanity checks */
  433.    assert(x < scene->tiles_x * TILE_SIZE);
  434.    assert(y < scene->tiles_y * TILE_SIZE);
  435.    assert(x % TILE_VECTOR_WIDTH == 0);
  436.    assert(y % TILE_VECTOR_HEIGHT == 0);
  437.  
  438.    assert((x % 4) == 0);
  439.    assert((y % 4) == 0);
  440.  
  441.    /* color buffer */
  442.    for (i = 0; i < scene->fb.nr_cbufs; i++) {
  443.       stride[i] = scene->cbufs[i].stride;
  444.       color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, x, y, inputs->layer);
  445.    }
  446.  
  447.    /* depth buffer */
  448.    if (scene->zsbuf.map) {
  449.       depth_stride = scene->zsbuf.stride;
  450.       depth = lp_rast_get_unswizzled_depth_block_pointer(task, x, y, inputs->layer);
  451.    }
  452.  
  453.    assert(lp_check_alignment(state->jit_context.u8_blend_color, 16));
  454.  
  455.    /*
  456.     * The rasterizer may produce fragments outside our
  457.     * allocated 4x4 blocks hence need to filter them out here.
  458.     */
  459.    if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
  460.       /* not very accurate would need a popcount on the mask */
  461.       /* always count this not worth bothering? */
  462.       task->ps_invocations++;
  463.  
  464.       /* run shader on 4x4 block */
  465.       BEGIN_JIT_CALL(state, task);
  466.       variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
  467.                                             x, y,
  468.                                             inputs->frontfacing,
  469.                                             GET_A0(inputs),
  470.                                             GET_DADX(inputs),
  471.                                             GET_DADY(inputs),
  472.                                             color,
  473.                                             depth,
  474.                                             mask,
  475.                                             &task->thread_data,
  476.                                             stride,
  477.                                             depth_stride);
  478.       END_JIT_CALL();
  479.    }
  480. }
  481.  
  482.  
  483.  
  484. /**
  485.  * Begin a new occlusion query.
  486.  * This is a bin command put in all bins.
  487.  * Called per thread.
  488.  */
  489. static void
  490. lp_rast_begin_query(struct lp_rasterizer_task *task,
  491.                     const union lp_rast_cmd_arg arg)
  492. {
  493.    struct llvmpipe_query *pq = arg.query_obj;
  494.  
  495.    switch (pq->type) {
  496.    case PIPE_QUERY_OCCLUSION_COUNTER:
  497.    case PIPE_QUERY_OCCLUSION_PREDICATE:
  498.       pq->start[task->thread_index] = task->thread_data.vis_counter;
  499.       break;
  500.    case PIPE_QUERY_PIPELINE_STATISTICS:
  501.       pq->start[task->thread_index] = task->ps_invocations;
  502.       break;
  503.    default:
  504.       assert(0);
  505.       break;
  506.    }
  507. }
  508.  
  509.  
  510. /**
  511.  * End the current occlusion query.
  512.  * This is a bin command put in all bins.
  513.  * Called per thread.
  514.  */
  515. static void
  516. lp_rast_end_query(struct lp_rasterizer_task *task,
  517.                   const union lp_rast_cmd_arg arg)
  518. {
  519.    struct llvmpipe_query *pq = arg.query_obj;
  520.  
  521.    switch (pq->type) {
  522.    case PIPE_QUERY_OCCLUSION_COUNTER:
  523.    case PIPE_QUERY_OCCLUSION_PREDICATE:
  524.       pq->end[task->thread_index] +=
  525.          task->thread_data.vis_counter - pq->start[task->thread_index];
  526.       pq->start[task->thread_index] = 0;
  527.       break;
  528.    case PIPE_QUERY_TIMESTAMP:
  529.       pq->end[task->thread_index] = os_time_get_nano();
  530.       break;
  531.    case PIPE_QUERY_PIPELINE_STATISTICS:
  532.       pq->end[task->thread_index] +=
  533.          task->ps_invocations - pq->start[task->thread_index];
  534.       pq->start[task->thread_index] = 0;
  535.       break;
  536.    default:
  537.       assert(0);
  538.       break;
  539.    }
  540. }
  541.  
  542.  
  543. void
  544. lp_rast_set_state(struct lp_rasterizer_task *task,
  545.                   const union lp_rast_cmd_arg arg)
  546. {
  547.    task->state = arg.state;
  548. }
  549.  
  550.  
  551.  
  552. /**
  553.  * Called when we're done writing to a color tile.
  554.  */
  555. static void
  556. lp_rast_tile_end(struct lp_rasterizer_task *task)
  557. {
  558.    unsigned i;
  559.  
  560.    for (i = 0; i < task->scene->num_active_queries; ++i) {
  561.       lp_rast_end_query(task, lp_rast_arg_query(task->scene->active_queries[i]));
  562.    }
  563.  
  564.    /* debug */
  565.    memset(task->color_tiles, 0, sizeof(task->color_tiles));
  566.    task->depth_tile = NULL;
  567.  
  568.    task->bin = NULL;
  569. }
  570.  
  571. static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] =
  572. {
  573.    lp_rast_clear_color,
  574.    lp_rast_clear_zstencil,
  575.    lp_rast_triangle_1,
  576.    lp_rast_triangle_2,
  577.    lp_rast_triangle_3,
  578.    lp_rast_triangle_4,
  579.    lp_rast_triangle_5,
  580.    lp_rast_triangle_6,
  581.    lp_rast_triangle_7,
  582.    lp_rast_triangle_8,
  583.    lp_rast_triangle_3_4,
  584.    lp_rast_triangle_3_16,
  585.    lp_rast_triangle_4_16,
  586.    lp_rast_shade_tile,
  587.    lp_rast_shade_tile_opaque,
  588.    lp_rast_begin_query,
  589.    lp_rast_end_query,
  590.    lp_rast_set_state,
  591. };
  592.  
  593.  
  594. static void
  595. do_rasterize_bin(struct lp_rasterizer_task *task,
  596.                  const struct cmd_bin *bin,
  597.                  int x, int y)
  598. {
  599.    const struct cmd_block *block;
  600.    unsigned k;
  601.  
  602.    if (0)
  603.       lp_debug_bin(bin, x, y);
  604.  
  605.    for (block = bin->head; block; block = block->next) {
  606.       for (k = 0; k < block->count; k++) {
  607.          dispatch[block->cmd[k]]( task, block->arg[k] );
  608.       }
  609.    }
  610. }
  611.  
  612.  
  613.  
  614. /**
  615.  * Rasterize commands for a single bin.
  616.  * \param x, y  position of the bin's tile in the framebuffer
  617.  * Must be called between lp_rast_begin() and lp_rast_end().
  618.  * Called per thread.
  619.  */
  620. static void
  621. rasterize_bin(struct lp_rasterizer_task *task,
  622.               const struct cmd_bin *bin, int x, int y )
  623. {
  624.    lp_rast_tile_begin( task, bin, x, y );
  625.  
  626.    do_rasterize_bin(task, bin, x, y);
  627.  
  628.    lp_rast_tile_end(task);
  629.  
  630.  
  631.    /* Debug/Perf flags:
  632.     */
  633.    if (bin->head->count == 1) {
  634.       if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE)
  635.          LP_COUNT(nr_pure_shade_opaque_64);
  636.       else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE)
  637.          LP_COUNT(nr_pure_shade_64);
  638.    }
  639. }
  640.  
  641.  
  642. /* An empty bin is one that just loads the contents of the tile and
  643.  * stores them again unchanged.  This typically happens when bins have
  644.  * been flushed for some reason in the middle of a frame, or when
  645.  * incremental updates are being made to a render target.
  646.  *
  647.  * Try to avoid doing pointless work in this case.
  648.  */
  649. static boolean
  650. is_empty_bin( const struct cmd_bin *bin )
  651. {
  652.    return bin->head == NULL;
  653. }
  654.  
  655.  
  656. /**
  657.  * Rasterize/execute all bins within a scene.
  658.  * Called per thread.
  659.  */
  660. static void
  661. rasterize_scene(struct lp_rasterizer_task *task,
  662.                 struct lp_scene *scene)
  663. {
  664.    task->scene = scene;
  665.  
  666.    if (!task->rast->no_rast && !scene->discard) {
  667.       /* loop over scene bins, rasterize each */
  668.       {
  669.          struct cmd_bin *bin;
  670.          int i, j;
  671.  
  672.          assert(scene);
  673.          while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) {
  674.             if (!is_empty_bin( bin ))
  675.                rasterize_bin(task, bin, i, j);
  676.          }
  677.       }
  678.    }
  679.  
  680.  
  681.    if (scene->fence) {
  682.       lp_fence_signal(scene->fence);
  683.    }
  684.  
  685.    task->scene = NULL;
  686. }
  687.  
  688.  
  689. /**
  690.  * Called by setup module when it has something for us to render.
  691.  */
  692. void
  693. lp_rast_queue_scene( struct lp_rasterizer *rast,
  694.                      struct lp_scene *scene)
  695. {
  696.    LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
  697.  
  698.    if (rast->num_threads == 0) {
  699.       /* no threading */
  700.  
  701.       lp_rast_begin( rast, scene );
  702.  
  703.       rasterize_scene( &rast->tasks[0], scene );
  704.  
  705.       lp_rast_end( rast );
  706.  
  707.       rast->curr_scene = NULL;
  708.    }
  709.    else {
  710.       /* threaded rendering! */
  711.       unsigned i;
  712.  
  713.       lp_scene_enqueue( rast->full_scenes, scene );
  714.  
  715.       /* signal the threads that there's work to do */
  716.       for (i = 0; i < rast->num_threads; i++) {
  717.          pipe_semaphore_signal(&rast->tasks[i].work_ready);
  718.       }
  719.    }
  720.  
  721.    LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
  722. }
  723.  
  724.  
  725. void
  726. lp_rast_finish( struct lp_rasterizer *rast )
  727. {
  728.    if (rast->num_threads == 0) {
  729.       /* nothing to do */
  730.    }
  731.    else {
  732.       int i;
  733.  
  734.       /* wait for work to complete */
  735.       for (i = 0; i < rast->num_threads; i++) {
  736.          pipe_semaphore_wait(&rast->tasks[i].work_done);
  737.       }
  738.    }
  739. }
  740.  
  741.  
  742. /**
  743.  * This is the thread's main entrypoint.
  744.  * It's a simple loop:
  745.  *   1. wait for work
  746.  *   2. do work
  747.  *   3. signal that we're done
  748.  */
  749. static PIPE_THREAD_ROUTINE( thread_function, init_data )
  750. {
  751.    struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
  752.    struct lp_rasterizer *rast = task->rast;
  753.    boolean debug = false;
  754.    unsigned fpstate = util_fpstate_get();
  755.  
  756.    /* Make sure that denorms are treated like zeros. This is
  757.     * the behavior required by D3D10. OpenGL doesn't care.
  758.     */
  759.    util_fpstate_set_denorms_to_zero(fpstate);
  760.  
  761.    while (1) {
  762.       /* wait for work */
  763.       if (debug)
  764.          debug_printf("thread %d waiting for work\n", task->thread_index);
  765.       pipe_semaphore_wait(&task->work_ready);
  766.  
  767.       if (rast->exit_flag)
  768.          break;
  769.  
  770.       if (task->thread_index == 0) {
  771.          /* thread[0]:
  772.           *  - get next scene to rasterize
  773.           *  - map the framebuffer surfaces
  774.           */
  775.          lp_rast_begin( rast,
  776.                         lp_scene_dequeue( rast->full_scenes, TRUE ) );
  777.       }
  778.  
  779.       /* Wait for all threads to get here so that threads[1+] don't
  780.        * get a null rast->curr_scene pointer.
  781.        */
  782.       pipe_barrier_wait( &rast->barrier );
  783.  
  784.       /* do work */
  785.       if (debug)
  786.          debug_printf("thread %d doing work\n", task->thread_index);
  787.  
  788.       rasterize_scene(task,
  789.                       rast->curr_scene);
  790.      
  791.       /* wait for all threads to finish with this scene */
  792.       pipe_barrier_wait( &rast->barrier );
  793.  
  794.       /* XXX: shouldn't be necessary:
  795.        */
  796.       if (task->thread_index == 0) {
  797.          lp_rast_end( rast );
  798.       }
  799.  
  800.       /* signal done with work */
  801.       if (debug)
  802.          debug_printf("thread %d done working\n", task->thread_index);
  803.  
  804.       pipe_semaphore_signal(&task->work_done);
  805.    }
  806.  
  807.    return NULL;
  808. }
  809.  
  810.  
  811. /**
  812.  * Initialize semaphores and spawn the threads.
  813.  */
  814. static void
  815. create_rast_threads(struct lp_rasterizer *rast)
  816. {
  817.    unsigned i;
  818.  
  819.    /* NOTE: if num_threads is zero, we won't use any threads */
  820.    for (i = 0; i < rast->num_threads; i++) {
  821.       pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
  822.       pipe_semaphore_init(&rast->tasks[i].work_done, 0);
  823.       rast->threads[i] = pipe_thread_create(thread_function,
  824.                                             (void *) &rast->tasks[i]);
  825.    }
  826. }
  827.  
  828.  
  829.  
  830. /**
  831.  * Create new lp_rasterizer.  If num_threads is zero, don't create any
  832.  * new threads, do rendering synchronously.
  833.  * \param num_threads  number of rasterizer threads to create
  834.  */
  835. struct lp_rasterizer *
  836. lp_rast_create( unsigned num_threads )
  837. {
  838.    struct lp_rasterizer *rast;
  839.    unsigned i;
  840.  
  841.    rast = CALLOC_STRUCT(lp_rasterizer);
  842.    if (!rast) {
  843.       goto no_rast;
  844.    }
  845.  
  846.    rast->full_scenes = lp_scene_queue_create();
  847.    if (!rast->full_scenes) {
  848.       goto no_full_scenes;
  849.    }
  850.  
  851.    for (i = 0; i < Elements(rast->tasks); i++) {
  852.       struct lp_rasterizer_task *task = &rast->tasks[i];
  853.       task->rast = rast;
  854.       task->thread_index = i;
  855.    }
  856.  
  857.    rast->num_threads = num_threads;
  858.  
  859.    rast->no_rast = debug_get_bool_option("LP_NO_RAST", FALSE);
  860.  
  861.    create_rast_threads(rast);
  862.  
  863.    /* for synchronizing rasterization threads */
  864.    pipe_barrier_init( &rast->barrier, rast->num_threads );
  865.  
  866.    memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
  867.  
  868.    return rast;
  869.  
  870. no_full_scenes:
  871.    FREE(rast);
  872. no_rast:
  873.    return NULL;
  874. }
  875.  
  876.  
  877. /* Shutdown:
  878.  */
  879. void lp_rast_destroy( struct lp_rasterizer *rast )
  880. {
  881.    unsigned i;
  882.  
  883.    /* Set exit_flag and signal each thread's work_ready semaphore.
  884.     * Each thread will be woken up, notice that the exit_flag is set and
  885.     * break out of its main loop.  The thread will then exit.
  886.     */
  887.    rast->exit_flag = TRUE;
  888.    for (i = 0; i < rast->num_threads; i++) {
  889.       pipe_semaphore_signal(&rast->tasks[i].work_ready);
  890.    }
  891.  
  892.    /* Wait for threads to terminate before cleaning up per-thread data */
  893.    for (i = 0; i < rast->num_threads; i++) {
  894.       pipe_thread_wait(rast->threads[i]);
  895.    }
  896.  
  897.    /* Clean up per-thread data */
  898.    for (i = 0; i < rast->num_threads; i++) {
  899.       pipe_semaphore_destroy(&rast->tasks[i].work_ready);
  900.       pipe_semaphore_destroy(&rast->tasks[i].work_done);
  901.    }
  902.  
  903.    /* for synchronizing rasterization threads */
  904.    pipe_barrier_destroy( &rast->barrier );
  905.  
  906.    lp_scene_queue_destroy(rast->full_scenes);
  907.  
  908.    FREE(rast);
  909. }
  910.  
  911.  
  912. /** Return number of rasterization threads */
  913. unsigned
  914. lp_rast_get_num_threads( struct lp_rasterizer *rast )
  915. {
  916.    return rast->num_threads;
  917. }
  918.  
  919.  
  920.