Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2009 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. #include <limits.h>
  29. #include "util/u_memory.h"
  30. #include "util/u_math.h"
  31. #include "util/u_rect.h"
  32. #include "util/u_surface.h"
  33. #include "util/u_pack_color.h"
  34. #include "util/u_string.h"
  35.  
  36. #include "os/os_time.h"
  37.  
  38. #include "lp_scene_queue.h"
  39. #include "lp_context.h"
  40. #include "lp_debug.h"
  41. #include "lp_fence.h"
  42. #include "lp_perf.h"
  43. #include "lp_query.h"
  44. #include "lp_rast.h"
  45. #include "lp_rast_priv.h"
  46. #include "gallivm/lp_bld_debug.h"
  47. #include "lp_scene.h"
  48. #include "lp_tex_sample.h"
  49.  
  50.  
  51. #ifdef DEBUG
  52. int jit_line = 0;
  53. const struct lp_rast_state *jit_state = NULL;
  54. const struct lp_rasterizer_task *jit_task = NULL;
  55. #endif
  56.  
  57.  
  58. /**
  59.  * Begin rasterizing a scene.
  60.  * Called once per scene by one thread.
  61.  */
  62. static void
  63. lp_rast_begin( struct lp_rasterizer *rast,
  64.                struct lp_scene *scene )
  65. {
  66.    rast->curr_scene = scene;
  67.  
  68.    LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
  69.  
  70.    lp_scene_begin_rasterization( scene );
  71.    lp_scene_bin_iter_begin( scene );
  72. }
  73.  
  74.  
  75. static void
  76. lp_rast_end( struct lp_rasterizer *rast )
  77. {
  78.    lp_scene_end_rasterization( rast->curr_scene );
  79.  
  80.    rast->curr_scene = NULL;
  81. }
  82.  
  83.  
  84. /**
  85.  * Beginning rasterization of a tile.
  86.  * \param x  window X position of the tile, in pixels
  87.  * \param y  window Y position of the tile, in pixels
  88.  */
  89. static void
  90. lp_rast_tile_begin(struct lp_rasterizer_task *task,
  91.                    const struct cmd_bin *bin,
  92.                    int x, int y)
  93. {
  94.    unsigned i;
  95.    struct lp_scene *scene = task->scene;
  96.  
  97.    LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
  98.  
  99.    task->bin = bin;
  100.    task->x = x * TILE_SIZE;
  101.    task->y = y * TILE_SIZE;
  102.    task->width = TILE_SIZE + x * TILE_SIZE > task->scene->fb.width ?
  103.                     task->scene->fb.width - x * TILE_SIZE : TILE_SIZE;
  104.    task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ?
  105.                     task->scene->fb.height - y * TILE_SIZE : TILE_SIZE;
  106.  
  107.    task->thread_data.vis_counter = 0;
  108.    task->ps_invocations = 0;
  109.  
  110.    for (i = 0; i < task->scene->fb.nr_cbufs; i++) {
  111.       if (task->scene->fb.cbufs[i]) {
  112.          task->color_tiles[i] = scene->cbufs[i].map +
  113.                                 scene->cbufs[i].stride * task->y +
  114.                                 scene->cbufs[i].format_bytes * task->x;
  115.       }
  116.    }
  117.    if (task->scene->fb.zsbuf) {
  118.       task->depth_tile = scene->zsbuf.map +
  119.                          scene->zsbuf.stride * task->y +
  120.                          scene->zsbuf.format_bytes * task->x;
  121.    }
  122. }
  123.  
  124.  
  125. /**
  126.  * Clear the rasterizer's current color tile.
  127.  * This is a bin command called during bin processing.
  128.  * Clear commands always clear all bound layers.
  129.  */
  130. static void
  131. lp_rast_clear_color(struct lp_rasterizer_task *task,
  132.                     const union lp_rast_cmd_arg arg)
  133. {
  134.    const struct lp_scene *scene = task->scene;
  135.    unsigned cbuf = arg.clear_rb->cbuf;
  136.    union util_color uc;
  137.    enum pipe_format format;
  138.  
  139.    /* we never bin clear commands for non-existing buffers */
  140.    assert(cbuf < scene->fb.nr_cbufs);
  141.    assert(scene->fb.cbufs[cbuf]);
  142.  
  143.    format = scene->fb.cbufs[cbuf]->format;
  144.    uc = arg.clear_rb->color_val;
  145.  
  146.    /*
  147.     * this is pretty rough since we have target format (bunch of bytes...) here.
  148.     * dump it as raw 4 dwords.
  149.     */
  150.    LP_DBG(DEBUG_RAST, "%s clear value (target format %d) raw 0x%x,0x%x,0x%x,0x%x\n",
  151.           __FUNCTION__, format, uc.ui[0], uc.ui[1], uc.ui[2], uc.ui[3]);
  152.  
  153.  
  154.    util_fill_box(scene->cbufs[cbuf].map,
  155.                  format,
  156.                  scene->cbufs[cbuf].stride,
  157.                  scene->cbufs[cbuf].layer_stride,
  158.                  task->x,
  159.                  task->y,
  160.                  0,
  161.                  task->width,
  162.                  task->height,
  163.                  scene->fb_max_layer + 1,
  164.                  &uc);
  165.  
  166.    /* this will increase for each rb which probably doesn't mean much */
  167.    LP_COUNT(nr_color_tile_clear);
  168. }
  169.  
  170.  
  171. /**
  172.  * Clear the rasterizer's current z/stencil tile.
  173.  * This is a bin command called during bin processing.
  174.  * Clear commands always clear all bound layers.
  175.  */
  176. static void
  177. lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
  178.                        const union lp_rast_cmd_arg arg)
  179. {
  180.    const struct lp_scene *scene = task->scene;
  181.    uint64_t clear_value64 = arg.clear_zstencil.value;
  182.    uint64_t clear_mask64 = arg.clear_zstencil.mask;
  183.    uint32_t clear_value = (uint32_t) clear_value64;
  184.    uint32_t clear_mask = (uint32_t) clear_mask64;
  185.    const unsigned height = task->height;
  186.    const unsigned width = task->width;
  187.    const unsigned dst_stride = scene->zsbuf.stride;
  188.    uint8_t *dst;
  189.    unsigned i, j;
  190.    unsigned block_size;
  191.  
  192.    LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n",
  193.            __FUNCTION__, clear_value, clear_mask);
  194.  
  195.    /*
  196.     * Clear the area of the depth/depth buffer matching this tile.
  197.     */
  198.  
  199.    if (scene->fb.zsbuf) {
  200.       unsigned layer;
  201.       uint8_t *dst_layer = task->depth_tile;
  202.       block_size = util_format_get_blocksize(scene->fb.zsbuf->format);
  203.  
  204.       clear_value &= clear_mask;
  205.  
  206.       for (layer = 0; layer <= scene->fb_max_layer; layer++) {
  207.          dst = dst_layer;
  208.  
  209.          switch (block_size) {
  210.          case 1:
  211.             assert(clear_mask == 0xff);
  212.             memset(dst, (uint8_t) clear_value, height * width);
  213.             break;
  214.          case 2:
  215.             if (clear_mask == 0xffff) {
  216.                for (i = 0; i < height; i++) {
  217.                   uint16_t *row = (uint16_t *)dst;
  218.                   for (j = 0; j < width; j++)
  219.                      *row++ = (uint16_t) clear_value;
  220.                   dst += dst_stride;
  221.                }
  222.             }
  223.             else {
  224.                for (i = 0; i < height; i++) {
  225.                   uint16_t *row = (uint16_t *)dst;
  226.                   for (j = 0; j < width; j++) {
  227.                      uint16_t tmp = ~clear_mask & *row;
  228.                      *row++ = clear_value | tmp;
  229.                   }
  230.                   dst += dst_stride;
  231.                }
  232.             }
  233.             break;
  234.          case 4:
  235.             if (clear_mask == 0xffffffff) {
  236.                for (i = 0; i < height; i++) {
  237.                   uint32_t *row = (uint32_t *)dst;
  238.                   for (j = 0; j < width; j++)
  239.                      *row++ = clear_value;
  240.                   dst += dst_stride;
  241.                }
  242.             }
  243.             else {
  244.                for (i = 0; i < height; i++) {
  245.                   uint32_t *row = (uint32_t *)dst;
  246.                   for (j = 0; j < width; j++) {
  247.                      uint32_t tmp = ~clear_mask & *row;
  248.                      *row++ = clear_value | tmp;
  249.                   }
  250.                   dst += dst_stride;
  251.                }
  252.             }
  253.             break;
  254.          case 8:
  255.             clear_value64 &= clear_mask64;
  256.             if (clear_mask64 == 0xffffffffffULL) {
  257.                for (i = 0; i < height; i++) {
  258.                   uint64_t *row = (uint64_t *)dst;
  259.                   for (j = 0; j < width; j++)
  260.                      *row++ = clear_value64;
  261.                   dst += dst_stride;
  262.                }
  263.             }
  264.             else {
  265.                for (i = 0; i < height; i++) {
  266.                   uint64_t *row = (uint64_t *)dst;
  267.                   for (j = 0; j < width; j++) {
  268.                      uint64_t tmp = ~clear_mask64 & *row;
  269.                      *row++ = clear_value64 | tmp;
  270.                   }
  271.                   dst += dst_stride;
  272.                }
  273.             }
  274.             break;
  275.  
  276.          default:
  277.             assert(0);
  278.             break;
  279.          }
  280.          dst_layer += scene->zsbuf.layer_stride;
  281.       }
  282.    }
  283. }
  284.  
  285.  
  286.  
  287. /**
  288.  * Run the shader on all blocks in a tile.  This is used when a tile is
  289.  * completely contained inside a triangle.
  290.  * This is a bin command called during bin processing.
  291.  */
  292. static void
  293. lp_rast_shade_tile(struct lp_rasterizer_task *task,
  294.                    const union lp_rast_cmd_arg arg)
  295. {
  296.    const struct lp_scene *scene = task->scene;
  297.    const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
  298.    const struct lp_rast_state *state;
  299.    struct lp_fragment_shader_variant *variant;
  300.    const unsigned tile_x = task->x, tile_y = task->y;
  301.    unsigned x, y;
  302.  
  303.    if (inputs->disable) {
  304.       /* This command was partially binned and has been disabled */
  305.       return;
  306.    }
  307.  
  308.    LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
  309.  
  310.    state = task->state;
  311.    assert(state);
  312.    if (!state) {
  313.       return;
  314.    }
  315.    variant = state->variant;
  316.  
  317.    /* render the whole 64x64 tile in 4x4 chunks */
  318.    for (y = 0; y < task->height; y += 4){
  319.       for (x = 0; x < task->width; x += 4) {
  320.          uint8_t *color[PIPE_MAX_COLOR_BUFS];
  321.          unsigned stride[PIPE_MAX_COLOR_BUFS];
  322.          uint8_t *depth = NULL;
  323.          unsigned depth_stride = 0;
  324.          unsigned i;
  325.  
  326.          /* color buffer */
  327.          for (i = 0; i < scene->fb.nr_cbufs; i++){
  328.             if (scene->fb.cbufs[i]) {
  329.                stride[i] = scene->cbufs[i].stride;
  330.                color[i] = lp_rast_get_color_block_pointer(task, i, tile_x + x,
  331.                                                           tile_y + y, inputs->layer);
  332.             }
  333.             else {
  334.                stride[i] = 0;
  335.                color[i] = NULL;
  336.             }
  337.          }
  338.  
  339.          /* depth buffer */
  340.          if (scene->zsbuf.map) {
  341.             depth = lp_rast_get_depth_block_pointer(task, tile_x + x,
  342.                                                     tile_y + y, inputs->layer);
  343.             depth_stride = scene->zsbuf.stride;
  344.          }
  345.  
  346.          /* Propagate non-interpolated raster state. */
  347.          task->thread_data.raster_state.viewport_index = inputs->viewport_index;
  348.  
  349.          /* run shader on 4x4 block */
  350.          BEGIN_JIT_CALL(state, task);
  351.          variant->jit_function[RAST_WHOLE]( &state->jit_context,
  352.                                             tile_x + x, tile_y + y,
  353.                                             inputs->frontfacing,
  354.                                             GET_A0(inputs),
  355.                                             GET_DADX(inputs),
  356.                                             GET_DADY(inputs),
  357.                                             color,
  358.                                             depth,
  359.                                             0xffff,
  360.                                             &task->thread_data,
  361.                                             stride,
  362.                                             depth_stride);
  363.          END_JIT_CALL();
  364.       }
  365.    }
  366. }
  367.  
  368.  
  369. /**
  370.  * Run the shader on all blocks in a tile.  This is used when a tile is
  371.  * completely contained inside a triangle, and the shader is opaque.
  372.  * This is a bin command called during bin processing.
  373.  */
  374. static void
  375. lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task,
  376.                           const union lp_rast_cmd_arg arg)
  377. {
  378.    LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
  379.  
  380.    assert(task->state);
  381.    if (!task->state) {
  382.       return;
  383.    }
  384.  
  385.    lp_rast_shade_tile(task, arg);
  386. }
  387.  
  388.  
  389. /**
  390.  * Compute shading for a 4x4 block of pixels inside a triangle.
  391.  * This is a bin command called during bin processing.
  392.  * \param x  X position of quad in window coords
  393.  * \param y  Y position of quad in window coords
  394.  */
  395. void
  396. lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
  397.                          const struct lp_rast_shader_inputs *inputs,
  398.                          unsigned x, unsigned y,
  399.                          unsigned mask)
  400. {
  401.    const struct lp_rast_state *state = task->state;
  402.    struct lp_fragment_shader_variant *variant = state->variant;
  403.    const struct lp_scene *scene = task->scene;
  404.    uint8_t *color[PIPE_MAX_COLOR_BUFS];
  405.    unsigned stride[PIPE_MAX_COLOR_BUFS];
  406.    uint8_t *depth = NULL;
  407.    unsigned depth_stride = 0;
  408.    unsigned i;
  409.  
  410.    assert(state);
  411.  
  412.    /* Sanity checks */
  413.    assert(x < scene->tiles_x * TILE_SIZE);
  414.    assert(y < scene->tiles_y * TILE_SIZE);
  415.    assert(x % TILE_VECTOR_WIDTH == 0);
  416.    assert(y % TILE_VECTOR_HEIGHT == 0);
  417.  
  418.    assert((x % 4) == 0);
  419.    assert((y % 4) == 0);
  420.  
  421.    /* color buffer */
  422.    for (i = 0; i < scene->fb.nr_cbufs; i++) {
  423.       if (scene->fb.cbufs[i]) {
  424.          stride[i] = scene->cbufs[i].stride;
  425.          color[i] = lp_rast_get_color_block_pointer(task, i, x, y,
  426.                                                     inputs->layer);
  427.       }
  428.       else {
  429.          stride[i] = 0;
  430.          color[i] = NULL;
  431.       }
  432.    }
  433.  
  434.    /* depth buffer */
  435.    if (scene->zsbuf.map) {
  436.       depth_stride = scene->zsbuf.stride;
  437.       depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer);
  438.    }
  439.  
  440.    assert(lp_check_alignment(state->jit_context.u8_blend_color, 16));
  441.  
  442.    /*
  443.     * The rasterizer may produce fragments outside our
  444.     * allocated 4x4 blocks hence need to filter them out here.
  445.     */
  446.    if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
  447.       /* not very accurate would need a popcount on the mask */
  448.       /* always count this not worth bothering? */
  449.       task->ps_invocations += 1 * variant->ps_inv_multiplier;
  450.  
  451.       /* Propagate non-interpolated raster state. */
  452.       task->thread_data.raster_state.viewport_index = inputs->viewport_index;
  453.  
  454.       /* run shader on 4x4 block */
  455.       BEGIN_JIT_CALL(state, task);
  456.       variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
  457.                                             x, y,
  458.                                             inputs->frontfacing,
  459.                                             GET_A0(inputs),
  460.                                             GET_DADX(inputs),
  461.                                             GET_DADY(inputs),
  462.                                             color,
  463.                                             depth,
  464.                                             mask,
  465.                                             &task->thread_data,
  466.                                             stride,
  467.                                             depth_stride);
  468.       END_JIT_CALL();
  469.    }
  470. }
  471.  
  472.  
  473.  
  474. /**
  475.  * Begin a new occlusion query.
  476.  * This is a bin command put in all bins.
  477.  * Called per thread.
  478.  */
  479. static void
  480. lp_rast_begin_query(struct lp_rasterizer_task *task,
  481.                     const union lp_rast_cmd_arg arg)
  482. {
  483.    struct llvmpipe_query *pq = arg.query_obj;
  484.  
  485.    switch (pq->type) {
  486.    case PIPE_QUERY_OCCLUSION_COUNTER:
  487.    case PIPE_QUERY_OCCLUSION_PREDICATE:
  488.       pq->start[task->thread_index] = task->thread_data.vis_counter;
  489.       break;
  490.    case PIPE_QUERY_PIPELINE_STATISTICS:
  491.       pq->start[task->thread_index] = task->ps_invocations;
  492.       break;
  493.    default:
  494.       assert(0);
  495.       break;
  496.    }
  497. }
  498.  
  499.  
  500. /**
  501.  * End the current occlusion query.
  502.  * This is a bin command put in all bins.
  503.  * Called per thread.
  504.  */
  505. static void
  506. lp_rast_end_query(struct lp_rasterizer_task *task,
  507.                   const union lp_rast_cmd_arg arg)
  508. {
  509.    struct llvmpipe_query *pq = arg.query_obj;
  510.  
  511.    switch (pq->type) {
  512.    case PIPE_QUERY_OCCLUSION_COUNTER:
  513.    case PIPE_QUERY_OCCLUSION_PREDICATE:
  514.       pq->end[task->thread_index] +=
  515.          task->thread_data.vis_counter - pq->start[task->thread_index];
  516.       pq->start[task->thread_index] = 0;
  517.       break;
  518.    case PIPE_QUERY_TIMESTAMP:
  519.       pq->end[task->thread_index] = os_time_get_nano();
  520.       break;
  521.    case PIPE_QUERY_PIPELINE_STATISTICS:
  522.       pq->end[task->thread_index] +=
  523.          task->ps_invocations - pq->start[task->thread_index];
  524.       pq->start[task->thread_index] = 0;
  525.       break;
  526.    default:
  527.       assert(0);
  528.       break;
  529.    }
  530. }
  531.  
  532.  
  533. void
  534. lp_rast_set_state(struct lp_rasterizer_task *task,
  535.                   const union lp_rast_cmd_arg arg)
  536. {
  537.    task->state = arg.state;
  538. }
  539.  
  540.  
  541.  
  542. /**
  543.  * Called when we're done writing to a color tile.
  544.  */
  545. static void
  546. lp_rast_tile_end(struct lp_rasterizer_task *task)
  547. {
  548.    unsigned i;
  549.  
  550.    for (i = 0; i < task->scene->num_active_queries; ++i) {
  551.       lp_rast_end_query(task, lp_rast_arg_query(task->scene->active_queries[i]));
  552.    }
  553.  
  554.    /* debug */
  555.    memset(task->color_tiles, 0, sizeof(task->color_tiles));
  556.    task->depth_tile = NULL;
  557.  
  558.    task->bin = NULL;
  559. }
  560.  
  561. static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] =
  562. {
  563.    lp_rast_clear_color,
  564.    lp_rast_clear_zstencil,
  565.    lp_rast_triangle_1,
  566.    lp_rast_triangle_2,
  567.    lp_rast_triangle_3,
  568.    lp_rast_triangle_4,
  569.    lp_rast_triangle_5,
  570.    lp_rast_triangle_6,
  571.    lp_rast_triangle_7,
  572.    lp_rast_triangle_8,
  573.    lp_rast_triangle_3_4,
  574.    lp_rast_triangle_3_16,
  575.    lp_rast_triangle_4_16,
  576.    lp_rast_shade_tile,
  577.    lp_rast_shade_tile_opaque,
  578.    lp_rast_begin_query,
  579.    lp_rast_end_query,
  580.    lp_rast_set_state,
  581.    lp_rast_triangle_32_1,
  582.    lp_rast_triangle_32_2,
  583.    lp_rast_triangle_32_3,
  584.    lp_rast_triangle_32_4,
  585.    lp_rast_triangle_32_5,
  586.    lp_rast_triangle_32_6,
  587.    lp_rast_triangle_32_7,
  588.    lp_rast_triangle_32_8,
  589.    lp_rast_triangle_32_3_4,
  590.    lp_rast_triangle_32_3_16,
  591.    lp_rast_triangle_32_4_16
  592. };
  593.  
  594.  
  595. static void
  596. do_rasterize_bin(struct lp_rasterizer_task *task,
  597.                  const struct cmd_bin *bin,
  598.                  int x, int y)
  599. {
  600.    const struct cmd_block *block;
  601.    unsigned k;
  602.  
  603.    if (0)
  604.       lp_debug_bin(bin, x, y);
  605.  
  606.    for (block = bin->head; block; block = block->next) {
  607.       for (k = 0; k < block->count; k++) {
  608.          dispatch[block->cmd[k]]( task, block->arg[k] );
  609.       }
  610.    }
  611. }
  612.  
  613.  
  614.  
  615. /**
  616.  * Rasterize commands for a single bin.
  617.  * \param x, y  position of the bin's tile in the framebuffer
  618.  * Must be called between lp_rast_begin() and lp_rast_end().
  619.  * Called per thread.
  620.  */
  621. static void
  622. rasterize_bin(struct lp_rasterizer_task *task,
  623.               const struct cmd_bin *bin, int x, int y )
  624. {
  625.    lp_rast_tile_begin( task, bin, x, y );
  626.  
  627.    do_rasterize_bin(task, bin, x, y);
  628.  
  629.    lp_rast_tile_end(task);
  630.  
  631.  
  632.    /* Debug/Perf flags:
  633.     */
  634.    if (bin->head->count == 1) {
  635.       if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE)
  636.          LP_COUNT(nr_pure_shade_opaque_64);
  637.       else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE)
  638.          LP_COUNT(nr_pure_shade_64);
  639.    }
  640. }
  641.  
  642.  
  643. /* An empty bin is one that just loads the contents of the tile and
  644.  * stores them again unchanged.  This typically happens when bins have
  645.  * been flushed for some reason in the middle of a frame, or when
  646.  * incremental updates are being made to a render target.
  647.  *
  648.  * Try to avoid doing pointless work in this case.
  649.  */
  650. static boolean
  651. is_empty_bin( const struct cmd_bin *bin )
  652. {
  653.    return bin->head == NULL;
  654. }
  655.  
  656.  
  657. /**
  658.  * Rasterize/execute all bins within a scene.
  659.  * Called per thread.
  660.  */
  661. static void
  662. rasterize_scene(struct lp_rasterizer_task *task,
  663.                 struct lp_scene *scene)
  664. {
  665.    task->scene = scene;
  666.  
  667.    if (!task->rast->no_rast && !scene->discard) {
  668.       /* loop over scene bins, rasterize each */
  669.       {
  670.          struct cmd_bin *bin;
  671.          int i, j;
  672.  
  673.          assert(scene);
  674.          while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) {
  675.             if (!is_empty_bin( bin ))
  676.                rasterize_bin(task, bin, i, j);
  677.          }
  678.       }
  679.    }
  680.  
  681.  
  682.    if (scene->fence) {
  683.       lp_fence_signal(scene->fence);
  684.    }
  685.  
  686.    task->scene = NULL;
  687. }
  688.  
  689.  
  690. /**
  691.  * Called by setup module when it has something for us to render.
  692.  */
  693. void
  694. lp_rast_queue_scene( struct lp_rasterizer *rast,
  695.                      struct lp_scene *scene)
  696. {
  697.    LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
  698.  
  699.    if (rast->num_threads == 0) {
  700.       /* no threading */
  701.       unsigned fpstate = util_fpstate_get();
  702.  
  703.       /* Make sure that denorms are treated like zeros. This is
  704.        * the behavior required by D3D10. OpenGL doesn't care.
  705.        */
  706.       util_fpstate_set_denorms_to_zero(fpstate);
  707.  
  708.       lp_rast_begin( rast, scene );
  709.  
  710.       rasterize_scene( &rast->tasks[0], scene );
  711.  
  712.       lp_rast_end( rast );
  713.  
  714.       util_fpstate_set(fpstate);
  715.  
  716.       rast->curr_scene = NULL;
  717.    }
  718.    else {
  719.       /* threaded rendering! */
  720.       unsigned i;
  721.  
  722.       lp_scene_enqueue( rast->full_scenes, scene );
  723.  
  724.       /* signal the threads that there's work to do */
  725.       for (i = 0; i < rast->num_threads; i++) {
  726.          pipe_semaphore_signal(&rast->tasks[i].work_ready);
  727.       }
  728.    }
  729.  
  730.    LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
  731. }
  732.  
  733.  
  734. void
  735. lp_rast_finish( struct lp_rasterizer *rast )
  736. {
  737.    if (rast->num_threads == 0) {
  738.       /* nothing to do */
  739.    }
  740.    else {
  741.       int i;
  742.  
  743.       /* wait for work to complete */
  744.       for (i = 0; i < rast->num_threads; i++) {
  745.          pipe_semaphore_wait(&rast->tasks[i].work_done);
  746.       }
  747.    }
  748. }
  749.  
  750.  
  751. /**
  752.  * This is the thread's main entrypoint.
  753.  * It's a simple loop:
  754.  *   1. wait for work
  755.  *   2. do work
  756.  *   3. signal that we're done
  757.  */
  758. static PIPE_THREAD_ROUTINE( thread_function, init_data )
  759. {
  760.    struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
  761.    struct lp_rasterizer *rast = task->rast;
  762.    boolean debug = false;
  763.    char thread_name[16];
  764.    unsigned fpstate;
  765.  
  766.    util_snprintf(thread_name, sizeof thread_name, "llvmpipe-%u", task->thread_index);
  767.    pipe_thread_setname(thread_name);
  768.  
  769.    /* Make sure that denorms are treated like zeros. This is
  770.     * the behavior required by D3D10. OpenGL doesn't care.
  771.     */
  772.    fpstate = util_fpstate_get();
  773.    util_fpstate_set_denorms_to_zero(fpstate);
  774.  
  775.    while (1) {
  776.       /* wait for work */
  777.       if (debug)
  778.          debug_printf("thread %d waiting for work\n", task->thread_index);
  779.       pipe_semaphore_wait(&task->work_ready);
  780.  
  781.       if (rast->exit_flag)
  782.          break;
  783.  
  784.       if (task->thread_index == 0) {
  785.          /* thread[0]:
  786.           *  - get next scene to rasterize
  787.           *  - map the framebuffer surfaces
  788.           */
  789.          lp_rast_begin( rast,
  790.                         lp_scene_dequeue( rast->full_scenes, TRUE ) );
  791.       }
  792.  
  793.       /* Wait for all threads to get here so that threads[1+] don't
  794.        * get a null rast->curr_scene pointer.
  795.        */
  796.       pipe_barrier_wait( &rast->barrier );
  797.  
  798.       /* do work */
  799.       if (debug)
  800.          debug_printf("thread %d doing work\n", task->thread_index);
  801.  
  802.       rasterize_scene(task,
  803.                       rast->curr_scene);
  804.      
  805.       /* wait for all threads to finish with this scene */
  806.       pipe_barrier_wait( &rast->barrier );
  807.  
  808.       /* XXX: shouldn't be necessary:
  809.        */
  810.       if (task->thread_index == 0) {
  811.          lp_rast_end( rast );
  812.       }
  813.  
  814.       /* signal done with work */
  815.       if (debug)
  816.          debug_printf("thread %d done working\n", task->thread_index);
  817.  
  818.       pipe_semaphore_signal(&task->work_done);
  819.    }
  820.  
  821. #ifdef _WIN32
  822.    pipe_semaphore_signal(&task->work_done);
  823. #endif
  824.  
  825.    return 0;
  826. }
  827.  
  828.  
  829. /**
  830.  * Initialize semaphores and spawn the threads.
  831.  */
  832. static void
  833. create_rast_threads(struct lp_rasterizer *rast)
  834. {
  835.    unsigned i;
  836.  
  837.    /* NOTE: if num_threads is zero, we won't use any threads */
  838.    for (i = 0; i < rast->num_threads; i++) {
  839.       pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
  840.       pipe_semaphore_init(&rast->tasks[i].work_done, 0);
  841.       rast->threads[i] = pipe_thread_create(thread_function,
  842.                                             (void *) &rast->tasks[i]);
  843.    }
  844. }
  845.  
  846.  
  847.  
  848. /**
  849.  * Create new lp_rasterizer.  If num_threads is zero, don't create any
  850.  * new threads, do rendering synchronously.
  851.  * \param num_threads  number of rasterizer threads to create
  852.  */
  853. struct lp_rasterizer *
  854. lp_rast_create( unsigned num_threads )
  855. {
  856.    struct lp_rasterizer *rast;
  857.    unsigned i;
  858.  
  859.    rast = CALLOC_STRUCT(lp_rasterizer);
  860.    if (!rast) {
  861.       goto no_rast;
  862.    }
  863.  
  864.    rast->full_scenes = lp_scene_queue_create();
  865.    if (!rast->full_scenes) {
  866.       goto no_full_scenes;
  867.    }
  868.  
  869.    for (i = 0; i < Elements(rast->tasks); i++) {
  870.       struct lp_rasterizer_task *task = &rast->tasks[i];
  871.       task->rast = rast;
  872.       task->thread_index = i;
  873.    }
  874.  
  875.    rast->num_threads = num_threads;
  876.  
  877.    rast->no_rast = debug_get_bool_option("LP_NO_RAST", FALSE);
  878.  
  879.    create_rast_threads(rast);
  880.  
  881.    /* for synchronizing rasterization threads */
  882.    pipe_barrier_init( &rast->barrier, rast->num_threads );
  883.  
  884.    memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
  885.  
  886.    return rast;
  887.  
  888. no_full_scenes:
  889.    FREE(rast);
  890. no_rast:
  891.    return NULL;
  892. }
  893.  
  894.  
  895. /* Shutdown:
  896.  */
  897. void lp_rast_destroy( struct lp_rasterizer *rast )
  898. {
  899.    unsigned i;
  900.  
  901.    /* Set exit_flag and signal each thread's work_ready semaphore.
  902.     * Each thread will be woken up, notice that the exit_flag is set and
  903.     * break out of its main loop.  The thread will then exit.
  904.     */
  905.    rast->exit_flag = TRUE;
  906.    for (i = 0; i < rast->num_threads; i++) {
  907.       pipe_semaphore_signal(&rast->tasks[i].work_ready);
  908.    }
  909.  
  910.    /* Wait for threads to terminate before cleaning up per-thread data.
  911.     * We don't actually call pipe_thread_wait to avoid dead lock on Windows
  912.     * per https://bugs.freedesktop.org/show_bug.cgi?id=76252 */
  913.    for (i = 0; i < rast->num_threads; i++) {
  914. #ifdef _WIN32
  915.       pipe_semaphore_wait(&rast->tasks[i].work_done);
  916. #else
  917.       pipe_thread_wait(rast->threads[i]);
  918. #endif
  919.    }
  920.  
  921.    /* Clean up per-thread data */
  922.    for (i = 0; i < rast->num_threads; i++) {
  923.       pipe_semaphore_destroy(&rast->tasks[i].work_ready);
  924.       pipe_semaphore_destroy(&rast->tasks[i].work_done);
  925.    }
  926.  
  927.    /* for synchronizing rasterization threads */
  928.    pipe_barrier_destroy( &rast->barrier );
  929.  
  930.    lp_scene_queue_destroy(rast->full_scenes);
  931.  
  932.    FREE(rast);
  933. }
  934.  
  935.  
  936.