Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. /*
  29.  * Binning code for triangles
  30.  */
  31.  
  32. #include "util/u_math.h"
  33. #include "util/u_memory.h"
  34. #include "util/u_rect.h"
  35. #include "util/u_sse.h"
  36. #include "lp_perf.h"
  37. #include "lp_setup_context.h"
  38. #include "lp_rast.h"
  39. #include "lp_state_fs.h"
  40. #include "lp_state_setup.h"
  41. #include "lp_context.h"
  42.  
  43. #define NUM_CHANNELS 4
  44.  
  45. #if defined(PIPE_ARCH_SSE)
  46. #include <emmintrin.h>
  47. #endif
  48.    
  49. static INLINE int
  50. subpixel_snap(float a)
  51. {
  52.    return util_iround(FIXED_ONE * a);
  53. }
  54.  
  55. static INLINE float
  56. fixed_to_float(int a)
  57. {
  58.    return a * (1.0f / FIXED_ONE);
  59. }
  60.  
  61.  
  62. /* Position and area in fixed point coordinates */
  63. struct fixed_position {
  64.    int x[4];
  65.    int y[4];
  66.    int area;
  67.    int dx01;
  68.    int dy01;
  69.    int dx20;
  70.    int dy20;
  71. };
  72.  
  73.  
  74. /**
  75.  * Alloc space for a new triangle plus the input.a0/dadx/dady arrays
  76.  * immediately after it.
  77.  * The memory is allocated from the per-scene pool, not per-tile.
  78.  * \param tri_size  returns number of bytes allocated
  79.  * \param num_inputs  number of fragment shader inputs
  80.  * \return pointer to triangle space
  81.  */
  82. struct lp_rast_triangle *
  83. lp_setup_alloc_triangle(struct lp_scene *scene,
  84.                         unsigned nr_inputs,
  85.                         unsigned nr_planes,
  86.                         unsigned *tri_size)
  87. {
  88.    unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
  89.    unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane);
  90.    struct lp_rast_triangle *tri;
  91.  
  92.    *tri_size = (sizeof(struct lp_rast_triangle) +
  93.                 3 * input_array_sz +
  94.                 plane_sz);
  95.  
  96.    tri = lp_scene_alloc_aligned( scene, *tri_size, 16 );
  97.    if (tri == NULL)
  98.       return NULL;
  99.  
  100.    tri->inputs.stride = input_array_sz;
  101.  
  102.    {
  103.       char *a = (char *)tri;
  104.       char *b = (char *)&GET_PLANES(tri)[nr_planes];
  105.       assert(b - a == *tri_size);
  106.    }
  107.  
  108.    return tri;
  109. }
  110.  
  111. void
  112. lp_setup_print_vertex(struct lp_setup_context *setup,
  113.                       const char *name,
  114.                       const float (*v)[4])
  115. {
  116.    const struct lp_setup_variant_key *key = &setup->setup.variant->key;
  117.    int i, j;
  118.  
  119.    debug_printf("   wpos (%s[0]) xyzw %f %f %f %f\n",
  120.                 name,
  121.                 v[0][0], v[0][1], v[0][2], v[0][3]);
  122.  
  123.    for (i = 0; i < key->num_inputs; i++) {
  124.       const float *in = v[key->inputs[i].src_index];
  125.  
  126.       debug_printf("  in[%d] (%s[%d]) %s%s%s%s ",
  127.                    i,
  128.                    name, key->inputs[i].src_index,
  129.                    (key->inputs[i].usage_mask & 0x1) ? "x" : " ",
  130.                    (key->inputs[i].usage_mask & 0x2) ? "y" : " ",
  131.                    (key->inputs[i].usage_mask & 0x4) ? "z" : " ",
  132.                    (key->inputs[i].usage_mask & 0x8) ? "w" : " ");
  133.  
  134.       for (j = 0; j < 4; j++)
  135.          if (key->inputs[i].usage_mask & (1<<j))
  136.             debug_printf("%.5f ", in[j]);
  137.  
  138.       debug_printf("\n");
  139.    }
  140. }
  141.  
  142.  
  143. /**
  144.  * Print triangle vertex attribs (for debug).
  145.  */
  146. void
  147. lp_setup_print_triangle(struct lp_setup_context *setup,
  148.                         const float (*v0)[4],
  149.                         const float (*v1)[4],
  150.                         const float (*v2)[4])
  151. {
  152.    debug_printf("triangle\n");
  153.  
  154.    {
  155.       const float ex = v0[0][0] - v2[0][0];
  156.       const float ey = v0[0][1] - v2[0][1];
  157.       const float fx = v1[0][0] - v2[0][0];
  158.       const float fy = v1[0][1] - v2[0][1];
  159.  
  160.       /* det = cross(e,f).z */
  161.       const float det = ex * fy - ey * fx;
  162.       if (det < 0.0f)
  163.          debug_printf("   - ccw\n");
  164.       else if (det > 0.0f)
  165.          debug_printf("   - cw\n");
  166.       else
  167.          debug_printf("   - zero area\n");
  168.    }
  169.  
  170.    lp_setup_print_vertex(setup, "v0", v0);
  171.    lp_setup_print_vertex(setup, "v1", v1);
  172.    lp_setup_print_vertex(setup, "v2", v2);
  173. }
  174.  
  175.  
  176. #define MAX_PLANES 8
  177. static unsigned
  178. lp_rast_tri_tab[MAX_PLANES+1] = {
  179.    0,               /* should be impossible */
  180.    LP_RAST_OP_TRIANGLE_1,
  181.    LP_RAST_OP_TRIANGLE_2,
  182.    LP_RAST_OP_TRIANGLE_3,
  183.    LP_RAST_OP_TRIANGLE_4,
  184.    LP_RAST_OP_TRIANGLE_5,
  185.    LP_RAST_OP_TRIANGLE_6,
  186.    LP_RAST_OP_TRIANGLE_7,
  187.    LP_RAST_OP_TRIANGLE_8
  188. };
  189.  
  190.  
  191.  
  192. /**
  193.  * The primitive covers the whole tile- shade whole tile.
  194.  *
  195.  * \param tx, ty  the tile position in tiles, not pixels
  196.  */
  197. static boolean
  198. lp_setup_whole_tile(struct lp_setup_context *setup,
  199.                     const struct lp_rast_shader_inputs *inputs,
  200.                     int tx, int ty)
  201. {
  202.    struct lp_scene *scene = setup->scene;
  203.  
  204.    LP_COUNT(nr_fully_covered_64);
  205.  
  206.    /* if variant is opaque and scissor doesn't effect the tile */
  207.    if (inputs->opaque) {
  208.       /* Several things prevent this optimization from working:
  209.        * - For layered rendering we can't determine if this covers the same layer
  210.        * as previous rendering (or in case of clears those actually always cover
  211.        * all layers so optimization is impossible). Need to use fb_max_layer and
  212.        * not setup->layer_slot to determine this since even if there's currently
  213.        * no slot assigned previous rendering could have used one.
  214.        * - If there were any Begin/End query commands in the scene then those
  215.        * would get removed which would be very wrong. Furthermore, if queries
  216.        * were just active we also can't do the optimization since to get
  217.        * accurate query results we unfortunately need to execute the rendering
  218.        * commands.
  219.        */
  220.       if (!scene->fb.zsbuf && scene->fb_max_layer == 0 && !scene->had_queries) {
  221.          /*
  222.           * All previous rendering will be overwritten so reset the bin.
  223.           */
  224.          lp_scene_bin_reset( scene, tx, ty );
  225.       }
  226.  
  227.       LP_COUNT(nr_shade_opaque_64);
  228.       return lp_scene_bin_cmd_with_state( scene, tx, ty,
  229.                                           setup->fs.stored,
  230.                                           LP_RAST_OP_SHADE_TILE_OPAQUE,
  231.                                           lp_rast_arg_inputs(inputs) );
  232.    } else {
  233.       LP_COUNT(nr_shade_64);
  234.       return lp_scene_bin_cmd_with_state( scene, tx, ty,
  235.                                           setup->fs.stored,
  236.                                           LP_RAST_OP_SHADE_TILE,
  237.                                           lp_rast_arg_inputs(inputs) );
  238.    }
  239. }
  240.  
  241.  
  242. /**
  243.  * Do basic setup for triangle rasterization and determine which
  244.  * framebuffer tiles are touched.  Put the triangle in the scene's
  245.  * bins for the tiles which we overlap.
  246.  */
  247. static boolean
  248. do_triangle_ccw(struct lp_setup_context *setup,
  249.                 struct fixed_position* position,
  250.                 const float (*v0)[4],
  251.                 const float (*v1)[4],
  252.                 const float (*v2)[4],
  253.                 boolean frontfacing )
  254. {
  255.    struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
  256.    struct lp_scene *scene = setup->scene;
  257.    const struct lp_setup_variant_key *key = &setup->setup.variant->key;
  258.    struct lp_rast_triangle *tri;
  259.    struct lp_rast_plane *plane;
  260.    struct u_rect bbox;
  261.    unsigned tri_bytes;
  262.    int nr_planes = 3;
  263.    unsigned scissor_index = 0;
  264.    unsigned layer = 0;
  265.  
  266.    /* Area should always be positive here */
  267.    assert(position->area > 0);
  268.  
  269.    if (0)
  270.       lp_setup_print_triangle(setup, v0, v1, v2);
  271.  
  272.    if (setup->scissor_test) {
  273.       nr_planes = 7;
  274.       if (setup->viewport_index_slot > 0) {
  275.          unsigned *udata = (unsigned*)v0[setup->viewport_index_slot];
  276.          scissor_index = lp_clamp_scissor_idx(*udata);
  277.       }
  278.    }
  279.    else {
  280.       nr_planes = 3;
  281.    }
  282.    if (setup->layer_slot > 0) {
  283.       layer = *(unsigned*)v1[setup->layer_slot];
  284.       layer = MIN2(layer, scene->fb_max_layer);
  285.    }
  286.  
  287.    /* Bounding rectangle (in pixels) */
  288.    {
  289.       /* Yes this is necessary to accurately calculate bounding boxes
  290.        * with the two fill-conventions we support.  GL (normally) ends
  291.        * up needing a bottom-left fill convention, which requires
  292.        * slightly different rounding.
  293.        */
  294.       int adj = (setup->pixel_offset != 0) ? 1 : 0;
  295.  
  296.       /* Inclusive x0, exclusive x1 */
  297.       bbox.x0 =  MIN3(position->x[0], position->x[1], position->x[2]) >> FIXED_ORDER;
  298.       bbox.x1 = (MAX3(position->x[0], position->x[1], position->x[2]) - 1) >> FIXED_ORDER;
  299.  
  300.       /* Inclusive / exclusive depending upon adj (bottom-left or top-right) */
  301.       bbox.y0 = (MIN3(position->y[0], position->y[1], position->y[2]) + adj) >> FIXED_ORDER;
  302.       bbox.y1 = (MAX3(position->y[0], position->y[1], position->y[2]) - 1 + adj) >> FIXED_ORDER;
  303.    }
  304.  
  305.    if (bbox.x1 < bbox.x0 ||
  306.        bbox.y1 < bbox.y0) {
  307.       if (0) debug_printf("empty bounding box\n");
  308.       LP_COUNT(nr_culled_tris);
  309.       return TRUE;
  310.    }
  311.  
  312.    if (!u_rect_test_intersection(&setup->draw_regions[scissor_index], &bbox)) {
  313.       if (0) debug_printf("offscreen\n");
  314.       LP_COUNT(nr_culled_tris);
  315.       return TRUE;
  316.    }
  317.  
  318.    /* Can safely discard negative regions, but need to keep hold of
  319.     * information about when the triangle extends past screen
  320.     * boundaries.  See trimmed_box in lp_setup_bin_triangle().
  321.     */
  322.    bbox.x0 = MAX2(bbox.x0, 0);
  323.    bbox.y0 = MAX2(bbox.y0, 0);
  324.  
  325.    tri = lp_setup_alloc_triangle(scene,
  326.                                  key->num_inputs,
  327.                                  nr_planes,
  328.                                  &tri_bytes);
  329.    if (!tri)
  330.       return FALSE;
  331.  
  332. #if 0
  333.    tri->v[0][0] = v0[0][0];
  334.    tri->v[1][0] = v1[0][0];
  335.    tri->v[2][0] = v2[0][0];
  336.    tri->v[0][1] = v0[0][1];
  337.    tri->v[1][1] = v1[0][1];
  338.    tri->v[2][1] = v2[0][1];
  339. #endif
  340.  
  341.    LP_COUNT(nr_tris);
  342.  
  343.    if (lp_context->active_statistics_queries) {
  344.       lp_context->pipeline_statistics.c_primitives++;
  345.    }
  346.  
  347.    /* Setup parameter interpolants:
  348.     */
  349.    setup->setup.variant->jit_function( v0,
  350.                                        v1,
  351.                                        v2,
  352.                                        frontfacing,
  353.                                        GET_A0(&tri->inputs),
  354.                                        GET_DADX(&tri->inputs),
  355.                                        GET_DADY(&tri->inputs) );
  356.  
  357.    tri->inputs.frontfacing = frontfacing;
  358.    tri->inputs.disable = FALSE;
  359.    tri->inputs.opaque = setup->fs.current.variant->opaque;
  360.    tri->inputs.layer = layer;
  361.  
  362.    if (0)
  363.       lp_dump_setup_coef(&setup->setup.variant->key,
  364.                          (const float (*)[4])GET_A0(&tri->inputs),
  365.                          (const float (*)[4])GET_DADX(&tri->inputs),
  366.                          (const float (*)[4])GET_DADY(&tri->inputs));
  367.  
  368.    plane = GET_PLANES(tri);
  369.  
  370. #if defined(PIPE_ARCH_SSE)
  371.    {
  372.       __m128i vertx, verty;
  373.       __m128i shufx, shufy;
  374.       __m128i dcdx, dcdy, c;
  375.       __m128i unused;
  376.       __m128i dcdx_neg_mask;
  377.       __m128i dcdy_neg_mask;
  378.       __m128i dcdx_zero_mask;
  379.       __m128i top_left_flag;
  380.       __m128i c_inc_mask, c_inc;
  381.       __m128i eo, p0, p1, p2;
  382.       __m128i zero = _mm_setzero_si128();
  383.  
  384.       vertx = _mm_loadu_si128((__m128i *)position->x); /* vertex x coords */
  385.       verty = _mm_loadu_si128((__m128i *)position->y); /* vertex y coords */
  386.  
  387.       shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1));
  388.       shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1));
  389.  
  390.       dcdx = _mm_sub_epi32(verty, shufy);
  391.       dcdy = _mm_sub_epi32(vertx, shufx);
  392.  
  393.       dcdx_neg_mask = _mm_srai_epi32(dcdx, 31);
  394.       dcdx_zero_mask = _mm_cmpeq_epi32(dcdx, zero);
  395.       dcdy_neg_mask = _mm_srai_epi32(dcdy, 31);
  396.  
  397.       top_left_flag = _mm_set1_epi32((setup->bottom_edge_rule == 0) ? ~0 : 0);
  398.  
  399.       c_inc_mask = _mm_or_si128(dcdx_neg_mask,
  400.                                 _mm_and_si128(dcdx_zero_mask,
  401.                                               _mm_xor_si128(dcdy_neg_mask,
  402.                                                             top_left_flag)));
  403.  
  404.       c_inc = _mm_srli_epi32(c_inc_mask, 31);
  405.  
  406.       c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx),
  407.                         mm_mullo_epi32(dcdy, verty));
  408.  
  409.       c = _mm_add_epi32(c, c_inc);
  410.  
  411.       /* Scale up to match c:
  412.        */
  413.       dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER);
  414.       dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER);
  415.  
  416.       /* Calculate trivial reject values:
  417.        */
  418.       eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy),
  419.                          _mm_and_si128(dcdx_neg_mask, dcdx));
  420.  
  421.       /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
  422.  
  423.       /* Pointless transpose which gets undone immediately in
  424.        * rasterization:
  425.        */
  426.       transpose4_epi32(&c, &dcdx, &dcdy, &eo,
  427.                        &p0, &p1, &p2, &unused);
  428.  
  429.       _mm_store_si128((__m128i *)&plane[0], p0);
  430.       _mm_store_si128((__m128i *)&plane[1], p1);
  431.       _mm_store_si128((__m128i *)&plane[2], p2);
  432.    }
  433. #else
  434.    {
  435.       int i;
  436.       plane[0].dcdy = position->dx01;
  437.       plane[1].dcdy = position->x[1] - position->x[2];
  438.       plane[2].dcdy = position->dx20;
  439.       plane[0].dcdx = position->dy01;
  440.       plane[1].dcdx = position->y[1] - position->y[2];
  441.       plane[2].dcdx = position->dy20;
  442.  
  443.       for (i = 0; i < 3; i++) {
  444.          /* half-edge constants, will be interated over the whole render
  445.           * target.
  446.           */
  447.          plane[i].c = plane[i].dcdx * position->x[i] - plane[i].dcdy * position->y[i];
  448.  
  449.          /* correct for top-left vs. bottom-left fill convention.
  450.           */        
  451.          if (plane[i].dcdx < 0) {
  452.             /* both fill conventions want this - adjust for left edges */
  453.             plane[i].c++;            
  454.          }
  455.          else if (plane[i].dcdx == 0) {
  456.             if (setup->bottom_edge_rule == 0){
  457.                /* correct for top-left fill convention:
  458.                 */
  459.                if (plane[i].dcdy > 0) plane[i].c++;
  460.             }
  461.             else {
  462.                /* correct for bottom-left fill convention:
  463.                 */
  464.                if (plane[i].dcdy < 0) plane[i].c++;
  465.             }
  466.          }
  467.  
  468.          plane[i].dcdx *= FIXED_ONE;
  469.          plane[i].dcdy *= FIXED_ONE;
  470.  
  471.          /* find trivial reject offsets for each edge for a single-pixel
  472.           * sized block.  These will be scaled up at each recursive level to
  473.           * match the active blocksize.  Scaling in this way works best if
  474.           * the blocks are square.
  475.           */
  476.          plane[i].eo = 0;
  477.          if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx;
  478.          if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy;
  479.       }
  480.    }
  481. #endif
  482.  
  483.    if (0) {
  484.       debug_printf("p0: %08x/%08x/%08x/%08x\n",
  485.                    plane[0].c,
  486.                    plane[0].dcdx,
  487.                    plane[0].dcdy,
  488.                    plane[0].eo);
  489.      
  490.       debug_printf("p1: %08x/%08x/%08x/%08x\n",
  491.                    plane[1].c,
  492.                    plane[1].dcdx,
  493.                    plane[1].dcdy,
  494.                    plane[1].eo);
  495.      
  496.       debug_printf("p0: %08x/%08x/%08x/%08x\n",
  497.                    plane[2].c,
  498.                    plane[2].dcdx,
  499.                    plane[2].dcdy,
  500.                    plane[2].eo);
  501.    }
  502.  
  503.  
  504.    /*
  505.     * When rasterizing scissored tris, use the intersection of the
  506.     * triangle bounding box and the scissor rect to generate the
  507.     * scissor planes.
  508.     *
  509.     * This permits us to cut off the triangle "tails" that are present
  510.     * in the intermediate recursive levels caused when two of the
  511.     * triangles edges don't diverge quickly enough to trivially reject
  512.     * exterior blocks from the triangle.
  513.     *
  514.     * It's not really clear if it's worth worrying about these tails,
  515.     * but since we generate the planes for each scissored tri, it's
  516.     * free to trim them in this case.
  517.     *
  518.     * Note that otherwise, the scissor planes only vary in 'C' value,
  519.     * and even then only on state-changes.  Could alternatively store
  520.     * these planes elsewhere.
  521.     */
  522.    if (nr_planes == 7) {
  523.       const struct u_rect *scissor = &setup->scissors[scissor_index];
  524.  
  525.       plane[3].dcdx = -1;
  526.       plane[3].dcdy = 0;
  527.       plane[3].c = 1-scissor->x0;
  528.       plane[3].eo = 1;
  529.  
  530.       plane[4].dcdx = 1;
  531.       plane[4].dcdy = 0;
  532.       plane[4].c = scissor->x1+1;
  533.       plane[4].eo = 0;
  534.  
  535.       plane[5].dcdx = 0;
  536.       plane[5].dcdy = 1;
  537.       plane[5].c = 1-scissor->y0;
  538.       plane[5].eo = 1;
  539.  
  540.       plane[6].dcdx = 0;
  541.       plane[6].dcdy = -1;
  542.       plane[6].c = scissor->y1+1;
  543.       plane[6].eo = 0;
  544.    }
  545.  
  546.    return lp_setup_bin_triangle(setup, tri, &bbox, nr_planes, scissor_index);
  547. }
  548.  
  549. /*
  550.  * Round to nearest less or equal power of two of the input.
  551.  *
  552.  * Undefined if no bit set exists, so code should check against 0 first.
  553.  */
  554. static INLINE uint32_t
  555. floor_pot(uint32_t n)
  556. {
  557. #if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
  558.    if (n == 0)
  559.       return 0;
  560.  
  561.    __asm__("bsr %1,%0"
  562.           : "=r" (n)
  563.           : "rm" (n));
  564.    return 1 << n;
  565. #else
  566.    n |= (n >>  1);
  567.    n |= (n >>  2);
  568.    n |= (n >>  4);
  569.    n |= (n >>  8);
  570.    n |= (n >> 16);
  571.    return n - (n >> 1);
  572. #endif
  573. }
  574.  
  575.  
  576. boolean
  577. lp_setup_bin_triangle( struct lp_setup_context *setup,
  578.                        struct lp_rast_triangle *tri,
  579.                        const struct u_rect *bbox,
  580.                        int nr_planes,
  581.                        unsigned scissor_index )
  582. {
  583.    struct lp_scene *scene = setup->scene;
  584.    struct u_rect trimmed_box = *bbox;  
  585.    int i;
  586.  
  587.    /* What is the largest power-of-two boundary this triangle crosses:
  588.     */
  589.    int dx = floor_pot((bbox->x0 ^ bbox->x1) |
  590.                       (bbox->y0 ^ bbox->y1));
  591.  
  592.    /* The largest dimension of the rasterized area of the triangle
  593.     * (aligned to a 4x4 grid), rounded down to the nearest power of two:
  594.     */
  595.    int sz = floor_pot((bbox->x1 - (bbox->x0 & ~3)) |
  596.                       (bbox->y1 - (bbox->y0 & ~3)));
  597.  
  598.    /* Now apply scissor, etc to the bounding box.  Could do this
  599.     * earlier, but it confuses the logic for tri-16 and would force
  600.     * the rasterizer to also respect scissor, etc, just for the rare
  601.     * cases where a small triangle extends beyond the scissor.
  602.     */
  603.    u_rect_find_intersection(&setup->draw_regions[scissor_index],
  604.                             &trimmed_box);
  605.  
  606.    /* Determine which tile(s) intersect the triangle's bounding box
  607.     */
  608.    if (dx < TILE_SIZE)
  609.    {
  610.       int ix0 = bbox->x0 / TILE_SIZE;
  611.       int iy0 = bbox->y0 / TILE_SIZE;
  612.       unsigned px = bbox->x0 & 63 & ~3;
  613.       unsigned py = bbox->y0 & 63 & ~3;
  614.  
  615.       assert(iy0 == bbox->y1 / TILE_SIZE &&
  616.              ix0 == bbox->x1 / TILE_SIZE);
  617.  
  618.       if (nr_planes == 3) {
  619.          if (sz < 4)
  620.          {
  621.             /* Triangle is contained in a single 4x4 stamp:
  622.              */
  623.             assert(px + 4 <= TILE_SIZE);
  624.             assert(py + 4 <= TILE_SIZE);
  625.             return lp_scene_bin_cmd_with_state( scene, ix0, iy0,
  626.                                                 setup->fs.stored,
  627.                                                 LP_RAST_OP_TRIANGLE_3_4,
  628.                                                 lp_rast_arg_triangle_contained(tri, px, py) );
  629.          }
  630.  
  631.          if (sz < 16)
  632.          {
  633.             /* Triangle is contained in a single 16x16 block:
  634.              */
  635.  
  636.             /*
  637.              * The 16x16 block is only 4x4 aligned, and can exceed the tile
  638.              * dimensions if the triangle is 16 pixels in one dimension but 4
  639.              * in the other. So budge the 16x16 back inside the tile.
  640.              */
  641.             px = MIN2(px, TILE_SIZE - 16);
  642.             py = MIN2(py, TILE_SIZE - 16);
  643.  
  644.             assert(px + 16 <= TILE_SIZE);
  645.             assert(py + 16 <= TILE_SIZE);
  646.  
  647.             return lp_scene_bin_cmd_with_state( scene, ix0, iy0,
  648.                                                 setup->fs.stored,
  649.                                                 LP_RAST_OP_TRIANGLE_3_16,
  650.                                                 lp_rast_arg_triangle_contained(tri, px, py) );
  651.          }
  652.       }
  653.       else if (nr_planes == 4 && sz < 16)
  654.       {
  655.          px = MIN2(px, TILE_SIZE - 16);
  656.          py = MIN2(py, TILE_SIZE - 16);
  657.  
  658.          assert(px + 16 <= TILE_SIZE);
  659.          assert(py + 16 <= TILE_SIZE);
  660.  
  661.          return lp_scene_bin_cmd_with_state(scene, ix0, iy0,
  662.                                             setup->fs.stored,
  663.                                             LP_RAST_OP_TRIANGLE_4_16,
  664.                                             lp_rast_arg_triangle_contained(tri, px, py));
  665.       }
  666.  
  667.  
  668.       /* Triangle is contained in a single tile:
  669.        */
  670.       return lp_scene_bin_cmd_with_state( scene, ix0, iy0, setup->fs.stored,
  671.                                           lp_rast_tri_tab[nr_planes],
  672.                                           lp_rast_arg_triangle(tri, (1<<nr_planes)-1) );
  673.    }
  674.    else
  675.    {
  676.       struct lp_rast_plane *plane = GET_PLANES(tri);
  677.       int c[MAX_PLANES];
  678.       int ei[MAX_PLANES];
  679.  
  680.       int eo[MAX_PLANES];
  681.       int xstep[MAX_PLANES];
  682.       int ystep[MAX_PLANES];
  683.       int x, y;
  684.  
  685.       int ix0 = trimmed_box.x0 / TILE_SIZE;
  686.       int iy0 = trimmed_box.y0 / TILE_SIZE;
  687.       int ix1 = trimmed_box.x1 / TILE_SIZE;
  688.       int iy1 = trimmed_box.y1 / TILE_SIZE;
  689.      
  690.       for (i = 0; i < nr_planes; i++) {
  691.          c[i] = (plane[i].c +
  692.                  plane[i].dcdy * iy0 * TILE_SIZE -
  693.                  plane[i].dcdx * ix0 * TILE_SIZE);
  694.  
  695.          ei[i] = (plane[i].dcdy -
  696.                   plane[i].dcdx -
  697.                   plane[i].eo) << TILE_ORDER;
  698.  
  699.          eo[i] = plane[i].eo << TILE_ORDER;
  700.          xstep[i] = -(plane[i].dcdx << TILE_ORDER);
  701.          ystep[i] = plane[i].dcdy << TILE_ORDER;
  702.       }
  703.  
  704.  
  705.  
  706.       /* Test tile-sized blocks against the triangle.
  707.        * Discard blocks fully outside the tri.  If the block is fully
  708.        * contained inside the tri, bin an lp_rast_shade_tile command.
  709.        * Else, bin a lp_rast_triangle command.
  710.        */
  711.       for (y = iy0; y <= iy1; y++)
  712.       {
  713.          boolean in = FALSE;  /* are we inside the triangle? */
  714.          int cx[MAX_PLANES];
  715.  
  716.          for (i = 0; i < nr_planes; i++)
  717.             cx[i] = c[i];
  718.  
  719.          for (x = ix0; x <= ix1; x++)
  720.          {
  721.             int out = 0;
  722.             int partial = 0;
  723.  
  724.             for (i = 0; i < nr_planes; i++) {
  725.                int planeout = cx[i] + eo[i];
  726.                int planepartial = cx[i] + ei[i] - 1;
  727.                out |= (planeout >> 31);
  728.                partial |= (planepartial >> 31) & (1<<i);
  729.             }
  730.  
  731.             if (out) {
  732.                /* do nothing */
  733.                if (in)
  734.                   break;  /* exiting triangle, all done with this row */
  735.                LP_COUNT(nr_empty_64);
  736.             }
  737.             else if (partial) {
  738.                /* Not trivially accepted by at least one plane -
  739.                 * rasterize/shade partial tile
  740.                 */
  741.                int count = util_bitcount(partial);
  742.                in = TRUE;
  743.                
  744.                if (!lp_scene_bin_cmd_with_state( scene, x, y,
  745.                                                  setup->fs.stored,
  746.                                                  lp_rast_tri_tab[count],
  747.                                                  lp_rast_arg_triangle(tri, partial) ))
  748.                   goto fail;
  749.  
  750.                LP_COUNT(nr_partially_covered_64);
  751.             }
  752.             else {
  753.                /* triangle covers the whole tile- shade whole tile */
  754.                LP_COUNT(nr_fully_covered_64);
  755.                in = TRUE;
  756.                if (!lp_setup_whole_tile(setup, &tri->inputs, x, y))
  757.                   goto fail;
  758.             }
  759.  
  760.             /* Iterate cx values across the region:
  761.              */
  762.             for (i = 0; i < nr_planes; i++)
  763.                cx[i] += xstep[i];
  764.          }
  765.      
  766.          /* Iterate c values down the region:
  767.           */
  768.          for (i = 0; i < nr_planes; i++)
  769.             c[i] += ystep[i];
  770.       }
  771.    }
  772.  
  773.    return TRUE;
  774.  
  775. fail:
  776.    /* Need to disable any partially binned triangle.  This is easier
  777.     * than trying to locate all the triangle, shade-tile, etc,
  778.     * commands which may have been binned.
  779.     */
  780.    tri->inputs.disable = TRUE;
  781.    return FALSE;
  782. }
  783.  
  784.  
  785. /**
  786.  * Try to draw the triangle, restart the scene on failure.
  787.  */
  788. static void retry_triangle_ccw( struct lp_setup_context *setup,
  789.                                 struct fixed_position* position,
  790.                                 const float (*v0)[4],
  791.                                 const float (*v1)[4],
  792.                                 const float (*v2)[4],
  793.                                 boolean front)
  794. {
  795.    if (!do_triangle_ccw( setup, position, v0, v1, v2, front ))
  796.    {
  797.       if (!lp_setup_flush_and_restart(setup))
  798.          return;
  799.  
  800.       if (!do_triangle_ccw( setup, position, v0, v1, v2, front ))
  801.          return;
  802.    }
  803. }
  804.  
  805.  
  806. /**
  807.  * Calculate fixed position data for a triangle
  808.  */
  809. static INLINE void
  810. calc_fixed_position( struct lp_setup_context *setup,
  811.                      struct fixed_position* position,
  812.                      const float (*v0)[4],
  813.                      const float (*v1)[4],
  814.                      const float (*v2)[4])
  815. {
  816.    position->x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset);
  817.    position->x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset);
  818.    position->x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset);
  819.    position->x[3] = 0;
  820.  
  821.    position->y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset);
  822.    position->y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset);
  823.    position->y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset);
  824.    position->y[3] = 0;
  825.  
  826.    position->dx01 = position->x[0] - position->x[1];
  827.    position->dy01 = position->y[0] - position->y[1];
  828.  
  829.    position->dx20 = position->x[2] - position->x[0];
  830.    position->dy20 = position->y[2] - position->y[0];
  831.  
  832.    position->area = position->dx01 * position->dy20 - position->dx20 * position->dy01;
  833. }
  834.  
  835.  
  836. /**
  837.  * Rotate a triangle, flipping its clockwise direction,
  838.  * Swaps values for xy[0] and xy[1]
  839.  */
  840. static INLINE void
  841. rotate_fixed_position_01( struct fixed_position* position )
  842. {
  843.    int x, y;
  844.  
  845.    x = position->x[1];
  846.    y = position->y[1];
  847.    position->x[1] = position->x[0];
  848.    position->y[1] = position->y[0];
  849.    position->x[0] = x;
  850.    position->y[0] = y;
  851.  
  852.    position->dx01 = -position->dx01;
  853.    position->dy01 = -position->dy01;
  854.    position->dx20 = position->x[2] - position->x[0];
  855.    position->dy20 = position->y[2] - position->y[0];
  856.  
  857.    position->area = -position->area;
  858. }
  859.  
  860.  
  861. /**
  862.  * Rotate a triangle, flipping its clockwise direction,
  863.  * Swaps values for xy[1] and xy[2]
  864.  */
  865. static INLINE void
  866. rotate_fixed_position_12( struct fixed_position* position )
  867. {
  868.    int x, y;
  869.  
  870.    x = position->x[2];
  871.    y = position->y[2];
  872.    position->x[2] = position->x[1];
  873.    position->y[2] = position->y[1];
  874.    position->x[1] = x;
  875.    position->y[1] = y;
  876.  
  877.    x = position->dx01;
  878.    y = position->dy01;
  879.    position->dx01 = -position->dx20;
  880.    position->dy01 = -position->dy20;
  881.    position->dx20 = -x;
  882.    position->dy20 = -y;
  883.  
  884.    position->area = -position->area;
  885. }
  886.  
  887.  
  888. typedef void (*triangle_func_t)(struct lp_setup_context *setup,
  889.                                 const float (*v0)[4],
  890.                                 const float (*v1)[4],
  891.                                 const float (*v2)[4]);
  892.  
  893.  
  894. /**
  895.  * Subdivide this triangle by bisecting edge (v0, v1).
  896.  * \param pv  the provoking vertex (must = v0 or v1 or v2)
  897.  * TODO: should probably think about non-overflowing arithmetic elsewhere.
  898.  * This will definitely screw with pipeline counters for instance.
  899.  */
  900. static void
  901. subdiv_tri(struct lp_setup_context *setup,
  902.            const float (*v0)[4],
  903.            const float (*v1)[4],
  904.            const float (*v2)[4],
  905.            const float (*pv)[4],
  906.            triangle_func_t tri)
  907. {
  908.    unsigned n = setup->fs.current.variant->shader->info.base.num_inputs + 1;
  909.    const struct lp_shader_input *inputs =
  910.       setup->fs.current.variant->shader->inputs;
  911.    float vmid[PIPE_MAX_ATTRIBS][4];
  912.    const float (*vm)[4] = (const float (*)[4]) vmid;
  913.    unsigned i;
  914.    float w0, w1, wm;
  915.    boolean flatshade = setup->fs.current.variant->key.flatshade;
  916.  
  917.    /* find position midpoint (attrib[0] = position) */
  918.    vmid[0][0] = 0.5f * (v1[0][0] + v0[0][0]);
  919.    vmid[0][1] = 0.5f * (v1[0][1] + v0[0][1]);
  920.    vmid[0][2] = 0.5f * (v1[0][2] + v0[0][2]);
  921.    vmid[0][3] = 0.5f * (v1[0][3] + v0[0][3]);
  922.  
  923.    w0 = v0[0][3];
  924.    w1 = v1[0][3];
  925.    wm = vmid[0][3];
  926.  
  927.    /* interpolate other attributes */
  928.    for (i = 1; i < n; i++) {
  929.       if ((inputs[i - 1].interp == LP_INTERP_COLOR && flatshade) ||
  930.           inputs[i - 1].interp == LP_INTERP_CONSTANT) {
  931.          /* copy the provoking vertex's attribute */
  932.          vmid[i][0] = pv[i][0];
  933.          vmid[i][1] = pv[i][1];
  934.          vmid[i][2] = pv[i][2];
  935.          vmid[i][3] = pv[i][3];
  936.       }
  937.       else {
  938.          /* interpolate with perspective correction (for linear too) */
  939.          vmid[i][0] = 0.5f * (v1[i][0] * w1 + v0[i][0] * w0) / wm;
  940.          vmid[i][1] = 0.5f * (v1[i][1] * w1 + v0[i][1] * w0) / wm;
  941.          vmid[i][2] = 0.5f * (v1[i][2] * w1 + v0[i][2] * w0) / wm;
  942.          vmid[i][3] = 0.5f * (v1[i][3] * w1 + v0[i][3] * w0) / wm;
  943.       }
  944.    }
  945.  
  946.    /* handling flat shading and first vs. last provoking vertex is a
  947.     * little tricky...
  948.     */
  949.    if (pv == v0) {
  950.       if (setup->flatshade_first) {
  951.          /* first vertex must be v0 or vm */
  952.          tri(setup, v0, vm, v2);
  953.          tri(setup, vm, v1, v2);
  954.       }
  955.       else {
  956.          /* last vertex must be v0 or vm */
  957.          tri(setup, vm, v2, v0);
  958.          tri(setup, v1, v2, vm);
  959.       }
  960.    }
  961.    else if (pv == v1) {
  962.       if (setup->flatshade_first) {
  963.          tri(setup, vm, v2, v0);
  964.          tri(setup, v1, v2, vm);
  965.       }
  966.       else {
  967.          tri(setup, v2, v0, vm);
  968.          tri(setup, v2, vm, v1);
  969.       }
  970.    }
  971.    else {
  972.       if (setup->flatshade_first) {
  973.          tri(setup, v2, v0, vm);
  974.          tri(setup, v2, vm, v1);
  975.       }
  976.       else {
  977.          tri(setup, v0, vm, v2);
  978.          tri(setup, vm, v1, v2);
  979.       }
  980.    }
  981. }
  982.  
  983.  
  984. /**
  985.  * Check the lengths of the edges of the triangle.  If any edge is too
  986.  * long, subdivide the longest edge and draw two sub-triangles.
  987.  * Note: this may be called recursively.
  988.  * \return TRUE if triangle was subdivided, FALSE otherwise
  989.  */
  990. static boolean
  991. check_subdivide_triangle(struct lp_setup_context *setup,
  992.                          const float (*v0)[4],
  993.                          const float (*v1)[4],
  994.                          const float (*v2)[4],
  995.                          triangle_func_t tri)
  996. {
  997.    const float maxLen = 2048.0f;  /* longest permissible edge, in pixels */
  998.    float dx10, dy10, len10;
  999.    float dx21, dy21, len21;
  1000.    float dx02, dy02, len02;
  1001.    const float (*pv)[4] = setup->flatshade_first ? v0 : v2;
  1002.  
  1003.    /* compute lengths of triangle edges, squared */
  1004.    dx10 = v1[0][0] - v0[0][0];
  1005.    dy10 = v1[0][1] - v0[0][1];
  1006.    len10 = dx10 * dx10 + dy10 * dy10;
  1007.  
  1008.    dx21 = v2[0][0] - v1[0][0];
  1009.    dy21 = v2[0][1] - v1[0][1];
  1010.    len21 = dx21 * dx21 + dy21 * dy21;
  1011.  
  1012.    dx02 = v0[0][0] - v2[0][0];
  1013.    dy02 = v0[0][1] - v2[0][1];
  1014.    len02 = dx02 * dx02 + dy02 * dy02;
  1015.  
  1016.    /* Look for longest the edge that's longer than maxLen.  If we find
  1017.     * such an edge, split the triangle using the midpoint of that edge.
  1018.     * Note: it's important to split the longest edge, not just any edge
  1019.     * that's longer than maxLen.  Otherwise, we can get into a degenerate
  1020.     * situation and recurse indefinitely.
  1021.     */
  1022.    if (len10 > maxLen * maxLen &&
  1023.        len10 >= len21 &&
  1024.        len10 >= len02) {
  1025.       /* subdivide v0, v1 edge */
  1026.       subdiv_tri(setup, v0, v1, v2, pv, tri);
  1027.       return TRUE;
  1028.    }
  1029.  
  1030.    if (len21 > maxLen * maxLen &&
  1031.        len21 >= len10 &&
  1032.        len21 >= len02) {      
  1033.       /* subdivide v1, v2 edge */
  1034.       subdiv_tri(setup, v1, v2, v0, pv, tri);
  1035.       return TRUE;
  1036.    }
  1037.  
  1038.    if (len02 > maxLen * maxLen &&
  1039.        len02 >= len21 &&
  1040.        len02 >= len10) {      
  1041.       /* subdivide v2, v0 edge */
  1042.       subdiv_tri(setup, v2, v0, v1, pv, tri);
  1043.       return TRUE;
  1044.    }
  1045.  
  1046.    return FALSE;
  1047. }
  1048.  
  1049.  
  1050. /**
  1051.  * Draw triangle if it's CW, cull otherwise.
  1052.  */
  1053. static void triangle_cw( struct lp_setup_context *setup,
  1054.                          const float (*v0)[4],
  1055.                          const float (*v1)[4],
  1056.                          const float (*v2)[4] )
  1057. {
  1058.    struct fixed_position position;
  1059.  
  1060.    if (setup->subdivide_large_triangles &&
  1061.        check_subdivide_triangle(setup, v0, v1, v2, triangle_cw))
  1062.       return;
  1063.  
  1064.    calc_fixed_position(setup, &position, v0, v1, v2);
  1065.  
  1066.    if (position.area < 0) {
  1067.       if (setup->flatshade_first) {
  1068.          rotate_fixed_position_12(&position);
  1069.          retry_triangle_ccw(setup, &position, v0, v2, v1, !setup->ccw_is_frontface);
  1070.       } else {
  1071.          rotate_fixed_position_01(&position);
  1072.          retry_triangle_ccw(setup, &position, v1, v0, v2, !setup->ccw_is_frontface);
  1073.       }
  1074.    }
  1075. }
  1076.  
  1077.  
  1078. static void triangle_ccw( struct lp_setup_context *setup,
  1079.                           const float (*v0)[4],
  1080.                           const float (*v1)[4],
  1081.                           const float (*v2)[4])
  1082. {
  1083.    struct fixed_position position;
  1084.  
  1085.    if (setup->subdivide_large_triangles &&
  1086.        check_subdivide_triangle(setup, v0, v1, v2, triangle_ccw))
  1087.       return;
  1088.  
  1089.    calc_fixed_position(setup, &position, v0, v1, v2);
  1090.  
  1091.    if (position.area > 0)
  1092.       retry_triangle_ccw(setup, &position, v0, v1, v2, setup->ccw_is_frontface);
  1093. }
  1094.  
  1095. /**
  1096.  * Draw triangle whether it's CW or CCW.
  1097.  */
  1098. static void triangle_both( struct lp_setup_context *setup,
  1099.                            const float (*v0)[4],
  1100.                            const float (*v1)[4],
  1101.                            const float (*v2)[4] )
  1102. {
  1103.    struct fixed_position position;
  1104.  
  1105.    if (setup->subdivide_large_triangles &&
  1106.        check_subdivide_triangle(setup, v0, v1, v2, triangle_both))
  1107.       return;
  1108.  
  1109.    calc_fixed_position(setup, &position, v0, v1, v2);
  1110.  
  1111.    if (0) {
  1112.       assert(!util_is_inf_or_nan(v0[0][0]));
  1113.       assert(!util_is_inf_or_nan(v0[0][1]));
  1114.       assert(!util_is_inf_or_nan(v1[0][0]));
  1115.       assert(!util_is_inf_or_nan(v1[0][1]));
  1116.       assert(!util_is_inf_or_nan(v2[0][0]));
  1117.       assert(!util_is_inf_or_nan(v2[0][1]));
  1118.    }
  1119.  
  1120.    if (position.area > 0)
  1121.       retry_triangle_ccw( setup, &position, v0, v1, v2, setup->ccw_is_frontface );
  1122.    else if (position.area < 0) {
  1123.       if (setup->flatshade_first) {
  1124.          rotate_fixed_position_12( &position );
  1125.          retry_triangle_ccw( setup, &position, v0, v2, v1, !setup->ccw_is_frontface );
  1126.       } else {
  1127.          rotate_fixed_position_01( &position );
  1128.          retry_triangle_ccw( setup, &position, v1, v0, v2, !setup->ccw_is_frontface );
  1129.       }
  1130.    }
  1131. }
  1132.  
  1133.  
  1134. static void triangle_nop( struct lp_setup_context *setup,
  1135.                           const float (*v0)[4],
  1136.                           const float (*v1)[4],
  1137.                           const float (*v2)[4] )
  1138. {
  1139. }
  1140.  
  1141.  
  1142. void
  1143. lp_setup_choose_triangle( struct lp_setup_context *setup )
  1144. {
  1145.    switch (setup->cullmode) {
  1146.    case PIPE_FACE_NONE:
  1147.       setup->triangle = triangle_both;
  1148.       break;
  1149.    case PIPE_FACE_BACK:
  1150.       setup->triangle = setup->ccw_is_frontface ? triangle_ccw : triangle_cw;
  1151.       break;
  1152.    case PIPE_FACE_FRONT:
  1153.       setup->triangle = setup->ccw_is_frontface ? triangle_cw : triangle_ccw;
  1154.       break;
  1155.    default:
  1156.       setup->triangle = triangle_nop;
  1157.       break;
  1158.    }
  1159. }
  1160.