Subversion Repositories Kolibri OS

Rev

Rev 3770 | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. /**
  29.  * \brief  Primitive rasterization/rendering (points, lines, triangles)
  30.  *
  31.  * \author  Keith Whitwell <keith@tungstengraphics.com>
  32.  * \author  Brian Paul
  33.  */
  34.  
  35. #include "sp_context.h"
  36. #include "sp_quad.h"
  37. #include "sp_quad_pipe.h"
  38. #include "sp_setup.h"
  39. #include "sp_state.h"
  40. #include "draw/draw_context.h"
  41. #include "draw/draw_vertex.h"
  42. #include "pipe/p_shader_tokens.h"
  43. #include "util/u_math.h"
  44. #include "util/u_memory.h"
  45.  
  46.  
  47. #define DEBUG_VERTS 0
  48. #define DEBUG_FRAGS 0
  49.  
  50.  
  51. /**
  52.  * Triangle edge info
  53.  */
  54. struct edge {
  55.    float dx;            /**< X(v1) - X(v0), used only during setup */
  56.    float dy;            /**< Y(v1) - Y(v0), used only during setup */
  57.    float dxdy;          /**< dx/dy */
  58.    float sx, sy;        /**< first sample point coord */
  59.    int lines;           /**< number of lines on this edge */
  60. };
  61.  
  62.  
  63. /**
  64.  * Max number of quads (2x2 pixel blocks) to process per batch.
  65.  * This can't be arbitrarily increased since we depend on some 32-bit
  66.  * bitmasks (two bits per quad).
  67.  */
  68. #define MAX_QUADS 16
  69.  
  70.  
  71. /**
  72.  * Triangle setup info.
  73.  * Also used for line drawing (taking some liberties).
  74.  */
  75. struct setup_context {
  76.    struct softpipe_context *softpipe;
  77.  
  78.    /* Vertices are just an array of floats making up each attribute in
  79.     * turn.  Currently fixed at 4 floats, but should change in time.
  80.     * Codegen will help cope with this.
  81.     */
  82.    const float (*vmax)[4];
  83.    const float (*vmid)[4];
  84.    const float (*vmin)[4];
  85.    const float (*vprovoke)[4];
  86.  
  87.    struct edge ebot;
  88.    struct edge etop;
  89.    struct edge emaj;
  90.  
  91.    float oneoverarea;
  92.    int facing;
  93.  
  94.    float pixel_offset;
  95.  
  96.    struct quad_header quad[MAX_QUADS];
  97.    struct quad_header *quad_ptrs[MAX_QUADS];
  98.    unsigned count;
  99.  
  100.    struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
  101.    struct tgsi_interp_coef posCoef;  /* For Z, W */
  102.  
  103.    struct {
  104.       int left[2];   /**< [0] = row0, [1] = row1 */
  105.       int right[2];
  106.       int y;
  107.    } span;
  108.  
  109. #if DEBUG_FRAGS
  110.    uint numFragsEmitted;  /**< per primitive */
  111.    uint numFragsWritten;  /**< per primitive */
  112. #endif
  113.  
  114.    unsigned cull_face;          /* which faces cull */
  115.    unsigned nr_vertex_attrs;
  116. };
  117.  
  118.  
  119.  
  120.  
  121.  
  122.  
  123.  
  124. /**
  125.  * Clip setup->quad against the scissor/surface bounds.
  126.  */
  127. static INLINE void
  128. quad_clip(struct setup_context *setup, struct quad_header *quad)
  129. {
  130.    const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
  131.    const int minx = (int) cliprect->minx;
  132.    const int maxx = (int) cliprect->maxx;
  133.    const int miny = (int) cliprect->miny;
  134.    const int maxy = (int) cliprect->maxy;
  135.  
  136.    if (quad->input.x0 >= maxx ||
  137.        quad->input.y0 >= maxy ||
  138.        quad->input.x0 + 1 < minx ||
  139.        quad->input.y0 + 1 < miny) {
  140.       /* totally clipped */
  141.       quad->inout.mask = 0x0;
  142.       return;
  143.    }
  144.    if (quad->input.x0 < minx)
  145.       quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
  146.    if (quad->input.y0 < miny)
  147.       quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
  148.    if (quad->input.x0 == maxx - 1)
  149.       quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
  150.    if (quad->input.y0 == maxy - 1)
  151.       quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
  152. }
  153.  
  154.  
  155. /**
  156.  * Emit a quad (pass to next stage) with clipping.
  157.  */
  158. static INLINE void
  159. clip_emit_quad(struct setup_context *setup, struct quad_header *quad)
  160. {
  161.    quad_clip( setup, quad );
  162.  
  163.    if (quad->inout.mask) {
  164.       struct softpipe_context *sp = setup->softpipe;
  165.  
  166. #if DEBUG_FRAGS
  167.       setup->numFragsEmitted += util_bitcount(quad->inout.mask);
  168. #endif
  169.  
  170.       sp->quad.first->run( sp->quad.first, &quad, 1 );
  171.    }
  172. }
  173.  
  174.  
  175.  
  176. /**
  177.  * Given an X or Y coordinate, return the block/quad coordinate that it
  178.  * belongs to.
  179.  */
  180. static INLINE int
  181. block(int x)
  182. {
  183.    return x & ~(2-1);
  184. }
  185.  
  186.  
  187. static INLINE int
  188. block_x(int x)
  189. {
  190.    return x & ~(16-1);
  191. }
  192.  
  193.  
  194. /**
  195.  * Render a horizontal span of quads
  196.  */
  197. static void
  198. flush_spans(struct setup_context *setup)
  199. {
  200.    const int step = MAX_QUADS;
  201.    const int xleft0 = setup->span.left[0];
  202.    const int xleft1 = setup->span.left[1];
  203.    const int xright0 = setup->span.right[0];
  204.    const int xright1 = setup->span.right[1];
  205.    struct quad_stage *pipe = setup->softpipe->quad.first;
  206.  
  207.    const int minleft = block_x(MIN2(xleft0, xleft1));
  208.    const int maxright = MAX2(xright0, xright1);
  209.    int x;
  210.  
  211.    /* process quads in horizontal chunks of 16 */
  212.    for (x = minleft; x < maxright; x += step) {
  213.       unsigned skip_left0 = CLAMP(xleft0 - x, 0, step);
  214.       unsigned skip_left1 = CLAMP(xleft1 - x, 0, step);
  215.       unsigned skip_right0 = CLAMP(x + step - xright0, 0, step);
  216.       unsigned skip_right1 = CLAMP(x + step - xright1, 0, step);
  217.       unsigned lx = x;
  218.       unsigned q = 0;
  219.  
  220.       unsigned skipmask_left0 = (1U << skip_left0) - 1U;
  221.       unsigned skipmask_left1 = (1U << skip_left1) - 1U;
  222.  
  223.       /* These calculations fail when step == 32 and skip_right == 0.
  224.        */
  225.       unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0);
  226.       unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1);
  227.  
  228.       unsigned mask0 = ~skipmask_left0 & ~skipmask_right0;
  229.       unsigned mask1 = ~skipmask_left1 & ~skipmask_right1;
  230.  
  231.       if (mask0 | mask1) {
  232.          do {
  233.             unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2);
  234.             if (quadmask) {
  235.                setup->quad[q].input.x0 = lx;
  236.                setup->quad[q].input.y0 = setup->span.y;
  237.                setup->quad[q].input.facing = setup->facing;
  238.                setup->quad[q].inout.mask = quadmask;
  239.                setup->quad_ptrs[q] = &setup->quad[q];
  240.                q++;
  241. #if DEBUG_FRAGS
  242.                setup->numFragsEmitted += util_bitcount(quadmask);
  243. #endif
  244.             }
  245.             mask0 >>= 2;
  246.             mask1 >>= 2;
  247.             lx += 2;
  248.          } while (mask0 | mask1);
  249.  
  250.          pipe->run( pipe, setup->quad_ptrs, q );
  251.       }
  252.    }
  253.  
  254.  
  255.    setup->span.y = 0;
  256.    setup->span.right[0] = 0;
  257.    setup->span.right[1] = 0;
  258.    setup->span.left[0] = 1000000;     /* greater than right[0] */
  259.    setup->span.left[1] = 1000000;     /* greater than right[1] */
  260. }
  261.  
  262.  
  263. #if DEBUG_VERTS
  264. static void
  265. print_vertex(const struct setup_context *setup,
  266.              const float (*v)[4])
  267. {
  268.    int i;
  269.    debug_printf("   Vertex: (%p)\n", (void *) v);
  270.    for (i = 0; i < setup->nr_vertex_attrs; i++) {
  271.       debug_printf("     %d: %f %f %f %f\n",  i,
  272.               v[i][0], v[i][1], v[i][2], v[i][3]);
  273.       if (util_is_inf_or_nan(v[i][0])) {
  274.          debug_printf("   NaN!\n");
  275.       }
  276.    }
  277. }
  278. #endif
  279.  
  280.  
  281. /**
  282.  * Sort the vertices from top to bottom order, setting up the triangle
  283.  * edge fields (ebot, emaj, etop).
  284.  * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise
  285.  */
  286. static boolean
  287. setup_sort_vertices(struct setup_context *setup,
  288.                     float det,
  289.                     const float (*v0)[4],
  290.                     const float (*v1)[4],
  291.                     const float (*v2)[4])
  292. {
  293.    if (setup->softpipe->rasterizer->flatshade_first)
  294.       setup->vprovoke = v0;
  295.    else
  296.       setup->vprovoke = v2;
  297.  
  298.    /* determine bottom to top order of vertices */
  299.    {
  300.       float y0 = v0[0][1];
  301.       float y1 = v1[0][1];
  302.       float y2 = v2[0][1];
  303.       if (y0 <= y1) {
  304.          if (y1 <= y2) {
  305.             /* y0<=y1<=y2 */
  306.             setup->vmin = v0;
  307.             setup->vmid = v1;
  308.             setup->vmax = v2;
  309.          }
  310.          else if (y2 <= y0) {
  311.             /* y2<=y0<=y1 */
  312.             setup->vmin = v2;
  313.             setup->vmid = v0;
  314.             setup->vmax = v1;
  315.          }
  316.          else {
  317.             /* y0<=y2<=y1 */
  318.             setup->vmin = v0;
  319.             setup->vmid = v2;
  320.             setup->vmax = v1;
  321.          }
  322.       }
  323.       else {
  324.          if (y0 <= y2) {
  325.             /* y1<=y0<=y2 */
  326.             setup->vmin = v1;
  327.             setup->vmid = v0;
  328.             setup->vmax = v2;
  329.          }
  330.          else if (y2 <= y1) {
  331.             /* y2<=y1<=y0 */
  332.             setup->vmin = v2;
  333.             setup->vmid = v1;
  334.             setup->vmax = v0;
  335.          }
  336.          else {
  337.             /* y1<=y2<=y0 */
  338.             setup->vmin = v1;
  339.             setup->vmid = v2;
  340.             setup->vmax = v0;
  341.          }
  342.       }
  343.    }
  344.  
  345.    setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0];
  346.    setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1];
  347.    setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
  348.    setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
  349.    setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0];
  350.    setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1];
  351.  
  352.    /*
  353.     * Compute triangle's area.  Use 1/area to compute partial
  354.     * derivatives of attributes later.
  355.     *
  356.     * The area will be the same as prim->det, but the sign may be
  357.     * different depending on how the vertices get sorted above.
  358.     *
  359.     * To determine whether the primitive is front or back facing we
  360.     * use the prim->det value because its sign is correct.
  361.     */
  362.    {
  363.       const float area = (setup->emaj.dx * setup->ebot.dy -
  364.                             setup->ebot.dx * setup->emaj.dy);
  365.  
  366.       setup->oneoverarea = 1.0f / area;
  367.  
  368.       /*
  369.       debug_printf("%s one-over-area %f  area %f  det %f\n",
  370.                    __FUNCTION__, setup->oneoverarea, area, det );
  371.       */
  372.       if (util_is_inf_or_nan(setup->oneoverarea))
  373.          return FALSE;
  374.    }
  375.  
  376.    /* We need to know if this is a front or back-facing triangle for:
  377.     *  - the GLSL gl_FrontFacing fragment attribute (bool)
  378.     *  - two-sided stencil test
  379.     * 0 = front-facing, 1 = back-facing
  380.     */
  381.    setup->facing =
  382.       ((det < 0.0) ^
  383.        (setup->softpipe->rasterizer->front_ccw));
  384.  
  385.    {
  386.       unsigned face = setup->facing == 0 ? PIPE_FACE_FRONT : PIPE_FACE_BACK;
  387.  
  388.       if (face & setup->cull_face)
  389.          return FALSE;
  390.    }
  391.  
  392.  
  393.    /* Prepare pixel offset for rasterisation:
  394.     *  - pixel center (0.5, 0.5) for GL, or
  395.     *  - assume (0.0, 0.0) for other APIs.
  396.     */
  397.    if (setup->softpipe->rasterizer->half_pixel_center) {
  398.       setup->pixel_offset = 0.5f;
  399.    } else {
  400.       setup->pixel_offset = 0.0f;
  401.    }
  402.  
  403.    return TRUE;
  404. }
  405.  
  406.  
  407. /* Apply cylindrical wrapping to v0, v1, v2 coordinates, if enabled.
  408.  * Input coordinates must be in [0, 1] range, otherwise results are undefined.
  409.  * Some combinations of coordinates produce invalid results,
  410.  * but this behaviour is acceptable.
  411.  */
  412. static void
  413. tri_apply_cylindrical_wrap(float v0,
  414.                            float v1,
  415.                            float v2,
  416.                            uint cylindrical_wrap,
  417.                            float output[3])
  418. {
  419.    if (cylindrical_wrap) {
  420.       float delta;
  421.  
  422.       delta = v1 - v0;
  423.       if (delta > 0.5f) {
  424.          v0 += 1.0f;
  425.       }
  426.       else if (delta < -0.5f) {
  427.          v1 += 1.0f;
  428.       }
  429.  
  430.       delta = v2 - v1;
  431.       if (delta > 0.5f) {
  432.          v1 += 1.0f;
  433.       }
  434.       else if (delta < -0.5f) {
  435.          v2 += 1.0f;
  436.       }
  437.  
  438.       delta = v0 - v2;
  439.       if (delta > 0.5f) {
  440.          v2 += 1.0f;
  441.       }
  442.       else if (delta < -0.5f) {
  443.          v0 += 1.0f;
  444.       }
  445.    }
  446.  
  447.    output[0] = v0;
  448.    output[1] = v1;
  449.    output[2] = v2;
  450. }
  451.  
  452.  
  453. /**
  454.  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
  455.  * The value value comes from vertex[slot][i].
  456.  * The result will be put into setup->coef[slot].a0[i].
  457.  * \param slot  which attribute slot
  458.  * \param i  which component of the slot (0..3)
  459.  */
  460. static void
  461. const_coeff(struct setup_context *setup,
  462.             struct tgsi_interp_coef *coef,
  463.             uint vertSlot, uint i)
  464. {
  465.    assert(i <= 3);
  466.  
  467.    coef->dadx[i] = 0;
  468.    coef->dady[i] = 0;
  469.  
  470.    /* need provoking vertex info!
  471.     */
  472.    coef->a0[i] = setup->vprovoke[vertSlot][i];
  473. }
  474.  
  475.  
  476. /**
  477.  * Compute a0, dadx and dady for a linearly interpolated coefficient,
  478.  * for a triangle.
  479.  * v[0], v[1] and v[2] are vmin, vmid and vmax, respectively.
  480.  */
  481. static void
  482. tri_linear_coeff(struct setup_context *setup,
  483.                  struct tgsi_interp_coef *coef,
  484.                  uint i,
  485.                  const float v[3])
  486. {
  487.    float botda = v[1] - v[0];
  488.    float majda = v[2] - v[0];
  489.    float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
  490.    float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
  491.    float dadx = a * setup->oneoverarea;
  492.    float dady = b * setup->oneoverarea;
  493.  
  494.    assert(i <= 3);
  495.  
  496.    coef->dadx[i] = dadx;
  497.    coef->dady[i] = dady;
  498.  
  499.    /* calculate a0 as the value which would be sampled for the
  500.     * fragment at (0,0), taking into account that we want to sample at
  501.     * pixel centers, in other words (pixel_offset, pixel_offset).
  502.     *
  503.     * this is neat but unfortunately not a good way to do things for
  504.     * triangles with very large values of dadx or dady as it will
  505.     * result in the subtraction and re-addition from a0 of a very
  506.     * large number, which means we'll end up loosing a lot of the
  507.     * fractional bits and precision from a0.  the way to fix this is
  508.     * to define a0 as the sample at a pixel center somewhere near vmin
  509.     * instead - i'll switch to this later.
  510.     */
  511.    coef->a0[i] = (v[0] -
  512.                   (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
  513.                    dady * (setup->vmin[0][1] - setup->pixel_offset)));
  514.  
  515.    /*
  516.    debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
  517.                 slot, "xyzw"[i],
  518.                 setup->coef[slot].a0[i],
  519.                 setup->coef[slot].dadx[i],
  520.                 setup->coef[slot].dady[i]);
  521.    */
  522. }
  523.  
  524.  
  525. /**
  526.  * Compute a0, dadx and dady for a perspective-corrected interpolant,
  527.  * for a triangle.
  528.  * We basically multiply the vertex value by 1/w before computing
  529.  * the plane coefficients (a0, dadx, dady).
  530.  * Later, when we compute the value at a particular fragment position we'll
  531.  * divide the interpolated value by the interpolated W at that fragment.
  532.  * v[0], v[1] and v[2] are vmin, vmid and vmax, respectively.
  533.  */
  534. static void
  535. tri_persp_coeff(struct setup_context *setup,
  536.                 struct tgsi_interp_coef *coef,
  537.                 uint i,
  538.                 const float v[3])
  539. {
  540.    /* premultiply by 1/w  (v[0][3] is always W):
  541.     */
  542.    float mina = v[0] * setup->vmin[0][3];
  543.    float mida = v[1] * setup->vmid[0][3];
  544.    float maxa = v[2] * setup->vmax[0][3];
  545.    float botda = mida - mina;
  546.    float majda = maxa - mina;
  547.    float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
  548.    float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
  549.    float dadx = a * setup->oneoverarea;
  550.    float dady = b * setup->oneoverarea;
  551.  
  552.    /*
  553.    debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i,
  554.                 setup->vmin[vertSlot][i],
  555.                 setup->vmid[vertSlot][i],
  556.                 setup->vmax[vertSlot][i]
  557.           );
  558.    */
  559.    assert(i <= 3);
  560.  
  561.    coef->dadx[i] = dadx;
  562.    coef->dady[i] = dady;
  563.    coef->a0[i] = (mina -
  564.                   (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
  565.                    dady * (setup->vmin[0][1] - setup->pixel_offset)));
  566. }
  567.  
  568.  
  569. /**
  570.  * Special coefficient setup for gl_FragCoord.
  571.  * X and Y are trivial, though Y may have to be inverted for OpenGL.
  572.  * Z and W are copied from posCoef which should have already been computed.
  573.  * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
  574.  */
  575. static void
  576. setup_fragcoord_coeff(struct setup_context *setup, uint slot)
  577. {
  578.    const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
  579.  
  580.    /*X*/
  581.    setup->coef[slot].a0[0] = fsInfo->pixel_center_integer ? 0.0f : 0.5f;
  582.    setup->coef[slot].dadx[0] = 1.0f;
  583.    setup->coef[slot].dady[0] = 0.0f;
  584.    /*Y*/
  585.    setup->coef[slot].a0[1] =
  586.                    (fsInfo->origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0)
  587.                    + (fsInfo->pixel_center_integer ? 0.0f : 0.5f);
  588.    setup->coef[slot].dadx[1] = 0.0f;
  589.    setup->coef[slot].dady[1] = fsInfo->origin_lower_left ? -1.0f : 1.0f;
  590.    /*Z*/
  591.    setup->coef[slot].a0[2] = setup->posCoef.a0[2];
  592.    setup->coef[slot].dadx[2] = setup->posCoef.dadx[2];
  593.    setup->coef[slot].dady[2] = setup->posCoef.dady[2];
  594.    /*W*/
  595.    setup->coef[slot].a0[3] = setup->posCoef.a0[3];
  596.    setup->coef[slot].dadx[3] = setup->posCoef.dadx[3];
  597.    setup->coef[slot].dady[3] = setup->posCoef.dady[3];
  598. }
  599.  
  600.  
  601.  
  602. /**
  603.  * Compute the setup->coef[] array dadx, dady, a0 values.
  604.  * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
  605.  */
  606. static void
  607. setup_tri_coefficients(struct setup_context *setup)
  608. {
  609.    struct softpipe_context *softpipe = setup->softpipe;
  610.    const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
  611.    const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
  612.    uint fragSlot;
  613.    float v[3];
  614.  
  615.    /* z and w are done by linear interpolation:
  616.     */
  617.    v[0] = setup->vmin[0][2];
  618.    v[1] = setup->vmid[0][2];
  619.    v[2] = setup->vmax[0][2];
  620.    tri_linear_coeff(setup, &setup->posCoef, 2, v);
  621.  
  622.    v[0] = setup->vmin[0][3];
  623.    v[1] = setup->vmid[0][3];
  624.    v[2] = setup->vmax[0][3];
  625.    tri_linear_coeff(setup, &setup->posCoef, 3, v);
  626.  
  627.    /* setup interpolation for all the remaining attributes:
  628.     */
  629.    for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) {
  630.       const uint vertSlot = vinfo->attrib[fragSlot].src_index;
  631.       uint j;
  632.  
  633.       switch (vinfo->attrib[fragSlot].interp_mode) {
  634.       case INTERP_CONSTANT:
  635.          for (j = 0; j < TGSI_NUM_CHANNELS; j++)
  636.             const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
  637.          break;
  638.       case INTERP_LINEAR:
  639.          for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
  640.             tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
  641.                                        setup->vmid[vertSlot][j],
  642.                                        setup->vmax[vertSlot][j],
  643.                                        fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
  644.                                        v);
  645.             tri_linear_coeff(setup, &setup->coef[fragSlot], j, v);
  646.          }
  647.          break;
  648.       case INTERP_PERSPECTIVE:
  649.          for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
  650.             tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
  651.                                        setup->vmid[vertSlot][j],
  652.                                        setup->vmax[vertSlot][j],
  653.                                        fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
  654.                                        v);
  655.             tri_persp_coeff(setup, &setup->coef[fragSlot], j, v);
  656.          }
  657.          break;
  658.       case INTERP_POS:
  659.          setup_fragcoord_coeff(setup, fragSlot);
  660.          break;
  661.       default:
  662.          assert(0);
  663.       }
  664.  
  665.       if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
  666.          /* convert 0 to 1.0 and 1 to -1.0 */
  667.          setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f;
  668.          setup->coef[fragSlot].dadx[0] = 0.0;
  669.          setup->coef[fragSlot].dady[0] = 0.0;
  670.       }
  671.    }
  672. }
  673.  
  674.  
  675. static void
  676. setup_tri_edges(struct setup_context *setup)
  677. {
  678.    float vmin_x = setup->vmin[0][0] + setup->pixel_offset;
  679.    float vmid_x = setup->vmid[0][0] + setup->pixel_offset;
  680.  
  681.    float vmin_y = setup->vmin[0][1] - setup->pixel_offset;
  682.    float vmid_y = setup->vmid[0][1] - setup->pixel_offset;
  683.    float vmax_y = setup->vmax[0][1] - setup->pixel_offset;
  684.  
  685.    setup->emaj.sy = ceilf(vmin_y);
  686.    setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy);
  687.    setup->emaj.dxdy = setup->emaj.dy ? setup->emaj.dx / setup->emaj.dy : .0f;
  688.    setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy;
  689.  
  690.    setup->etop.sy = ceilf(vmid_y);
  691.    setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy);
  692.    setup->etop.dxdy = setup->etop.dy ? setup->etop.dx / setup->etop.dy : .0f;
  693.    setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy;
  694.  
  695.    setup->ebot.sy = ceilf(vmin_y);
  696.    setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy);
  697.    setup->ebot.dxdy = setup->ebot.dy ? setup->ebot.dx / setup->ebot.dy : .0f;
  698.    setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy;
  699. }
  700.  
  701.  
  702. /**
  703.  * Render the upper or lower half of a triangle.
  704.  * Scissoring/cliprect is applied here too.
  705.  */
  706. static void
  707. subtriangle(struct setup_context *setup,
  708.             struct edge *eleft,
  709.             struct edge *eright,
  710.             int lines)
  711. {
  712.    const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
  713.    const int minx = (int) cliprect->minx;
  714.    const int maxx = (int) cliprect->maxx;
  715.    const int miny = (int) cliprect->miny;
  716.    const int maxy = (int) cliprect->maxy;
  717.    int y, start_y, finish_y;
  718.    int sy = (int)eleft->sy;
  719.  
  720.    assert((int)eleft->sy == (int) eright->sy);
  721.    assert(lines >= 0);
  722.  
  723.    /* clip top/bottom */
  724.    start_y = sy;
  725.    if (start_y < miny)
  726.       start_y = miny;
  727.  
  728.    finish_y = sy + lines;
  729.    if (finish_y > maxy)
  730.       finish_y = maxy;
  731.  
  732.    start_y -= sy;
  733.    finish_y -= sy;
  734.  
  735.    /*
  736.    debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
  737.    */
  738.  
  739.    for (y = start_y; y < finish_y; y++) {
  740.  
  741.       /* avoid accumulating adds as floats don't have the precision to
  742.        * accurately iterate large triangle edges that way.  luckily we
  743.        * can just multiply these days.
  744.        *
  745.        * this is all drowned out by the attribute interpolation anyway.
  746.        */
  747.       int left = (int)(eleft->sx + y * eleft->dxdy);
  748.       int right = (int)(eright->sx + y * eright->dxdy);
  749.  
  750.       /* clip left/right */
  751.       if (left < minx)
  752.          left = minx;
  753.       if (right > maxx)
  754.          right = maxx;
  755.  
  756.       if (left < right) {
  757.          int _y = sy + y;
  758.          if (block(_y) != setup->span.y) {
  759.             flush_spans(setup);
  760.             setup->span.y = block(_y);
  761.          }
  762.  
  763.          setup->span.left[_y&1] = left;
  764.          setup->span.right[_y&1] = right;
  765.       }
  766.    }
  767.  
  768.  
  769.    /* save the values so that emaj can be restarted:
  770.     */
  771.    eleft->sx += lines * eleft->dxdy;
  772.    eright->sx += lines * eright->dxdy;
  773.    eleft->sy += lines;
  774.    eright->sy += lines;
  775. }
  776.  
  777.  
  778. /**
  779.  * Recalculate prim's determinant.  This is needed as we don't have
  780.  * get this information through the vbuf_render interface & we must
  781.  * calculate it here.
  782.  */
  783. static float
  784. calc_det(const float (*v0)[4],
  785.          const float (*v1)[4],
  786.          const float (*v2)[4])
  787. {
  788.    /* edge vectors e = v0 - v2, f = v1 - v2 */
  789.    const float ex = v0[0][0] - v2[0][0];
  790.    const float ey = v0[0][1] - v2[0][1];
  791.    const float fx = v1[0][0] - v2[0][0];
  792.    const float fy = v1[0][1] - v2[0][1];
  793.  
  794.    /* det = cross(e,f).z */
  795.    return ex * fy - ey * fx;
  796. }
  797.  
  798.  
  799. /**
  800.  * Do setup for triangle rasterization, then render the triangle.
  801.  */
  802. void
  803. sp_setup_tri(struct setup_context *setup,
  804.              const float (*v0)[4],
  805.              const float (*v1)[4],
  806.              const float (*v2)[4])
  807. {
  808.    float det;
  809.  
  810. #if DEBUG_VERTS
  811.    debug_printf("Setup triangle:\n");
  812.    print_vertex(setup, v0);
  813.    print_vertex(setup, v1);
  814.    print_vertex(setup, v2);
  815. #endif
  816.  
  817.    if (setup->softpipe->no_rast || setup->softpipe->rasterizer->rasterizer_discard)
  818.       return;
  819.    
  820.    det = calc_det(v0, v1, v2);
  821.    /*
  822.    debug_printf("%s\n", __FUNCTION__ );
  823.    */
  824.  
  825. #if DEBUG_FRAGS
  826.    setup->numFragsEmitted = 0;
  827.    setup->numFragsWritten = 0;
  828. #endif
  829.  
  830.    if (!setup_sort_vertices( setup, det, v0, v1, v2 ))
  831.       return;
  832.  
  833.    setup_tri_coefficients( setup );
  834.    setup_tri_edges( setup );
  835.  
  836.    assert(setup->softpipe->reduced_prim == PIPE_PRIM_TRIANGLES);
  837.  
  838.    setup->span.y = 0;
  839.    setup->span.right[0] = 0;
  840.    setup->span.right[1] = 0;
  841.    /*   setup->span.z_mode = tri_z_mode( setup->ctx ); */
  842.  
  843.    /*   init_constant_attribs( setup ); */
  844.  
  845.    if (setup->oneoverarea < 0.0) {
  846.       /* emaj on left:
  847.        */
  848.       subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines );
  849.       subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines );
  850.    }
  851.    else {
  852.       /* emaj on right:
  853.        */
  854.       subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines );
  855.       subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines );
  856.    }
  857.  
  858.    flush_spans( setup );
  859.  
  860.    if (setup->softpipe->active_statistics_queries) {
  861.       setup->softpipe->pipeline_statistics.c_primitives++;
  862.    }
  863.  
  864. #if DEBUG_FRAGS
  865.    printf("Tri: %u frags emitted, %u written\n",
  866.           setup->numFragsEmitted,
  867.           setup->numFragsWritten);
  868. #endif
  869. }
  870.  
  871.  
  872. /* Apply cylindrical wrapping to v0, v1 coordinates, if enabled.
  873.  * Input coordinates must be in [0, 1] range, otherwise results are undefined.
  874.  */
  875. static void
  876. line_apply_cylindrical_wrap(float v0,
  877.                             float v1,
  878.                             uint cylindrical_wrap,
  879.                             float output[2])
  880. {
  881.    if (cylindrical_wrap) {
  882.       float delta;
  883.  
  884.       delta = v1 - v0;
  885.       if (delta > 0.5f) {
  886.          v0 += 1.0f;
  887.       }
  888.       else if (delta < -0.5f) {
  889.          v1 += 1.0f;
  890.       }
  891.    }
  892.  
  893.    output[0] = v0;
  894.    output[1] = v1;
  895. }
  896.  
  897.  
  898. /**
  899.  * Compute a0, dadx and dady for a linearly interpolated coefficient,
  900.  * for a line.
  901.  * v[0] and v[1] are vmin and vmax, respectively.
  902.  */
  903. static void
  904. line_linear_coeff(const struct setup_context *setup,
  905.                   struct tgsi_interp_coef *coef,
  906.                   uint i,
  907.                   const float v[2])
  908. {
  909.    const float da = v[1] - v[0];
  910.    const float dadx = da * setup->emaj.dx * setup->oneoverarea;
  911.    const float dady = da * setup->emaj.dy * setup->oneoverarea;
  912.    coef->dadx[i] = dadx;
  913.    coef->dady[i] = dady;
  914.    coef->a0[i] = (v[0] -
  915.                   (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
  916.                    dady * (setup->vmin[0][1] - setup->pixel_offset)));
  917. }
  918.  
  919.  
  920. /**
  921.  * Compute a0, dadx and dady for a perspective-corrected interpolant,
  922.  * for a line.
  923.  * v[0] and v[1] are vmin and vmax, respectively.
  924.  */
  925. static void
  926. line_persp_coeff(const struct setup_context *setup,
  927.                  struct tgsi_interp_coef *coef,
  928.                  uint i,
  929.                  const float v[2])
  930. {
  931.    const float a0 = v[0] * setup->vmin[0][3];
  932.    const float a1 = v[1] * setup->vmax[0][3];
  933.    const float da = a1 - a0;
  934.    const float dadx = da * setup->emaj.dx * setup->oneoverarea;
  935.    const float dady = da * setup->emaj.dy * setup->oneoverarea;
  936.    coef->dadx[i] = dadx;
  937.    coef->dady[i] = dady;
  938.    coef->a0[i] = (a0 -
  939.                   (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
  940.                    dady * (setup->vmin[0][1] - setup->pixel_offset)));
  941. }
  942.  
  943.  
  944. /**
  945.  * Compute the setup->coef[] array dadx, dady, a0 values.
  946.  * Must be called after setup->vmin,vmax are initialized.
  947.  */
  948. static boolean
  949. setup_line_coefficients(struct setup_context *setup,
  950.                         const float (*v0)[4],
  951.                         const float (*v1)[4])
  952. {
  953.    struct softpipe_context *softpipe = setup->softpipe;
  954.    const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
  955.    const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
  956.    uint fragSlot;
  957.    float area;
  958.    float v[2];
  959.  
  960.    /* use setup->vmin, vmax to point to vertices */
  961.    if (softpipe->rasterizer->flatshade_first)
  962.       setup->vprovoke = v0;
  963.    else
  964.       setup->vprovoke = v1;
  965.    setup->vmin = v0;
  966.    setup->vmax = v1;
  967.  
  968.    setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
  969.    setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
  970.  
  971.    /* NOTE: this is not really area but something proportional to it */
  972.    area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy;
  973.    if (area == 0.0f || util_is_inf_or_nan(area))
  974.       return FALSE;
  975.    setup->oneoverarea = 1.0f / area;
  976.  
  977.    /* z and w are done by linear interpolation:
  978.     */
  979.    v[0] = setup->vmin[0][2];
  980.    v[1] = setup->vmax[0][2];
  981.    line_linear_coeff(setup, &setup->posCoef, 2, v);
  982.  
  983.    v[0] = setup->vmin[0][3];
  984.    v[1] = setup->vmax[0][3];
  985.    line_linear_coeff(setup, &setup->posCoef, 3, v);
  986.  
  987.    /* setup interpolation for all the remaining attributes:
  988.     */
  989.    for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) {
  990.       const uint vertSlot = vinfo->attrib[fragSlot].src_index;
  991.       uint j;
  992.  
  993.       switch (vinfo->attrib[fragSlot].interp_mode) {
  994.       case INTERP_CONSTANT:
  995.          for (j = 0; j < TGSI_NUM_CHANNELS; j++)
  996.             const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
  997.          break;
  998.       case INTERP_LINEAR:
  999.          for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
  1000.             line_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
  1001.                                         setup->vmax[vertSlot][j],
  1002.                                         fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
  1003.                                         v);
  1004.             line_linear_coeff(setup, &setup->coef[fragSlot], j, v);
  1005.          }
  1006.          break;
  1007.       case INTERP_PERSPECTIVE:
  1008.          for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
  1009.             line_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
  1010.                                         setup->vmax[vertSlot][j],
  1011.                                         fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
  1012.                                         v);
  1013.             line_persp_coeff(setup, &setup->coef[fragSlot], j, v);
  1014.          }
  1015.          break;
  1016.       case INTERP_POS:
  1017.          setup_fragcoord_coeff(setup, fragSlot);
  1018.          break;
  1019.       default:
  1020.          assert(0);
  1021.       }
  1022.  
  1023.       if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
  1024.          /* convert 0 to 1.0 and 1 to -1.0 */
  1025.          setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f;
  1026.          setup->coef[fragSlot].dadx[0] = 0.0;
  1027.          setup->coef[fragSlot].dady[0] = 0.0;
  1028.       }
  1029.    }
  1030.    return TRUE;
  1031. }
  1032.  
  1033.  
  1034. /**
  1035.  * Plot a pixel in a line segment.
  1036.  */
  1037. static INLINE void
  1038. plot(struct setup_context *setup, int x, int y)
  1039. {
  1040.    const int iy = y & 1;
  1041.    const int ix = x & 1;
  1042.    const int quadX = x - ix;
  1043.    const int quadY = y - iy;
  1044.    const int mask = (1 << ix) << (2 * iy);
  1045.  
  1046.    if (quadX != setup->quad[0].input.x0 ||
  1047.        quadY != setup->quad[0].input.y0)
  1048.    {
  1049.       /* flush prev quad, start new quad */
  1050.  
  1051.       if (setup->quad[0].input.x0 != -1)
  1052.          clip_emit_quad( setup, &setup->quad[0] );
  1053.  
  1054.       setup->quad[0].input.x0 = quadX;
  1055.       setup->quad[0].input.y0 = quadY;
  1056.       setup->quad[0].inout.mask = 0x0;
  1057.    }
  1058.  
  1059.    setup->quad[0].inout.mask |= mask;
  1060. }
  1061.  
  1062.  
  1063. /**
  1064.  * Do setup for line rasterization, then render the line.
  1065.  * Single-pixel width, no stipple, etc.  We rely on the 'draw' module
  1066.  * to handle stippling and wide lines.
  1067.  */
  1068. void
  1069. sp_setup_line(struct setup_context *setup,
  1070.               const float (*v0)[4],
  1071.               const float (*v1)[4])
  1072. {
  1073.    int x0 = (int) v0[0][0];
  1074.    int x1 = (int) v1[0][0];
  1075.    int y0 = (int) v0[0][1];
  1076.    int y1 = (int) v1[0][1];
  1077.    int dx = x1 - x0;
  1078.    int dy = y1 - y0;
  1079.    int xstep, ystep;
  1080.  
  1081. #if DEBUG_VERTS
  1082.    debug_printf("Setup line:\n");
  1083.    print_vertex(setup, v0);
  1084.    print_vertex(setup, v1);
  1085. #endif
  1086.  
  1087.    if (setup->softpipe->no_rast || setup->softpipe->rasterizer->rasterizer_discard)
  1088.       return;
  1089.  
  1090.    if (dx == 0 && dy == 0)
  1091.       return;
  1092.  
  1093.    if (!setup_line_coefficients(setup, v0, v1))
  1094.       return;
  1095.  
  1096.    assert(v0[0][0] < 1.0e9);
  1097.    assert(v0[0][1] < 1.0e9);
  1098.    assert(v1[0][0] < 1.0e9);
  1099.    assert(v1[0][1] < 1.0e9);
  1100.  
  1101.    if (dx < 0) {
  1102.       dx = -dx;   /* make positive */
  1103.       xstep = -1;
  1104.    }
  1105.    else {
  1106.       xstep = 1;
  1107.    }
  1108.  
  1109.    if (dy < 0) {
  1110.       dy = -dy;   /* make positive */
  1111.       ystep = -1;
  1112.    }
  1113.    else {
  1114.       ystep = 1;
  1115.    }
  1116.  
  1117.    assert(dx >= 0);
  1118.    assert(dy >= 0);
  1119.    assert(setup->softpipe->reduced_prim == PIPE_PRIM_LINES);
  1120.  
  1121.    setup->quad[0].input.x0 = setup->quad[0].input.y0 = -1;
  1122.    setup->quad[0].inout.mask = 0x0;
  1123.  
  1124.    /* XXX temporary: set coverage to 1.0 so the line appears
  1125.     * if AA mode happens to be enabled.
  1126.     */
  1127.    setup->quad[0].input.coverage[0] =
  1128.    setup->quad[0].input.coverage[1] =
  1129.    setup->quad[0].input.coverage[2] =
  1130.    setup->quad[0].input.coverage[3] = 1.0;
  1131.  
  1132.    if (dx > dy) {
  1133.       /*** X-major line ***/
  1134.       int i;
  1135.       const int errorInc = dy + dy;
  1136.       int error = errorInc - dx;
  1137.       const int errorDec = error - dx;
  1138.  
  1139.       for (i = 0; i < dx; i++) {
  1140.          plot(setup, x0, y0);
  1141.  
  1142.          x0 += xstep;
  1143.          if (error < 0) {
  1144.             error += errorInc;
  1145.          }
  1146.          else {
  1147.             error += errorDec;
  1148.             y0 += ystep;
  1149.          }
  1150.       }
  1151.    }
  1152.    else {
  1153.       /*** Y-major line ***/
  1154.       int i;
  1155.       const int errorInc = dx + dx;
  1156.       int error = errorInc - dy;
  1157.       const int errorDec = error - dy;
  1158.  
  1159.       for (i = 0; i < dy; i++) {
  1160.          plot(setup, x0, y0);
  1161.  
  1162.          y0 += ystep;
  1163.          if (error < 0) {
  1164.             error += errorInc;
  1165.          }
  1166.          else {
  1167.             error += errorDec;
  1168.             x0 += xstep;
  1169.          }
  1170.       }
  1171.    }
  1172.  
  1173.    /* draw final quad */
  1174.    if (setup->quad[0].inout.mask) {
  1175.       clip_emit_quad( setup, &setup->quad[0] );
  1176.    }
  1177. }
  1178.  
  1179.  
  1180. static void
  1181. point_persp_coeff(const struct setup_context *setup,
  1182.                   const float (*vert)[4],
  1183.                   struct tgsi_interp_coef *coef,
  1184.                   uint vertSlot, uint i)
  1185. {
  1186.    assert(i <= 3);
  1187.    coef->dadx[i] = 0.0F;
  1188.    coef->dady[i] = 0.0F;
  1189.    coef->a0[i] = vert[vertSlot][i] * vert[0][3];
  1190. }
  1191.  
  1192.  
  1193. /**
  1194.  * Do setup for point rasterization, then render the point.
  1195.  * Round or square points...
  1196.  * XXX could optimize a lot for 1-pixel points.
  1197.  */
  1198. void
  1199. sp_setup_point(struct setup_context *setup,
  1200.                const float (*v0)[4])
  1201. {
  1202.    struct softpipe_context *softpipe = setup->softpipe;
  1203.    const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
  1204.    const int sizeAttr = setup->softpipe->psize_slot;
  1205.    const float size
  1206.       = sizeAttr > 0 ? v0[sizeAttr][0]
  1207.       : setup->softpipe->rasterizer->point_size;
  1208.    const float halfSize = 0.5F * size;
  1209.    const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth;
  1210.    const float x = v0[0][0];  /* Note: data[0] is always position */
  1211.    const float y = v0[0][1];
  1212.    const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
  1213.    uint fragSlot;
  1214.  
  1215. #if DEBUG_VERTS
  1216.    debug_printf("Setup point:\n");
  1217.    print_vertex(setup, v0);
  1218. #endif
  1219.  
  1220.    if (setup->softpipe->no_rast || setup->softpipe->rasterizer->rasterizer_discard)
  1221.       return;
  1222.  
  1223.    assert(setup->softpipe->reduced_prim == PIPE_PRIM_POINTS);
  1224.  
  1225.    /* For points, all interpolants are constant-valued.
  1226.     * However, for point sprites, we'll need to setup texcoords appropriately.
  1227.     * XXX: which coefficients are the texcoords???
  1228.     * We may do point sprites as textured quads...
  1229.     *
  1230.     * KW: We don't know which coefficients are texcoords - ultimately
  1231.     * the choice of what interpolation mode to use for each attribute
  1232.     * should be determined by the fragment program, using
  1233.     * per-attribute declaration statements that include interpolation
  1234.     * mode as a parameter.  So either the fragment program will have
  1235.     * to be adjusted for pointsprite vs normal point behaviour, or
  1236.     * otherwise a special interpolation mode will have to be defined
  1237.     * which matches the required behaviour for point sprites.  But -
  1238.     * the latter is not a feature of normal hardware, and as such
  1239.     * probably should be ruled out on that basis.
  1240.     */
  1241.    setup->vprovoke = v0;
  1242.  
  1243.    /* setup Z, W */
  1244.    const_coeff(setup, &setup->posCoef, 0, 2);
  1245.    const_coeff(setup, &setup->posCoef, 0, 3);
  1246.  
  1247.    for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) {
  1248.       const uint vertSlot = vinfo->attrib[fragSlot].src_index;
  1249.       uint j;
  1250.  
  1251.       switch (vinfo->attrib[fragSlot].interp_mode) {
  1252.       case INTERP_CONSTANT:
  1253.          /* fall-through */
  1254.       case INTERP_LINEAR:
  1255.          for (j = 0; j < TGSI_NUM_CHANNELS; j++)
  1256.             const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
  1257.          break;
  1258.       case INTERP_PERSPECTIVE:
  1259.          for (j = 0; j < TGSI_NUM_CHANNELS; j++)
  1260.             point_persp_coeff(setup, setup->vprovoke,
  1261.                               &setup->coef[fragSlot], vertSlot, j);
  1262.          break;
  1263.       case INTERP_POS:
  1264.          setup_fragcoord_coeff(setup, fragSlot);
  1265.          break;
  1266.       default:
  1267.          assert(0);
  1268.       }
  1269.  
  1270.       if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
  1271.          /* convert 0 to 1.0 and 1 to -1.0 */
  1272.          setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f;
  1273.          setup->coef[fragSlot].dadx[0] = 0.0;
  1274.          setup->coef[fragSlot].dady[0] = 0.0;
  1275.       }
  1276.    }
  1277.  
  1278.  
  1279.    if (halfSize <= 0.5 && !round) {
  1280.       /* special case for 1-pixel points */
  1281.       const int ix = ((int) x) & 1;
  1282.       const int iy = ((int) y) & 1;
  1283.       setup->quad[0].input.x0 = (int) x - ix;
  1284.       setup->quad[0].input.y0 = (int) y - iy;
  1285.       setup->quad[0].inout.mask = (1 << ix) << (2 * iy);
  1286.       clip_emit_quad( setup, &setup->quad[0] );
  1287.    }
  1288.    else {
  1289.       if (round) {
  1290.          /* rounded points */
  1291.          const int ixmin = block((int) (x - halfSize));
  1292.          const int ixmax = block((int) (x + halfSize));
  1293.          const int iymin = block((int) (y - halfSize));
  1294.          const int iymax = block((int) (y + halfSize));
  1295.          const float rmin = halfSize - 0.7071F;  /* 0.7071 = sqrt(2)/2 */
  1296.          const float rmax = halfSize + 0.7071F;
  1297.          const float rmin2 = MAX2(0.0F, rmin * rmin);
  1298.          const float rmax2 = rmax * rmax;
  1299.          const float cscale = 1.0F / (rmax2 - rmin2);
  1300.          int ix, iy;
  1301.  
  1302.          for (iy = iymin; iy <= iymax; iy += 2) {
  1303.             for (ix = ixmin; ix <= ixmax; ix += 2) {
  1304.                float dx, dy, dist2, cover;
  1305.  
  1306.                setup->quad[0].inout.mask = 0x0;
  1307.  
  1308.                dx = (ix + 0.5f) - x;
  1309.                dy = (iy + 0.5f) - y;
  1310.                dist2 = dx * dx + dy * dy;
  1311.                if (dist2 <= rmax2) {
  1312.                   cover = 1.0F - (dist2 - rmin2) * cscale;
  1313.                   setup->quad[0].input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f);
  1314.                   setup->quad[0].inout.mask |= MASK_TOP_LEFT;
  1315.                }
  1316.  
  1317.                dx = (ix + 1.5f) - x;
  1318.                dy = (iy + 0.5f) - y;
  1319.                dist2 = dx * dx + dy * dy;
  1320.                if (dist2 <= rmax2) {
  1321.                   cover = 1.0F - (dist2 - rmin2) * cscale;
  1322.                   setup->quad[0].input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f);
  1323.                   setup->quad[0].inout.mask |= MASK_TOP_RIGHT;
  1324.                }
  1325.  
  1326.                dx = (ix + 0.5f) - x;
  1327.                dy = (iy + 1.5f) - y;
  1328.                dist2 = dx * dx + dy * dy;
  1329.                if (dist2 <= rmax2) {
  1330.                   cover = 1.0F - (dist2 - rmin2) * cscale;
  1331.                   setup->quad[0].input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f);
  1332.                   setup->quad[0].inout.mask |= MASK_BOTTOM_LEFT;
  1333.                }
  1334.  
  1335.                dx = (ix + 1.5f) - x;
  1336.                dy = (iy + 1.5f) - y;
  1337.                dist2 = dx * dx + dy * dy;
  1338.                if (dist2 <= rmax2) {
  1339.                   cover = 1.0F - (dist2 - rmin2) * cscale;
  1340.                   setup->quad[0].input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f);
  1341.                   setup->quad[0].inout.mask |= MASK_BOTTOM_RIGHT;
  1342.                }
  1343.  
  1344.                if (setup->quad[0].inout.mask) {
  1345.                   setup->quad[0].input.x0 = ix;
  1346.                   setup->quad[0].input.y0 = iy;
  1347.                   clip_emit_quad( setup, &setup->quad[0] );
  1348.                }
  1349.             }
  1350.          }
  1351.       }
  1352.       else {
  1353.          /* square points */
  1354.          const int xmin = (int) (x + 0.75 - halfSize);
  1355.          const int ymin = (int) (y + 0.25 - halfSize);
  1356.          const int xmax = xmin + (int) size;
  1357.          const int ymax = ymin + (int) size;
  1358.          /* XXX could apply scissor to xmin,ymin,xmax,ymax now */
  1359.          const int ixmin = block(xmin);
  1360.          const int ixmax = block(xmax - 1);
  1361.          const int iymin = block(ymin);
  1362.          const int iymax = block(ymax - 1);
  1363.          int ix, iy;
  1364.  
  1365.          /*
  1366.          debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax);
  1367.          */
  1368.          for (iy = iymin; iy <= iymax; iy += 2) {
  1369.             uint rowMask = 0xf;
  1370.             if (iy < ymin) {
  1371.                /* above the top edge */
  1372.                rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
  1373.             }
  1374.             if (iy + 1 >= ymax) {
  1375.                /* below the bottom edge */
  1376.                rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
  1377.             }
  1378.  
  1379.             for (ix = ixmin; ix <= ixmax; ix += 2) {
  1380.                uint mask = rowMask;
  1381.  
  1382.                if (ix < xmin) {
  1383.                   /* fragment is past left edge of point, turn off left bits */
  1384.                   mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
  1385.                }
  1386.                if (ix + 1 >= xmax) {
  1387.                   /* past the right edge */
  1388.                   mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
  1389.                }
  1390.  
  1391.                setup->quad[0].inout.mask = mask;
  1392.                setup->quad[0].input.x0 = ix;
  1393.                setup->quad[0].input.y0 = iy;
  1394.                clip_emit_quad( setup, &setup->quad[0] );
  1395.             }
  1396.          }
  1397.       }
  1398.    }
  1399. }
  1400.  
  1401.  
  1402. /**
  1403.  * Called by vbuf code just before we start buffering primitives.
  1404.  */
  1405. void
  1406. sp_setup_prepare(struct setup_context *setup)
  1407. {
  1408.    struct softpipe_context *sp = setup->softpipe;
  1409.  
  1410.    if (sp->dirty) {
  1411.       softpipe_update_derived(sp, sp->reduced_api_prim);
  1412.    }
  1413.  
  1414.    /* Note: nr_attrs is only used for debugging (vertex printing) */
  1415.    setup->nr_vertex_attrs = draw_num_shader_outputs(sp->draw);
  1416.  
  1417.    sp->quad.first->begin( sp->quad.first );
  1418.  
  1419.    if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES &&
  1420.        sp->rasterizer->fill_front == PIPE_POLYGON_MODE_FILL &&
  1421.        sp->rasterizer->fill_back == PIPE_POLYGON_MODE_FILL) {
  1422.       /* we'll do culling */
  1423.       setup->cull_face = sp->rasterizer->cull_face;
  1424.    }
  1425.    else {
  1426.       /* 'draw' will do culling */
  1427.       setup->cull_face = PIPE_FACE_NONE;
  1428.    }
  1429. }
  1430.  
  1431.  
  1432. void
  1433. sp_setup_destroy_context(struct setup_context *setup)
  1434. {
  1435.    FREE( setup );
  1436. }
  1437.  
  1438.  
  1439. /**
  1440.  * Create a new primitive setup/render stage.
  1441.  */
  1442. struct setup_context *
  1443. sp_setup_create_context(struct softpipe_context *softpipe)
  1444. {
  1445.    struct setup_context *setup = CALLOC_STRUCT(setup_context);
  1446.    unsigned i;
  1447.  
  1448.    setup->softpipe = softpipe;
  1449.  
  1450.    for (i = 0; i < MAX_QUADS; i++) {
  1451.       setup->quad[i].coef = setup->coef;
  1452.       setup->quad[i].posCoef = &setup->posCoef;
  1453.    }
  1454.  
  1455.    setup->span.left[0] = 1000000;     /* greater than right[0] */
  1456.    setup->span.left[1] = 1000000;     /* greater than right[1] */
  1457.  
  1458.    return setup;
  1459. }
  1460.