Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2007 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. /**
  29.  * \brief  Primitive rasterization/rendering (points, lines, triangles)
  30.  *
  31.  * \author  Keith Whitwell <keithw@vmware.com>
  32.  * \author  Brian Paul
  33.  */
  34.  
  35. #include "sp_context.h"
  36. #include "sp_quad.h"
  37. #include "sp_quad_pipe.h"
  38. #include "sp_setup.h"
  39. #include "sp_state.h"
  40. #include "draw/draw_context.h"
  41. #include "draw/draw_vertex.h"
  42. #include "pipe/p_shader_tokens.h"
  43. #include "util/u_math.h"
  44. #include "util/u_memory.h"
  45.  
  46.  
  47. #define DEBUG_VERTS 0
  48. #define DEBUG_FRAGS 0
  49.  
  50.  
  51. /**
  52.  * Triangle edge info
  53.  */
  54. struct edge {
  55.    float dx;            /**< X(v1) - X(v0), used only during setup */
  56.    float dy;            /**< Y(v1) - Y(v0), used only during setup */
  57.    float dxdy;          /**< dx/dy */
  58.    float sx, sy;        /**< first sample point coord */
  59.    int lines;           /**< number of lines on this edge */
  60. };
  61.  
  62.  
  63. /**
  64.  * Max number of quads (2x2 pixel blocks) to process per batch.
  65.  * This can't be arbitrarily increased since we depend on some 32-bit
  66.  * bitmasks (two bits per quad).
  67.  */
  68. #define MAX_QUADS 16
  69.  
  70.  
  71. /**
  72.  * Triangle setup info.
  73.  * Also used for line drawing (taking some liberties).
  74.  */
  75. struct setup_context {
  76.    struct softpipe_context *softpipe;
  77.  
  78.    /* Vertices are just an array of floats making up each attribute in
  79.     * turn.  Currently fixed at 4 floats, but should change in time.
  80.     * Codegen will help cope with this.
  81.     */
  82.    const float (*vmax)[4];
  83.    const float (*vmid)[4];
  84.    const float (*vmin)[4];
  85.    const float (*vprovoke)[4];
  86.  
  87.    struct edge ebot;
  88.    struct edge etop;
  89.    struct edge emaj;
  90.  
  91.    float oneoverarea;
  92.    int facing;
  93.  
  94.    float pixel_offset;
  95.    unsigned max_layer;
  96.  
  97.    struct quad_header quad[MAX_QUADS];
  98.    struct quad_header *quad_ptrs[MAX_QUADS];
  99.    unsigned count;
  100.  
  101.    struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
  102.    struct tgsi_interp_coef posCoef;  /* For Z, W */
  103.  
  104.    struct {
  105.       int left[2];   /**< [0] = row0, [1] = row1 */
  106.       int right[2];
  107.       int y;
  108.    } span;
  109.  
  110. #if DEBUG_FRAGS
  111.    uint numFragsEmitted;  /**< per primitive */
  112.    uint numFragsWritten;  /**< per primitive */
  113. #endif
  114.  
  115.    unsigned cull_face;          /* which faces cull */
  116.    unsigned nr_vertex_attrs;
  117. };
  118.  
  119.  
  120.  
  121.  
  122.  
  123.  
  124.  
  125. /**
  126.  * Clip setup->quad against the scissor/surface bounds.
  127.  */
  128. static INLINE void
  129. quad_clip(struct setup_context *setup, struct quad_header *quad)
  130. {
  131.    const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
  132.    const int minx = (int) cliprect->minx;
  133.    const int maxx = (int) cliprect->maxx;
  134.    const int miny = (int) cliprect->miny;
  135.    const int maxy = (int) cliprect->maxy;
  136.  
  137.    if (quad->input.x0 >= maxx ||
  138.        quad->input.y0 >= maxy ||
  139.        quad->input.x0 + 1 < minx ||
  140.        quad->input.y0 + 1 < miny) {
  141.       /* totally clipped */
  142.       quad->inout.mask = 0x0;
  143.       return;
  144.    }
  145.    if (quad->input.x0 < minx)
  146.       quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
  147.    if (quad->input.y0 < miny)
  148.       quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
  149.    if (quad->input.x0 == maxx - 1)
  150.       quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
  151.    if (quad->input.y0 == maxy - 1)
  152.       quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
  153. }
  154.  
  155.  
  156. /**
  157.  * Emit a quad (pass to next stage) with clipping.
  158.  */
  159. static INLINE void
  160. clip_emit_quad(struct setup_context *setup, struct quad_header *quad)
  161. {
  162.    quad_clip( setup, quad );
  163.  
  164.    if (quad->inout.mask) {
  165.       struct softpipe_context *sp = setup->softpipe;
  166.  
  167. #if DEBUG_FRAGS
  168.       setup->numFragsEmitted += util_bitcount(quad->inout.mask);
  169. #endif
  170.  
  171.       sp->quad.first->run( sp->quad.first, &quad, 1 );
  172.    }
  173. }
  174.  
  175.  
  176.  
  177. /**
  178.  * Given an X or Y coordinate, return the block/quad coordinate that it
  179.  * belongs to.
  180.  */
  181. static INLINE int
  182. block(int x)
  183. {
  184.    return x & ~(2-1);
  185. }
  186.  
  187.  
  188. static INLINE int
  189. block_x(int x)
  190. {
  191.    return x & ~(16-1);
  192. }
  193.  
  194.  
  195. /**
  196.  * Render a horizontal span of quads
  197.  */
  198. static void
  199. flush_spans(struct setup_context *setup)
  200. {
  201.    const int step = MAX_QUADS;
  202.    const int xleft0 = setup->span.left[0];
  203.    const int xleft1 = setup->span.left[1];
  204.    const int xright0 = setup->span.right[0];
  205.    const int xright1 = setup->span.right[1];
  206.    struct quad_stage *pipe = setup->softpipe->quad.first;
  207.  
  208.    const int minleft = block_x(MIN2(xleft0, xleft1));
  209.    const int maxright = MAX2(xright0, xright1);
  210.    int x;
  211.  
  212.    /* process quads in horizontal chunks of 16 */
  213.    for (x = minleft; x < maxright; x += step) {
  214.       unsigned skip_left0 = CLAMP(xleft0 - x, 0, step);
  215.       unsigned skip_left1 = CLAMP(xleft1 - x, 0, step);
  216.       unsigned skip_right0 = CLAMP(x + step - xright0, 0, step);
  217.       unsigned skip_right1 = CLAMP(x + step - xright1, 0, step);
  218.       unsigned lx = x;
  219.       unsigned q = 0;
  220.  
  221.       unsigned skipmask_left0 = (1U << skip_left0) - 1U;
  222.       unsigned skipmask_left1 = (1U << skip_left1) - 1U;
  223.  
  224.       /* These calculations fail when step == 32 and skip_right == 0.
  225.        */
  226.       unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0);
  227.       unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1);
  228.  
  229.       unsigned mask0 = ~skipmask_left0 & ~skipmask_right0;
  230.       unsigned mask1 = ~skipmask_left1 & ~skipmask_right1;
  231.  
  232.       if (mask0 | mask1) {
  233.          do {
  234.             unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2);
  235.             if (quadmask) {
  236.                setup->quad[q].input.x0 = lx;
  237.                setup->quad[q].input.y0 = setup->span.y;
  238.                setup->quad[q].input.facing = setup->facing;
  239.                setup->quad[q].inout.mask = quadmask;
  240.                setup->quad_ptrs[q] = &setup->quad[q];
  241.                q++;
  242. #if DEBUG_FRAGS
  243.                setup->numFragsEmitted += util_bitcount(quadmask);
  244. #endif
  245.             }
  246.             mask0 >>= 2;
  247.             mask1 >>= 2;
  248.             lx += 2;
  249.          } while (mask0 | mask1);
  250.  
  251.          pipe->run( pipe, setup->quad_ptrs, q );
  252.       }
  253.    }
  254.  
  255.  
  256.    setup->span.y = 0;
  257.    setup->span.right[0] = 0;
  258.    setup->span.right[1] = 0;
  259.    setup->span.left[0] = 1000000;     /* greater than right[0] */
  260.    setup->span.left[1] = 1000000;     /* greater than right[1] */
  261. }
  262.  
  263.  
  264. #if DEBUG_VERTS
  265. static void
  266. print_vertex(const struct setup_context *setup,
  267.              const float (*v)[4])
  268. {
  269.    int i;
  270.    debug_printf("   Vertex: (%p)\n", (void *) v);
  271.    for (i = 0; i < setup->nr_vertex_attrs; i++) {
  272.       debug_printf("     %d: %f %f %f %f\n",  i,
  273.               v[i][0], v[i][1], v[i][2], v[i][3]);
  274.       if (util_is_inf_or_nan(v[i][0])) {
  275.          debug_printf("   NaN!\n");
  276.       }
  277.    }
  278. }
  279. #endif
  280.  
  281.  
  282. /**
  283.  * Sort the vertices from top to bottom order, setting up the triangle
  284.  * edge fields (ebot, emaj, etop).
  285.  * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise
  286.  */
  287. static boolean
  288. setup_sort_vertices(struct setup_context *setup,
  289.                     float det,
  290.                     const float (*v0)[4],
  291.                     const float (*v1)[4],
  292.                     const float (*v2)[4])
  293. {
  294.    if (setup->softpipe->rasterizer->flatshade_first)
  295.       setup->vprovoke = v0;
  296.    else
  297.       setup->vprovoke = v2;
  298.  
  299.    /* determine bottom to top order of vertices */
  300.    {
  301.       float y0 = v0[0][1];
  302.       float y1 = v1[0][1];
  303.       float y2 = v2[0][1];
  304.       if (y0 <= y1) {
  305.          if (y1 <= y2) {
  306.             /* y0<=y1<=y2 */
  307.             setup->vmin = v0;
  308.             setup->vmid = v1;
  309.             setup->vmax = v2;
  310.          }
  311.          else if (y2 <= y0) {
  312.             /* y2<=y0<=y1 */
  313.             setup->vmin = v2;
  314.             setup->vmid = v0;
  315.             setup->vmax = v1;
  316.          }
  317.          else {
  318.             /* y0<=y2<=y1 */
  319.             setup->vmin = v0;
  320.             setup->vmid = v2;
  321.             setup->vmax = v1;
  322.          }
  323.       }
  324.       else {
  325.          if (y0 <= y2) {
  326.             /* y1<=y0<=y2 */
  327.             setup->vmin = v1;
  328.             setup->vmid = v0;
  329.             setup->vmax = v2;
  330.          }
  331.          else if (y2 <= y1) {
  332.             /* y2<=y1<=y0 */
  333.             setup->vmin = v2;
  334.             setup->vmid = v1;
  335.             setup->vmax = v0;
  336.          }
  337.          else {
  338.             /* y1<=y2<=y0 */
  339.             setup->vmin = v1;
  340.             setup->vmid = v2;
  341.             setup->vmax = v0;
  342.          }
  343.       }
  344.    }
  345.  
  346.    setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0];
  347.    setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1];
  348.    setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
  349.    setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
  350.    setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0];
  351.    setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1];
  352.  
  353.    /*
  354.     * Compute triangle's area.  Use 1/area to compute partial
  355.     * derivatives of attributes later.
  356.     *
  357.     * The area will be the same as prim->det, but the sign may be
  358.     * different depending on how the vertices get sorted above.
  359.     *
  360.     * To determine whether the primitive is front or back facing we
  361.     * use the prim->det value because its sign is correct.
  362.     */
  363.    {
  364.       const float area = (setup->emaj.dx * setup->ebot.dy -
  365.                             setup->ebot.dx * setup->emaj.dy);
  366.  
  367.       setup->oneoverarea = 1.0f / area;
  368.  
  369.       /*
  370.       debug_printf("%s one-over-area %f  area %f  det %f\n",
  371.                    __FUNCTION__, setup->oneoverarea, area, det );
  372.       */
  373.       if (util_is_inf_or_nan(setup->oneoverarea))
  374.          return FALSE;
  375.    }
  376.  
  377.    /* We need to know if this is a front or back-facing triangle for:
  378.     *  - the GLSL gl_FrontFacing fragment attribute (bool)
  379.     *  - two-sided stencil test
  380.     * 0 = front-facing, 1 = back-facing
  381.     */
  382.    setup->facing =
  383.       ((det < 0.0) ^
  384.        (setup->softpipe->rasterizer->front_ccw));
  385.  
  386.    {
  387.       unsigned face = setup->facing == 0 ? PIPE_FACE_FRONT : PIPE_FACE_BACK;
  388.  
  389.       if (face & setup->cull_face)
  390.          return FALSE;
  391.    }
  392.  
  393.  
  394.    /* Prepare pixel offset for rasterisation:
  395.     *  - pixel center (0.5, 0.5) for GL, or
  396.     *  - assume (0.0, 0.0) for other APIs.
  397.     */
  398.    if (setup->softpipe->rasterizer->half_pixel_center) {
  399.       setup->pixel_offset = 0.5f;
  400.    } else {
  401.       setup->pixel_offset = 0.0f;
  402.    }
  403.  
  404.    return TRUE;
  405. }
  406.  
  407.  
  408. /* Apply cylindrical wrapping to v0, v1, v2 coordinates, if enabled.
  409.  * Input coordinates must be in [0, 1] range, otherwise results are undefined.
  410.  * Some combinations of coordinates produce invalid results,
  411.  * but this behaviour is acceptable.
  412.  */
  413. static void
  414. tri_apply_cylindrical_wrap(float v0,
  415.                            float v1,
  416.                            float v2,
  417.                            uint cylindrical_wrap,
  418.                            float output[3])
  419. {
  420.    if (cylindrical_wrap) {
  421.       float delta;
  422.  
  423.       delta = v1 - v0;
  424.       if (delta > 0.5f) {
  425.          v0 += 1.0f;
  426.       }
  427.       else if (delta < -0.5f) {
  428.          v1 += 1.0f;
  429.       }
  430.  
  431.       delta = v2 - v1;
  432.       if (delta > 0.5f) {
  433.          v1 += 1.0f;
  434.       }
  435.       else if (delta < -0.5f) {
  436.          v2 += 1.0f;
  437.       }
  438.  
  439.       delta = v0 - v2;
  440.       if (delta > 0.5f) {
  441.          v2 += 1.0f;
  442.       }
  443.       else if (delta < -0.5f) {
  444.          v0 += 1.0f;
  445.       }
  446.    }
  447.  
  448.    output[0] = v0;
  449.    output[1] = v1;
  450.    output[2] = v2;
  451. }
  452.  
  453.  
  454. /**
  455.  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
  456.  * The value value comes from vertex[slot][i].
  457.  * The result will be put into setup->coef[slot].a0[i].
  458.  * \param slot  which attribute slot
  459.  * \param i  which component of the slot (0..3)
  460.  */
  461. static void
  462. const_coeff(struct setup_context *setup,
  463.             struct tgsi_interp_coef *coef,
  464.             uint vertSlot, uint i)
  465. {
  466.    assert(i <= 3);
  467.  
  468.    coef->dadx[i] = 0;
  469.    coef->dady[i] = 0;
  470.  
  471.    /* need provoking vertex info!
  472.     */
  473.    coef->a0[i] = setup->vprovoke[vertSlot][i];
  474. }
  475.  
  476.  
  477. /**
  478.  * Compute a0, dadx and dady for a linearly interpolated coefficient,
  479.  * for a triangle.
  480.  * v[0], v[1] and v[2] are vmin, vmid and vmax, respectively.
  481.  */
  482. static void
  483. tri_linear_coeff(struct setup_context *setup,
  484.                  struct tgsi_interp_coef *coef,
  485.                  uint i,
  486.                  const float v[3])
  487. {
  488.    float botda = v[1] - v[0];
  489.    float majda = v[2] - v[0];
  490.    float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
  491.    float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
  492.    float dadx = a * setup->oneoverarea;
  493.    float dady = b * setup->oneoverarea;
  494.  
  495.    assert(i <= 3);
  496.  
  497.    coef->dadx[i] = dadx;
  498.    coef->dady[i] = dady;
  499.  
  500.    /* calculate a0 as the value which would be sampled for the
  501.     * fragment at (0,0), taking into account that we want to sample at
  502.     * pixel centers, in other words (pixel_offset, pixel_offset).
  503.     *
  504.     * this is neat but unfortunately not a good way to do things for
  505.     * triangles with very large values of dadx or dady as it will
  506.     * result in the subtraction and re-addition from a0 of a very
  507.     * large number, which means we'll end up loosing a lot of the
  508.     * fractional bits and precision from a0.  the way to fix this is
  509.     * to define a0 as the sample at a pixel center somewhere near vmin
  510.     * instead - i'll switch to this later.
  511.     */
  512.    coef->a0[i] = (v[0] -
  513.                   (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
  514.                    dady * (setup->vmin[0][1] - setup->pixel_offset)));
  515. }
  516.  
  517.  
  518. /**
  519.  * Compute a0, dadx and dady for a perspective-corrected interpolant,
  520.  * for a triangle.
  521.  * We basically multiply the vertex value by 1/w before computing
  522.  * the plane coefficients (a0, dadx, dady).
  523.  * Later, when we compute the value at a particular fragment position we'll
  524.  * divide the interpolated value by the interpolated W at that fragment.
  525.  * v[0], v[1] and v[2] are vmin, vmid and vmax, respectively.
  526.  */
  527. static void
  528. tri_persp_coeff(struct setup_context *setup,
  529.                 struct tgsi_interp_coef *coef,
  530.                 uint i,
  531.                 const float v[3])
  532. {
  533.    /* premultiply by 1/w  (v[0][3] is always W):
  534.     */
  535.    float mina = v[0] * setup->vmin[0][3];
  536.    float mida = v[1] * setup->vmid[0][3];
  537.    float maxa = v[2] * setup->vmax[0][3];
  538.    float botda = mida - mina;
  539.    float majda = maxa - mina;
  540.    float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
  541.    float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
  542.    float dadx = a * setup->oneoverarea;
  543.    float dady = b * setup->oneoverarea;
  544.  
  545.    assert(i <= 3);
  546.  
  547.    coef->dadx[i] = dadx;
  548.    coef->dady[i] = dady;
  549.    coef->a0[i] = (mina -
  550.                   (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
  551.                    dady * (setup->vmin[0][1] - setup->pixel_offset)));
  552. }
  553.  
  554.  
  555. /**
  556.  * Special coefficient setup for gl_FragCoord.
  557.  * X and Y are trivial, though Y may have to be inverted for OpenGL.
  558.  * Z and W are copied from posCoef which should have already been computed.
  559.  * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
  560.  */
  561. static void
  562. setup_fragcoord_coeff(struct setup_context *setup, uint slot)
  563. {
  564.    const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
  565.    boolean origin_lower_left =
  566.          fsInfo->properties[TGSI_PROPERTY_FS_COORD_ORIGIN];
  567.    boolean pixel_center_integer =
  568.          fsInfo->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER];
  569.  
  570.    /*X*/
  571.    setup->coef[slot].a0[0] = pixel_center_integer ? 0.0f : 0.5f;
  572.    setup->coef[slot].dadx[0] = 1.0f;
  573.    setup->coef[slot].dady[0] = 0.0f;
  574.    /*Y*/
  575.    setup->coef[slot].a0[1] =
  576.                    (origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0)
  577.                    + (pixel_center_integer ? 0.0f : 0.5f);
  578.    setup->coef[slot].dadx[1] = 0.0f;
  579.    setup->coef[slot].dady[1] = origin_lower_left ? -1.0f : 1.0f;
  580.    /*Z*/
  581.    setup->coef[slot].a0[2] = setup->posCoef.a0[2];
  582.    setup->coef[slot].dadx[2] = setup->posCoef.dadx[2];
  583.    setup->coef[slot].dady[2] = setup->posCoef.dady[2];
  584.    /*W*/
  585.    setup->coef[slot].a0[3] = setup->posCoef.a0[3];
  586.    setup->coef[slot].dadx[3] = setup->posCoef.dadx[3];
  587.    setup->coef[slot].dady[3] = setup->posCoef.dady[3];
  588. }
  589.  
  590.  
  591.  
  592. /**
  593.  * Compute the setup->coef[] array dadx, dady, a0 values.
  594.  * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
  595.  */
  596. static void
  597. setup_tri_coefficients(struct setup_context *setup)
  598. {
  599.    struct softpipe_context *softpipe = setup->softpipe;
  600.    const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
  601.    const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
  602.    uint fragSlot;
  603.    float v[3];
  604.  
  605.    /* z and w are done by linear interpolation:
  606.     */
  607.    v[0] = setup->vmin[0][2];
  608.    v[1] = setup->vmid[0][2];
  609.    v[2] = setup->vmax[0][2];
  610.    tri_linear_coeff(setup, &setup->posCoef, 2, v);
  611.  
  612.    v[0] = setup->vmin[0][3];
  613.    v[1] = setup->vmid[0][3];
  614.    v[2] = setup->vmax[0][3];
  615.    tri_linear_coeff(setup, &setup->posCoef, 3, v);
  616.  
  617.    /* setup interpolation for all the remaining attributes:
  618.     */
  619.    for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) {
  620.       const uint vertSlot = vinfo->attrib[fragSlot].src_index;
  621.       uint j;
  622.  
  623.       switch (vinfo->attrib[fragSlot].interp_mode) {
  624.       case INTERP_CONSTANT:
  625.          for (j = 0; j < TGSI_NUM_CHANNELS; j++)
  626.             const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
  627.          break;
  628.       case INTERP_LINEAR:
  629.          for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
  630.             tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
  631.                                        setup->vmid[vertSlot][j],
  632.                                        setup->vmax[vertSlot][j],
  633.                                        fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
  634.                                        v);
  635.             tri_linear_coeff(setup, &setup->coef[fragSlot], j, v);
  636.          }
  637.          break;
  638.       case INTERP_PERSPECTIVE:
  639.          for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
  640.             tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
  641.                                        setup->vmid[vertSlot][j],
  642.                                        setup->vmax[vertSlot][j],
  643.                                        fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
  644.                                        v);
  645.             tri_persp_coeff(setup, &setup->coef[fragSlot], j, v);
  646.          }
  647.          break;
  648.       case INTERP_POS:
  649.          setup_fragcoord_coeff(setup, fragSlot);
  650.          break;
  651.       default:
  652.          assert(0);
  653.       }
  654.  
  655.       if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
  656.          /* convert 0 to 1.0 and 1 to -1.0 */
  657.          setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f;
  658.          setup->coef[fragSlot].dadx[0] = 0.0;
  659.          setup->coef[fragSlot].dady[0] = 0.0;
  660.       }
  661.  
  662.       if (0) {
  663.          for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
  664.             debug_printf("attr[%d].%c: a0:%f dx:%f dy:%f\n",
  665.                          fragSlot, "xyzw"[j],
  666.                          setup->coef[fragSlot].a0[j],
  667.                          setup->coef[fragSlot].dadx[j],
  668.                          setup->coef[fragSlot].dady[j]);
  669.          }
  670.       }
  671.    }
  672. }
  673.  
  674.  
  675. static void
  676. setup_tri_edges(struct setup_context *setup)
  677. {
  678.    float vmin_x = setup->vmin[0][0] + setup->pixel_offset;
  679.    float vmid_x = setup->vmid[0][0] + setup->pixel_offset;
  680.  
  681.    float vmin_y = setup->vmin[0][1] - setup->pixel_offset;
  682.    float vmid_y = setup->vmid[0][1] - setup->pixel_offset;
  683.    float vmax_y = setup->vmax[0][1] - setup->pixel_offset;
  684.  
  685.    setup->emaj.sy = ceilf(vmin_y);
  686.    setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy);
  687.    setup->emaj.dxdy = setup->emaj.dy ? setup->emaj.dx / setup->emaj.dy : .0f;
  688.    setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy;
  689.  
  690.    setup->etop.sy = ceilf(vmid_y);
  691.    setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy);
  692.    setup->etop.dxdy = setup->etop.dy ? setup->etop.dx / setup->etop.dy : .0f;
  693.    setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy;
  694.  
  695.    setup->ebot.sy = ceilf(vmin_y);
  696.    setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy);
  697.    setup->ebot.dxdy = setup->ebot.dy ? setup->ebot.dx / setup->ebot.dy : .0f;
  698.    setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy;
  699. }
  700.  
  701.  
  702. /**
  703.  * Render the upper or lower half of a triangle.
  704.  * Scissoring/cliprect is applied here too.
  705.  */
  706. static void
  707. subtriangle(struct setup_context *setup,
  708.             struct edge *eleft,
  709.             struct edge *eright,
  710.             int lines)
  711. {
  712.    const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
  713.    const int minx = (int) cliprect->minx;
  714.    const int maxx = (int) cliprect->maxx;
  715.    const int miny = (int) cliprect->miny;
  716.    const int maxy = (int) cliprect->maxy;
  717.    int y, start_y, finish_y;
  718.    int sy = (int)eleft->sy;
  719.  
  720.    assert((int)eleft->sy == (int) eright->sy);
  721.    assert(lines >= 0);
  722.  
  723.    /* clip top/bottom */
  724.    start_y = sy;
  725.    if (start_y < miny)
  726.       start_y = miny;
  727.  
  728.    finish_y = sy + lines;
  729.    if (finish_y > maxy)
  730.       finish_y = maxy;
  731.  
  732.    start_y -= sy;
  733.    finish_y -= sy;
  734.  
  735.    /*
  736.    debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
  737.    */
  738.  
  739.    for (y = start_y; y < finish_y; y++) {
  740.  
  741.       /* avoid accumulating adds as floats don't have the precision to
  742.        * accurately iterate large triangle edges that way.  luckily we
  743.        * can just multiply these days.
  744.        *
  745.        * this is all drowned out by the attribute interpolation anyway.
  746.        */
  747.       int left = (int)(eleft->sx + y * eleft->dxdy);
  748.       int right = (int)(eright->sx + y * eright->dxdy);
  749.  
  750.       /* clip left/right */
  751.       if (left < minx)
  752.          left = minx;
  753.       if (right > maxx)
  754.          right = maxx;
  755.  
  756.       if (left < right) {
  757.          int _y = sy + y;
  758.          if (block(_y) != setup->span.y) {
  759.             flush_spans(setup);
  760.             setup->span.y = block(_y);
  761.          }
  762.  
  763.          setup->span.left[_y&1] = left;
  764.          setup->span.right[_y&1] = right;
  765.       }
  766.    }
  767.  
  768.  
  769.    /* save the values so that emaj can be restarted:
  770.     */
  771.    eleft->sx += lines * eleft->dxdy;
  772.    eright->sx += lines * eright->dxdy;
  773.    eleft->sy += lines;
  774.    eright->sy += lines;
  775. }
  776.  
  777.  
  778. /**
  779.  * Recalculate prim's determinant.  This is needed as we don't have
  780.  * get this information through the vbuf_render interface & we must
  781.  * calculate it here.
  782.  */
  783. static float
  784. calc_det(const float (*v0)[4],
  785.          const float (*v1)[4],
  786.          const float (*v2)[4])
  787. {
  788.    /* edge vectors e = v0 - v2, f = v1 - v2 */
  789.    const float ex = v0[0][0] - v2[0][0];
  790.    const float ey = v0[0][1] - v2[0][1];
  791.    const float fx = v1[0][0] - v2[0][0];
  792.    const float fy = v1[0][1] - v2[0][1];
  793.  
  794.    /* det = cross(e,f).z */
  795.    return ex * fy - ey * fx;
  796. }
  797.  
  798.  
  799. /**
  800.  * Do setup for triangle rasterization, then render the triangle.
  801.  */
  802. void
  803. sp_setup_tri(struct setup_context *setup,
  804.              const float (*v0)[4],
  805.              const float (*v1)[4],
  806.              const float (*v2)[4])
  807. {
  808.    float det;
  809.    uint layer = 0;
  810. #if DEBUG_VERTS
  811.    debug_printf("Setup triangle:\n");
  812.    print_vertex(setup, v0);
  813.    print_vertex(setup, v1);
  814.    print_vertex(setup, v2);
  815. #endif
  816.  
  817.    if (setup->softpipe->no_rast || setup->softpipe->rasterizer->rasterizer_discard)
  818.       return;
  819.    
  820.    det = calc_det(v0, v1, v2);
  821.    /*
  822.    debug_printf("%s\n", __FUNCTION__ );
  823.    */
  824.  
  825. #if DEBUG_FRAGS
  826.    setup->numFragsEmitted = 0;
  827.    setup->numFragsWritten = 0;
  828. #endif
  829.  
  830.    if (!setup_sort_vertices( setup, det, v0, v1, v2 ))
  831.       return;
  832.  
  833.    setup_tri_coefficients( setup );
  834.    setup_tri_edges( setup );
  835.  
  836.    assert(setup->softpipe->reduced_prim == PIPE_PRIM_TRIANGLES);
  837.  
  838.    setup->span.y = 0;
  839.    setup->span.right[0] = 0;
  840.    setup->span.right[1] = 0;
  841.    /*   setup->span.z_mode = tri_z_mode( setup->ctx ); */
  842.    if (setup->softpipe->layer_slot > 0) {
  843.       layer = *(unsigned *)v1[setup->softpipe->layer_slot];
  844.       layer = MIN2(layer, setup->max_layer);
  845.    }
  846.    setup->quad[0].input.layer = layer;
  847.  
  848.    /*   init_constant_attribs( setup ); */
  849.  
  850.    if (setup->oneoverarea < 0.0) {
  851.       /* emaj on left:
  852.        */
  853.       subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines );
  854.       subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines );
  855.    }
  856.    else {
  857.       /* emaj on right:
  858.        */
  859.       subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines );
  860.       subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines );
  861.    }
  862.  
  863.    flush_spans( setup );
  864.  
  865.    if (setup->softpipe->active_statistics_queries) {
  866.       setup->softpipe->pipeline_statistics.c_primitives++;
  867.    }
  868.  
  869. #if DEBUG_FRAGS
  870.    printf("Tri: %u frags emitted, %u written\n",
  871.           setup->numFragsEmitted,
  872.           setup->numFragsWritten);
  873. #endif
  874. }
  875.  
  876.  
  877. /* Apply cylindrical wrapping to v0, v1 coordinates, if enabled.
  878.  * Input coordinates must be in [0, 1] range, otherwise results are undefined.
  879.  */
  880. static void
  881. line_apply_cylindrical_wrap(float v0,
  882.                             float v1,
  883.                             uint cylindrical_wrap,
  884.                             float output[2])
  885. {
  886.    if (cylindrical_wrap) {
  887.       float delta;
  888.  
  889.       delta = v1 - v0;
  890.       if (delta > 0.5f) {
  891.          v0 += 1.0f;
  892.       }
  893.       else if (delta < -0.5f) {
  894.          v1 += 1.0f;
  895.       }
  896.    }
  897.  
  898.    output[0] = v0;
  899.    output[1] = v1;
  900. }
  901.  
  902.  
  903. /**
  904.  * Compute a0, dadx and dady for a linearly interpolated coefficient,
  905.  * for a line.
  906.  * v[0] and v[1] are vmin and vmax, respectively.
  907.  */
  908. static void
  909. line_linear_coeff(const struct setup_context *setup,
  910.                   struct tgsi_interp_coef *coef,
  911.                   uint i,
  912.                   const float v[2])
  913. {
  914.    const float da = v[1] - v[0];
  915.    const float dadx = da * setup->emaj.dx * setup->oneoverarea;
  916.    const float dady = da * setup->emaj.dy * setup->oneoverarea;
  917.    coef->dadx[i] = dadx;
  918.    coef->dady[i] = dady;
  919.    coef->a0[i] = (v[0] -
  920.                   (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
  921.                    dady * (setup->vmin[0][1] - setup->pixel_offset)));
  922. }
  923.  
  924.  
  925. /**
  926.  * Compute a0, dadx and dady for a perspective-corrected interpolant,
  927.  * for a line.
  928.  * v[0] and v[1] are vmin and vmax, respectively.
  929.  */
  930. static void
  931. line_persp_coeff(const struct setup_context *setup,
  932.                  struct tgsi_interp_coef *coef,
  933.                  uint i,
  934.                  const float v[2])
  935. {
  936.    const float a0 = v[0] * setup->vmin[0][3];
  937.    const float a1 = v[1] * setup->vmax[0][3];
  938.    const float da = a1 - a0;
  939.    const float dadx = da * setup->emaj.dx * setup->oneoverarea;
  940.    const float dady = da * setup->emaj.dy * setup->oneoverarea;
  941.    coef->dadx[i] = dadx;
  942.    coef->dady[i] = dady;
  943.    coef->a0[i] = (a0 -
  944.                   (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
  945.                    dady * (setup->vmin[0][1] - setup->pixel_offset)));
  946. }
  947.  
  948.  
  949. /**
  950.  * Compute the setup->coef[] array dadx, dady, a0 values.
  951.  * Must be called after setup->vmin,vmax are initialized.
  952.  */
  953. static boolean
  954. setup_line_coefficients(struct setup_context *setup,
  955.                         const float (*v0)[4],
  956.                         const float (*v1)[4])
  957. {
  958.    struct softpipe_context *softpipe = setup->softpipe;
  959.    const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
  960.    const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
  961.    uint fragSlot;
  962.    float area;
  963.    float v[2];
  964.  
  965.    /* use setup->vmin, vmax to point to vertices */
  966.    if (softpipe->rasterizer->flatshade_first)
  967.       setup->vprovoke = v0;
  968.    else
  969.       setup->vprovoke = v1;
  970.    setup->vmin = v0;
  971.    setup->vmax = v1;
  972.  
  973.    setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
  974.    setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
  975.  
  976.    /* NOTE: this is not really area but something proportional to it */
  977.    area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy;
  978.    if (area == 0.0f || util_is_inf_or_nan(area))
  979.       return FALSE;
  980.    setup->oneoverarea = 1.0f / area;
  981.  
  982.    /* z and w are done by linear interpolation:
  983.     */
  984.    v[0] = setup->vmin[0][2];
  985.    v[1] = setup->vmax[0][2];
  986.    line_linear_coeff(setup, &setup->posCoef, 2, v);
  987.  
  988.    v[0] = setup->vmin[0][3];
  989.    v[1] = setup->vmax[0][3];
  990.    line_linear_coeff(setup, &setup->posCoef, 3, v);
  991.  
  992.    /* setup interpolation for all the remaining attributes:
  993.     */
  994.    for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) {
  995.       const uint vertSlot = vinfo->attrib[fragSlot].src_index;
  996.       uint j;
  997.  
  998.       switch (vinfo->attrib[fragSlot].interp_mode) {
  999.       case INTERP_CONSTANT:
  1000.          for (j = 0; j < TGSI_NUM_CHANNELS; j++)
  1001.             const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
  1002.          break;
  1003.       case INTERP_LINEAR:
  1004.          for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
  1005.             line_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
  1006.                                         setup->vmax[vertSlot][j],
  1007.                                         fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
  1008.                                         v);
  1009.             line_linear_coeff(setup, &setup->coef[fragSlot], j, v);
  1010.          }
  1011.          break;
  1012.       case INTERP_PERSPECTIVE:
  1013.          for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
  1014.             line_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
  1015.                                         setup->vmax[vertSlot][j],
  1016.                                         fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
  1017.                                         v);
  1018.             line_persp_coeff(setup, &setup->coef[fragSlot], j, v);
  1019.          }
  1020.          break;
  1021.       case INTERP_POS:
  1022.          setup_fragcoord_coeff(setup, fragSlot);
  1023.          break;
  1024.       default:
  1025.          assert(0);
  1026.       }
  1027.  
  1028.       if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
  1029.          /* convert 0 to 1.0 and 1 to -1.0 */
  1030.          setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f;
  1031.          setup->coef[fragSlot].dadx[0] = 0.0;
  1032.          setup->coef[fragSlot].dady[0] = 0.0;
  1033.       }
  1034.    }
  1035.    return TRUE;
  1036. }
  1037.  
  1038.  
  1039. /**
  1040.  * Plot a pixel in a line segment.
  1041.  */
  1042. static INLINE void
  1043. plot(struct setup_context *setup, int x, int y)
  1044. {
  1045.    const int iy = y & 1;
  1046.    const int ix = x & 1;
  1047.    const int quadX = x - ix;
  1048.    const int quadY = y - iy;
  1049.    const int mask = (1 << ix) << (2 * iy);
  1050.  
  1051.    if (quadX != setup->quad[0].input.x0 ||
  1052.        quadY != setup->quad[0].input.y0)
  1053.    {
  1054.       /* flush prev quad, start new quad */
  1055.  
  1056.       if (setup->quad[0].input.x0 != -1)
  1057.          clip_emit_quad( setup, &setup->quad[0] );
  1058.  
  1059.       setup->quad[0].input.x0 = quadX;
  1060.       setup->quad[0].input.y0 = quadY;
  1061.       setup->quad[0].inout.mask = 0x0;
  1062.    }
  1063.  
  1064.    setup->quad[0].inout.mask |= mask;
  1065. }
  1066.  
  1067.  
  1068. /**
  1069.  * Do setup for line rasterization, then render the line.
  1070.  * Single-pixel width, no stipple, etc.  We rely on the 'draw' module
  1071.  * to handle stippling and wide lines.
  1072.  */
  1073. void
  1074. sp_setup_line(struct setup_context *setup,
  1075.               const float (*v0)[4],
  1076.               const float (*v1)[4])
  1077. {
  1078.    int x0 = (int) v0[0][0];
  1079.    int x1 = (int) v1[0][0];
  1080.    int y0 = (int) v0[0][1];
  1081.    int y1 = (int) v1[0][1];
  1082.    int dx = x1 - x0;
  1083.    int dy = y1 - y0;
  1084.    int xstep, ystep;
  1085.    uint layer = 0;
  1086.  
  1087. #if DEBUG_VERTS
  1088.    debug_printf("Setup line:\n");
  1089.    print_vertex(setup, v0);
  1090.    print_vertex(setup, v1);
  1091. #endif
  1092.  
  1093.    if (setup->softpipe->no_rast || setup->softpipe->rasterizer->rasterizer_discard)
  1094.       return;
  1095.  
  1096.    if (dx == 0 && dy == 0)
  1097.       return;
  1098.  
  1099.    if (!setup_line_coefficients(setup, v0, v1))
  1100.       return;
  1101.  
  1102.    assert(v0[0][0] < 1.0e9);
  1103.    assert(v0[0][1] < 1.0e9);
  1104.    assert(v1[0][0] < 1.0e9);
  1105.    assert(v1[0][1] < 1.0e9);
  1106.  
  1107.    if (dx < 0) {
  1108.       dx = -dx;   /* make positive */
  1109.       xstep = -1;
  1110.    }
  1111.    else {
  1112.       xstep = 1;
  1113.    }
  1114.  
  1115.    if (dy < 0) {
  1116.       dy = -dy;   /* make positive */
  1117.       ystep = -1;
  1118.    }
  1119.    else {
  1120.       ystep = 1;
  1121.    }
  1122.  
  1123.    assert(dx >= 0);
  1124.    assert(dy >= 0);
  1125.    assert(setup->softpipe->reduced_prim == PIPE_PRIM_LINES);
  1126.  
  1127.    setup->quad[0].input.x0 = setup->quad[0].input.y0 = -1;
  1128.    setup->quad[0].inout.mask = 0x0;
  1129.    if (setup->softpipe->layer_slot > 0) {
  1130.       layer = *(unsigned *)v1[setup->softpipe->layer_slot];
  1131.       layer = MIN2(layer, setup->max_layer);
  1132.    }
  1133.    setup->quad[0].input.layer = layer;
  1134.  
  1135.    /* XXX temporary: set coverage to 1.0 so the line appears
  1136.     * if AA mode happens to be enabled.
  1137.     */
  1138.    setup->quad[0].input.coverage[0] =
  1139.    setup->quad[0].input.coverage[1] =
  1140.    setup->quad[0].input.coverage[2] =
  1141.    setup->quad[0].input.coverage[3] = 1.0;
  1142.  
  1143.    if (dx > dy) {
  1144.       /*** X-major line ***/
  1145.       int i;
  1146.       const int errorInc = dy + dy;
  1147.       int error = errorInc - dx;
  1148.       const int errorDec = error - dx;
  1149.  
  1150.       for (i = 0; i < dx; i++) {
  1151.          plot(setup, x0, y0);
  1152.  
  1153.          x0 += xstep;
  1154.          if (error < 0) {
  1155.             error += errorInc;
  1156.          }
  1157.          else {
  1158.             error += errorDec;
  1159.             y0 += ystep;
  1160.          }
  1161.       }
  1162.    }
  1163.    else {
  1164.       /*** Y-major line ***/
  1165.       int i;
  1166.       const int errorInc = dx + dx;
  1167.       int error = errorInc - dy;
  1168.       const int errorDec = error - dy;
  1169.  
  1170.       for (i = 0; i < dy; i++) {
  1171.          plot(setup, x0, y0);
  1172.  
  1173.          y0 += ystep;
  1174.          if (error < 0) {
  1175.             error += errorInc;
  1176.          }
  1177.          else {
  1178.             error += errorDec;
  1179.             x0 += xstep;
  1180.          }
  1181.       }
  1182.    }
  1183.  
  1184.    /* draw final quad */
  1185.    if (setup->quad[0].inout.mask) {
  1186.       clip_emit_quad( setup, &setup->quad[0] );
  1187.    }
  1188. }
  1189.  
  1190.  
  1191. static void
  1192. point_persp_coeff(const struct setup_context *setup,
  1193.                   const float (*vert)[4],
  1194.                   struct tgsi_interp_coef *coef,
  1195.                   uint vertSlot, uint i)
  1196. {
  1197.    assert(i <= 3);
  1198.    coef->dadx[i] = 0.0F;
  1199.    coef->dady[i] = 0.0F;
  1200.    coef->a0[i] = vert[vertSlot][i] * vert[0][3];
  1201. }
  1202.  
  1203.  
  1204. /**
  1205.  * Do setup for point rasterization, then render the point.
  1206.  * Round or square points...
  1207.  * XXX could optimize a lot for 1-pixel points.
  1208.  */
  1209. void
  1210. sp_setup_point(struct setup_context *setup,
  1211.                const float (*v0)[4])
  1212. {
  1213.    struct softpipe_context *softpipe = setup->softpipe;
  1214.    const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
  1215.    const int sizeAttr = setup->softpipe->psize_slot;
  1216.    const float size
  1217.       = sizeAttr > 0 ? v0[sizeAttr][0]
  1218.       : setup->softpipe->rasterizer->point_size;
  1219.    const float halfSize = 0.5F * size;
  1220.    const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth;
  1221.    const float x = v0[0][0];  /* Note: data[0] is always position */
  1222.    const float y = v0[0][1];
  1223.    const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
  1224.    uint fragSlot;
  1225.    uint layer = 0;
  1226. #if DEBUG_VERTS
  1227.    debug_printf("Setup point:\n");
  1228.    print_vertex(setup, v0);
  1229. #endif
  1230.  
  1231.    if (setup->softpipe->no_rast || setup->softpipe->rasterizer->rasterizer_discard)
  1232.       return;
  1233.  
  1234.    assert(setup->softpipe->reduced_prim == PIPE_PRIM_POINTS);
  1235.  
  1236.    if (setup->softpipe->layer_slot > 0) {
  1237.       layer = *(unsigned *)v0[setup->softpipe->layer_slot];
  1238.       layer = MIN2(layer, setup->max_layer);
  1239.    }
  1240.    setup->quad[0].input.layer = layer;
  1241.  
  1242.    /* For points, all interpolants are constant-valued.
  1243.     * However, for point sprites, we'll need to setup texcoords appropriately.
  1244.     * XXX: which coefficients are the texcoords???
  1245.     * We may do point sprites as textured quads...
  1246.     *
  1247.     * KW: We don't know which coefficients are texcoords - ultimately
  1248.     * the choice of what interpolation mode to use for each attribute
  1249.     * should be determined by the fragment program, using
  1250.     * per-attribute declaration statements that include interpolation
  1251.     * mode as a parameter.  So either the fragment program will have
  1252.     * to be adjusted for pointsprite vs normal point behaviour, or
  1253.     * otherwise a special interpolation mode will have to be defined
  1254.     * which matches the required behaviour for point sprites.  But -
  1255.     * the latter is not a feature of normal hardware, and as such
  1256.     * probably should be ruled out on that basis.
  1257.     */
  1258.    setup->vprovoke = v0;
  1259.  
  1260.    /* setup Z, W */
  1261.    const_coeff(setup, &setup->posCoef, 0, 2);
  1262.    const_coeff(setup, &setup->posCoef, 0, 3);
  1263.  
  1264.    for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) {
  1265.       const uint vertSlot = vinfo->attrib[fragSlot].src_index;
  1266.       uint j;
  1267.  
  1268.       switch (vinfo->attrib[fragSlot].interp_mode) {
  1269.       case INTERP_CONSTANT:
  1270.          /* fall-through */
  1271.       case INTERP_LINEAR:
  1272.          for (j = 0; j < TGSI_NUM_CHANNELS; j++)
  1273.             const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
  1274.          break;
  1275.       case INTERP_PERSPECTIVE:
  1276.          for (j = 0; j < TGSI_NUM_CHANNELS; j++)
  1277.             point_persp_coeff(setup, setup->vprovoke,
  1278.                               &setup->coef[fragSlot], vertSlot, j);
  1279.          break;
  1280.       case INTERP_POS:
  1281.          setup_fragcoord_coeff(setup, fragSlot);
  1282.          break;
  1283.       default:
  1284.          assert(0);
  1285.       }
  1286.  
  1287.       if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
  1288.          /* convert 0 to 1.0 and 1 to -1.0 */
  1289.          setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f;
  1290.          setup->coef[fragSlot].dadx[0] = 0.0;
  1291.          setup->coef[fragSlot].dady[0] = 0.0;
  1292.       }
  1293.    }
  1294.  
  1295.  
  1296.    if (halfSize <= 0.5 && !round) {
  1297.       /* special case for 1-pixel points */
  1298.       const int ix = ((int) x) & 1;
  1299.       const int iy = ((int) y) & 1;
  1300.       setup->quad[0].input.x0 = (int) x - ix;
  1301.       setup->quad[0].input.y0 = (int) y - iy;
  1302.       setup->quad[0].inout.mask = (1 << ix) << (2 * iy);
  1303.       clip_emit_quad( setup, &setup->quad[0] );
  1304.    }
  1305.    else {
  1306.       if (round) {
  1307.          /* rounded points */
  1308.          const int ixmin = block((int) (x - halfSize));
  1309.          const int ixmax = block((int) (x + halfSize));
  1310.          const int iymin = block((int) (y - halfSize));
  1311.          const int iymax = block((int) (y + halfSize));
  1312.          const float rmin = halfSize - 0.7071F;  /* 0.7071 = sqrt(2)/2 */
  1313.          const float rmax = halfSize + 0.7071F;
  1314.          const float rmin2 = MAX2(0.0F, rmin * rmin);
  1315.          const float rmax2 = rmax * rmax;
  1316.          const float cscale = 1.0F / (rmax2 - rmin2);
  1317.          int ix, iy;
  1318.  
  1319.          for (iy = iymin; iy <= iymax; iy += 2) {
  1320.             for (ix = ixmin; ix <= ixmax; ix += 2) {
  1321.                float dx, dy, dist2, cover;
  1322.  
  1323.                setup->quad[0].inout.mask = 0x0;
  1324.  
  1325.                dx = (ix + 0.5f) - x;
  1326.                dy = (iy + 0.5f) - y;
  1327.                dist2 = dx * dx + dy * dy;
  1328.                if (dist2 <= rmax2) {
  1329.                   cover = 1.0F - (dist2 - rmin2) * cscale;
  1330.                   setup->quad[0].input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f);
  1331.                   setup->quad[0].inout.mask |= MASK_TOP_LEFT;
  1332.                }
  1333.  
  1334.                dx = (ix + 1.5f) - x;
  1335.                dy = (iy + 0.5f) - y;
  1336.                dist2 = dx * dx + dy * dy;
  1337.                if (dist2 <= rmax2) {
  1338.                   cover = 1.0F - (dist2 - rmin2) * cscale;
  1339.                   setup->quad[0].input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f);
  1340.                   setup->quad[0].inout.mask |= MASK_TOP_RIGHT;
  1341.                }
  1342.  
  1343.                dx = (ix + 0.5f) - x;
  1344.                dy = (iy + 1.5f) - y;
  1345.                dist2 = dx * dx + dy * dy;
  1346.                if (dist2 <= rmax2) {
  1347.                   cover = 1.0F - (dist2 - rmin2) * cscale;
  1348.                   setup->quad[0].input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f);
  1349.                   setup->quad[0].inout.mask |= MASK_BOTTOM_LEFT;
  1350.                }
  1351.  
  1352.                dx = (ix + 1.5f) - x;
  1353.                dy = (iy + 1.5f) - y;
  1354.                dist2 = dx * dx + dy * dy;
  1355.                if (dist2 <= rmax2) {
  1356.                   cover = 1.0F - (dist2 - rmin2) * cscale;
  1357.                   setup->quad[0].input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f);
  1358.                   setup->quad[0].inout.mask |= MASK_BOTTOM_RIGHT;
  1359.                }
  1360.  
  1361.                if (setup->quad[0].inout.mask) {
  1362.                   setup->quad[0].input.x0 = ix;
  1363.                   setup->quad[0].input.y0 = iy;
  1364.                   clip_emit_quad( setup, &setup->quad[0] );
  1365.                }
  1366.             }
  1367.          }
  1368.       }
  1369.       else {
  1370.          /* square points */
  1371.          const int xmin = (int) (x + 0.75 - halfSize);
  1372.          const int ymin = (int) (y + 0.25 - halfSize);
  1373.          const int xmax = xmin + (int) size;
  1374.          const int ymax = ymin + (int) size;
  1375.          /* XXX could apply scissor to xmin,ymin,xmax,ymax now */
  1376.          const int ixmin = block(xmin);
  1377.          const int ixmax = block(xmax - 1);
  1378.          const int iymin = block(ymin);
  1379.          const int iymax = block(ymax - 1);
  1380.          int ix, iy;
  1381.  
  1382.          /*
  1383.          debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax);
  1384.          */
  1385.          for (iy = iymin; iy <= iymax; iy += 2) {
  1386.             uint rowMask = 0xf;
  1387.             if (iy < ymin) {
  1388.                /* above the top edge */
  1389.                rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
  1390.             }
  1391.             if (iy + 1 >= ymax) {
  1392.                /* below the bottom edge */
  1393.                rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
  1394.             }
  1395.  
  1396.             for (ix = ixmin; ix <= ixmax; ix += 2) {
  1397.                uint mask = rowMask;
  1398.  
  1399.                if (ix < xmin) {
  1400.                   /* fragment is past left edge of point, turn off left bits */
  1401.                   mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
  1402.                }
  1403.                if (ix + 1 >= xmax) {
  1404.                   /* past the right edge */
  1405.                   mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
  1406.                }
  1407.  
  1408.                setup->quad[0].inout.mask = mask;
  1409.                setup->quad[0].input.x0 = ix;
  1410.                setup->quad[0].input.y0 = iy;
  1411.                clip_emit_quad( setup, &setup->quad[0] );
  1412.             }
  1413.          }
  1414.       }
  1415.    }
  1416. }
  1417.  
  1418.  
  1419. /**
  1420.  * Called by vbuf code just before we start buffering primitives.
  1421.  */
  1422. void
  1423. sp_setup_prepare(struct setup_context *setup)
  1424. {
  1425.    struct softpipe_context *sp = setup->softpipe;
  1426.    int i;
  1427.    unsigned max_layer = ~0;
  1428.    if (sp->dirty) {
  1429.       softpipe_update_derived(sp, sp->reduced_api_prim);
  1430.    }
  1431.  
  1432.    /* Note: nr_attrs is only used for debugging (vertex printing) */
  1433.    setup->nr_vertex_attrs = draw_num_shader_outputs(sp->draw);
  1434.  
  1435.    /*
  1436.     * Determine how many layers the fb has (used for clamping layer value).
  1437.     * OpenGL (but not d3d10) permits different amount of layers per rt, however
  1438.     * results are undefined if layer exceeds the amount of layers of ANY
  1439.     * attachment hence don't need separate per cbuf and zsbuf max.
  1440.     */
  1441.    for (i = 0; i < setup->softpipe->framebuffer.nr_cbufs; i++) {
  1442.       struct pipe_surface *cbuf = setup->softpipe->framebuffer.cbufs[i];
  1443.       if (cbuf) {
  1444.          max_layer = MIN2(max_layer,
  1445.                           cbuf->u.tex.last_layer - cbuf->u.tex.first_layer);
  1446.  
  1447.       }
  1448.    }
  1449.  
  1450.    setup->max_layer = max_layer;
  1451.  
  1452.    sp->quad.first->begin( sp->quad.first );
  1453.  
  1454.    if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES &&
  1455.        sp->rasterizer->fill_front == PIPE_POLYGON_MODE_FILL &&
  1456.        sp->rasterizer->fill_back == PIPE_POLYGON_MODE_FILL) {
  1457.       /* we'll do culling */
  1458.       setup->cull_face = sp->rasterizer->cull_face;
  1459.    }
  1460.    else {
  1461.       /* 'draw' will do culling */
  1462.       setup->cull_face = PIPE_FACE_NONE;
  1463.    }
  1464. }
  1465.  
  1466.  
  1467. void
  1468. sp_setup_destroy_context(struct setup_context *setup)
  1469. {
  1470.    FREE( setup );
  1471. }
  1472.  
  1473.  
  1474. /**
  1475.  * Create a new primitive setup/render stage.
  1476.  */
  1477. struct setup_context *
  1478. sp_setup_create_context(struct softpipe_context *softpipe)
  1479. {
  1480.    struct setup_context *setup = CALLOC_STRUCT(setup_context);
  1481.    unsigned i;
  1482.  
  1483.    setup->softpipe = softpipe;
  1484.  
  1485.    for (i = 0; i < MAX_QUADS; i++) {
  1486.       setup->quad[i].coef = setup->coef;
  1487.       setup->quad[i].posCoef = &setup->posCoef;
  1488.    }
  1489.  
  1490.    setup->span.left[0] = 1000000;     /* greater than right[0] */
  1491.    setup->span.left[1] = 1000000;     /* greater than right[1] */
  1492.  
  1493.    return setup;
  1494. }
  1495.