Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2007-2009 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. /*
  29.  * Rasterization for binned triangles within a tile
  30.  */
  31.  
  32. #include <limits.h>
  33. #include "util/u_math.h"
  34. #include "lp_debug.h"
  35. #include "lp_perf.h"
  36. #include "lp_rast_priv.h"
  37.  
  38. /**
  39.  * Shade all pixels in a 4x4 block.
  40.  */
  41. static void
  42. block_full_4(struct lp_rasterizer_task *task,
  43.              const struct lp_rast_triangle *tri,
  44.              int x, int y)
  45. {
  46.    lp_rast_shade_quads_all(task, &tri->inputs, x, y);
  47. }
  48.  
  49.  
  50. /**
  51.  * Shade all pixels in a 16x16 block.
  52.  */
  53. static void
  54. block_full_16(struct lp_rasterizer_task *task,
  55.               const struct lp_rast_triangle *tri,
  56.               int x, int y)
  57. {
  58.    unsigned ix, iy;
  59.    assert(x % 16 == 0);
  60.    assert(y % 16 == 0);
  61.    for (iy = 0; iy < 16; iy += 4)
  62.       for (ix = 0; ix < 16; ix += 4)
  63.          block_full_4(task, tri, x + ix, y + iy);
  64. }
  65.  
  66. static INLINE unsigned
  67. build_mask_linear(int64_t c, int64_t dcdx, int64_t dcdy)
  68. {
  69.    unsigned mask = 0;
  70.  
  71.    int64_t c0 = c;
  72.    int64_t c1 = c0 + dcdy;
  73.    int64_t c2 = c1 + dcdy;
  74.    int64_t c3 = c2 + dcdy;
  75.  
  76.    mask |= ((c0 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 0);
  77.    mask |= ((c0 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 1);
  78.    mask |= ((c0 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 2);
  79.    mask |= ((c0 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 3);
  80.    mask |= ((c1 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 4);
  81.    mask |= ((c1 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 5);
  82.    mask |= ((c1 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 6);
  83.    mask |= ((c1 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 7);
  84.    mask |= ((c2 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 8);
  85.    mask |= ((c2 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 9);
  86.    mask |= ((c2 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 10);
  87.    mask |= ((c2 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 11);
  88.    mask |= ((c3 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 12);
  89.    mask |= ((c3 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 13);
  90.    mask |= ((c3 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 14);
  91.    mask |= ((c3 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 15);
  92.  
  93.    return mask;
  94. }
  95.  
  96.  
  97. static INLINE void
  98. build_masks(int64_t c,
  99.             int64_t cdiff,
  100.             int64_t dcdx,
  101.             int64_t dcdy,
  102.             unsigned *outmask,
  103.             unsigned *partmask)
  104. {
  105.    *outmask |= build_mask_linear(c, dcdx, dcdy);
  106.    *partmask |= build_mask_linear(c + cdiff, dcdx, dcdy);
  107. }
  108.  
  109. void
  110. lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
  111.                       const union lp_rast_cmd_arg arg)
  112. {
  113.    union lp_rast_cmd_arg arg2;
  114.    arg2.triangle.tri = arg.triangle.tri;
  115.    arg2.triangle.plane_mask = (1<<3)-1;
  116.    lp_rast_triangle_3(task, arg2);
  117. }
  118.  
  119. void
  120. lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
  121.                       const union lp_rast_cmd_arg arg)
  122. {
  123.    lp_rast_triangle_3_16(task, arg);
  124. }
  125.  
  126. void
  127. lp_rast_triangle_4_16(struct lp_rasterizer_task *task,
  128.                       const union lp_rast_cmd_arg arg)
  129. {
  130.    union lp_rast_cmd_arg arg2;
  131.    arg2.triangle.tri = arg.triangle.tri;
  132.    arg2.triangle.plane_mask = (1<<4)-1;
  133.    lp_rast_triangle_4(task, arg2);
  134. }
  135.  
  136. #if !defined(PIPE_ARCH_SSE)
  137.  
  138. void
  139. lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
  140.                          const union lp_rast_cmd_arg arg)
  141. {
  142.    union lp_rast_cmd_arg arg2;
  143.    arg2.triangle.tri = arg.triangle.tri;
  144.    arg2.triangle.plane_mask = (1<<3)-1;
  145.    lp_rast_triangle_32_3(task, arg2);
  146. }
  147.  
  148. void
  149. lp_rast_triangle_32_4_16(struct lp_rasterizer_task *task,
  150.                          const union lp_rast_cmd_arg arg)
  151. {
  152.    union lp_rast_cmd_arg arg2;
  153.    arg2.triangle.tri = arg.triangle.tri;
  154.    arg2.triangle.plane_mask = (1<<4)-1;
  155.    lp_rast_triangle_32_4(task, arg2);
  156. }
  157.  
  158. void
  159. lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
  160.                       const union lp_rast_cmd_arg arg)
  161. {
  162.    lp_rast_triangle_32_3_16(task, arg);
  163. }
  164.  
  165. #else
  166. #include <emmintrin.h>
  167. #include "util/u_sse.h"
  168.  
  169.  
  170. static INLINE void
  171. build_masks_32(int c,
  172.                int cdiff,
  173.                int dcdx,
  174.                int dcdy,
  175.                unsigned *outmask,
  176.                unsigned *partmask)
  177. {
  178.    __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
  179.    __m128i xdcdy = _mm_set1_epi32(dcdy);
  180.  
  181.    /* Get values across the quad
  182.     */
  183.    __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
  184.    __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
  185.    __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
  186.  
  187.    {
  188.       __m128i cstep01, cstep23, result;
  189.  
  190.       cstep01 = _mm_packs_epi32(cstep0, cstep1);
  191.       cstep23 = _mm_packs_epi32(cstep2, cstep3);
  192.       result = _mm_packs_epi16(cstep01, cstep23);
  193.  
  194.       *outmask |= _mm_movemask_epi8(result);
  195.    }
  196.  
  197.  
  198.    {
  199.       __m128i cio4 = _mm_set1_epi32(cdiff);
  200.       __m128i cstep01, cstep23, result;
  201.  
  202.       cstep0 = _mm_add_epi32(cstep0, cio4);
  203.       cstep1 = _mm_add_epi32(cstep1, cio4);
  204.       cstep2 = _mm_add_epi32(cstep2, cio4);
  205.       cstep3 = _mm_add_epi32(cstep3, cio4);
  206.  
  207.       cstep01 = _mm_packs_epi32(cstep0, cstep1);
  208.       cstep23 = _mm_packs_epi32(cstep2, cstep3);
  209.       result = _mm_packs_epi16(cstep01, cstep23);
  210.  
  211.       *partmask |= _mm_movemask_epi8(result);
  212.    }
  213. }
  214.  
  215.  
  216. static INLINE unsigned
  217. build_mask_linear_32(int c, int dcdx, int dcdy)
  218. {
  219.    __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
  220.    __m128i xdcdy = _mm_set1_epi32(dcdy);
  221.  
  222.    /* Get values across the quad
  223.     */
  224.    __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
  225.    __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
  226.    __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
  227.  
  228.    /* pack pairs of results into epi16
  229.     */
  230.    __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
  231.    __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
  232.  
  233.    /* pack into epi8, preserving sign bits
  234.     */
  235.    __m128i result = _mm_packs_epi16(cstep01, cstep23);
  236.  
  237.    /* extract sign bits to create mask
  238.     */
  239.    return _mm_movemask_epi8(result);
  240. }
  241.  
  242. static INLINE unsigned
  243. sign_bits4(const __m128i *cstep, int cdiff)
  244. {
  245.  
  246.    /* Adjust the step values
  247.     */
  248.    __m128i cio4 = _mm_set1_epi32(cdiff);
  249.    __m128i cstep0 = _mm_add_epi32(cstep[0], cio4);
  250.    __m128i cstep1 = _mm_add_epi32(cstep[1], cio4);
  251.    __m128i cstep2 = _mm_add_epi32(cstep[2], cio4);
  252.    __m128i cstep3 = _mm_add_epi32(cstep[3], cio4);
  253.  
  254.    /* Pack down to epi8
  255.     */
  256.    __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
  257.    __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
  258.    __m128i result = _mm_packs_epi16(cstep01, cstep23);
  259.  
  260.    /* Extract the sign bits
  261.     */
  262.    return _mm_movemask_epi8(result);
  263. }
  264.  
  265.  
  266. #define NR_PLANES 3
  267.  
  268.  
  269.  
  270.  
  271.  
  272.  
  273.  
  274. void
  275. lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
  276.                       const union lp_rast_cmd_arg arg)
  277. {
  278.    const struct lp_rast_triangle *tri = arg.triangle.tri;
  279.    const struct lp_rast_plane *plane = GET_PLANES(tri);
  280.    int x = (arg.triangle.plane_mask & 0xff) + task->x;
  281.    int y = (arg.triangle.plane_mask >> 8) + task->y;
  282.    unsigned i, j;
  283.  
  284.    struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16];
  285.    unsigned nr = 0;
  286.  
  287.    __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */
  288.    __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */
  289.    __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */
  290.    __m128i zero = _mm_setzero_si128();
  291.  
  292.    __m128i c;
  293.    __m128i dcdx;
  294.    __m128i dcdy;
  295.    __m128i rej4;
  296.  
  297.    __m128i dcdx2;
  298.    __m128i dcdx3;
  299.    
  300.    __m128i span_0;                /* 0,dcdx,2dcdx,3dcdx for plane 0 */
  301.    __m128i span_1;                /* 0,dcdx,2dcdx,3dcdx for plane 1 */
  302.    __m128i span_2;                /* 0,dcdx,2dcdx,3dcdx for plane 2 */
  303.    __m128i unused;
  304.    
  305.    transpose4_epi32(&p0, &p1, &p2, &zero,
  306.                     &c, &dcdx, &dcdy, &rej4);
  307.  
  308.    /* Adjust dcdx;
  309.     */
  310.    dcdx = _mm_sub_epi32(zero, dcdx);
  311.  
  312.    c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x)));
  313.    c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y)));
  314.    rej4 = _mm_slli_epi32(rej4, 2);
  315.  
  316.    /* Adjust so we can just check the sign bit (< 0 comparison), instead of having to do a less efficient <= 0 comparison */
  317.    c = _mm_sub_epi32(c, _mm_set1_epi32(1));
  318.    rej4 = _mm_add_epi32(rej4, _mm_set1_epi32(1));
  319.  
  320.    dcdx2 = _mm_add_epi32(dcdx, dcdx);
  321.    dcdx3 = _mm_add_epi32(dcdx2, dcdx);
  322.  
  323.    transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3,
  324.                     &span_0, &span_1, &span_2, &unused);
  325.  
  326.    for (i = 0; i < 4; i++) {
  327.       __m128i cx = c;
  328.  
  329.       for (j = 0; j < 4; j++) {
  330.          __m128i c4rej = _mm_add_epi32(cx, rej4);
  331.          __m128i rej_masks = _mm_srai_epi32(c4rej, 31);
  332.  
  333.          /* if (is_zero(rej_masks)) */
  334.          if (_mm_movemask_epi8(rej_masks) == 0) {
  335.             __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(cx, 0), span_0);
  336.             __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(cx, 1), span_1);
  337.             __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(cx, 2), span_2);
  338.  
  339.             __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0);
  340.  
  341.             __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0));
  342.             __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1));
  343.             __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2));
  344.  
  345.             __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1);
  346.             __m128i c_01 = _mm_packs_epi32(c_0, c_1);
  347.  
  348.             __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0));
  349.             __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1));
  350.             __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2));
  351.  
  352.             __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2);
  353.  
  354.             __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0));
  355.             __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1));
  356.             __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2));
  357.  
  358.             __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3);
  359.             __m128i c_23 = _mm_packs_epi32(c_2, c_3);
  360.             __m128i c_0123 = _mm_packs_epi16(c_01, c_23);
  361.  
  362.             unsigned mask = _mm_movemask_epi8(c_0123);
  363.  
  364.             out[nr].i = i;
  365.             out[nr].j = j;
  366.             out[nr].mask = mask;
  367.             if (mask != 0xffff)
  368.                nr++;
  369.          }
  370.          cx = _mm_add_epi32(cx, _mm_slli_epi32(dcdx, 2));
  371.       }
  372.  
  373.       c = _mm_add_epi32(c, _mm_slli_epi32(dcdy, 2));
  374.    }
  375.  
  376.    for (i = 0; i < nr; i++)
  377.       lp_rast_shade_quads_mask(task,
  378.                                &tri->inputs,
  379.                                x + 4 * out[i].j,
  380.                                y + 4 * out[i].i,
  381.                                0xffff & ~out[i].mask);
  382. }
  383.  
  384.  
  385.  
  386.  
  387.  
  388. void
  389. lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
  390.                      const union lp_rast_cmd_arg arg)
  391. {
  392.    const struct lp_rast_triangle *tri = arg.triangle.tri;
  393.    const struct lp_rast_plane *plane = GET_PLANES(tri);
  394.    unsigned x = (arg.triangle.plane_mask & 0xff) + task->x;
  395.    unsigned y = (arg.triangle.plane_mask >> 8) + task->y;
  396.  
  397.    __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */
  398.    __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */
  399.    __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */
  400.    __m128i zero = _mm_setzero_si128();
  401.  
  402.    __m128i c;
  403.    __m128i dcdx;
  404.    __m128i dcdy;
  405.  
  406.    __m128i dcdx2;
  407.    __m128i dcdx3;
  408.    
  409.    __m128i span_0;                /* 0,dcdx,2dcdx,3dcdx for plane 0 */
  410.    __m128i span_1;                /* 0,dcdx,2dcdx,3dcdx for plane 1 */
  411.    __m128i span_2;                /* 0,dcdx,2dcdx,3dcdx for plane 2 */
  412.    __m128i unused;
  413.  
  414.    transpose4_epi32(&p0, &p1, &p2, &zero,
  415.                     &c, &dcdx, &dcdy, &unused);
  416.  
  417.    /* Adjust dcdx;
  418.     */
  419.    dcdx = _mm_sub_epi32(zero, dcdx);
  420.  
  421.    c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x)));
  422.    c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y)));
  423.  
  424.    /* Adjust so we can just check the sign bit (< 0 comparison), instead of having to do a less efficient <= 0 comparison */
  425.    c = _mm_sub_epi32(c, _mm_set1_epi32(1));
  426.  
  427.    dcdx2 = _mm_add_epi32(dcdx, dcdx);
  428.    dcdx3 = _mm_add_epi32(dcdx2, dcdx);
  429.  
  430.    transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3,
  431.                     &span_0, &span_1, &span_2, &unused);
  432.  
  433.  
  434.    {
  435.       __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(c, 0), span_0);
  436.       __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(c, 1), span_1);
  437.       __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(c, 2), span_2);
  438.      
  439.       __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0);
  440.  
  441.       __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0));
  442.       __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1));
  443.       __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2));
  444.  
  445.       __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1);
  446.       __m128i c_01 = _mm_packs_epi32(c_0, c_1);
  447.  
  448.       __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0));
  449.       __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1));
  450.       __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2));
  451.  
  452.       __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2);
  453.  
  454.       __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0));
  455.       __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1));
  456.       __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2));
  457.  
  458.       __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3);
  459.       __m128i c_23 = _mm_packs_epi32(c_2, c_3);
  460.       __m128i c_0123 = _mm_packs_epi16(c_01, c_23);
  461.  
  462.       unsigned mask = _mm_movemask_epi8(c_0123);
  463.  
  464.       if (mask != 0xffff)
  465.          lp_rast_shade_quads_mask(task,
  466.                                   &tri->inputs,
  467.                                   x,
  468.                                   y,
  469.                                   0xffff & ~mask);
  470.    }
  471. }
  472.  
  473. #undef NR_PLANES
  474. #endif
  475.  
  476.  
  477. #define BUILD_MASKS(c, cdiff, dcdx, dcdy, omask, pmask) build_masks(c, cdiff, dcdx, dcdy, omask, pmask)
  478. #define BUILD_MASK_LINEAR(c, dcdx, dcdy) build_mask_linear(c, dcdx, dcdy)
  479.  
  480. #define TAG(x) x##_1
  481. #define NR_PLANES 1
  482. #include "lp_rast_tri_tmp.h"
  483.  
  484. #define TAG(x) x##_2
  485. #define NR_PLANES 2
  486. #include "lp_rast_tri_tmp.h"
  487.  
  488. #define TAG(x) x##_3
  489. #define NR_PLANES 3
  490. /*#define TRI_4 lp_rast_triangle_3_4*/
  491. /*#define TRI_16 lp_rast_triangle_3_16*/
  492. #include "lp_rast_tri_tmp.h"
  493.  
  494. #define TAG(x) x##_4
  495. #define NR_PLANES 4
  496. /*#define TRI_16 lp_rast_triangle_4_16*/
  497. #include "lp_rast_tri_tmp.h"
  498.  
  499. #define TAG(x) x##_5
  500. #define NR_PLANES 5
  501. #include "lp_rast_tri_tmp.h"
  502.  
  503. #define TAG(x) x##_6
  504. #define NR_PLANES 6
  505. #include "lp_rast_tri_tmp.h"
  506.  
  507. #define TAG(x) x##_7
  508. #define NR_PLANES 7
  509. #include "lp_rast_tri_tmp.h"
  510.  
  511. #define TAG(x) x##_8
  512. #define NR_PLANES 8
  513. #include "lp_rast_tri_tmp.h"
  514.  
  515. #ifdef PIPE_ARCH_SSE
  516. #undef BUILD_MASKS
  517. #undef BUILD_MASK_LINEAR
  518. #define BUILD_MASKS(c, cdiff, dcdx, dcdy, omask, pmask) build_masks_32((int)c, (int)cdiff, dcdx, dcdy, omask, pmask)
  519. #define BUILD_MASK_LINEAR(c, dcdx, dcdy) build_mask_linear_32((int)c, dcdx, dcdy)
  520. #endif
  521.  
  522. #define TAG(x) x##_32_1
  523. #define NR_PLANES 1
  524. #include "lp_rast_tri_tmp.h"
  525.  
  526. #define TAG(x) x##_32_2
  527. #define NR_PLANES 2
  528. #include "lp_rast_tri_tmp.h"
  529.  
  530. #define TAG(x) x##_32_3
  531. #define NR_PLANES 3
  532. /*#define TRI_4 lp_rast_triangle_3_4*/
  533. /*#define TRI_16 lp_rast_triangle_3_16*/
  534. #include "lp_rast_tri_tmp.h"
  535.  
  536. #define TAG(x) x##_32_4
  537. #define NR_PLANES 4
  538. #ifdef PIPE_ARCH_SSE
  539. #define TRI_16 lp_rast_triangle_32_4_16
  540. #endif
  541. #include "lp_rast_tri_tmp.h"
  542.  
  543. #define TAG(x) x##_32_5
  544. #define NR_PLANES 5
  545. #include "lp_rast_tri_tmp.h"
  546.  
  547. #define TAG(x) x##_32_6
  548. #define NR_PLANES 6
  549. #include "lp_rast_tri_tmp.h"
  550.  
  551. #define TAG(x) x##_32_7
  552. #define NR_PLANES 7
  553. #include "lp_rast_tri_tmp.h"
  554.  
  555. #define TAG(x) x##_32_8
  556. #define NR_PLANES 8
  557. #include "lp_rast_tri_tmp.h"
  558.  
  559.