Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2009 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Eric Anholt <eric@anholt.net>
  25.  *
  26.  */
  27.  
  28. #include "brw_context.h"
  29. #include "brw_state.h"
  30. #include "brw_defines.h"
  31. #include "brw_util.h"
  32. #include "main/macros.h"
  33. #include "main/fbobject.h"
  34. #include "intel_batchbuffer.h"
  35.  
  36. /**
  37.  * Determine the appropriate attribute override value to store into the
  38.  * 3DSTATE_SF structure for a given fragment shader attribute.  The attribute
  39.  * override value contains two pieces of information: the location of the
  40.  * attribute in the VUE (relative to urb_entry_read_offset, see below), and a
  41.  * flag indicating whether to "swizzle" the attribute based on the direction
  42.  * the triangle is facing.
  43.  *
  44.  * If an attribute is "swizzled", then the given VUE location is used for
  45.  * front-facing triangles, and the VUE location that immediately follows is
  46.  * used for back-facing triangles.  We use this to implement the mapping from
  47.  * gl_FrontColor/gl_BackColor to gl_Color.
  48.  *
  49.  * urb_entry_read_offset is the offset into the VUE at which the SF unit is
  50.  * being instructed to begin reading attribute data.  It can be set to a
  51.  * nonzero value to prevent the SF unit from wasting time reading elements of
  52.  * the VUE that are not needed by the fragment shader.  It is measured in
  53.  * 256-bit increments.
  54.  */
  55. static uint32_t
  56. get_attr_override(const struct brw_vue_map *vue_map, int urb_entry_read_offset,
  57.                   int fs_attr, bool two_side_color, uint32_t *max_source_attr)
  58. {
  59.    /* Find the VUE slot for this attribute. */
  60.    int slot = vue_map->varying_to_slot[fs_attr];
  61.  
  62.    /* If there was only a back color written but not front, use back
  63.     * as the color instead of undefined
  64.     */
  65.    if (slot == -1 && fs_attr == VARYING_SLOT_COL0)
  66.       slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0];
  67.    if (slot == -1 && fs_attr == VARYING_SLOT_COL1)
  68.       slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1];
  69.  
  70.    if (slot == -1) {
  71.       /* This attribute does not exist in the VUE--that means that the vertex
  72.        * shader did not write to it.  This means that either:
  73.        *
  74.        * (a) This attribute is a texture coordinate, and it is going to be
  75.        * replaced with point coordinates (as a consequence of a call to
  76.        * glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the
  77.        * hardware will ignore whatever attribute override we supply.
  78.        *
  79.        * (b) This attribute is read by the fragment shader but not written by
  80.        * the vertex shader, so its value is undefined.  Therefore the
  81.        * attribute override we supply doesn't matter.
  82.        *
  83.        * (c) This attribute is gl_PrimitiveID, and it wasn't written by the
  84.        * previous shader stage.
  85.        *
  86.        * Note that we don't have to worry about the cases where the attribute
  87.        * is gl_PointCoord or is undergoing point sprite coordinate
  88.        * replacement, because in those cases, this function isn't called.
  89.        *
  90.        * In case (c), we need to program the attribute overrides so that the
  91.        * primitive ID will be stored in this slot.  In every other case, the
  92.        * attribute override we supply doesn't matter.  So just go ahead and
  93.        * program primitive ID in every case.
  94.        */
  95.       return (ATTRIBUTE_0_OVERRIDE_W |
  96.               ATTRIBUTE_0_OVERRIDE_Z |
  97.               ATTRIBUTE_0_OVERRIDE_Y |
  98.               ATTRIBUTE_0_OVERRIDE_X |
  99.               (ATTRIBUTE_CONST_PRIM_ID << ATTRIBUTE_0_CONST_SOURCE_SHIFT));
  100.    }
  101.  
  102.    /* Compute the location of the attribute relative to urb_entry_read_offset.
  103.     * Each increment of urb_entry_read_offset represents a 256-bit value, so
  104.     * it counts for two 128-bit VUE slots.
  105.     */
  106.    int source_attr = slot - 2 * urb_entry_read_offset;
  107.    assert(source_attr >= 0 && source_attr < 32);
  108.  
  109.    /* If we are doing two-sided color, and the VUE slot following this one
  110.     * represents a back-facing color, then we need to instruct the SF unit to
  111.     * do back-facing swizzling.
  112.     */
  113.    bool swizzling = two_side_color &&
  114.       ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 &&
  115.         vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) ||
  116.        (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 &&
  117.         vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1));
  118.  
  119.    /* Update max_source_attr.  If swizzling, the SF will read this slot + 1. */
  120.    if (*max_source_attr < source_attr + swizzling)
  121.       *max_source_attr = source_attr + swizzling;
  122.  
  123.    if (swizzling) {
  124.       return source_attr |
  125.          (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT);
  126.    }
  127.  
  128.    return source_attr;
  129. }
  130.  
  131.  
  132. static bool
  133. is_drawing_points(const struct brw_context *brw)
  134. {
  135.    /* Determine if the primitives *reaching the SF* are points */
  136.    /* _NEW_POLYGON */
  137.    if (brw->ctx.Polygon.FrontMode == GL_POINT ||
  138.        brw->ctx.Polygon.BackMode == GL_POINT) {
  139.       return true;
  140.    }
  141.  
  142.    if (brw->geometry_program) {
  143.       /* BRW_NEW_GEOMETRY_PROGRAM */
  144.       return brw->geometry_program->OutputType == GL_POINTS;
  145.    } else {
  146.       /* BRW_NEW_PRIMITIVE */
  147.       return brw->primitive == _3DPRIM_POINTLIST;
  148.    }
  149. }
  150.  
  151.  
  152. /**
  153.  * Create the mapping from the FS inputs we produce to the previous pipeline
  154.  * stage (GS or VS) outputs they source from.
  155.  */
  156. void
  157. calculate_attr_overrides(const struct brw_context *brw,
  158.                          uint16_t *attr_overrides,
  159.                          uint32_t *point_sprite_enables,
  160.                          uint32_t *flat_enables,
  161.                          uint32_t *urb_entry_read_length)
  162. {
  163.    const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
  164.    uint32_t max_source_attr = 0;
  165.  
  166.    *point_sprite_enables = 0;
  167.    *flat_enables = 0;
  168.  
  169.    /* _NEW_LIGHT */
  170.    bool shade_model_flat = brw->ctx.Light.ShadeModel == GL_FLAT;
  171.  
  172.    /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE,
  173.     * description of dw10 Point Sprite Texture Coordinate Enable:
  174.     *
  175.     * "This field must be programmed to zero when non-point primitives
  176.     * are rendered."
  177.     *
  178.     * The SandyBridge PRM doesn't explicitly say that point sprite enables
  179.     * must be programmed to zero when rendering non-point primitives, but
  180.     * the IvyBridge PRM does, and if we don't, we get garbage.
  181.     *
  182.     * This is not required on Haswell, as the hardware ignores this state
  183.     * when drawing non-points -- although we do still need to be careful to
  184.     * correctly set the attr overrides.
  185.     */
  186.    /* BRW_NEW_PRIMITIVE | BRW_NEW_GEOMETRY_PROGRAM */
  187.    bool drawing_points = is_drawing_points(brw);
  188.  
  189.    /* Initialize all the attr_overrides to 0.  In the loop below we'll modify
  190.     * just the ones that correspond to inputs used by the fs.
  191.     */
  192.    memset(attr_overrides, 0, 16*sizeof(*attr_overrides));
  193.  
  194.    for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
  195.       /* BRW_NEW_FRAGMENT_PROGRAM */
  196.       enum glsl_interp_qualifier interp_qualifier =
  197.          brw->fragment_program->InterpQualifier[attr];
  198.       bool is_gl_Color = attr == VARYING_SLOT_COL0 || attr == VARYING_SLOT_COL1;
  199.       /* BRW_NEW_FS_PROG_DATA */
  200.       int input_index = brw->wm.prog_data->urb_setup[attr];
  201.  
  202.       if (input_index < 0)
  203.          continue;
  204.  
  205.       /* _NEW_POINT */
  206.       bool point_sprite = false;
  207.       if (drawing_points) {
  208.          if (brw->ctx.Point.PointSprite &&
  209.              (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7) &&
  210.              brw->ctx.Point.CoordReplace[attr - VARYING_SLOT_TEX0]) {
  211.             point_sprite = true;
  212.          }
  213.  
  214.          if (attr == VARYING_SLOT_PNTC)
  215.             point_sprite = true;
  216.  
  217.          if (point_sprite)
  218.             *point_sprite_enables |= (1 << input_index);
  219.       }
  220.  
  221.       /* flat shading */
  222.       if (interp_qualifier == INTERP_QUALIFIER_FLAT ||
  223.           (shade_model_flat && is_gl_Color &&
  224.            interp_qualifier == INTERP_QUALIFIER_NONE))
  225.          *flat_enables |= (1 << input_index);
  226.  
  227.       /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
  228.       uint16_t attr_override = point_sprite ? 0 :
  229.          get_attr_override(&brw->vue_map_geom_out,
  230.                            urb_entry_read_offset, attr,
  231.                            brw->ctx.VertexProgram._TwoSideEnabled,
  232.                            &max_source_attr);
  233.  
  234.       /* The hardware can only do the overrides on 16 overrides at a
  235.        * time, and the other up to 16 have to be lined up so that the
  236.        * input index = the output index.  We'll need to do some
  237.        * tweaking to make sure that's the case.
  238.        */
  239.       if (input_index < 16)
  240.          attr_overrides[input_index] = attr_override;
  241.       else
  242.          assert(attr_override == input_index);
  243.    }
  244.  
  245.    /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
  246.     * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
  247.     *
  248.     * "This field should be set to the minimum length required to read the
  249.     *  maximum source attribute.  The maximum source attribute is indicated
  250.     *  by the maximum value of the enabled Attribute # Source Attribute if
  251.     *  Attribute Swizzle Enable is set, Number of Output Attributes-1 if
  252.     *  enable is not set.
  253.     *  read_length = ceiling((max_source_attr + 1) / 2)
  254.     *
  255.     *  [errata] Corruption/Hang possible if length programmed larger than
  256.     *  recommended"
  257.     *
  258.     * Similar text exists for Ivy Bridge.
  259.     */
  260.    *urb_entry_read_length = ALIGN(max_source_attr + 1, 2) / 2;
  261. }
  262.  
  263.  
  264. static void
  265. upload_sf_state(struct brw_context *brw)
  266. {
  267.    struct gl_context *ctx = &brw->ctx;
  268.    /* BRW_NEW_FS_PROG_DATA */
  269.    uint32_t num_outputs = brw->wm.prog_data->num_varying_inputs;
  270.    uint32_t dw1, dw2, dw3, dw4;
  271.    uint32_t point_sprite_enables;
  272.    uint32_t flat_enables;
  273.    int i;
  274.    /* _NEW_BUFFER */
  275.    bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
  276.    bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
  277.  
  278.    const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
  279.    float point_size;
  280.    uint16_t attr_overrides[16];
  281.    uint32_t point_sprite_origin;
  282.  
  283.    dw1 = GEN6_SF_SWIZZLE_ENABLE | num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT;
  284.    dw2 = GEN6_SF_STATISTICS_ENABLE;
  285.  
  286.    if (brw->sf.viewport_transform_enable)
  287.        dw2 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
  288.  
  289.    dw3 = 0;
  290.    dw4 = 0;
  291.  
  292.    /* _NEW_POLYGON */
  293.    if (ctx->Polygon._FrontBit == render_to_fbo)
  294.       dw2 |= GEN6_SF_WINDING_CCW;
  295.  
  296.    if (ctx->Polygon.OffsetFill)
  297.        dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
  298.  
  299.    if (ctx->Polygon.OffsetLine)
  300.        dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
  301.  
  302.    if (ctx->Polygon.OffsetPoint)
  303.        dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
  304.  
  305.    switch (ctx->Polygon.FrontMode) {
  306.    case GL_FILL:
  307.        dw2 |= GEN6_SF_FRONT_SOLID;
  308.        break;
  309.  
  310.    case GL_LINE:
  311.        dw2 |= GEN6_SF_FRONT_WIREFRAME;
  312.        break;
  313.  
  314.    case GL_POINT:
  315.        dw2 |= GEN6_SF_FRONT_POINT;
  316.        break;
  317.  
  318.    default:
  319.        unreachable("not reached");
  320.    }
  321.  
  322.    switch (ctx->Polygon.BackMode) {
  323.    case GL_FILL:
  324.        dw2 |= GEN6_SF_BACK_SOLID;
  325.        break;
  326.  
  327.    case GL_LINE:
  328.        dw2 |= GEN6_SF_BACK_WIREFRAME;
  329.        break;
  330.  
  331.    case GL_POINT:
  332.        dw2 |= GEN6_SF_BACK_POINT;
  333.        break;
  334.  
  335.    default:
  336.        unreachable("not reached");
  337.    }
  338.  
  339.    /* _NEW_SCISSOR */
  340.    if (ctx->Scissor.EnableFlags)
  341.       dw3 |= GEN6_SF_SCISSOR_ENABLE;
  342.  
  343.    /* _NEW_POLYGON */
  344.    if (ctx->Polygon.CullFlag) {
  345.       switch (ctx->Polygon.CullFaceMode) {
  346.       case GL_FRONT:
  347.          dw3 |= GEN6_SF_CULL_FRONT;
  348.          break;
  349.       case GL_BACK:
  350.          dw3 |= GEN6_SF_CULL_BACK;
  351.          break;
  352.       case GL_FRONT_AND_BACK:
  353.          dw3 |= GEN6_SF_CULL_BOTH;
  354.          break;
  355.       default:
  356.          unreachable("not reached");
  357.       }
  358.    } else {
  359.       dw3 |= GEN6_SF_CULL_NONE;
  360.    }
  361.  
  362.    /* _NEW_LINE */
  363.    {
  364.       /* OpenGL dictates that line width should be rounded to the nearest
  365.        * integer
  366.        */
  367.       float line_width =
  368.          roundf(CLAMP(ctx->Line.Width, 0.0, ctx->Const.MaxLineWidth));
  369.       uint32_t line_width_u3_7 = U_FIXED(line_width, 7);
  370.  
  371.       /* Line width of 0 is not allowed when MSAA enabled */
  372.       if (ctx->Multisample._Enabled) {
  373.          if (line_width_u3_7 == 0)
  374.              line_width_u3_7 = 1;
  375.       } else if (ctx->Line.SmoothFlag && ctx->Line.Width < 1.5) {
  376.          /* For 1 pixel line thickness or less, the general
  377.           * anti-aliasing algorithm gives up, and a garbage line is
  378.           * generated.  Setting a Line Width of 0.0 specifies the
  379.           * rasterization of the "thinnest" (one-pixel-wide),
  380.           * non-antialiased lines.
  381.           *
  382.           * Lines rendered with zero Line Width are rasterized using
  383.           * Grid Intersection Quantization rules as specified by
  384.           * bspec section 6.3.12.1 Zero-Width (Cosmetic) Line
  385.           * Rasterization.
  386.           */
  387.          line_width_u3_7 = 0;
  388.       }
  389.       dw3 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
  390.    }
  391.    if (ctx->Line.SmoothFlag) {
  392.       dw3 |= GEN6_SF_LINE_AA_ENABLE;
  393.       dw3 |= GEN6_SF_LINE_AA_MODE_TRUE;
  394.       dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
  395.    }
  396.    /* _NEW_MULTISAMPLE */
  397.    if (multisampled_fbo && ctx->Multisample.Enabled)
  398.       dw3 |= GEN6_SF_MSRAST_ON_PATTERN;
  399.  
  400.    /* _NEW_PROGRAM | _NEW_POINT */
  401.    if (!(ctx->VertexProgram.PointSizeEnabled ||
  402.          ctx->Point._Attenuated))
  403.       dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH;
  404.  
  405.    /* Clamp to ARB_point_parameters user limits */
  406.    point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
  407.  
  408.    /* Clamp to the hardware limits and convert to fixed point */
  409.    dw4 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);
  410.  
  411.    /*
  412.     * Window coordinates in an FBO are inverted, which means point
  413.     * sprite origin must be inverted, too.
  414.     */
  415.    if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) {
  416.       point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT;
  417.    } else {
  418.       point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT;
  419.    }
  420.    dw1 |= point_sprite_origin;
  421.  
  422.    /* _NEW_LIGHT */
  423.    if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
  424.       dw4 |=
  425.          (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
  426.          (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
  427.          (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
  428.    } else {
  429.       dw4 |=
  430.          (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
  431.    }
  432.  
  433.    /* BRW_NEW_VUE_MAP_GEOM_OUT | BRW_NEW_FRAGMENT_PROGRAM |
  434.     * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
  435.     */
  436.    uint32_t urb_entry_read_length;
  437.    calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables,
  438.                             &flat_enables, &urb_entry_read_length);
  439.    dw1 |= (urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
  440.            urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
  441.  
  442.    BEGIN_BATCH(20);
  443.    OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2));
  444.    OUT_BATCH(dw1);
  445.    OUT_BATCH(dw2);
  446.    OUT_BATCH(dw3);
  447.    OUT_BATCH(dw4);
  448.    OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant.  copied from gen4 */
  449.    OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
  450.    OUT_BATCH_F(ctx->Polygon.OffsetClamp); /* global depth offset clamp */
  451.    for (i = 0; i < 8; i++) {
  452.       OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16);
  453.    }
  454.    OUT_BATCH(point_sprite_enables); /* dw16 */
  455.    OUT_BATCH(flat_enables);
  456.    OUT_BATCH(0); /* wrapshortest enables 0-7 */
  457.    OUT_BATCH(0); /* wrapshortest enables 8-15 */
  458.    ADVANCE_BATCH();
  459. }
  460.  
  461. const struct brw_tracked_state gen6_sf_state = {
  462.    .dirty = {
  463.       .mesa  = _NEW_BUFFERS |
  464.                _NEW_LIGHT |
  465.                _NEW_LINE |
  466.                _NEW_MULTISAMPLE |
  467.                _NEW_POINT |
  468.                _NEW_POLYGON |
  469.                _NEW_PROGRAM |
  470.                _NEW_SCISSOR,
  471.       .brw   = BRW_NEW_CONTEXT |
  472.                BRW_NEW_FRAGMENT_PROGRAM |
  473.                BRW_NEW_FS_PROG_DATA |
  474.                BRW_NEW_GEOMETRY_PROGRAM |
  475.                BRW_NEW_PRIMITIVE |
  476.                BRW_NEW_VUE_MAP_GEOM_OUT,
  477.    },
  478.    .emit = upload_sf_state,
  479. };
  480.