Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2003 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. #include "main/glheader.h"
  29. #include "main/bufferobj.h"
  30. #include "main/context.h"
  31. #include "main/enums.h"
  32. #include "main/macros.h"
  33. #include "main/glformats.h"
  34.  
  35. #include "brw_draw.h"
  36. #include "brw_defines.h"
  37. #include "brw_context.h"
  38. #include "brw_state.h"
  39.  
  40. #include "intel_batchbuffer.h"
  41. #include "intel_buffer_objects.h"
  42.  
  43. static GLuint double_types[5] = {
  44.    0,
  45.    BRW_SURFACEFORMAT_R64_FLOAT,
  46.    BRW_SURFACEFORMAT_R64G64_FLOAT,
  47.    BRW_SURFACEFORMAT_R64G64B64_FLOAT,
  48.    BRW_SURFACEFORMAT_R64G64B64A64_FLOAT
  49. };
  50.  
  51. static GLuint float_types[5] = {
  52.    0,
  53.    BRW_SURFACEFORMAT_R32_FLOAT,
  54.    BRW_SURFACEFORMAT_R32G32_FLOAT,
  55.    BRW_SURFACEFORMAT_R32G32B32_FLOAT,
  56.    BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
  57. };
  58.  
  59. static GLuint half_float_types[5] = {
  60.    0,
  61.    BRW_SURFACEFORMAT_R16_FLOAT,
  62.    BRW_SURFACEFORMAT_R16G16_FLOAT,
  63.    BRW_SURFACEFORMAT_R16G16B16A16_FLOAT,
  64.    BRW_SURFACEFORMAT_R16G16B16A16_FLOAT
  65. };
  66.  
  67. static GLuint fixed_point_types[5] = {
  68.    0,
  69.    BRW_SURFACEFORMAT_R32_SFIXED,
  70.    BRW_SURFACEFORMAT_R32G32_SFIXED,
  71.    BRW_SURFACEFORMAT_R32G32B32_SFIXED,
  72.    BRW_SURFACEFORMAT_R32G32B32A32_SFIXED,
  73. };
  74.  
  75. static GLuint uint_types_direct[5] = {
  76.    0,
  77.    BRW_SURFACEFORMAT_R32_UINT,
  78.    BRW_SURFACEFORMAT_R32G32_UINT,
  79.    BRW_SURFACEFORMAT_R32G32B32_UINT,
  80.    BRW_SURFACEFORMAT_R32G32B32A32_UINT
  81. };
  82.  
  83. static GLuint uint_types_norm[5] = {
  84.    0,
  85.    BRW_SURFACEFORMAT_R32_UNORM,
  86.    BRW_SURFACEFORMAT_R32G32_UNORM,
  87.    BRW_SURFACEFORMAT_R32G32B32_UNORM,
  88.    BRW_SURFACEFORMAT_R32G32B32A32_UNORM
  89. };
  90.  
  91. static GLuint uint_types_scale[5] = {
  92.    0,
  93.    BRW_SURFACEFORMAT_R32_USCALED,
  94.    BRW_SURFACEFORMAT_R32G32_USCALED,
  95.    BRW_SURFACEFORMAT_R32G32B32_USCALED,
  96.    BRW_SURFACEFORMAT_R32G32B32A32_USCALED
  97. };
  98.  
  99. static GLuint int_types_direct[5] = {
  100.    0,
  101.    BRW_SURFACEFORMAT_R32_SINT,
  102.    BRW_SURFACEFORMAT_R32G32_SINT,
  103.    BRW_SURFACEFORMAT_R32G32B32_SINT,
  104.    BRW_SURFACEFORMAT_R32G32B32A32_SINT
  105. };
  106.  
  107. static GLuint int_types_norm[5] = {
  108.    0,
  109.    BRW_SURFACEFORMAT_R32_SNORM,
  110.    BRW_SURFACEFORMAT_R32G32_SNORM,
  111.    BRW_SURFACEFORMAT_R32G32B32_SNORM,
  112.    BRW_SURFACEFORMAT_R32G32B32A32_SNORM
  113. };
  114.  
  115. static GLuint int_types_scale[5] = {
  116.    0,
  117.    BRW_SURFACEFORMAT_R32_SSCALED,
  118.    BRW_SURFACEFORMAT_R32G32_SSCALED,
  119.    BRW_SURFACEFORMAT_R32G32B32_SSCALED,
  120.    BRW_SURFACEFORMAT_R32G32B32A32_SSCALED
  121. };
  122.  
  123. static GLuint ushort_types_direct[5] = {
  124.    0,
  125.    BRW_SURFACEFORMAT_R16_UINT,
  126.    BRW_SURFACEFORMAT_R16G16_UINT,
  127.    BRW_SURFACEFORMAT_R16G16B16A16_UINT,
  128.    BRW_SURFACEFORMAT_R16G16B16A16_UINT
  129. };
  130.  
  131. static GLuint ushort_types_norm[5] = {
  132.    0,
  133.    BRW_SURFACEFORMAT_R16_UNORM,
  134.    BRW_SURFACEFORMAT_R16G16_UNORM,
  135.    BRW_SURFACEFORMAT_R16G16B16_UNORM,
  136.    BRW_SURFACEFORMAT_R16G16B16A16_UNORM
  137. };
  138.  
  139. static GLuint ushort_types_scale[5] = {
  140.    0,
  141.    BRW_SURFACEFORMAT_R16_USCALED,
  142.    BRW_SURFACEFORMAT_R16G16_USCALED,
  143.    BRW_SURFACEFORMAT_R16G16B16_USCALED,
  144.    BRW_SURFACEFORMAT_R16G16B16A16_USCALED
  145. };
  146.  
  147. static GLuint short_types_direct[5] = {
  148.    0,
  149.    BRW_SURFACEFORMAT_R16_SINT,
  150.    BRW_SURFACEFORMAT_R16G16_SINT,
  151.    BRW_SURFACEFORMAT_R16G16B16A16_SINT,
  152.    BRW_SURFACEFORMAT_R16G16B16A16_SINT
  153. };
  154.  
  155. static GLuint short_types_norm[5] = {
  156.    0,
  157.    BRW_SURFACEFORMAT_R16_SNORM,
  158.    BRW_SURFACEFORMAT_R16G16_SNORM,
  159.    BRW_SURFACEFORMAT_R16G16B16_SNORM,
  160.    BRW_SURFACEFORMAT_R16G16B16A16_SNORM
  161. };
  162.  
  163. static GLuint short_types_scale[5] = {
  164.    0,
  165.    BRW_SURFACEFORMAT_R16_SSCALED,
  166.    BRW_SURFACEFORMAT_R16G16_SSCALED,
  167.    BRW_SURFACEFORMAT_R16G16B16_SSCALED,
  168.    BRW_SURFACEFORMAT_R16G16B16A16_SSCALED
  169. };
  170.  
  171. static GLuint ubyte_types_direct[5] = {
  172.    0,
  173.    BRW_SURFACEFORMAT_R8_UINT,
  174.    BRW_SURFACEFORMAT_R8G8_UINT,
  175.    BRW_SURFACEFORMAT_R8G8B8A8_UINT,
  176.    BRW_SURFACEFORMAT_R8G8B8A8_UINT
  177. };
  178.  
  179. static GLuint ubyte_types_norm[5] = {
  180.    0,
  181.    BRW_SURFACEFORMAT_R8_UNORM,
  182.    BRW_SURFACEFORMAT_R8G8_UNORM,
  183.    BRW_SURFACEFORMAT_R8G8B8_UNORM,
  184.    BRW_SURFACEFORMAT_R8G8B8A8_UNORM
  185. };
  186.  
  187. static GLuint ubyte_types_scale[5] = {
  188.    0,
  189.    BRW_SURFACEFORMAT_R8_USCALED,
  190.    BRW_SURFACEFORMAT_R8G8_USCALED,
  191.    BRW_SURFACEFORMAT_R8G8B8_USCALED,
  192.    BRW_SURFACEFORMAT_R8G8B8A8_USCALED
  193. };
  194.  
  195. static GLuint byte_types_direct[5] = {
  196.    0,
  197.    BRW_SURFACEFORMAT_R8_SINT,
  198.    BRW_SURFACEFORMAT_R8G8_SINT,
  199.    BRW_SURFACEFORMAT_R8G8B8A8_SINT,
  200.    BRW_SURFACEFORMAT_R8G8B8A8_SINT
  201. };
  202.  
  203. static GLuint byte_types_norm[5] = {
  204.    0,
  205.    BRW_SURFACEFORMAT_R8_SNORM,
  206.    BRW_SURFACEFORMAT_R8G8_SNORM,
  207.    BRW_SURFACEFORMAT_R8G8B8_SNORM,
  208.    BRW_SURFACEFORMAT_R8G8B8A8_SNORM
  209. };
  210.  
  211. static GLuint byte_types_scale[5] = {
  212.    0,
  213.    BRW_SURFACEFORMAT_R8_SSCALED,
  214.    BRW_SURFACEFORMAT_R8G8_SSCALED,
  215.    BRW_SURFACEFORMAT_R8G8B8_SSCALED,
  216.    BRW_SURFACEFORMAT_R8G8B8A8_SSCALED
  217. };
  218.  
  219.  
  220. /**
  221.  * Given vertex array type/size/format/normalized info, return
  222.  * the appopriate hardware surface type.
  223.  * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays.
  224.  */
  225. unsigned
  226. brw_get_vertex_surface_type(struct brw_context *brw,
  227.                             const struct gl_client_array *glarray)
  228. {
  229.    int size = glarray->Size;
  230.  
  231.    if (unlikely(INTEL_DEBUG & DEBUG_VERTS))
  232.       fprintf(stderr, "type %s size %d normalized %d\n",
  233.               _mesa_lookup_enum_by_nr(glarray->Type),
  234.               glarray->Size, glarray->Normalized);
  235.  
  236.    if (glarray->Integer) {
  237.       assert(glarray->Format == GL_RGBA); /* sanity check */
  238.       switch (glarray->Type) {
  239.       case GL_INT: return int_types_direct[size];
  240.       case GL_SHORT: return short_types_direct[size];
  241.       case GL_BYTE: return byte_types_direct[size];
  242.       case GL_UNSIGNED_INT: return uint_types_direct[size];
  243.       case GL_UNSIGNED_SHORT: return ushort_types_direct[size];
  244.       case GL_UNSIGNED_BYTE: return ubyte_types_direct[size];
  245.       default: unreachable("not reached");
  246.       }
  247.    } else if (glarray->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) {
  248.       return BRW_SURFACEFORMAT_R11G11B10_FLOAT;
  249.    } else if (glarray->Normalized) {
  250.       switch (glarray->Type) {
  251.       case GL_DOUBLE: return double_types[size];
  252.       case GL_FLOAT: return float_types[size];
  253.       case GL_HALF_FLOAT: return half_float_types[size];
  254.       case GL_INT: return int_types_norm[size];
  255.       case GL_SHORT: return short_types_norm[size];
  256.       case GL_BYTE: return byte_types_norm[size];
  257.       case GL_UNSIGNED_INT: return uint_types_norm[size];
  258.       case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
  259.       case GL_UNSIGNED_BYTE:
  260.          if (glarray->Format == GL_BGRA) {
  261.             /* See GL_EXT_vertex_array_bgra */
  262.             assert(size == 4);
  263.             return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
  264.          }
  265.          else {
  266.             return ubyte_types_norm[size];
  267.          }
  268.       case GL_FIXED:
  269.          if (brw->gen >= 8 || brw->is_haswell)
  270.             return fixed_point_types[size];
  271.  
  272.          /* This produces GL_FIXED inputs as values between INT32_MIN and
  273.           * INT32_MAX, which will be scaled down by 1/65536 by the VS.
  274.           */
  275.          return int_types_scale[size];
  276.       /* See GL_ARB_vertex_type_2_10_10_10_rev.
  277.        * W/A: Pre-Haswell, the hardware doesn't really support the formats we'd
  278.        * like to use here, so upload everything as UINT and fix
  279.        * it in the shader
  280.        */
  281.       case GL_INT_2_10_10_10_REV:
  282.          assert(size == 4);
  283.          if (brw->gen >= 8 || brw->is_haswell) {
  284.             return glarray->Format == GL_BGRA
  285.                ? BRW_SURFACEFORMAT_B10G10R10A2_SNORM
  286.                : BRW_SURFACEFORMAT_R10G10B10A2_SNORM;
  287.          }
  288.          return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
  289.       case GL_UNSIGNED_INT_2_10_10_10_REV:
  290.          assert(size == 4);
  291.          if (brw->gen >= 8 || brw->is_haswell) {
  292.             return glarray->Format == GL_BGRA
  293.                ? BRW_SURFACEFORMAT_B10G10R10A2_UNORM
  294.                : BRW_SURFACEFORMAT_R10G10B10A2_UNORM;
  295.          }
  296.          return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
  297.       default: unreachable("not reached");
  298.       }
  299.    }
  300.    else {
  301.       /* See GL_ARB_vertex_type_2_10_10_10_rev.
  302.        * W/A: the hardware doesn't really support the formats we'd
  303.        * like to use here, so upload everything as UINT and fix
  304.        * it in the shader
  305.        */
  306.       if (glarray->Type == GL_INT_2_10_10_10_REV) {
  307.          assert(size == 4);
  308.          if (brw->gen >= 8 || brw->is_haswell) {
  309.             return glarray->Format == GL_BGRA
  310.                ? BRW_SURFACEFORMAT_B10G10R10A2_SSCALED
  311.                : BRW_SURFACEFORMAT_R10G10B10A2_SSCALED;
  312.          }
  313.          return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
  314.       } else if (glarray->Type == GL_UNSIGNED_INT_2_10_10_10_REV) {
  315.          assert(size == 4);
  316.          if (brw->gen >= 8 || brw->is_haswell) {
  317.             return glarray->Format == GL_BGRA
  318.                ? BRW_SURFACEFORMAT_B10G10R10A2_USCALED
  319.                : BRW_SURFACEFORMAT_R10G10B10A2_USCALED;
  320.          }
  321.          return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
  322.       }
  323.       assert(glarray->Format == GL_RGBA); /* sanity check */
  324.       switch (glarray->Type) {
  325.       case GL_DOUBLE: return double_types[size];
  326.       case GL_FLOAT: return float_types[size];
  327.       case GL_HALF_FLOAT: return half_float_types[size];
  328.       case GL_INT: return int_types_scale[size];
  329.       case GL_SHORT: return short_types_scale[size];
  330.       case GL_BYTE: return byte_types_scale[size];
  331.       case GL_UNSIGNED_INT: return uint_types_scale[size];
  332.       case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
  333.       case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
  334.       case GL_FIXED:
  335.          if (brw->gen >= 8 || brw->is_haswell)
  336.             return fixed_point_types[size];
  337.  
  338.          /* This produces GL_FIXED inputs as values between INT32_MIN and
  339.           * INT32_MAX, which will be scaled down by 1/65536 by the VS.
  340.           */
  341.          return int_types_scale[size];
  342.       default: unreachable("not reached");
  343.       }
  344.    }
  345. }
  346.  
  347. static void
  348. copy_array_to_vbo_array(struct brw_context *brw,
  349.                         struct brw_vertex_element *element,
  350.                         int min, int max,
  351.                         struct brw_vertex_buffer *buffer,
  352.                         GLuint dst_stride)
  353. {
  354.    const int src_stride = element->glarray->StrideB;
  355.  
  356.    /* If the source stride is zero, we just want to upload the current
  357.     * attribute once and set the buffer's stride to 0.  There's no need
  358.     * to replicate it out.
  359.     */
  360.    if (src_stride == 0) {
  361.       intel_upload_data(brw, element->glarray->Ptr,
  362.                         element->glarray->_ElementSize,
  363.                         element->glarray->_ElementSize,
  364.                         &buffer->bo, &buffer->offset);
  365.  
  366.       buffer->stride = 0;
  367.       return;
  368.    }
  369.  
  370.    const unsigned char *src = element->glarray->Ptr + min * src_stride;
  371.    int count = max - min + 1;
  372.    GLuint size = count * dst_stride;
  373.    uint8_t *dst = intel_upload_space(brw, size, dst_stride,
  374.                                      &buffer->bo, &buffer->offset);
  375.  
  376.    if (dst_stride == src_stride) {
  377.       memcpy(dst, src, size);
  378.    } else {
  379.       while (count--) {
  380.          memcpy(dst, src, dst_stride);
  381.          src += src_stride;
  382.          dst += dst_stride;
  383.       }
  384.    }
  385.    buffer->stride = dst_stride;
  386. }
  387.  
  388. void
  389. brw_prepare_vertices(struct brw_context *brw)
  390. {
  391.    struct gl_context *ctx = &brw->ctx;
  392.    /* BRW_NEW_VS_PROG_DATA */
  393.    GLbitfield64 vs_inputs = brw->vs.prog_data->inputs_read;
  394.    const unsigned char *ptr = NULL;
  395.    GLuint interleaved = 0;
  396.    unsigned int min_index = brw->vb.min_index + brw->basevertex;
  397.    unsigned int max_index = brw->vb.max_index + brw->basevertex;
  398.    int delta, i, j;
  399.  
  400.    struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
  401.    GLuint nr_uploads = 0;
  402.  
  403.    /* _NEW_POLYGON
  404.     *
  405.     * On gen6+, edge flags don't end up in the VUE (either in or out of the
  406.     * VS).  Instead, they're uploaded as the last vertex element, and the data
  407.     * is passed sideband through the fixed function units.  So, we need to
  408.     * prepare the vertex buffer for it, but it's not present in inputs_read.
  409.     */
  410.    if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL ||
  411.                            ctx->Polygon.BackMode != GL_FILL)) {
  412.       vs_inputs |= VERT_BIT_EDGEFLAG;
  413.    }
  414.  
  415.    if (0)
  416.       fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index);
  417.  
  418.    /* Accumulate the list of enabled arrays. */
  419.    brw->vb.nr_enabled = 0;
  420.    while (vs_inputs) {
  421.       GLuint i = ffsll(vs_inputs) - 1;
  422.       struct brw_vertex_element *input = &brw->vb.inputs[i];
  423.  
  424.       vs_inputs &= ~BITFIELD64_BIT(i);
  425.       brw->vb.enabled[brw->vb.nr_enabled++] = input;
  426.    }
  427.  
  428.    if (brw->vb.nr_enabled == 0)
  429.       return;
  430.  
  431.    if (brw->vb.nr_buffers)
  432.       return;
  433.  
  434.    for (i = j = 0; i < brw->vb.nr_enabled; i++) {
  435.       struct brw_vertex_element *input = brw->vb.enabled[i];
  436.       const struct gl_client_array *glarray = input->glarray;
  437.  
  438.       if (_mesa_is_bufferobj(glarray->BufferObj)) {
  439.          struct intel_buffer_object *intel_buffer =
  440.             intel_buffer_object(glarray->BufferObj);
  441.          int k;
  442.  
  443.          /* If we have a VB set to be uploaded for this buffer object
  444.           * already, reuse that VB state so that we emit fewer
  445.           * relocations.
  446.           */
  447.          for (k = 0; k < i; k++) {
  448.             const struct gl_client_array *other = brw->vb.enabled[k]->glarray;
  449.             if (glarray->BufferObj == other->BufferObj &&
  450.                 glarray->StrideB == other->StrideB &&
  451.                 glarray->InstanceDivisor == other->InstanceDivisor &&
  452.                 (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB)
  453.             {
  454.                input->buffer = brw->vb.enabled[k]->buffer;
  455.                input->offset = glarray->Ptr - other->Ptr;
  456.                break;
  457.             }
  458.          }
  459.          if (k == i) {
  460.             struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
  461.  
  462.             /* Named buffer object: Just reference its contents directly. */
  463.             buffer->offset = (uintptr_t)glarray->Ptr;
  464.             buffer->stride = glarray->StrideB;
  465.             buffer->step_rate = glarray->InstanceDivisor;
  466.  
  467.             uint32_t offset, size;
  468.             if (glarray->InstanceDivisor) {
  469.                offset = buffer->offset;
  470.                size = (buffer->stride * ((brw->num_instances /
  471.                                           glarray->InstanceDivisor) - 1) +
  472.                        glarray->_ElementSize);
  473.             } else {
  474.                if (min_index == -1) {
  475.                   offset = 0;
  476.                   size = intel_buffer->Base.Size;
  477.                } else {
  478.                   offset = buffer->offset + min_index * buffer->stride;
  479.                   size = (buffer->stride * (max_index - min_index) +
  480.                           glarray->_ElementSize);
  481.                }
  482.             }
  483.             buffer->bo = intel_bufferobj_buffer(brw, intel_buffer,
  484.                                                 offset, size);
  485.             drm_intel_bo_reference(buffer->bo);
  486.  
  487.             input->buffer = j++;
  488.             input->offset = 0;
  489.          }
  490.  
  491.          /* This is a common place to reach if the user mistakenly supplies
  492.           * a pointer in place of a VBO offset.  If we just let it go through,
  493.           * we may end up dereferencing a pointer beyond the bounds of the
  494.           * GTT.
  495.           *
  496.           * The VBO spec allows application termination in this case, and it's
  497.           * probably a service to the poor programmer to do so rather than
  498.           * trying to just not render.
  499.           */
  500.          assert(input->offset < brw->vb.buffers[input->buffer].bo->size);
  501.       } else {
  502.          /* Queue the buffer object up to be uploaded in the next pass,
  503.           * when we've decided if we're doing interleaved or not.
  504.           */
  505.          if (nr_uploads == 0) {
  506.             interleaved = glarray->StrideB;
  507.             ptr = glarray->Ptr;
  508.          }
  509.          else if (interleaved != glarray->StrideB ||
  510.                   glarray->Ptr < ptr ||
  511.                   (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved)
  512.          {
  513.             /* If our stride is different from the first attribute's stride,
  514.              * or if the first attribute's stride didn't cover our element,
  515.              * disable the interleaved upload optimization.  The second case
  516.              * can most commonly occur in cases where there is a single vertex
  517.              * and, for example, the data is stored on the application's
  518.              * stack.
  519.              *
  520.              * NOTE: This will also disable the optimization in cases where
  521.              * the data is in a different order than the array indices.
  522.              * Something like:
  523.              *
  524.              *     float data[...];
  525.              *     glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]);
  526.              *     glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]);
  527.              */
  528.             interleaved = 0;
  529.          }
  530.  
  531.          upload[nr_uploads++] = input;
  532.       }
  533.    }
  534.  
  535.    /* If we need to upload all the arrays, then we can trim those arrays to
  536.     * only the used elements [min_index, max_index] so long as we adjust all
  537.     * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
  538.     */
  539.    brw->vb.start_vertex_bias = 0;
  540.    delta = min_index;
  541.    if (nr_uploads == brw->vb.nr_enabled) {
  542.       brw->vb.start_vertex_bias = -delta;
  543.       delta = 0;
  544.    }
  545.  
  546.    /* Handle any arrays to be uploaded. */
  547.    if (nr_uploads > 1) {
  548.       if (interleaved) {
  549.          struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
  550.          /* All uploads are interleaved, so upload the arrays together as
  551.           * interleaved.  First, upload the contents and set up upload[0].
  552.           */
  553.          copy_array_to_vbo_array(brw, upload[0], min_index, max_index,
  554.                                  buffer, interleaved);
  555.          buffer->offset -= delta * interleaved;
  556.  
  557.          for (i = 0; i < nr_uploads; i++) {
  558.             /* Then, just point upload[i] at upload[0]'s buffer. */
  559.             upload[i]->offset =
  560.                ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
  561.             upload[i]->buffer = j;
  562.          }
  563.          j++;
  564.  
  565.          nr_uploads = 0;
  566.       }
  567.    }
  568.    /* Upload non-interleaved arrays */
  569.    for (i = 0; i < nr_uploads; i++) {
  570.       struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
  571.       if (upload[i]->glarray->InstanceDivisor == 0) {
  572.          copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
  573.                                  buffer, upload[i]->glarray->_ElementSize);
  574.       } else {
  575.          /* This is an instanced attribute, since its InstanceDivisor
  576.           * is not zero. Therefore, its data will be stepped after the
  577.           * instanced draw has been run InstanceDivisor times.
  578.           */
  579.          uint32_t instanced_attr_max_index =
  580.             (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor;
  581.          copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index,
  582.                                  buffer, upload[i]->glarray->_ElementSize);
  583.       }
  584.       buffer->offset -= delta * buffer->stride;
  585.       buffer->step_rate = upload[i]->glarray->InstanceDivisor;
  586.       upload[i]->buffer = j++;
  587.       upload[i]->offset = 0;
  588.    }
  589.  
  590.    brw->vb.nr_buffers = j;
  591. }
  592.  
  593. void
  594. brw_prepare_shader_draw_parameters(struct brw_context *brw)
  595. {
  596.    /* For non-indirect draws, upload gl_BaseVertex. */
  597.    if (brw->vs.prog_data->uses_vertexid && brw->draw.draw_params_bo == NULL) {
  598.       intel_upload_data(brw, &brw->draw.gl_basevertex, 4, 4,
  599.                         &brw->draw.draw_params_bo,
  600.                         &brw->draw.draw_params_offset);
  601.    }
  602. }
  603.  
  604. /**
  605.  * Emit a VERTEX_BUFFER_STATE entry (part of 3DSTATE_VERTEX_BUFFERS).
  606.  */
  607. static void
  608. emit_vertex_buffer_state(struct brw_context *brw,
  609.                          unsigned buffer_nr,
  610.                          drm_intel_bo *bo,
  611.                          unsigned bo_ending_address,
  612.                          unsigned bo_offset,
  613.                          unsigned stride,
  614.                          unsigned step_rate)
  615. {
  616.    struct gl_context *ctx = &brw->ctx;
  617.    uint32_t dw0;
  618.  
  619.    if (brw->gen >= 6) {
  620.       dw0 = (buffer_nr << GEN6_VB0_INDEX_SHIFT) |
  621.             (step_rate ? GEN6_VB0_ACCESS_INSTANCEDATA
  622.                        : GEN6_VB0_ACCESS_VERTEXDATA);
  623.    } else {
  624.       dw0 = (buffer_nr << BRW_VB0_INDEX_SHIFT) |
  625.             (step_rate ? BRW_VB0_ACCESS_INSTANCEDATA
  626.                        : BRW_VB0_ACCESS_VERTEXDATA);
  627.    }
  628.  
  629.    if (brw->gen >= 7)
  630.       dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
  631.  
  632.    if (brw->gen == 7)
  633.       dw0 |= GEN7_MOCS_L3 << 16;
  634.  
  635.    WARN_ONCE(stride >= (brw->gen >= 5 ? 2048 : 2047),
  636.              "VBO stride %d too large, bad rendering may occur\n",
  637.              stride);
  638.    OUT_BATCH(dw0 | (stride << BRW_VB0_PITCH_SHIFT));
  639.    OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, bo_offset);
  640.    if (brw->gen >= 5) {
  641.       OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, bo_ending_address);
  642.    } else {
  643.       OUT_BATCH(0);
  644.    }
  645.    OUT_BATCH(step_rate);
  646. }
  647.  
  648. static void brw_emit_vertices(struct brw_context *brw)
  649. {
  650.    GLuint i;
  651.  
  652.    brw_prepare_vertices(brw);
  653.    brw_prepare_shader_draw_parameters(brw);
  654.  
  655.    brw_emit_query_begin(brw);
  656.  
  657.    unsigned nr_elements = brw->vb.nr_enabled;
  658.    if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid)
  659.       ++nr_elements;
  660.  
  661.    /* If the VS doesn't read any inputs (calculating vertex position from
  662.     * a state variable for some reason, for example), emit a single pad
  663.     * VERTEX_ELEMENT struct and bail.
  664.     *
  665.     * The stale VB state stays in place, but they don't do anything unless
  666.     * a VE loads from them.
  667.     */
  668.    if (nr_elements == 0) {
  669.       BEGIN_BATCH(3);
  670.       OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1);
  671.       if (brw->gen >= 6) {
  672.          OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
  673.                    GEN6_VE0_VALID |
  674.                    (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
  675.                    (0 << BRW_VE0_SRC_OFFSET_SHIFT));
  676.       } else {
  677.          OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
  678.                    BRW_VE0_VALID |
  679.                    (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
  680.                    (0 << BRW_VE0_SRC_OFFSET_SHIFT));
  681.       }
  682.       OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
  683.                 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
  684.                 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
  685.                 (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
  686.       ADVANCE_BATCH();
  687.       return;
  688.    }
  689.  
  690.    /* Now emit VB and VEP state packets.
  691.     */
  692.  
  693.    unsigned nr_buffers =
  694.       brw->vb.nr_buffers + brw->vs.prog_data->uses_vertexid;
  695.  
  696.    if (nr_buffers) {
  697.       if (brw->gen >= 6) {
  698.          assert(nr_buffers <= 33);
  699.       } else {
  700.          assert(nr_buffers <= 17);
  701.       }
  702.  
  703.       BEGIN_BATCH(1 + 4 * nr_buffers);
  704.       OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1));
  705.       for (i = 0; i < brw->vb.nr_buffers; i++) {
  706.          struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
  707.          emit_vertex_buffer_state(brw, i, buffer->bo, buffer->bo->size - 1,
  708.                                   buffer->offset, buffer->stride,
  709.                                   buffer->step_rate);
  710.  
  711.       }
  712.  
  713.       if (brw->vs.prog_data->uses_vertexid) {
  714.          emit_vertex_buffer_state(brw, brw->vb.nr_buffers,
  715.                                   brw->draw.draw_params_bo,
  716.                                   brw->draw.draw_params_bo->size - 1,
  717.                                   brw->draw.draw_params_offset,
  718.                                   0,  /* stride */
  719.                                   0); /* step rate */
  720.       }
  721.       ADVANCE_BATCH();
  722.    }
  723.  
  724.    /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably
  725.     * for VertexID/InstanceID.
  726.     */
  727.    if (brw->gen >= 6) {
  728.       assert(nr_elements <= 34);
  729.    } else {
  730.       assert(nr_elements <= 18);
  731.    }
  732.  
  733.    struct brw_vertex_element *gen6_edgeflag_input = NULL;
  734.  
  735.    BEGIN_BATCH(1 + nr_elements * 2);
  736.    OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1));
  737.    for (i = 0; i < brw->vb.nr_enabled; i++) {
  738.       struct brw_vertex_element *input = brw->vb.enabled[i];
  739.       uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
  740.       uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
  741.       uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
  742.       uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
  743.       uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
  744.  
  745.       if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) {
  746.          /* Gen6+ passes edgeflag as sideband along with the vertex, instead
  747.           * of in the VUE.  We have to upload it sideband as the last vertex
  748.           * element according to the B-Spec.
  749.           */
  750.          if (brw->gen >= 6) {
  751.             gen6_edgeflag_input = input;
  752.             continue;
  753.          }
  754.       }
  755.  
  756.       switch (input->glarray->Size) {
  757.       case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
  758.       case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
  759.       case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
  760.       case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT
  761.                                               : BRW_VE1_COMPONENT_STORE_1_FLT;
  762.          break;
  763.       }
  764.  
  765.       if (brw->gen >= 6) {
  766.          OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) |
  767.                    GEN6_VE0_VALID |
  768.                    (format << BRW_VE0_FORMAT_SHIFT) |
  769.                    (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
  770.       } else {
  771.          OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) |
  772.                    BRW_VE0_VALID |
  773.                    (format << BRW_VE0_FORMAT_SHIFT) |
  774.                    (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
  775.       }
  776.  
  777.       if (brw->gen >= 5)
  778.           OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
  779.                     (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
  780.                     (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
  781.                     (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
  782.       else
  783.           OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
  784.                     (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
  785.                     (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
  786.                     (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
  787.                     ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
  788.    }
  789.  
  790.    if (brw->gen >= 6 && gen6_edgeflag_input) {
  791.       uint32_t format =
  792.          brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
  793.  
  794.       OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
  795.                 GEN6_VE0_VALID |
  796.                 GEN6_VE0_EDGE_FLAG_ENABLE |
  797.                 (format << BRW_VE0_FORMAT_SHIFT) |
  798.                 (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
  799.       OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
  800.                 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
  801.                 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
  802.                 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
  803.    }
  804.  
  805.    if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
  806.       uint32_t dw0 = 0, dw1 = 0;
  807.       uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0;
  808.       uint32_t comp1 = BRW_VE1_COMPONENT_STORE_0;
  809.       uint32_t comp2 = BRW_VE1_COMPONENT_STORE_0;
  810.       uint32_t comp3 = BRW_VE1_COMPONENT_STORE_0;
  811.  
  812.       if (brw->vs.prog_data->uses_vertexid) {
  813.          comp0 = BRW_VE1_COMPONENT_STORE_SRC;
  814.          comp2 = BRW_VE1_COMPONENT_STORE_VID;
  815.       }
  816.  
  817.       if (brw->vs.prog_data->uses_instanceid) {
  818.          comp3 = BRW_VE1_COMPONENT_STORE_IID;
  819.       }
  820.  
  821.       dw1 = (comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
  822.             (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
  823.             (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
  824.             (comp3 << BRW_VE1_COMPONENT_3_SHIFT);
  825.  
  826.       if (brw->gen >= 6) {
  827.          dw0 |= GEN6_VE0_VALID |
  828.                 brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
  829.                 BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT;
  830.       } else {
  831.          dw0 |= BRW_VE0_VALID |
  832.                 brw->vb.nr_buffers << BRW_VE0_INDEX_SHIFT |
  833.                 BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT;
  834.          dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
  835.       }
  836.  
  837.       /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values,
  838.        * the format is ignored and the value is always int.
  839.        */
  840.  
  841.       OUT_BATCH(dw0);
  842.       OUT_BATCH(dw1);
  843.    }
  844.  
  845.    ADVANCE_BATCH();
  846. }
  847.  
  848. const struct brw_tracked_state brw_vertices = {
  849.    .dirty = {
  850.       .mesa = _NEW_POLYGON,
  851.       .brw = BRW_NEW_BATCH |
  852.              BRW_NEW_VERTICES |
  853.              BRW_NEW_VS_PROG_DATA,
  854.    },
  855.    .emit = brw_emit_vertices,
  856. };
  857.  
  858. static void brw_upload_indices(struct brw_context *brw)
  859. {
  860.    struct gl_context *ctx = &brw->ctx;
  861.    const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
  862.    GLuint ib_size;
  863.    drm_intel_bo *old_bo = brw->ib.bo;
  864.    struct gl_buffer_object *bufferobj;
  865.    GLuint offset;
  866.    GLuint ib_type_size;
  867.  
  868.    if (index_buffer == NULL)
  869.       return;
  870.  
  871.    ib_type_size = _mesa_sizeof_type(index_buffer->type);
  872.    ib_size = ib_type_size * index_buffer->count;
  873.    bufferobj = index_buffer->obj;
  874.  
  875.    /* Turn into a proper VBO:
  876.     */
  877.    if (!_mesa_is_bufferobj(bufferobj)) {
  878.       /* Get new bufferobj, offset:
  879.        */
  880.       intel_upload_data(brw, index_buffer->ptr, ib_size, ib_type_size,
  881.                         &brw->ib.bo, &offset);
  882.    } else {
  883.       offset = (GLuint) (unsigned long) index_buffer->ptr;
  884.  
  885.       /* If the index buffer isn't aligned to its element size, we have to
  886.        * rebase it into a temporary.
  887.        */
  888.       if ((ib_type_size - 1) & offset) {
  889.          perf_debug("copying index buffer to a temporary to work around "
  890.                     "misaligned offset %d\n", offset);
  891.  
  892.          GLubyte *map = ctx->Driver.MapBufferRange(ctx,
  893.                                                    offset,
  894.                                                    ib_size,
  895.                                                    GL_MAP_READ_BIT,
  896.                                                    bufferobj,
  897.                                                    MAP_INTERNAL);
  898.  
  899.          intel_upload_data(brw, map, ib_size, ib_type_size,
  900.                            &brw->ib.bo, &offset);
  901.  
  902.          ctx->Driver.UnmapBuffer(ctx, bufferobj, MAP_INTERNAL);
  903.       } else {
  904.          drm_intel_bo *bo =
  905.             intel_bufferobj_buffer(brw, intel_buffer_object(bufferobj),
  906.                                    offset, ib_size);
  907.          if (bo != brw->ib.bo) {
  908.             drm_intel_bo_unreference(brw->ib.bo);
  909.             brw->ib.bo = bo;
  910.             drm_intel_bo_reference(bo);
  911.          }
  912.       }
  913.    }
  914.  
  915.    /* Use 3DPRIMITIVE's start_vertex_offset to avoid re-uploading
  916.     * the index buffer state when we're just moving the start index
  917.     * of our drawing.
  918.     */
  919.    brw->ib.start_vertex_offset = offset / ib_type_size;
  920.  
  921.    if (brw->ib.bo != old_bo)
  922.       brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
  923.  
  924.    if (index_buffer->type != brw->ib.type) {
  925.       brw->ib.type = index_buffer->type;
  926.       brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
  927.    }
  928. }
  929.  
  930. const struct brw_tracked_state brw_indices = {
  931.    .dirty = {
  932.       .mesa = 0,
  933.       .brw = BRW_NEW_INDICES,
  934.    },
  935.    .emit = brw_upload_indices,
  936. };
  937.  
  938. static void brw_emit_index_buffer(struct brw_context *brw)
  939. {
  940.    const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
  941.    GLuint cut_index_setting;
  942.  
  943.    if (index_buffer == NULL)
  944.       return;
  945.  
  946.    if (brw->prim_restart.enable_cut_index && !brw->is_haswell) {
  947.       cut_index_setting = BRW_CUT_INDEX_ENABLE;
  948.    } else {
  949.       cut_index_setting = 0;
  950.    }
  951.  
  952.    BEGIN_BATCH(3);
  953.    OUT_BATCH(CMD_INDEX_BUFFER << 16 |
  954.              cut_index_setting |
  955.              brw_get_index_type(index_buffer->type) |
  956.              1);
  957.    OUT_RELOC(brw->ib.bo,
  958.              I915_GEM_DOMAIN_VERTEX, 0,
  959.              0);
  960.    OUT_RELOC(brw->ib.bo,
  961.              I915_GEM_DOMAIN_VERTEX, 0,
  962.              brw->ib.bo->size - 1);
  963.    ADVANCE_BATCH();
  964. }
  965.  
  966. const struct brw_tracked_state brw_index_buffer = {
  967.    .dirty = {
  968.       .mesa = 0,
  969.       .brw = BRW_NEW_BATCH |
  970.              BRW_NEW_INDEX_BUFFER,
  971.    },
  972.    .emit = brw_emit_index_buffer,
  973. };
  974.