Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. #include "main/glheader.h"
  29. #include "main/bufferobj.h"
  30. #include "main/context.h"
  31. #include "main/enums.h"
  32. #include "main/macros.h"
  33. #include "main/glformats.h"
  34.  
  35. #include "brw_draw.h"
  36. #include "brw_defines.h"
  37. #include "brw_context.h"
  38. #include "brw_state.h"
  39.  
  40. #include "intel_batchbuffer.h"
  41. #include "intel_buffer_objects.h"
  42.  
  43. static GLuint double_types[5] = {
  44.    0,
  45.    BRW_SURFACEFORMAT_R64_FLOAT,
  46.    BRW_SURFACEFORMAT_R64G64_FLOAT,
  47.    BRW_SURFACEFORMAT_R64G64B64_FLOAT,
  48.    BRW_SURFACEFORMAT_R64G64B64A64_FLOAT
  49. };
  50.  
  51. static GLuint float_types[5] = {
  52.    0,
  53.    BRW_SURFACEFORMAT_R32_FLOAT,
  54.    BRW_SURFACEFORMAT_R32G32_FLOAT,
  55.    BRW_SURFACEFORMAT_R32G32B32_FLOAT,
  56.    BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
  57. };
  58.  
  59. static GLuint half_float_types[5] = {
  60.    0,
  61.    BRW_SURFACEFORMAT_R16_FLOAT,
  62.    BRW_SURFACEFORMAT_R16G16_FLOAT,
  63.    BRW_SURFACEFORMAT_R16G16B16A16_FLOAT,
  64.    BRW_SURFACEFORMAT_R16G16B16A16_FLOAT
  65. };
  66.  
  67. static GLuint fixed_point_types[5] = {
  68.    0,
  69.    BRW_SURFACEFORMAT_R32_SFIXED,
  70.    BRW_SURFACEFORMAT_R32G32_SFIXED,
  71.    BRW_SURFACEFORMAT_R32G32B32_SFIXED,
  72.    BRW_SURFACEFORMAT_R32G32B32A32_SFIXED,
  73. };
  74.  
  75. static GLuint uint_types_direct[5] = {
  76.    0,
  77.    BRW_SURFACEFORMAT_R32_UINT,
  78.    BRW_SURFACEFORMAT_R32G32_UINT,
  79.    BRW_SURFACEFORMAT_R32G32B32_UINT,
  80.    BRW_SURFACEFORMAT_R32G32B32A32_UINT
  81. };
  82.  
  83. static GLuint uint_types_norm[5] = {
  84.    0,
  85.    BRW_SURFACEFORMAT_R32_UNORM,
  86.    BRW_SURFACEFORMAT_R32G32_UNORM,
  87.    BRW_SURFACEFORMAT_R32G32B32_UNORM,
  88.    BRW_SURFACEFORMAT_R32G32B32A32_UNORM
  89. };
  90.  
  91. static GLuint uint_types_scale[5] = {
  92.    0,
  93.    BRW_SURFACEFORMAT_R32_USCALED,
  94.    BRW_SURFACEFORMAT_R32G32_USCALED,
  95.    BRW_SURFACEFORMAT_R32G32B32_USCALED,
  96.    BRW_SURFACEFORMAT_R32G32B32A32_USCALED
  97. };
  98.  
  99. static GLuint int_types_direct[5] = {
  100.    0,
  101.    BRW_SURFACEFORMAT_R32_SINT,
  102.    BRW_SURFACEFORMAT_R32G32_SINT,
  103.    BRW_SURFACEFORMAT_R32G32B32_SINT,
  104.    BRW_SURFACEFORMAT_R32G32B32A32_SINT
  105. };
  106.  
  107. static GLuint int_types_norm[5] = {
  108.    0,
  109.    BRW_SURFACEFORMAT_R32_SNORM,
  110.    BRW_SURFACEFORMAT_R32G32_SNORM,
  111.    BRW_SURFACEFORMAT_R32G32B32_SNORM,
  112.    BRW_SURFACEFORMAT_R32G32B32A32_SNORM
  113. };
  114.  
  115. static GLuint int_types_scale[5] = {
  116.    0,
  117.    BRW_SURFACEFORMAT_R32_SSCALED,
  118.    BRW_SURFACEFORMAT_R32G32_SSCALED,
  119.    BRW_SURFACEFORMAT_R32G32B32_SSCALED,
  120.    BRW_SURFACEFORMAT_R32G32B32A32_SSCALED
  121. };
  122.  
  123. static GLuint ushort_types_direct[5] = {
  124.    0,
  125.    BRW_SURFACEFORMAT_R16_UINT,
  126.    BRW_SURFACEFORMAT_R16G16_UINT,
  127.    BRW_SURFACEFORMAT_R16G16B16A16_UINT,
  128.    BRW_SURFACEFORMAT_R16G16B16A16_UINT
  129. };
  130.  
  131. static GLuint ushort_types_norm[5] = {
  132.    0,
  133.    BRW_SURFACEFORMAT_R16_UNORM,
  134.    BRW_SURFACEFORMAT_R16G16_UNORM,
  135.    BRW_SURFACEFORMAT_R16G16B16_UNORM,
  136.    BRW_SURFACEFORMAT_R16G16B16A16_UNORM
  137. };
  138.  
  139. static GLuint ushort_types_scale[5] = {
  140.    0,
  141.    BRW_SURFACEFORMAT_R16_USCALED,
  142.    BRW_SURFACEFORMAT_R16G16_USCALED,
  143.    BRW_SURFACEFORMAT_R16G16B16_USCALED,
  144.    BRW_SURFACEFORMAT_R16G16B16A16_USCALED
  145. };
  146.  
  147. static GLuint short_types_direct[5] = {
  148.    0,
  149.    BRW_SURFACEFORMAT_R16_SINT,
  150.    BRW_SURFACEFORMAT_R16G16_SINT,
  151.    BRW_SURFACEFORMAT_R16G16B16A16_SINT,
  152.    BRW_SURFACEFORMAT_R16G16B16A16_SINT
  153. };
  154.  
  155. static GLuint short_types_norm[5] = {
  156.    0,
  157.    BRW_SURFACEFORMAT_R16_SNORM,
  158.    BRW_SURFACEFORMAT_R16G16_SNORM,
  159.    BRW_SURFACEFORMAT_R16G16B16_SNORM,
  160.    BRW_SURFACEFORMAT_R16G16B16A16_SNORM
  161. };
  162.  
  163. static GLuint short_types_scale[5] = {
  164.    0,
  165.    BRW_SURFACEFORMAT_R16_SSCALED,
  166.    BRW_SURFACEFORMAT_R16G16_SSCALED,
  167.    BRW_SURFACEFORMAT_R16G16B16_SSCALED,
  168.    BRW_SURFACEFORMAT_R16G16B16A16_SSCALED
  169. };
  170.  
  171. static GLuint ubyte_types_direct[5] = {
  172.    0,
  173.    BRW_SURFACEFORMAT_R8_UINT,
  174.    BRW_SURFACEFORMAT_R8G8_UINT,
  175.    BRW_SURFACEFORMAT_R8G8B8A8_UINT,
  176.    BRW_SURFACEFORMAT_R8G8B8A8_UINT
  177. };
  178.  
  179. static GLuint ubyte_types_norm[5] = {
  180.    0,
  181.    BRW_SURFACEFORMAT_R8_UNORM,
  182.    BRW_SURFACEFORMAT_R8G8_UNORM,
  183.    BRW_SURFACEFORMAT_R8G8B8_UNORM,
  184.    BRW_SURFACEFORMAT_R8G8B8A8_UNORM
  185. };
  186.  
  187. static GLuint ubyte_types_scale[5] = {
  188.    0,
  189.    BRW_SURFACEFORMAT_R8_USCALED,
  190.    BRW_SURFACEFORMAT_R8G8_USCALED,
  191.    BRW_SURFACEFORMAT_R8G8B8_USCALED,
  192.    BRW_SURFACEFORMAT_R8G8B8A8_USCALED
  193. };
  194.  
  195. static GLuint byte_types_direct[5] = {
  196.    0,
  197.    BRW_SURFACEFORMAT_R8_SINT,
  198.    BRW_SURFACEFORMAT_R8G8_SINT,
  199.    BRW_SURFACEFORMAT_R8G8B8A8_SINT,
  200.    BRW_SURFACEFORMAT_R8G8B8A8_SINT
  201. };
  202.  
  203. static GLuint byte_types_norm[5] = {
  204.    0,
  205.    BRW_SURFACEFORMAT_R8_SNORM,
  206.    BRW_SURFACEFORMAT_R8G8_SNORM,
  207.    BRW_SURFACEFORMAT_R8G8B8_SNORM,
  208.    BRW_SURFACEFORMAT_R8G8B8A8_SNORM
  209. };
  210.  
  211. static GLuint byte_types_scale[5] = {
  212.    0,
  213.    BRW_SURFACEFORMAT_R8_SSCALED,
  214.    BRW_SURFACEFORMAT_R8G8_SSCALED,
  215.    BRW_SURFACEFORMAT_R8G8B8_SSCALED,
  216.    BRW_SURFACEFORMAT_R8G8B8A8_SSCALED
  217. };
  218.  
  219.  
  220. /**
  221.  * Given vertex array type/size/format/normalized info, return
  222.  * the appopriate hardware surface type.
  223.  * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays.
  224.  */
  225. static unsigned
  226. get_surface_type(struct brw_context *brw,
  227.                  const struct gl_client_array *glarray)
  228. {
  229.    int size = glarray->Size;
  230.  
  231.    if (unlikely(INTEL_DEBUG & DEBUG_VERTS))
  232.       printf("type %s size %d normalized %d\n",
  233.              _mesa_lookup_enum_by_nr(glarray->Type),
  234.              glarray->Size, glarray->Normalized);
  235.  
  236.    if (glarray->Integer) {
  237.       assert(glarray->Format == GL_RGBA); /* sanity check */
  238.       switch (glarray->Type) {
  239.       case GL_INT: return int_types_direct[size];
  240.       case GL_SHORT: return short_types_direct[size];
  241.       case GL_BYTE: return byte_types_direct[size];
  242.       case GL_UNSIGNED_INT: return uint_types_direct[size];
  243.       case GL_UNSIGNED_SHORT: return ushort_types_direct[size];
  244.       case GL_UNSIGNED_BYTE: return ubyte_types_direct[size];
  245.       default: assert(0); return 0;
  246.       }
  247.    } else if (glarray->Normalized) {
  248.       switch (glarray->Type) {
  249.       case GL_DOUBLE: return double_types[size];
  250.       case GL_FLOAT: return float_types[size];
  251.       case GL_HALF_FLOAT: return half_float_types[size];
  252.       case GL_INT: return int_types_norm[size];
  253.       case GL_SHORT: return short_types_norm[size];
  254.       case GL_BYTE: return byte_types_norm[size];
  255.       case GL_UNSIGNED_INT: return uint_types_norm[size];
  256.       case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
  257.       case GL_UNSIGNED_BYTE:
  258.          if (glarray->Format == GL_BGRA) {
  259.             /* See GL_EXT_vertex_array_bgra */
  260.             assert(size == 4);
  261.             return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
  262.          }
  263.          else {
  264.             return ubyte_types_norm[size];
  265.          }
  266.       case GL_FIXED:
  267.          if (brw->gen >= 8 || brw->is_haswell)
  268.             return fixed_point_types[size];
  269.  
  270.          /* This produces GL_FIXED inputs as values between INT32_MIN and
  271.           * INT32_MAX, which will be scaled down by 1/65536 by the VS.
  272.           */
  273.          return int_types_scale[size];
  274.       /* See GL_ARB_vertex_type_2_10_10_10_rev.
  275.        * W/A: Pre-Haswell, the hardware doesn't really support the formats we'd
  276.        * like to use here, so upload everything as UINT and fix
  277.        * it in the shader
  278.        */
  279.       case GL_INT_2_10_10_10_REV:
  280.          assert(size == 4);
  281.          if (brw->gen >= 8 || brw->is_haswell) {
  282.             return glarray->Format == GL_BGRA
  283.                ? BRW_SURFACEFORMAT_B10G10R10A2_SNORM
  284.                : BRW_SURFACEFORMAT_R10G10B10A2_SNORM;
  285.          }
  286.          return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
  287.       case GL_UNSIGNED_INT_2_10_10_10_REV:
  288.          assert(size == 4);
  289.          if (brw->gen >= 8 || brw->is_haswell) {
  290.             return glarray->Format == GL_BGRA
  291.                ? BRW_SURFACEFORMAT_B10G10R10A2_UNORM
  292.                : BRW_SURFACEFORMAT_R10G10B10A2_UNORM;
  293.          }
  294.          return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
  295.       default: assert(0); return 0;
  296.       }
  297.    }
  298.    else {
  299.       /* See GL_ARB_vertex_type_2_10_10_10_rev.
  300.        * W/A: the hardware doesn't really support the formats we'd
  301.        * like to use here, so upload everything as UINT and fix
  302.        * it in the shader
  303.        */
  304.       if (glarray->Type == GL_INT_2_10_10_10_REV) {
  305.          assert(size == 4);
  306.          if (brw->gen >= 8 || brw->is_haswell) {
  307.             return glarray->Format == GL_BGRA
  308.                ? BRW_SURFACEFORMAT_B10G10R10A2_SSCALED
  309.                : BRW_SURFACEFORMAT_R10G10B10A2_SSCALED;
  310.          }
  311.          return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
  312.       } else if (glarray->Type == GL_UNSIGNED_INT_2_10_10_10_REV) {
  313.          assert(size == 4);
  314.          if (brw->gen >= 8 || brw->is_haswell) {
  315.             return glarray->Format == GL_BGRA
  316.                ? BRW_SURFACEFORMAT_B10G10R10A2_USCALED
  317.                : BRW_SURFACEFORMAT_R10G10B10A2_USCALED;
  318.          }
  319.          return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
  320.       }
  321.       assert(glarray->Format == GL_RGBA); /* sanity check */
  322.       switch (glarray->Type) {
  323.       case GL_DOUBLE: return double_types[size];
  324.       case GL_FLOAT: return float_types[size];
  325.       case GL_HALF_FLOAT: return half_float_types[size];
  326.       case GL_INT: return int_types_scale[size];
  327.       case GL_SHORT: return short_types_scale[size];
  328.       case GL_BYTE: return byte_types_scale[size];
  329.       case GL_UNSIGNED_INT: return uint_types_scale[size];
  330.       case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
  331.       case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
  332.       case GL_FIXED:
  333.          if (brw->gen >= 8 || brw->is_haswell)
  334.             return fixed_point_types[size];
  335.  
  336.          /* This produces GL_FIXED inputs as values between INT32_MIN and
  337.           * INT32_MAX, which will be scaled down by 1/65536 by the VS.
  338.           */
  339.          return int_types_scale[size];
  340.       default: assert(0); return 0;
  341.       }
  342.    }
  343. }
  344.  
  345. static GLuint get_index_type(GLenum type)
  346. {
  347.    switch (type) {
  348.    case GL_UNSIGNED_BYTE:  return BRW_INDEX_BYTE;
  349.    case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD;
  350.    case GL_UNSIGNED_INT:   return BRW_INDEX_DWORD;
  351.    default: assert(0); return 0;
  352.    }
  353. }
  354.  
  355. static void
  356. copy_array_to_vbo_array(struct brw_context *brw,
  357.                         struct brw_vertex_element *element,
  358.                         int min, int max,
  359.                         struct brw_vertex_buffer *buffer,
  360.                         GLuint dst_stride)
  361. {
  362.    const int src_stride = element->glarray->StrideB;
  363.  
  364.    /* If the source stride is zero, we just want to upload the current
  365.     * attribute once and set the buffer's stride to 0.  There's no need
  366.     * to replicate it out.
  367.     */
  368.    if (src_stride == 0) {
  369.       intel_upload_data(brw, element->glarray->Ptr,
  370.                         element->glarray->_ElementSize,
  371.                         element->glarray->_ElementSize,
  372.                         &buffer->bo, &buffer->offset);
  373.  
  374.       buffer->stride = 0;
  375.       return;
  376.    }
  377.  
  378.    const unsigned char *src = element->glarray->Ptr + min * src_stride;
  379.    int count = max - min + 1;
  380.    GLuint size = count * dst_stride;
  381.  
  382.    if (dst_stride == src_stride) {
  383.       intel_upload_data(brw, src, size, dst_stride,
  384.                         &buffer->bo, &buffer->offset);
  385.    } else {
  386.       char * const map = intel_upload_map(brw, size, dst_stride);
  387.       char *dst = map;
  388.  
  389.       while (count--) {
  390.          memcpy(dst, src, dst_stride);
  391.          src += src_stride;
  392.          dst += dst_stride;
  393.       }
  394.       intel_upload_unmap(brw, map, size, dst_stride,
  395.                          &buffer->bo, &buffer->offset);
  396.    }
  397.    buffer->stride = dst_stride;
  398. }
  399.  
  400. static void brw_prepare_vertices(struct brw_context *brw)
  401. {
  402.    struct gl_context *ctx = &brw->ctx;
  403.    /* CACHE_NEW_VS_PROG */
  404.    GLbitfield64 vs_inputs = brw->vs.prog_data->inputs_read;
  405.    const unsigned char *ptr = NULL;
  406.    GLuint interleaved = 0;
  407.    unsigned int min_index = brw->vb.min_index + brw->basevertex;
  408.    unsigned int max_index = brw->vb.max_index + brw->basevertex;
  409.    int delta, i, j;
  410.  
  411.    struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
  412.    GLuint nr_uploads = 0;
  413.  
  414.    /* _NEW_POLYGON
  415.     *
  416.     * On gen6+, edge flags don't end up in the VUE (either in or out of the
  417.     * VS).  Instead, they're uploaded as the last vertex element, and the data
  418.     * is passed sideband through the fixed function units.  So, we need to
  419.     * prepare the vertex buffer for it, but it's not present in inputs_read.
  420.     */
  421.    if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL ||
  422.                            ctx->Polygon.BackMode != GL_FILL)) {
  423.       vs_inputs |= VERT_BIT_EDGEFLAG;
  424.    }
  425.  
  426.    if (0)
  427.       printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
  428.  
  429.    /* Accumulate the list of enabled arrays. */
  430.    brw->vb.nr_enabled = 0;
  431.    while (vs_inputs) {
  432.       GLuint i = ffsll(vs_inputs) - 1;
  433.       struct brw_vertex_element *input = &brw->vb.inputs[i];
  434.  
  435.       vs_inputs &= ~BITFIELD64_BIT(i);
  436.       brw->vb.enabled[brw->vb.nr_enabled++] = input;
  437.    }
  438.  
  439.    if (brw->vb.nr_enabled == 0)
  440.       return;
  441.  
  442.    if (brw->vb.nr_buffers)
  443.       return;
  444.  
  445.    for (i = j = 0; i < brw->vb.nr_enabled; i++) {
  446.       struct brw_vertex_element *input = brw->vb.enabled[i];
  447.       const struct gl_client_array *glarray = input->glarray;
  448.  
  449.       if (_mesa_is_bufferobj(glarray->BufferObj)) {
  450.          struct intel_buffer_object *intel_buffer =
  451.             intel_buffer_object(glarray->BufferObj);
  452.          int k;
  453.  
  454.          /* If we have a VB set to be uploaded for this buffer object
  455.           * already, reuse that VB state so that we emit fewer
  456.           * relocations.
  457.           */
  458.          for (k = 0; k < i; k++) {
  459.             const struct gl_client_array *other = brw->vb.enabled[k]->glarray;
  460.             if (glarray->BufferObj == other->BufferObj &&
  461.                 glarray->StrideB == other->StrideB &&
  462.                 glarray->InstanceDivisor == other->InstanceDivisor &&
  463.                 (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB)
  464.             {
  465.                input->buffer = brw->vb.enabled[k]->buffer;
  466.                input->offset = glarray->Ptr - other->Ptr;
  467.                break;
  468.             }
  469.          }
  470.          if (k == i) {
  471.             struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
  472.  
  473.             /* Named buffer object: Just reference its contents directly. */
  474.             buffer->bo = intel_bufferobj_source(brw,
  475.                                                 intel_buffer, 1,
  476.                                                 &buffer->offset);
  477.             drm_intel_bo_reference(buffer->bo);
  478.             buffer->offset += (uintptr_t)glarray->Ptr;
  479.             buffer->stride = glarray->StrideB;
  480.             buffer->step_rate = glarray->InstanceDivisor;
  481.  
  482.             input->buffer = j++;
  483.             input->offset = 0;
  484.          }
  485.  
  486.          /* This is a common place to reach if the user mistakenly supplies
  487.           * a pointer in place of a VBO offset.  If we just let it go through,
  488.           * we may end up dereferencing a pointer beyond the bounds of the
  489.           * GTT.  We would hope that the VBO's max_index would save us, but
  490.           * Mesa appears to hand us min/max values not clipped to the
  491.           * array object's _MaxElement, and _MaxElement frequently appears
  492.           * to be wrong anyway.
  493.           *
  494.           * The VBO spec allows application termination in this case, and it's
  495.           * probably a service to the poor programmer to do so rather than
  496.           * trying to just not render.
  497.           */
  498.          assert(input->offset < brw->vb.buffers[input->buffer].bo->size);
  499.       } else {
  500.          /* Queue the buffer object up to be uploaded in the next pass,
  501.           * when we've decided if we're doing interleaved or not.
  502.           */
  503.          if (nr_uploads == 0) {
  504.             interleaved = glarray->StrideB;
  505.             ptr = glarray->Ptr;
  506.          }
  507.          else if (interleaved != glarray->StrideB ||
  508.                   glarray->Ptr < ptr ||
  509.                   (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved)
  510.          {
  511.             /* If our stride is different from the first attribute's stride,
  512.              * or if the first attribute's stride didn't cover our element,
  513.              * disable the interleaved upload optimization.  The second case
  514.              * can most commonly occur in cases where there is a single vertex
  515.              * and, for example, the data is stored on the application's
  516.              * stack.
  517.              *
  518.              * NOTE: This will also disable the optimization in cases where
  519.              * the data is in a different order than the array indices.
  520.              * Something like:
  521.              *
  522.              *     float data[...];
  523.              *     glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]);
  524.              *     glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]);
  525.              */
  526.             interleaved = 0;
  527.          }
  528.  
  529.          upload[nr_uploads++] = input;
  530.       }
  531.    }
  532.  
  533.    /* If we need to upload all the arrays, then we can trim those arrays to
  534.     * only the used elements [min_index, max_index] so long as we adjust all
  535.     * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
  536.     */
  537.    brw->vb.start_vertex_bias = 0;
  538.    delta = min_index;
  539.    if (nr_uploads == brw->vb.nr_enabled) {
  540.       brw->vb.start_vertex_bias = -delta;
  541.       delta = 0;
  542.    }
  543.  
  544.    /* Handle any arrays to be uploaded. */
  545.    if (nr_uploads > 1) {
  546.       if (interleaved) {
  547.          struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
  548.          /* All uploads are interleaved, so upload the arrays together as
  549.           * interleaved.  First, upload the contents and set up upload[0].
  550.           */
  551.          copy_array_to_vbo_array(brw, upload[0], min_index, max_index,
  552.                                  buffer, interleaved);
  553.          buffer->offset -= delta * interleaved;
  554.  
  555.          for (i = 0; i < nr_uploads; i++) {
  556.             /* Then, just point upload[i] at upload[0]'s buffer. */
  557.             upload[i]->offset =
  558.                ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
  559.             upload[i]->buffer = j;
  560.          }
  561.          j++;
  562.  
  563.          nr_uploads = 0;
  564.       }
  565.    }
  566.    /* Upload non-interleaved arrays */
  567.    for (i = 0; i < nr_uploads; i++) {
  568.       struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
  569.       if (upload[i]->glarray->InstanceDivisor == 0) {
  570.          copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
  571.                                  buffer, upload[i]->glarray->_ElementSize);
  572.       } else {
  573.          /* This is an instanced attribute, since its InstanceDivisor
  574.           * is not zero. Therefore, its data will be stepped after the
  575.           * instanced draw has been run InstanceDivisor times.
  576.           */
  577.          uint32_t instanced_attr_max_index =
  578.             (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor;
  579.          copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index,
  580.                                  buffer, upload[i]->glarray->_ElementSize);
  581.       }
  582.       buffer->offset -= delta * buffer->stride;
  583.       buffer->step_rate = upload[i]->glarray->InstanceDivisor;
  584.       upload[i]->buffer = j++;
  585.       upload[i]->offset = 0;
  586.    }
  587.  
  588.    brw->vb.nr_buffers = j;
  589. }
  590.  
  591. static void brw_emit_vertices(struct brw_context *brw)
  592. {
  593.    GLuint i, nr_elements;
  594.  
  595.    brw_prepare_vertices(brw);
  596.  
  597.    brw_emit_query_begin(brw);
  598.  
  599.    nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid;
  600.  
  601.    /* If the VS doesn't read any inputs (calculating vertex position from
  602.     * a state variable for some reason, for example), emit a single pad
  603.     * VERTEX_ELEMENT struct and bail.
  604.     *
  605.     * The stale VB state stays in place, but they don't do anything unless
  606.     * a VE loads from them.
  607.     */
  608.    if (nr_elements == 0) {
  609.       BEGIN_BATCH(3);
  610.       OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1);
  611.       if (brw->gen >= 6) {
  612.          OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
  613.                    GEN6_VE0_VALID |
  614.                    (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
  615.                    (0 << BRW_VE0_SRC_OFFSET_SHIFT));
  616.       } else {
  617.          OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
  618.                    BRW_VE0_VALID |
  619.                    (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
  620.                    (0 << BRW_VE0_SRC_OFFSET_SHIFT));
  621.       }
  622.       OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
  623.                 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
  624.                 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
  625.                 (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
  626.       CACHED_BATCH();
  627.       return;
  628.    }
  629.  
  630.    /* Now emit VB and VEP state packets.
  631.     */
  632.  
  633.    if (brw->vb.nr_buffers) {
  634.       if (brw->gen >= 6) {
  635.          assert(brw->vb.nr_buffers <= 33);
  636.       } else {
  637.          assert(brw->vb.nr_buffers <= 17);
  638.       }
  639.  
  640.       BEGIN_BATCH(1 + 4*brw->vb.nr_buffers);
  641.       OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4*brw->vb.nr_buffers - 1));
  642.       for (i = 0; i < brw->vb.nr_buffers; i++) {
  643.          struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
  644.          uint32_t dw0;
  645.  
  646.          if (brw->gen >= 6) {
  647.             dw0 = buffer->step_rate
  648.                      ? GEN6_VB0_ACCESS_INSTANCEDATA
  649.                      : GEN6_VB0_ACCESS_VERTEXDATA;
  650.             dw0 |= i << GEN6_VB0_INDEX_SHIFT;
  651.          } else {
  652.             dw0 = buffer->step_rate
  653.                      ? BRW_VB0_ACCESS_INSTANCEDATA
  654.                      : BRW_VB0_ACCESS_VERTEXDATA;
  655.             dw0 |= i << BRW_VB0_INDEX_SHIFT;
  656.          }
  657.  
  658.          if (brw->gen >= 7)
  659.             dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
  660.  
  661.          if (brw->is_haswell)
  662.             dw0 |= GEN7_MOCS_L3 << 16;
  663.  
  664.          OUT_BATCH(dw0 | (buffer->stride << BRW_VB0_PITCH_SHIFT));
  665.          OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset);
  666.          if (brw->gen >= 5) {
  667.             OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1);
  668.          } else
  669.             OUT_BATCH(0);
  670.          OUT_BATCH(buffer->step_rate);
  671.       }
  672.       ADVANCE_BATCH();
  673.    }
  674.  
  675.    /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably
  676.     * for VertexID/InstanceID.
  677.     */
  678.    if (brw->gen >= 6) {
  679.       assert(nr_elements <= 34);
  680.    } else {
  681.       assert(nr_elements <= 18);
  682.    }
  683.  
  684.    struct brw_vertex_element *gen6_edgeflag_input = NULL;
  685.  
  686.    BEGIN_BATCH(1 + nr_elements * 2);
  687.    OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1));
  688.    for (i = 0; i < brw->vb.nr_enabled; i++) {
  689.       struct brw_vertex_element *input = brw->vb.enabled[i];
  690.       uint32_t format = get_surface_type(brw, input->glarray);
  691.       uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
  692.       uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
  693.       uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
  694.       uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
  695.  
  696.       /* The gen4 driver expects edgeflag to come in as a float, and passes
  697.        * that float on to the tests in the clipper.  Mesa's current vertex
  698.        * attribute value for EdgeFlag is stored as a float, which works out.
  699.        * glEdgeFlagPointer, on the other hand, gives us an unnormalized
  700.        * integer ubyte.  Just rewrite that to convert to a float.
  701.        */
  702.       if (input->attrib == VERT_ATTRIB_EDGEFLAG) {
  703.          /* Gen6+ passes edgeflag as sideband along with the vertex, instead
  704.           * of in the VUE.  We have to upload it sideband as the last vertex
  705.           * element according to the B-Spec.
  706.           */
  707.          if (brw->gen >= 6) {
  708.             gen6_edgeflag_input = input;
  709.             continue;
  710.          }
  711.  
  712.          if (format == BRW_SURFACEFORMAT_R8_UINT)
  713.             format = BRW_SURFACEFORMAT_R8_SSCALED;
  714.       }
  715.  
  716.       switch (input->glarray->Size) {
  717.       case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
  718.       case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
  719.       case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
  720.       case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT
  721.                                               : BRW_VE1_COMPONENT_STORE_1_FLT;
  722.          break;
  723.       }
  724.  
  725.       if (brw->gen >= 6) {
  726.          OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) |
  727.                    GEN6_VE0_VALID |
  728.                    (format << BRW_VE0_FORMAT_SHIFT) |
  729.                    (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
  730.       } else {
  731.          OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) |
  732.                    BRW_VE0_VALID |
  733.                    (format << BRW_VE0_FORMAT_SHIFT) |
  734.                    (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
  735.       }
  736.  
  737.       if (brw->gen >= 5)
  738.           OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
  739.                     (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
  740.                     (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
  741.                     (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
  742.       else
  743.           OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
  744.                     (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
  745.                     (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
  746.                     (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
  747.                     ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
  748.    }
  749.  
  750.    if (brw->gen >= 6 && gen6_edgeflag_input) {
  751.       uint32_t format = get_surface_type(brw, gen6_edgeflag_input->glarray);
  752.  
  753.       OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
  754.                 GEN6_VE0_VALID |
  755.                 GEN6_VE0_EDGE_FLAG_ENABLE |
  756.                 (format << BRW_VE0_FORMAT_SHIFT) |
  757.                 (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
  758.       OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
  759.                 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
  760.                 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
  761.                 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
  762.    }
  763.  
  764.    if (brw->vs.prog_data->uses_vertexid) {
  765.       uint32_t dw0 = 0, dw1 = 0;
  766.  
  767.       dw1 = ((BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_0_SHIFT) |
  768.              (BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_1_SHIFT) |
  769.              (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
  770.              (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
  771.  
  772.       if (brw->gen >= 6) {
  773.          dw0 |= GEN6_VE0_VALID;
  774.       } else {
  775.          dw0 |= BRW_VE0_VALID;
  776.          dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
  777.       }
  778.  
  779.       /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values,
  780.        * the format is ignored and the value is always int.
  781.        */
  782.  
  783.       OUT_BATCH(dw0);
  784.       OUT_BATCH(dw1);
  785.    }
  786.  
  787.    CACHED_BATCH();
  788. }
  789.  
  790. const struct brw_tracked_state brw_vertices = {
  791.    .dirty = {
  792.       .mesa = _NEW_POLYGON,
  793.       .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES,
  794.       .cache = CACHE_NEW_VS_PROG,
  795.    },
  796.    .emit = brw_emit_vertices,
  797. };
  798.  
  799. static void brw_upload_indices(struct brw_context *brw)
  800. {
  801.    struct gl_context *ctx = &brw->ctx;
  802.    const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
  803.    GLuint ib_size;
  804.    drm_intel_bo *bo = NULL;
  805.    struct gl_buffer_object *bufferobj;
  806.    GLuint offset;
  807.    GLuint ib_type_size;
  808.  
  809.    if (index_buffer == NULL)
  810.       return;
  811.  
  812.    ib_type_size = _mesa_sizeof_type(index_buffer->type);
  813.    ib_size = ib_type_size * index_buffer->count;
  814.    bufferobj = index_buffer->obj;
  815.  
  816.    /* Turn into a proper VBO:
  817.     */
  818.    if (!_mesa_is_bufferobj(bufferobj)) {
  819.  
  820.       /* Get new bufferobj, offset:
  821.        */
  822.       intel_upload_data(brw, index_buffer->ptr, ib_size, ib_type_size,
  823.                         &bo, &offset);
  824.       brw->ib.start_vertex_offset = offset / ib_type_size;
  825.    } else {
  826.       offset = (GLuint) (unsigned long) index_buffer->ptr;
  827.  
  828.       /* If the index buffer isn't aligned to its element size, we have to
  829.        * rebase it into a temporary.
  830.        */
  831.        if ((ib_type_size - 1) & offset) {
  832.           perf_debug("copying index buffer to a temporary to work around "
  833.                      "misaligned offset %d\n", offset);
  834.  
  835.           GLubyte *map = ctx->Driver.MapBufferRange(ctx,
  836.                                                     offset,
  837.                                                     ib_size,
  838.                                                     GL_MAP_READ_BIT,
  839.                                                     bufferobj);
  840.  
  841.           intel_upload_data(brw, map, ib_size, ib_type_size, &bo, &offset);
  842.           brw->ib.start_vertex_offset = offset / ib_type_size;
  843.  
  844.           ctx->Driver.UnmapBuffer(ctx, bufferobj);
  845.        } else {
  846.           /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
  847.            * the index buffer state when we're just moving the start index
  848.            * of our drawing.
  849.            */
  850.           brw->ib.start_vertex_offset = offset / ib_type_size;
  851.  
  852.           bo = intel_bufferobj_source(brw,
  853.                                       intel_buffer_object(bufferobj),
  854.                                       ib_type_size,
  855.                                       &offset);
  856.           drm_intel_bo_reference(bo);
  857.  
  858.           brw->ib.start_vertex_offset += offset / ib_type_size;
  859.        }
  860.    }
  861.  
  862.    if (brw->ib.bo != bo) {
  863.       drm_intel_bo_unreference(brw->ib.bo);
  864.       brw->ib.bo = bo;
  865.  
  866.       brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
  867.    } else {
  868.       drm_intel_bo_unreference(bo);
  869.    }
  870.  
  871.    if (index_buffer->type != brw->ib.type) {
  872.       brw->ib.type = index_buffer->type;
  873.       brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
  874.    }
  875. }
  876.  
  877. const struct brw_tracked_state brw_indices = {
  878.    .dirty = {
  879.       .mesa = 0,
  880.       .brw = BRW_NEW_INDICES,
  881.       .cache = 0,
  882.    },
  883.    .emit = brw_upload_indices,
  884. };
  885.  
  886. static void brw_emit_index_buffer(struct brw_context *brw)
  887. {
  888.    const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
  889.    GLuint cut_index_setting;
  890.  
  891.    if (index_buffer == NULL)
  892.       return;
  893.  
  894.    if (brw->prim_restart.enable_cut_index && !brw->is_haswell) {
  895.       cut_index_setting = BRW_CUT_INDEX_ENABLE;
  896.    } else {
  897.       cut_index_setting = 0;
  898.    }
  899.  
  900.    BEGIN_BATCH(3);
  901.    OUT_BATCH(CMD_INDEX_BUFFER << 16 |
  902.              cut_index_setting |
  903.              get_index_type(index_buffer->type) << 8 |
  904.              1);
  905.    OUT_RELOC(brw->ib.bo,
  906.              I915_GEM_DOMAIN_VERTEX, 0,
  907.              0);
  908.    OUT_RELOC(brw->ib.bo,
  909.              I915_GEM_DOMAIN_VERTEX, 0,
  910.              brw->ib.bo->size - 1);
  911.    ADVANCE_BATCH();
  912. }
  913.  
  914. const struct brw_tracked_state brw_index_buffer = {
  915.    .dirty = {
  916.       .mesa = 0,
  917.       .brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER,
  918.       .cache = 0,
  919.    },
  920.    .emit = brw_emit_index_buffer,
  921. };
  922.