Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2011 Marek Olšák <maraeo@gmail.com>
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. /**
  29.  * This module uploads user buffers and translates the vertex buffers which
  30.  * contain incompatible vertices (i.e. not supported by the driver/hardware)
  31.  * into compatible ones, based on the Gallium CAPs.
  32.  *
  33.  * It does not upload index buffers.
  34.  *
  35.  * The module heavily uses bitmasks to represent per-buffer and
  36.  * per-vertex-element flags to avoid looping over the list of buffers just
  37.  * to see if there's a non-zero stride, or user buffer, or unsupported format,
  38.  * etc.
  39.  *
  40.  * There are 3 categories of vertex elements, which are processed separately:
  41.  * - per-vertex attribs (stride != 0, instance_divisor == 0)
  42.  * - instanced attribs (stride != 0, instance_divisor > 0)
  43.  * - constant attribs (stride == 0)
  44.  *
  45.  * All needed uploads and translations are performed every draw command, but
  46.  * only the subset of vertices needed for that draw command is uploaded or
  47.  * translated. (the module never translates whole buffers)
  48.  *
  49.  *
  50.  * The module consists of two main parts:
  51.  *
  52.  *
  53.  * 1) Translate (u_vbuf_translate_begin/end)
  54.  *
  55.  * This is pretty much a vertex fetch fallback. It translates vertices from
  56.  * one vertex buffer to another in an unused vertex buffer slot. It does
  57.  * whatever is needed to make the vertices readable by the hardware (changes
  58.  * vertex formats and aligns offsets and strides). The translate module is
  59.  * used here.
  60.  *
  61.  * Each of the 3 categories is translated to a separate buffer.
  62.  * Only the [min_index, max_index] range is translated. For instanced attribs,
  63.  * the range is [start_instance, start_instance+instance_count]. For constant
  64.  * attribs, the range is [0, 1].
  65.  *
  66.  *
  67.  * 2) User buffer uploading (u_vbuf_upload_buffers)
  68.  *
  69.  * Only the [min_index, max_index] range is uploaded (just like Translate)
  70.  * with a single memcpy.
  71.  *
  72.  * This method works best for non-indexed draw operations or indexed draw
  73.  * operations where the [min_index, max_index] range is not being way bigger
  74.  * than the vertex count.
  75.  *
  76.  * If the range is too big (e.g. one triangle with indices {0, 1, 10000}),
  77.  * the per-vertex attribs are uploaded via the translate module, all packed
  78.  * into one vertex buffer, and the indexed draw call is turned into
  79.  * a non-indexed one in the process. This adds additional complexity
  80.  * to the translate part, but it prevents bad apps from bringing your frame
  81.  * rate down.
  82.  *
  83.  *
  84.  * If there is nothing to do, it forwards every command to the driver.
  85.  * The module also has its own CSO cache of vertex element states.
  86.  */
  87.  
  88. #include "util/u_vbuf.h"
  89.  
  90. #include "util/u_dump.h"
  91. #include "util/u_format.h"
  92. #include "util/u_inlines.h"
  93. #include "util/u_memory.h"
  94. #include "util/u_upload_mgr.h"
  95. #include "translate/translate.h"
  96. #include "translate/translate_cache.h"
  97. #include "cso_cache/cso_cache.h"
  98. #include "cso_cache/cso_hash.h"
  99.  
  100. struct u_vbuf_elements {
  101.    unsigned count;
  102.    struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
  103.  
  104.    unsigned src_format_size[PIPE_MAX_ATTRIBS];
  105.  
  106.    /* If (velem[i].src_format != native_format[i]), the vertex buffer
  107.     * referenced by the vertex element cannot be used for rendering and
  108.     * its vertex data must be translated to native_format[i]. */
  109.    enum pipe_format native_format[PIPE_MAX_ATTRIBS];
  110.    unsigned native_format_size[PIPE_MAX_ATTRIBS];
  111.  
  112.    /* Which buffers are used by the vertex element state. */
  113.    uint32_t used_vb_mask;
  114.    /* This might mean two things:
  115.     * - src_format != native_format, as discussed above.
  116.     * - src_offset % 4 != 0 (if the caps don't allow such an offset). */
  117.    uint32_t incompatible_elem_mask; /* each bit describes a corresp. attrib  */
  118.    /* Which buffer has at least one vertex element referencing it
  119.     * incompatible. */
  120.    uint32_t incompatible_vb_mask_any;
  121.    /* Which buffer has all vertex elements referencing it incompatible. */
  122.    uint32_t incompatible_vb_mask_all;
  123.    /* Which buffer has at least one vertex element referencing it
  124.     * compatible. */
  125.    uint32_t compatible_vb_mask_any;
  126.    /* Which buffer has all vertex elements referencing it compatible. */
  127.    uint32_t compatible_vb_mask_all;
  128.  
  129.    /* Which buffer has at least one vertex element referencing it
  130.     * non-instanced. */
  131.    uint32_t noninstance_vb_mask_any;
  132.  
  133.    void *driver_cso;
  134. };
  135.  
  136. enum {
  137.    VB_VERTEX = 0,
  138.    VB_INSTANCE = 1,
  139.    VB_CONST = 2,
  140.    VB_NUM = 3
  141. };
  142.  
  143. struct u_vbuf {
  144.    struct u_vbuf_caps caps;
  145.  
  146.    struct pipe_context *pipe;
  147.    struct translate_cache *translate_cache;
  148.    struct cso_cache *cso_cache;
  149.    struct u_upload_mgr *uploader;
  150.  
  151.    /* This is what was set in set_vertex_buffers.
  152.     * May contain user buffers. */
  153.    struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
  154.    uint32_t enabled_vb_mask;
  155.  
  156.    /* Saved vertex buffer. */
  157.    unsigned aux_vertex_buffer_slot;
  158.    struct pipe_vertex_buffer aux_vertex_buffer_saved;
  159.  
  160.    /* Vertex buffers for the driver.
  161.     * There are usually no user buffers. */
  162.    struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS];
  163.    uint32_t dirty_real_vb_mask; /* which buffers are dirty since the last
  164.                                    call of set_vertex_buffers */
  165.  
  166.    /* The index buffer. */
  167.    struct pipe_index_buffer index_buffer;
  168.  
  169.    /* Vertex elements. */
  170.    struct u_vbuf_elements *ve, *ve_saved;
  171.  
  172.    /* Vertex elements used for the translate fallback. */
  173.    struct pipe_vertex_element fallback_velems[PIPE_MAX_ATTRIBS];
  174.    /* If non-NULL, this is a vertex element state used for the translate
  175.     * fallback and therefore used for rendering too. */
  176.    boolean using_translate;
  177.    /* The vertex buffer slot index where translated vertices have been
  178.     * stored in. */
  179.    unsigned fallback_vbs[VB_NUM];
  180.  
  181.    /* Which buffer is a user buffer. */
  182.    uint32_t user_vb_mask; /* each bit describes a corresp. buffer */
  183.    /* Which buffer is incompatible (unaligned). */
  184.    uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */
  185.    /* Which buffer has a non-zero stride. */
  186.    uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */
  187. };
  188.  
  189. static void *
  190. u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
  191.                               const struct pipe_vertex_element *attribs);
  192. static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso);
  193.  
  194. static const struct {
  195.    enum pipe_format from, to;
  196. } vbuf_format_fallbacks[] = {
  197.    { PIPE_FORMAT_R32_FIXED,            PIPE_FORMAT_R32_FLOAT },
  198.    { PIPE_FORMAT_R32G32_FIXED,         PIPE_FORMAT_R32G32_FLOAT },
  199.    { PIPE_FORMAT_R32G32B32_FIXED,      PIPE_FORMAT_R32G32B32_FLOAT },
  200.    { PIPE_FORMAT_R32G32B32A32_FIXED,   PIPE_FORMAT_R32G32B32A32_FLOAT },
  201.    { PIPE_FORMAT_R16_FLOAT,            PIPE_FORMAT_R32_FLOAT },
  202.    { PIPE_FORMAT_R16G16_FLOAT,         PIPE_FORMAT_R32G32_FLOAT },
  203.    { PIPE_FORMAT_R16G16B16_FLOAT,      PIPE_FORMAT_R32G32B32_FLOAT },
  204.    { PIPE_FORMAT_R16G16B16A16_FLOAT,   PIPE_FORMAT_R32G32B32A32_FLOAT },
  205.    { PIPE_FORMAT_R64_FLOAT,            PIPE_FORMAT_R32_FLOAT },
  206.    { PIPE_FORMAT_R64G64_FLOAT,         PIPE_FORMAT_R32G32_FLOAT },
  207.    { PIPE_FORMAT_R64G64B64_FLOAT,      PIPE_FORMAT_R32G32B32_FLOAT },
  208.    { PIPE_FORMAT_R64G64B64A64_FLOAT,   PIPE_FORMAT_R32G32B32A32_FLOAT },
  209.    { PIPE_FORMAT_R32_UNORM,            PIPE_FORMAT_R32_FLOAT },
  210.    { PIPE_FORMAT_R32G32_UNORM,         PIPE_FORMAT_R32G32_FLOAT },
  211.    { PIPE_FORMAT_R32G32B32_UNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
  212.    { PIPE_FORMAT_R32G32B32A32_UNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
  213.    { PIPE_FORMAT_R32_SNORM,            PIPE_FORMAT_R32_FLOAT },
  214.    { PIPE_FORMAT_R32G32_SNORM,         PIPE_FORMAT_R32G32_FLOAT },
  215.    { PIPE_FORMAT_R32G32B32_SNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
  216.    { PIPE_FORMAT_R32G32B32A32_SNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
  217.    { PIPE_FORMAT_R32_USCALED,          PIPE_FORMAT_R32_FLOAT },
  218.    { PIPE_FORMAT_R32G32_USCALED,       PIPE_FORMAT_R32G32_FLOAT },
  219.    { PIPE_FORMAT_R32G32B32_USCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
  220.    { PIPE_FORMAT_R32G32B32A32_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
  221.    { PIPE_FORMAT_R32_SSCALED,          PIPE_FORMAT_R32_FLOAT },
  222.    { PIPE_FORMAT_R32G32_SSCALED,       PIPE_FORMAT_R32G32_FLOAT },
  223.    { PIPE_FORMAT_R32G32B32_SSCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
  224.    { PIPE_FORMAT_R32G32B32A32_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
  225.    { PIPE_FORMAT_R16_UNORM,            PIPE_FORMAT_R32_FLOAT },
  226.    { PIPE_FORMAT_R16G16_UNORM,         PIPE_FORMAT_R32G32_FLOAT },
  227.    { PIPE_FORMAT_R16G16B16_UNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
  228.    { PIPE_FORMAT_R16G16B16A16_UNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
  229.    { PIPE_FORMAT_R16_SNORM,            PIPE_FORMAT_R32_FLOAT },
  230.    { PIPE_FORMAT_R16G16_SNORM,         PIPE_FORMAT_R32G32_FLOAT },
  231.    { PIPE_FORMAT_R16G16B16_SNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
  232.    { PIPE_FORMAT_R16G16B16A16_SNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
  233.    { PIPE_FORMAT_R16_USCALED,          PIPE_FORMAT_R32_FLOAT },
  234.    { PIPE_FORMAT_R16G16_USCALED,       PIPE_FORMAT_R32G32_FLOAT },
  235.    { PIPE_FORMAT_R16G16B16_USCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
  236.    { PIPE_FORMAT_R16G16B16A16_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
  237.    { PIPE_FORMAT_R16_SSCALED,          PIPE_FORMAT_R32_FLOAT },
  238.    { PIPE_FORMAT_R16G16_SSCALED,       PIPE_FORMAT_R32G32_FLOAT },
  239.    { PIPE_FORMAT_R16G16B16_SSCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
  240.    { PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
  241.    { PIPE_FORMAT_R8_UNORM,             PIPE_FORMAT_R32_FLOAT },
  242.    { PIPE_FORMAT_R8G8_UNORM,           PIPE_FORMAT_R32G32_FLOAT },
  243.    { PIPE_FORMAT_R8G8B8_UNORM,         PIPE_FORMAT_R32G32B32_FLOAT },
  244.    { PIPE_FORMAT_R8G8B8A8_UNORM,       PIPE_FORMAT_R32G32B32A32_FLOAT },
  245.    { PIPE_FORMAT_R8_SNORM,             PIPE_FORMAT_R32_FLOAT },
  246.    { PIPE_FORMAT_R8G8_SNORM,           PIPE_FORMAT_R32G32_FLOAT },
  247.    { PIPE_FORMAT_R8G8B8_SNORM,         PIPE_FORMAT_R32G32B32_FLOAT },
  248.    { PIPE_FORMAT_R8G8B8A8_SNORM,       PIPE_FORMAT_R32G32B32A32_FLOAT },
  249.    { PIPE_FORMAT_R8_USCALED,           PIPE_FORMAT_R32_FLOAT },
  250.    { PIPE_FORMAT_R8G8_USCALED,         PIPE_FORMAT_R32G32_FLOAT },
  251.    { PIPE_FORMAT_R8G8B8_USCALED,       PIPE_FORMAT_R32G32B32_FLOAT },
  252.    { PIPE_FORMAT_R8G8B8A8_USCALED,     PIPE_FORMAT_R32G32B32A32_FLOAT },
  253.    { PIPE_FORMAT_R8_SSCALED,           PIPE_FORMAT_R32_FLOAT },
  254.    { PIPE_FORMAT_R8G8_SSCALED,         PIPE_FORMAT_R32G32_FLOAT },
  255.    { PIPE_FORMAT_R8G8B8_SSCALED,       PIPE_FORMAT_R32G32B32_FLOAT },
  256.    { PIPE_FORMAT_R8G8B8A8_SSCALED,     PIPE_FORMAT_R32G32B32A32_FLOAT },
  257. };
  258.  
  259. boolean u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps)
  260. {
  261.    unsigned i;
  262.    boolean fallback = FALSE;
  263.  
  264.    /* I'd rather have a bitfield of which formats are supported and a static
  265.     * table of the translations indexed by format, but since we don't have C99
  266.     * we can't easily make a sparsely-populated table indexed by format.  So,
  267.     * we construct the sparse table here.
  268.     */
  269.    for (i = 0; i < PIPE_FORMAT_COUNT; i++)
  270.       caps->format_translation[i] = i;
  271.  
  272.    for (i = 0; i < Elements(vbuf_format_fallbacks); i++) {
  273.       enum pipe_format format = vbuf_format_fallbacks[i].from;
  274.  
  275.       if (!screen->is_format_supported(screen, format, PIPE_BUFFER, 0,
  276.                                        PIPE_BIND_VERTEX_BUFFER)) {
  277.          caps->format_translation[format] = vbuf_format_fallbacks[i].to;
  278.          fallback = TRUE;
  279.       }
  280.    }
  281.  
  282.    caps->buffer_offset_unaligned =
  283.       !screen->get_param(screen,
  284.                          PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY);
  285.    caps->buffer_stride_unaligned =
  286.      !screen->get_param(screen,
  287.                         PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY);
  288.    caps->velem_src_offset_unaligned =
  289.       !screen->get_param(screen,
  290.                          PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY);
  291.    caps->user_vertex_buffers =
  292.       screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
  293.  
  294.    if (!caps->buffer_offset_unaligned ||
  295.        !caps->buffer_stride_unaligned ||
  296.        !caps->velem_src_offset_unaligned ||
  297.        !caps->user_vertex_buffers) {
  298.       fallback = TRUE;
  299.    }
  300.  
  301.    return fallback;
  302. }
  303.  
  304. struct u_vbuf *
  305. u_vbuf_create(struct pipe_context *pipe,
  306.               struct u_vbuf_caps *caps, unsigned aux_vertex_buffer_index)
  307. {
  308.    struct u_vbuf *mgr = CALLOC_STRUCT(u_vbuf);
  309.  
  310.    mgr->caps = *caps;
  311.    mgr->aux_vertex_buffer_slot = aux_vertex_buffer_index;
  312.    mgr->pipe = pipe;
  313.    mgr->cso_cache = cso_cache_create();
  314.    mgr->translate_cache = translate_cache_create();
  315.    memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
  316.  
  317.    mgr->uploader = u_upload_create(pipe, 1024 * 1024, 4,
  318.                                    PIPE_BIND_VERTEX_BUFFER);
  319.  
  320.    return mgr;
  321. }
  322.  
  323. /* u_vbuf uses its own caching for vertex elements, because it needs to keep
  324.  * its own preprocessed state per vertex element CSO. */
  325. static struct u_vbuf_elements *
  326. u_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr, unsigned count,
  327.                                     const struct pipe_vertex_element *states)
  328. {
  329.    struct pipe_context *pipe = mgr->pipe;
  330.    unsigned key_size, hash_key;
  331.    struct cso_hash_iter iter;
  332.    struct u_vbuf_elements *ve;
  333.    struct cso_velems_state velems_state;
  334.  
  335.    /* need to include the count into the stored state data too. */
  336.    key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned);
  337.    velems_state.count = count;
  338.    memcpy(velems_state.velems, states,
  339.           sizeof(struct pipe_vertex_element) * count);
  340.    hash_key = cso_construct_key((void*)&velems_state, key_size);
  341.    iter = cso_find_state_template(mgr->cso_cache, hash_key, CSO_VELEMENTS,
  342.                                   (void*)&velems_state, key_size);
  343.  
  344.    if (cso_hash_iter_is_null(iter)) {
  345.       struct cso_velements *cso = MALLOC_STRUCT(cso_velements);
  346.       memcpy(&cso->state, &velems_state, key_size);
  347.       cso->data = u_vbuf_create_vertex_elements(mgr, count, states);
  348.       cso->delete_state = (cso_state_callback)u_vbuf_delete_vertex_elements;
  349.       cso->context = (void*)mgr;
  350.  
  351.       iter = cso_insert_state(mgr->cso_cache, hash_key, CSO_VELEMENTS, cso);
  352.       ve = cso->data;
  353.    } else {
  354.       ve = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
  355.    }
  356.  
  357.    assert(ve);
  358.  
  359.    if (ve != mgr->ve)
  360.       pipe->bind_vertex_elements_state(pipe, ve->driver_cso);
  361.  
  362.    return ve;
  363. }
  364.  
  365. void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count,
  366.                                const struct pipe_vertex_element *states)
  367. {
  368.    mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, count, states);
  369. }
  370.  
  371. void u_vbuf_destroy(struct u_vbuf *mgr)
  372. {
  373.    struct pipe_screen *screen = mgr->pipe->screen;
  374.    unsigned i;
  375.    unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
  376.                                               PIPE_SHADER_CAP_MAX_INPUTS);
  377.  
  378.    mgr->pipe->set_index_buffer(mgr->pipe, NULL);
  379.    pipe_resource_reference(&mgr->index_buffer.buffer, NULL);
  380.  
  381.    mgr->pipe->set_vertex_buffers(mgr->pipe, 0, num_vb, NULL);
  382.  
  383.    for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
  384.       pipe_resource_reference(&mgr->vertex_buffer[i].buffer, NULL);
  385.    }
  386.    for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
  387.       pipe_resource_reference(&mgr->real_vertex_buffer[i].buffer, NULL);
  388.    }
  389.    pipe_resource_reference(&mgr->aux_vertex_buffer_saved.buffer, NULL);
  390.  
  391.    translate_cache_destroy(mgr->translate_cache);
  392.    u_upload_destroy(mgr->uploader);
  393.    cso_cache_delete(mgr->cso_cache);
  394.    FREE(mgr);
  395. }
  396.  
  397. static enum pipe_error
  398. u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
  399.                          unsigned vb_mask, unsigned out_vb,
  400.                          int start_vertex, unsigned num_vertices,
  401.                          int start_index, unsigned num_indices, int min_index,
  402.                          boolean unroll_indices)
  403. {
  404.    struct translate *tr;
  405.    struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0};
  406.    struct pipe_resource *out_buffer = NULL;
  407.    uint8_t *out_map;
  408.    unsigned out_offset, mask;
  409.    enum pipe_error err;
  410.  
  411.    /* Get a translate object. */
  412.    tr = translate_cache_find(mgr->translate_cache, key);
  413.  
  414.    /* Map buffers we want to translate. */
  415.    mask = vb_mask;
  416.    while (mask) {
  417.       struct pipe_vertex_buffer *vb;
  418.       unsigned offset;
  419.       uint8_t *map;
  420.       unsigned i = u_bit_scan(&mask);
  421.  
  422.       vb = &mgr->vertex_buffer[i];
  423.       offset = vb->buffer_offset + vb->stride * start_vertex;
  424.  
  425.       if (vb->user_buffer) {
  426.          map = (uint8_t*)vb->user_buffer + offset;
  427.       } else {
  428.          unsigned size = vb->stride ? num_vertices * vb->stride
  429.                                     : sizeof(double)*4;
  430.  
  431.          if (offset+size > vb->buffer->width0) {
  432.             size = vb->buffer->width0 - offset;
  433.          }
  434.  
  435.          map = pipe_buffer_map_range(mgr->pipe, vb->buffer, offset, size,
  436.                                      PIPE_TRANSFER_READ, &vb_transfer[i]);
  437.       }
  438.  
  439.       /* Subtract min_index so that indexing with the index buffer works. */
  440.       if (unroll_indices) {
  441.          map -= (ptrdiff_t)vb->stride * min_index;
  442.       }
  443.  
  444.       tr->set_buffer(tr, i, map, vb->stride, ~0);
  445.    }
  446.  
  447.    /* Translate. */
  448.    if (unroll_indices) {
  449.       struct pipe_index_buffer *ib = &mgr->index_buffer;
  450.       struct pipe_transfer *transfer = NULL;
  451.       unsigned offset = ib->offset + start_index * ib->index_size;
  452.       uint8_t *map;
  453.  
  454.       assert((ib->buffer || ib->user_buffer) && ib->index_size);
  455.  
  456.       /* Create and map the output buffer. */
  457.       err = u_upload_alloc(mgr->uploader, 0,
  458.                            key->output_stride * num_indices,
  459.                            &out_offset, &out_buffer,
  460.                            (void**)&out_map);
  461.       if (err != PIPE_OK)
  462.          return err;
  463.  
  464.       if (ib->user_buffer) {
  465.          map = (uint8_t*)ib->user_buffer + offset;
  466.       } else {
  467.          map = pipe_buffer_map_range(mgr->pipe, ib->buffer, offset,
  468.                                      num_indices * ib->index_size,
  469.                                      PIPE_TRANSFER_READ, &transfer);
  470.       }
  471.  
  472.       switch (ib->index_size) {
  473.       case 4:
  474.          tr->run_elts(tr, (unsigned*)map, num_indices, 0, 0, out_map);
  475.          break;
  476.       case 2:
  477.          tr->run_elts16(tr, (uint16_t*)map, num_indices, 0, 0, out_map);
  478.          break;
  479.       case 1:
  480.          tr->run_elts8(tr, map, num_indices, 0, 0, out_map);
  481.          break;
  482.       }
  483.  
  484.       if (transfer) {
  485.          pipe_buffer_unmap(mgr->pipe, transfer);
  486.       }
  487.    } else {
  488.       /* Create and map the output buffer. */
  489.       err = u_upload_alloc(mgr->uploader,
  490.                            key->output_stride * start_vertex,
  491.                            key->output_stride * num_vertices,
  492.                            &out_offset, &out_buffer,
  493.                            (void**)&out_map);
  494.       if (err != PIPE_OK)
  495.          return err;
  496.  
  497.       out_offset -= key->output_stride * start_vertex;
  498.  
  499.       tr->run(tr, 0, num_vertices, 0, 0, out_map);
  500.    }
  501.  
  502.    /* Unmap all buffers. */
  503.    mask = vb_mask;
  504.    while (mask) {
  505.       unsigned i = u_bit_scan(&mask);
  506.  
  507.       if (vb_transfer[i]) {
  508.          pipe_buffer_unmap(mgr->pipe, vb_transfer[i]);
  509.       }
  510.    }
  511.  
  512.    /* Setup the new vertex buffer. */
  513.    mgr->real_vertex_buffer[out_vb].buffer_offset = out_offset;
  514.    mgr->real_vertex_buffer[out_vb].stride = key->output_stride;
  515.  
  516.    /* Move the buffer reference. */
  517.    pipe_resource_reference(
  518.       &mgr->real_vertex_buffer[out_vb].buffer, NULL);
  519.    mgr->real_vertex_buffer[out_vb].buffer = out_buffer;
  520.  
  521.    return PIPE_OK;
  522. }
  523.  
  524. static boolean
  525. u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
  526.                                     unsigned mask[VB_NUM])
  527. {
  528.    unsigned type;
  529.    unsigned fallback_vbs[VB_NUM];
  530.    /* Set the bit for each buffer which is incompatible, or isn't set. */
  531.    uint32_t unused_vb_mask =
  532.       mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
  533.       ~mgr->enabled_vb_mask;
  534.  
  535.    memset(fallback_vbs, ~0, sizeof(fallback_vbs));
  536.  
  537.    /* Find free slots for each type if needed. */
  538.    for (type = 0; type < VB_NUM; type++) {
  539.       if (mask[type]) {
  540.          uint32_t index;
  541.  
  542.          if (!unused_vb_mask) {
  543.             return FALSE;
  544.          }
  545.  
  546.          index = ffs(unused_vb_mask) - 1;
  547.          fallback_vbs[type] = index;
  548.          /*printf("found slot=%i for type=%i\n", index, type);*/
  549.       }
  550.    }
  551.  
  552.    for (type = 0; type < VB_NUM; type++) {
  553.       if (mask[type]) {
  554.          mgr->dirty_real_vb_mask |= 1 << fallback_vbs[type];
  555.       }
  556.    }
  557.  
  558.    memcpy(mgr->fallback_vbs, fallback_vbs, sizeof(fallback_vbs));
  559.    return TRUE;
  560. }
  561.  
  562. static boolean
  563. u_vbuf_translate_begin(struct u_vbuf *mgr,
  564.                        int start_vertex, unsigned num_vertices,
  565.                        int start_instance, unsigned num_instances,
  566.                        int start_index, unsigned num_indices, int min_index,
  567.                        boolean unroll_indices)
  568. {
  569.    unsigned mask[VB_NUM] = {0};
  570.    struct translate_key key[VB_NUM];
  571.    unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */
  572.    unsigned i, type;
  573.    unsigned incompatible_vb_mask = mgr->incompatible_vb_mask &
  574.                                    mgr->ve->used_vb_mask;
  575.  
  576.    int start[VB_NUM] = {
  577.       start_vertex,     /* VERTEX */
  578.       start_instance,   /* INSTANCE */
  579.       0                 /* CONST */
  580.    };
  581.  
  582.    unsigned num[VB_NUM] = {
  583.       num_vertices,     /* VERTEX */
  584.       num_instances,    /* INSTANCE */
  585.       1                 /* CONST */
  586.    };
  587.  
  588.    memset(key, 0, sizeof(key));
  589.    memset(elem_index, ~0, sizeof(elem_index));
  590.  
  591.    /* See if there are vertex attribs of each type to translate and
  592.     * which ones. */
  593.    for (i = 0; i < mgr->ve->count; i++) {
  594.       unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index;
  595.  
  596.       if (!mgr->vertex_buffer[vb_index].stride) {
  597.          if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
  598.              !(incompatible_vb_mask & (1 << vb_index))) {
  599.             continue;
  600.          }
  601.          mask[VB_CONST] |= 1 << vb_index;
  602.       } else if (mgr->ve->ve[i].instance_divisor) {
  603.          if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
  604.              !(incompatible_vb_mask & (1 << vb_index))) {
  605.             continue;
  606.          }
  607.          mask[VB_INSTANCE] |= 1 << vb_index;
  608.       } else {
  609.          if (!unroll_indices &&
  610.              !(mgr->ve->incompatible_elem_mask & (1 << i)) &&
  611.              !(incompatible_vb_mask & (1 << vb_index))) {
  612.             continue;
  613.          }
  614.          mask[VB_VERTEX] |= 1 << vb_index;
  615.       }
  616.    }
  617.  
  618.    assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]);
  619.  
  620.    /* Find free vertex buffer slots. */
  621.    if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) {
  622.       return FALSE;
  623.    }
  624.  
  625.    /* Initialize the translate keys. */
  626.    for (i = 0; i < mgr->ve->count; i++) {
  627.       struct translate_key *k;
  628.       struct translate_element *te;
  629.       unsigned bit, vb_index = mgr->ve->ve[i].vertex_buffer_index;
  630.       bit = 1 << vb_index;
  631.  
  632.       if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
  633.           !(incompatible_vb_mask & (1 << vb_index)) &&
  634.           (!unroll_indices || !(mask[VB_VERTEX] & bit))) {
  635.          continue;
  636.       }
  637.  
  638.       /* Set type to what we will translate.
  639.        * Whether vertex, instance, or constant attribs. */
  640.       for (type = 0; type < VB_NUM; type++) {
  641.          if (mask[type] & bit) {
  642.             break;
  643.          }
  644.       }
  645.       assert(type < VB_NUM);
  646.       assert(translate_is_output_format_supported(mgr->ve->native_format[i]));
  647.       /*printf("velem=%i type=%i\n", i, type);*/
  648.  
  649.       /* Add the vertex element. */
  650.       k = &key[type];
  651.       elem_index[type][i] = k->nr_elements;
  652.  
  653.       te = &k->element[k->nr_elements];
  654.       te->type = TRANSLATE_ELEMENT_NORMAL;
  655.       te->instance_divisor = 0;
  656.       te->input_buffer = vb_index;
  657.       te->input_format = mgr->ve->ve[i].src_format;
  658.       te->input_offset = mgr->ve->ve[i].src_offset;
  659.       te->output_format = mgr->ve->native_format[i];
  660.       te->output_offset = k->output_stride;
  661.  
  662.       k->output_stride += mgr->ve->native_format_size[i];
  663.       k->nr_elements++;
  664.    }
  665.  
  666.    /* Translate buffers. */
  667.    for (type = 0; type < VB_NUM; type++) {
  668.       if (key[type].nr_elements) {
  669.          enum pipe_error err;
  670.          err = u_vbuf_translate_buffers(mgr, &key[type], mask[type],
  671.                                         mgr->fallback_vbs[type],
  672.                                         start[type], num[type],
  673.                                         start_index, num_indices, min_index,
  674.                                         unroll_indices && type == VB_VERTEX);
  675.          if (err != PIPE_OK)
  676.             return FALSE;
  677.  
  678.          /* Fixup the stride for constant attribs. */
  679.          if (type == VB_CONST) {
  680.             mgr->real_vertex_buffer[mgr->fallback_vbs[VB_CONST]].stride = 0;
  681.          }
  682.       }
  683.    }
  684.  
  685.    /* Setup new vertex elements. */
  686.    for (i = 0; i < mgr->ve->count; i++) {
  687.       for (type = 0; type < VB_NUM; type++) {
  688.          if (elem_index[type][i] < key[type].nr_elements) {
  689.             struct translate_element *te = &key[type].element[elem_index[type][i]];
  690.             mgr->fallback_velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor;
  691.             mgr->fallback_velems[i].src_format = te->output_format;
  692.             mgr->fallback_velems[i].src_offset = te->output_offset;
  693.             mgr->fallback_velems[i].vertex_buffer_index = mgr->fallback_vbs[type];
  694.  
  695.             /* elem_index[type][i] can only be set for one type. */
  696.             assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0);
  697.             assert(type > VB_VERTEX   || elem_index[type+2][i] == ~0);
  698.             break;
  699.          }
  700.       }
  701.       /* No translating, just copy the original vertex element over. */
  702.       if (type == VB_NUM) {
  703.          memcpy(&mgr->fallback_velems[i], &mgr->ve->ve[i],
  704.                 sizeof(struct pipe_vertex_element));
  705.       }
  706.    }
  707.  
  708.    u_vbuf_set_vertex_elements_internal(mgr, mgr->ve->count,
  709.                                        mgr->fallback_velems);
  710.    mgr->using_translate = TRUE;
  711.    return TRUE;
  712. }
  713.  
  714. static void u_vbuf_translate_end(struct u_vbuf *mgr)
  715. {
  716.    unsigned i;
  717.  
  718.    /* Restore vertex elements. */
  719.    mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->ve->driver_cso);
  720.    mgr->using_translate = FALSE;
  721.  
  722.    /* Unreference the now-unused VBOs. */
  723.    for (i = 0; i < VB_NUM; i++) {
  724.       unsigned vb = mgr->fallback_vbs[i];
  725.       if (vb != ~0) {
  726.          pipe_resource_reference(&mgr->real_vertex_buffer[vb].buffer, NULL);
  727.          mgr->fallback_vbs[i] = ~0;
  728.  
  729.          /* This will cause the buffer to be unbound in the driver later. */
  730.          mgr->dirty_real_vb_mask |= 1 << vb;
  731.       }
  732.    }
  733. }
  734.  
  735. static void *
  736. u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
  737.                               const struct pipe_vertex_element *attribs)
  738. {
  739.    struct pipe_context *pipe = mgr->pipe;
  740.    unsigned i;
  741.    struct pipe_vertex_element driver_attribs[PIPE_MAX_ATTRIBS];
  742.    struct u_vbuf_elements *ve = CALLOC_STRUCT(u_vbuf_elements);
  743.    uint32_t used_buffers = 0;
  744.  
  745.    ve->count = count;
  746.  
  747.    memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count);
  748.    memcpy(driver_attribs, attribs, sizeof(struct pipe_vertex_element) * count);
  749.  
  750.    /* Set the best native format in case the original format is not
  751.     * supported. */
  752.    for (i = 0; i < count; i++) {
  753.       enum pipe_format format = ve->ve[i].src_format;
  754.  
  755.       ve->src_format_size[i] = util_format_get_blocksize(format);
  756.  
  757.       used_buffers |= 1 << ve->ve[i].vertex_buffer_index;
  758.  
  759.       if (!ve->ve[i].instance_divisor) {
  760.          ve->noninstance_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
  761.       }
  762.  
  763.       format = mgr->caps.format_translation[format];
  764.  
  765.       driver_attribs[i].src_format = format;
  766.       ve->native_format[i] = format;
  767.       ve->native_format_size[i] =
  768.             util_format_get_blocksize(ve->native_format[i]);
  769.  
  770.       if (ve->ve[i].src_format != format ||
  771.           (!mgr->caps.velem_src_offset_unaligned &&
  772.            ve->ve[i].src_offset % 4 != 0)) {
  773.          ve->incompatible_elem_mask |= 1 << i;
  774.          ve->incompatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
  775.       } else {
  776.          ve->compatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
  777.       }
  778.    }
  779.  
  780.    ve->used_vb_mask = used_buffers;
  781.    ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers;
  782.    ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers;
  783.  
  784.    /* Align the formats to the size of DWORD if needed. */
  785.    if (!mgr->caps.velem_src_offset_unaligned) {
  786.       for (i = 0; i < count; i++) {
  787.          ve->native_format_size[i] = align(ve->native_format_size[i], 4);
  788.       }
  789.    }
  790.  
  791.    ve->driver_cso =
  792.       pipe->create_vertex_elements_state(pipe, count, driver_attribs);
  793.    return ve;
  794. }
  795.  
  796. static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso)
  797. {
  798.    struct pipe_context *pipe = mgr->pipe;
  799.    struct u_vbuf_elements *ve = cso;
  800.  
  801.    pipe->delete_vertex_elements_state(pipe, ve->driver_cso);
  802.    FREE(ve);
  803. }
  804.  
  805. void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr,
  806.                                unsigned start_slot, unsigned count,
  807.                                const struct pipe_vertex_buffer *bufs)
  808. {
  809.    unsigned i;
  810.    /* which buffers are enabled */
  811.    uint32_t enabled_vb_mask = 0;
  812.    /* which buffers are in user memory */
  813.    uint32_t user_vb_mask = 0;
  814.    /* which buffers are incompatible with the driver */
  815.    uint32_t incompatible_vb_mask = 0;
  816.    /* which buffers have a non-zero stride */
  817.    uint32_t nonzero_stride_vb_mask = 0;
  818.    uint32_t mask = ~(((1ull << count) - 1) << start_slot);
  819.  
  820.    /* Zero out the bits we are going to rewrite completely. */
  821.    mgr->user_vb_mask &= mask;
  822.    mgr->incompatible_vb_mask &= mask;
  823.    mgr->nonzero_stride_vb_mask &= mask;
  824.    mgr->enabled_vb_mask &= mask;
  825.  
  826.    if (!bufs) {
  827.       struct pipe_context *pipe = mgr->pipe;
  828.       /* Unbind. */
  829.       mgr->dirty_real_vb_mask &= mask;
  830.  
  831.       for (i = 0; i < count; i++) {
  832.          unsigned dst_index = start_slot + i;
  833.  
  834.          pipe_resource_reference(&mgr->vertex_buffer[dst_index].buffer, NULL);
  835.          pipe_resource_reference(&mgr->real_vertex_buffer[dst_index].buffer,
  836.                                  NULL);
  837.       }
  838.  
  839.       pipe->set_vertex_buffers(pipe, start_slot, count, NULL);
  840.       return;
  841.    }
  842.  
  843.    for (i = 0; i < count; i++) {
  844.       unsigned dst_index = start_slot + i;
  845.       const struct pipe_vertex_buffer *vb = &bufs[i];
  846.       struct pipe_vertex_buffer *orig_vb = &mgr->vertex_buffer[dst_index];
  847.       struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[dst_index];
  848.  
  849.       if (!vb->buffer && !vb->user_buffer) {
  850.          pipe_resource_reference(&orig_vb->buffer, NULL);
  851.          pipe_resource_reference(&real_vb->buffer, NULL);
  852.          real_vb->user_buffer = NULL;
  853.          continue;
  854.       }
  855.  
  856.       pipe_resource_reference(&orig_vb->buffer, vb->buffer);
  857.       orig_vb->user_buffer = vb->user_buffer;
  858.  
  859.       real_vb->buffer_offset = orig_vb->buffer_offset = vb->buffer_offset;
  860.       real_vb->stride = orig_vb->stride = vb->stride;
  861.  
  862.       if (vb->stride) {
  863.          nonzero_stride_vb_mask |= 1 << dst_index;
  864.       }
  865.       enabled_vb_mask |= 1 << dst_index;
  866.  
  867.       if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0) ||
  868.           (!mgr->caps.buffer_stride_unaligned && vb->stride % 4 != 0)) {
  869.          incompatible_vb_mask |= 1 << dst_index;
  870.          pipe_resource_reference(&real_vb->buffer, NULL);
  871.          continue;
  872.       }
  873.  
  874.       if (!mgr->caps.user_vertex_buffers && vb->user_buffer) {
  875.          user_vb_mask |= 1 << dst_index;
  876.          pipe_resource_reference(&real_vb->buffer, NULL);
  877.          continue;
  878.       }
  879.  
  880.       pipe_resource_reference(&real_vb->buffer, vb->buffer);
  881.       real_vb->user_buffer = vb->user_buffer;
  882.    }
  883.  
  884.    mgr->user_vb_mask |= user_vb_mask;
  885.    mgr->incompatible_vb_mask |= incompatible_vb_mask;
  886.    mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask;
  887.    mgr->enabled_vb_mask |= enabled_vb_mask;
  888.  
  889.    /* All changed buffers are marked as dirty, even the NULL ones,
  890.     * which will cause the NULL buffers to be unbound in the driver later. */
  891.    mgr->dirty_real_vb_mask |= ~mask;
  892. }
  893.  
  894. void u_vbuf_set_index_buffer(struct u_vbuf *mgr,
  895.                              const struct pipe_index_buffer *ib)
  896. {
  897.    struct pipe_context *pipe = mgr->pipe;
  898.  
  899.    if (ib) {
  900.       assert(ib->offset % ib->index_size == 0);
  901.       pipe_resource_reference(&mgr->index_buffer.buffer, ib->buffer);
  902.       memcpy(&mgr->index_buffer, ib, sizeof(*ib));
  903.    } else {
  904.       pipe_resource_reference(&mgr->index_buffer.buffer, NULL);
  905.    }
  906.  
  907.    pipe->set_index_buffer(pipe, ib);
  908. }
  909.  
  910. static enum pipe_error
  911. u_vbuf_upload_buffers(struct u_vbuf *mgr,
  912.                       int start_vertex, unsigned num_vertices,
  913.                       int start_instance, unsigned num_instances)
  914. {
  915.    unsigned i;
  916.    unsigned nr_velems = mgr->ve->count;
  917.    struct pipe_vertex_element *velems =
  918.          mgr->using_translate ? mgr->fallback_velems : mgr->ve->ve;
  919.    unsigned start_offset[PIPE_MAX_ATTRIBS];
  920.    unsigned end_offset[PIPE_MAX_ATTRIBS];
  921.    uint32_t buffer_mask = 0;
  922.  
  923.    /* Determine how much data needs to be uploaded. */
  924.    for (i = 0; i < nr_velems; i++) {
  925.       struct pipe_vertex_element *velem = &velems[i];
  926.       unsigned index = velem->vertex_buffer_index;
  927.       struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index];
  928.       unsigned instance_div, first, size, index_bit;
  929.  
  930.       /* Skip the buffers generated by translate. */
  931.       if (index == mgr->fallback_vbs[VB_VERTEX] ||
  932.           index == mgr->fallback_vbs[VB_INSTANCE] ||
  933.           index == mgr->fallback_vbs[VB_CONST]) {
  934.          continue;
  935.       }
  936.  
  937.       if (!vb->user_buffer) {
  938.          continue;
  939.       }
  940.  
  941.       instance_div = velem->instance_divisor;
  942.       first = vb->buffer_offset + velem->src_offset;
  943.  
  944.       if (!vb->stride) {
  945.          /* Constant attrib. */
  946.          size = mgr->ve->src_format_size[i];
  947.       } else if (instance_div) {
  948.          /* Per-instance attrib. */
  949.          unsigned count = (num_instances + instance_div - 1) / instance_div;
  950.          first += vb->stride * start_instance;
  951.          size = vb->stride * (count - 1) + mgr->ve->src_format_size[i];
  952.       } else {
  953.          /* Per-vertex attrib. */
  954.          first += vb->stride * start_vertex;
  955.          size = vb->stride * (num_vertices - 1) + mgr->ve->src_format_size[i];
  956.       }
  957.  
  958.       index_bit = 1 << index;
  959.  
  960.       /* Update offsets. */
  961.       if (!(buffer_mask & index_bit)) {
  962.          start_offset[index] = first;
  963.          end_offset[index] = first + size;
  964.       } else {
  965.          if (first < start_offset[index])
  966.             start_offset[index] = first;
  967.          if (first + size > end_offset[index])
  968.             end_offset[index] = first + size;
  969.       }
  970.  
  971.       buffer_mask |= index_bit;
  972.    }
  973.  
  974.    /* Upload buffers. */
  975.    while (buffer_mask) {
  976.       unsigned start, end;
  977.       struct pipe_vertex_buffer *real_vb;
  978.       const uint8_t *ptr;
  979.       enum pipe_error err;
  980.  
  981.       i = u_bit_scan(&buffer_mask);
  982.  
  983.       start = start_offset[i];
  984.       end = end_offset[i];
  985.       assert(start < end);
  986.  
  987.       real_vb = &mgr->real_vertex_buffer[i];
  988.       ptr = mgr->vertex_buffer[i].user_buffer;
  989.  
  990.       err = u_upload_data(mgr->uploader, start, end - start, ptr + start,
  991.                           &real_vb->buffer_offset, &real_vb->buffer);
  992.       if (err != PIPE_OK)
  993.          return err;
  994.  
  995.       real_vb->buffer_offset -= start;
  996.    }
  997.  
  998.    return PIPE_OK;
  999. }
  1000.  
  1001. static boolean u_vbuf_need_minmax_index(struct u_vbuf *mgr)
  1002. {
  1003.    /* See if there are any per-vertex attribs which will be uploaded or
  1004.     * translated. Use bitmasks to get the info instead of looping over vertex
  1005.     * elements. */
  1006.    return (mgr->ve->used_vb_mask &
  1007.            ((mgr->user_vb_mask | mgr->incompatible_vb_mask |
  1008.              mgr->ve->incompatible_vb_mask_any) &
  1009.             mgr->ve->noninstance_vb_mask_any & mgr->nonzero_stride_vb_mask)) != 0;
  1010. }
  1011.  
  1012. static boolean u_vbuf_mapping_vertex_buffer_blocks(struct u_vbuf *mgr)
  1013. {
  1014.    /* Return true if there are hw buffers which don't need to be translated.
  1015.     *
  1016.     * We could query whether each buffer is busy, but that would
  1017.     * be way more costly than this. */
  1018.    return (mgr->ve->used_vb_mask &
  1019.            (~mgr->user_vb_mask & ~mgr->incompatible_vb_mask &
  1020.             mgr->ve->compatible_vb_mask_all & mgr->ve->noninstance_vb_mask_any &
  1021.             mgr->nonzero_stride_vb_mask)) != 0;
  1022. }
  1023.  
  1024. static void u_vbuf_get_minmax_index(struct pipe_context *pipe,
  1025.                                     struct pipe_index_buffer *ib,
  1026.                                     boolean primitive_restart,
  1027.                                     unsigned restart_index,
  1028.                                     unsigned start, unsigned count,
  1029.                                     int *out_min_index,
  1030.                                     int *out_max_index)
  1031. {
  1032.    struct pipe_transfer *transfer = NULL;
  1033.    const void *indices;
  1034.    unsigned i;
  1035.  
  1036.    if (ib->user_buffer) {
  1037.       indices = (uint8_t*)ib->user_buffer +
  1038.                 ib->offset + start * ib->index_size;
  1039.    } else {
  1040.       indices = pipe_buffer_map_range(pipe, ib->buffer,
  1041.                                       ib->offset + start * ib->index_size,
  1042.                                       count * ib->index_size,
  1043.                                       PIPE_TRANSFER_READ, &transfer);
  1044.    }
  1045.  
  1046.    switch (ib->index_size) {
  1047.    case 4: {
  1048.       const unsigned *ui_indices = (const unsigned*)indices;
  1049.       unsigned max_ui = 0;
  1050.       unsigned min_ui = ~0U;
  1051.       if (primitive_restart) {
  1052.          for (i = 0; i < count; i++) {
  1053.             if (ui_indices[i] != restart_index) {
  1054.                if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
  1055.                if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
  1056.             }
  1057.          }
  1058.       }
  1059.       else {
  1060.          for (i = 0; i < count; i++) {
  1061.             if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
  1062.             if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
  1063.          }
  1064.       }
  1065.       *out_min_index = min_ui;
  1066.       *out_max_index = max_ui;
  1067.       break;
  1068.    }
  1069.    case 2: {
  1070.       const unsigned short *us_indices = (const unsigned short*)indices;
  1071.       unsigned max_us = 0;
  1072.       unsigned min_us = ~0U;
  1073.       if (primitive_restart) {
  1074.          for (i = 0; i < count; i++) {
  1075.             if (us_indices[i] != restart_index) {
  1076.                if (us_indices[i] > max_us) max_us = us_indices[i];
  1077.                if (us_indices[i] < min_us) min_us = us_indices[i];
  1078.             }
  1079.          }
  1080.       }
  1081.       else {
  1082.          for (i = 0; i < count; i++) {
  1083.             if (us_indices[i] > max_us) max_us = us_indices[i];
  1084.             if (us_indices[i] < min_us) min_us = us_indices[i];
  1085.          }
  1086.       }
  1087.       *out_min_index = min_us;
  1088.       *out_max_index = max_us;
  1089.       break;
  1090.    }
  1091.    case 1: {
  1092.       const unsigned char *ub_indices = (const unsigned char*)indices;
  1093.       unsigned max_ub = 0;
  1094.       unsigned min_ub = ~0U;
  1095.       if (primitive_restart) {
  1096.          for (i = 0; i < count; i++) {
  1097.             if (ub_indices[i] != restart_index) {
  1098.                if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
  1099.                if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
  1100.             }
  1101.          }
  1102.       }
  1103.       else {
  1104.          for (i = 0; i < count; i++) {
  1105.             if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
  1106.             if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
  1107.          }
  1108.       }
  1109.       *out_min_index = min_ub;
  1110.       *out_max_index = max_ub;
  1111.       break;
  1112.    }
  1113.    default:
  1114.       assert(0);
  1115.       *out_min_index = 0;
  1116.       *out_max_index = 0;
  1117.    }
  1118.  
  1119.    if (transfer) {
  1120.       pipe_buffer_unmap(pipe, transfer);
  1121.    }
  1122. }
  1123.  
  1124. static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr)
  1125. {
  1126.    struct pipe_context *pipe = mgr->pipe;
  1127.    unsigned start_slot, count;
  1128.  
  1129.    start_slot = ffs(mgr->dirty_real_vb_mask) - 1;
  1130.    count = util_last_bit(mgr->dirty_real_vb_mask >> start_slot);
  1131.  
  1132.    pipe->set_vertex_buffers(pipe, start_slot, count,
  1133.                             mgr->real_vertex_buffer + start_slot);
  1134.    mgr->dirty_real_vb_mask = 0;
  1135. }
  1136.  
  1137. void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
  1138. {
  1139.    struct pipe_context *pipe = mgr->pipe;
  1140.    int start_vertex, min_index;
  1141.    unsigned num_vertices;
  1142.    boolean unroll_indices = FALSE;
  1143.    uint32_t used_vb_mask = mgr->ve->used_vb_mask;
  1144.    uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask;
  1145.    uint32_t incompatible_vb_mask = mgr->incompatible_vb_mask & used_vb_mask;
  1146.    struct pipe_draw_info new_info;
  1147.  
  1148.    /* Normal draw. No fallback and no user buffers. */
  1149.    if (!incompatible_vb_mask &&
  1150.        !mgr->ve->incompatible_elem_mask &&
  1151.        !user_vb_mask) {
  1152.  
  1153.       /* Set vertex buffers if needed. */
  1154.       if (mgr->dirty_real_vb_mask & used_vb_mask) {
  1155.          u_vbuf_set_driver_vertex_buffers(mgr);
  1156.       }
  1157.  
  1158.       pipe->draw_vbo(pipe, info);
  1159.       return;
  1160.    }
  1161.  
  1162.    new_info = *info;
  1163.  
  1164.    /* Fallback. We need to know all the parameters. */
  1165.    if (new_info.indirect) {
  1166.       struct pipe_transfer *transfer = NULL;
  1167.       int *data;
  1168.  
  1169.       if (new_info.indexed) {
  1170.          data = pipe_buffer_map_range(pipe, new_info.indirect,
  1171.                                       new_info.indirect_offset, 20,
  1172.                                       PIPE_TRANSFER_READ, &transfer);
  1173.          new_info.index_bias = data[3];
  1174.          new_info.start_instance = data[4];
  1175.       }
  1176.       else {
  1177.          data = pipe_buffer_map_range(pipe, new_info.indirect,
  1178.                                       new_info.indirect_offset, 16,
  1179.                                       PIPE_TRANSFER_READ, &transfer);
  1180.          new_info.start_instance = data[3];
  1181.       }
  1182.  
  1183.       new_info.count = data[0];
  1184.       new_info.instance_count = data[1];
  1185.       new_info.start = data[2];
  1186.       pipe_buffer_unmap(pipe, transfer);
  1187.       new_info.indirect = NULL;
  1188.    }
  1189.  
  1190.    if (new_info.indexed) {
  1191.       /* See if anything needs to be done for per-vertex attribs. */
  1192.       if (u_vbuf_need_minmax_index(mgr)) {
  1193.          int max_index;
  1194.  
  1195.          if (new_info.max_index != ~0) {
  1196.             min_index = new_info.min_index;
  1197.             max_index = new_info.max_index;
  1198.          } else {
  1199.             u_vbuf_get_minmax_index(mgr->pipe, &mgr->index_buffer,
  1200.                                     new_info.primitive_restart,
  1201.                                     new_info.restart_index, new_info.start,
  1202.                                     new_info.count, &min_index, &max_index);
  1203.          }
  1204.  
  1205.          assert(min_index <= max_index);
  1206.  
  1207.          start_vertex = min_index + new_info.index_bias;
  1208.          num_vertices = max_index + 1 - min_index;
  1209.  
  1210.          /* Primitive restart doesn't work when unrolling indices.
  1211.           * We would have to break this drawing operation into several ones. */
  1212.          /* Use some heuristic to see if unrolling indices improves
  1213.           * performance. */
  1214.          if (!new_info.primitive_restart &&
  1215.              num_vertices > new_info.count*2 &&
  1216.              num_vertices - new_info.count > 32 &&
  1217.              !u_vbuf_mapping_vertex_buffer_blocks(mgr)) {
  1218.             unroll_indices = TRUE;
  1219.             user_vb_mask &= ~(mgr->nonzero_stride_vb_mask &
  1220.                               mgr->ve->noninstance_vb_mask_any);
  1221.          }
  1222.       } else {
  1223.          /* Nothing to do for per-vertex attribs. */
  1224.          start_vertex = 0;
  1225.          num_vertices = 0;
  1226.          min_index = 0;
  1227.       }
  1228.    } else {
  1229.       start_vertex = new_info.start;
  1230.       num_vertices = new_info.count;
  1231.       min_index = 0;
  1232.    }
  1233.  
  1234.    /* Translate vertices with non-native layouts or formats. */
  1235.    if (unroll_indices ||
  1236.        incompatible_vb_mask ||
  1237.        mgr->ve->incompatible_elem_mask) {
  1238.       if (!u_vbuf_translate_begin(mgr, start_vertex, num_vertices,
  1239.                                   new_info.start_instance,
  1240.                                   new_info.instance_count, new_info.start,
  1241.                                   new_info.count, min_index, unroll_indices)) {
  1242.          debug_warn_once("u_vbuf_translate_begin() failed");
  1243.          return;
  1244.       }
  1245.  
  1246.       if (unroll_indices) {
  1247.          new_info.indexed = FALSE;
  1248.          new_info.index_bias = 0;
  1249.          new_info.min_index = 0;
  1250.          new_info.max_index = new_info.count - 1;
  1251.          new_info.start = 0;
  1252.       }
  1253.  
  1254.       user_vb_mask &= ~(incompatible_vb_mask |
  1255.                         mgr->ve->incompatible_vb_mask_all);
  1256.    }
  1257.  
  1258.    /* Upload user buffers. */
  1259.    if (user_vb_mask) {
  1260.       if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices,
  1261.                                 new_info.start_instance,
  1262.                                 new_info.instance_count) != PIPE_OK) {
  1263.          debug_warn_once("u_vbuf_upload_buffers() failed");
  1264.          return;
  1265.       }
  1266.  
  1267.       mgr->dirty_real_vb_mask |= user_vb_mask;
  1268.    }
  1269.  
  1270.    /*
  1271.    if (unroll_indices) {
  1272.       printf("unrolling indices: start_vertex = %i, num_vertices = %i\n",
  1273.              start_vertex, num_vertices);
  1274.       util_dump_draw_info(stdout, info);
  1275.       printf("\n");
  1276.    }
  1277.  
  1278.    unsigned i;
  1279.    for (i = 0; i < mgr->nr_vertex_buffers; i++) {
  1280.       printf("input %i: ", i);
  1281.       util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i);
  1282.       printf("\n");
  1283.    }
  1284.    for (i = 0; i < mgr->nr_real_vertex_buffers; i++) {
  1285.       printf("real %i: ", i);
  1286.       util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i);
  1287.       printf("\n");
  1288.    }
  1289.    */
  1290.  
  1291.    u_upload_unmap(mgr->uploader);
  1292.    u_vbuf_set_driver_vertex_buffers(mgr);
  1293.  
  1294.    pipe->draw_vbo(pipe, &new_info);
  1295.  
  1296.    if (mgr->using_translate) {
  1297.       u_vbuf_translate_end(mgr);
  1298.    }
  1299. }
  1300.  
  1301. void u_vbuf_save_vertex_elements(struct u_vbuf *mgr)
  1302. {
  1303.    assert(!mgr->ve_saved);
  1304.    mgr->ve_saved = mgr->ve;
  1305. }
  1306.  
  1307. void u_vbuf_restore_vertex_elements(struct u_vbuf *mgr)
  1308. {
  1309.    if (mgr->ve != mgr->ve_saved) {
  1310.       struct pipe_context *pipe = mgr->pipe;
  1311.  
  1312.       mgr->ve = mgr->ve_saved;
  1313.       pipe->bind_vertex_elements_state(pipe,
  1314.                                        mgr->ve ? mgr->ve->driver_cso : NULL);
  1315.    }
  1316.    mgr->ve_saved = NULL;
  1317. }
  1318.  
  1319. void u_vbuf_save_aux_vertex_buffer_slot(struct u_vbuf *mgr)
  1320. {
  1321.    struct pipe_vertex_buffer *vb =
  1322.          &mgr->vertex_buffer[mgr->aux_vertex_buffer_slot];
  1323.  
  1324.    pipe_resource_reference(&mgr->aux_vertex_buffer_saved.buffer, vb->buffer);
  1325.    memcpy(&mgr->aux_vertex_buffer_saved, vb, sizeof(*vb));
  1326. }
  1327.  
  1328. void u_vbuf_restore_aux_vertex_buffer_slot(struct u_vbuf *mgr)
  1329. {
  1330.    u_vbuf_set_vertex_buffers(mgr, mgr->aux_vertex_buffer_slot, 1,
  1331.                              &mgr->aux_vertex_buffer_saved);
  1332.    pipe_resource_reference(&mgr->aux_vertex_buffer_saved.buffer, NULL);
  1333. }
  1334.