Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2011 Marek Olšák <maraeo@gmail.com>
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. /**
  29.  * This module uploads user buffers and translates the vertex buffers which
  30.  * contain incompatible vertices (i.e. not supported by the driver/hardware)
  31.  * into compatible ones, based on the Gallium CAPs.
  32.  *
  33.  * It does not upload index buffers.
  34.  *
  35.  * The module heavily uses bitmasks to represent per-buffer and
  36.  * per-vertex-element flags to avoid looping over the list of buffers just
  37.  * to see if there's a non-zero stride, or user buffer, or unsupported format,
  38.  * etc.
  39.  *
  40.  * There are 3 categories of vertex elements, which are processed separately:
  41.  * - per-vertex attribs (stride != 0, instance_divisor == 0)
  42.  * - instanced attribs (stride != 0, instance_divisor > 0)
  43.  * - constant attribs (stride == 0)
  44.  *
  45.  * All needed uploads and translations are performed every draw command, but
  46.  * only the subset of vertices needed for that draw command is uploaded or
  47.  * translated. (the module never translates whole buffers)
  48.  *
  49.  *
  50.  * The module consists of two main parts:
  51.  *
  52.  *
  53.  * 1) Translate (u_vbuf_translate_begin/end)
  54.  *
  55.  * This is pretty much a vertex fetch fallback. It translates vertices from
  56.  * one vertex buffer to another in an unused vertex buffer slot. It does
  57.  * whatever is needed to make the vertices readable by the hardware (changes
  58.  * vertex formats and aligns offsets and strides). The translate module is
  59.  * used here.
  60.  *
  61.  * Each of the 3 categories is translated to a separate buffer.
  62.  * Only the [min_index, max_index] range is translated. For instanced attribs,
  63.  * the range is [start_instance, start_instance+instance_count]. For constant
  64.  * attribs, the range is [0, 1].
  65.  *
  66.  *
  67.  * 2) User buffer uploading (u_vbuf_upload_buffers)
  68.  *
  69.  * Only the [min_index, max_index] range is uploaded (just like Translate)
  70.  * with a single memcpy.
  71.  *
  72.  * This method works best for non-indexed draw operations or indexed draw
  73.  * operations where the [min_index, max_index] range is not being way bigger
  74.  * than the vertex count.
  75.  *
  76.  * If the range is too big (e.g. one triangle with indices {0, 1, 10000}),
  77.  * the per-vertex attribs are uploaded via the translate module, all packed
  78.  * into one vertex buffer, and the indexed draw call is turned into
  79.  * a non-indexed one in the process. This adds additional complexity
  80.  * to the translate part, but it prevents bad apps from bringing your frame
  81.  * rate down.
  82.  *
  83.  *
  84.  * If there is nothing to do, it forwards every command to the driver.
  85.  * The module also has its own CSO cache of vertex element states.
  86.  */
  87.  
  88. #include "util/u_vbuf.h"
  89.  
  90. #include "util/u_dump.h"
  91. #include "util/u_format.h"
  92. #include "util/u_inlines.h"
  93. #include "util/u_memory.h"
  94. #include "util/u_upload_mgr.h"
  95. #include "translate/translate.h"
  96. #include "translate/translate_cache.h"
  97. #include "cso_cache/cso_cache.h"
  98. #include "cso_cache/cso_hash.h"
  99.  
  100. struct u_vbuf_elements {
  101.    unsigned count;
  102.    struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
  103.  
  104.    unsigned src_format_size[PIPE_MAX_ATTRIBS];
  105.  
  106.    /* If (velem[i].src_format != native_format[i]), the vertex buffer
  107.     * referenced by the vertex element cannot be used for rendering and
  108.     * its vertex data must be translated to native_format[i]. */
  109.    enum pipe_format native_format[PIPE_MAX_ATTRIBS];
  110.    unsigned native_format_size[PIPE_MAX_ATTRIBS];
  111.  
  112.    /* Which buffers are used by the vertex element state. */
  113.    uint32_t used_vb_mask;
  114.    /* This might mean two things:
  115.     * - src_format != native_format, as discussed above.
  116.     * - src_offset % 4 != 0 (if the caps don't allow such an offset). */
  117.    uint32_t incompatible_elem_mask; /* each bit describes a corresp. attrib  */
  118.    /* Which buffer has at least one vertex element referencing it
  119.     * incompatible. */
  120.    uint32_t incompatible_vb_mask_any;
  121.    /* Which buffer has all vertex elements referencing it incompatible. */
  122.    uint32_t incompatible_vb_mask_all;
  123.    /* Which buffer has at least one vertex element referencing it
  124.     * compatible. */
  125.    uint32_t compatible_vb_mask_any;
  126.    /* Which buffer has all vertex elements referencing it compatible. */
  127.    uint32_t compatible_vb_mask_all;
  128.  
  129.    /* Which buffer has at least one vertex element referencing it
  130.     * non-instanced. */
  131.    uint32_t noninstance_vb_mask_any;
  132.  
  133.    void *driver_cso;
  134. };
  135.  
  136. enum {
  137.    VB_VERTEX = 0,
  138.    VB_INSTANCE = 1,
  139.    VB_CONST = 2,
  140.    VB_NUM = 3
  141. };
  142.  
  143. struct u_vbuf {
  144.    struct u_vbuf_caps caps;
  145.  
  146.    struct pipe_context *pipe;
  147.    struct translate_cache *translate_cache;
  148.    struct cso_cache *cso_cache;
  149.    struct u_upload_mgr *uploader;
  150.  
  151.    /* This is what was set in set_vertex_buffers.
  152.     * May contain user buffers. */
  153.    struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
  154.    uint32_t enabled_vb_mask;
  155.  
  156.    /* Saved vertex buffer. */
  157.    unsigned aux_vertex_buffer_slot;
  158.    struct pipe_vertex_buffer aux_vertex_buffer_saved;
  159.  
  160.    /* Vertex buffers for the driver.
  161.     * There are usually no user buffers. */
  162.    struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS];
  163.    uint32_t dirty_real_vb_mask; /* which buffers are dirty since the last
  164.                                    call of set_vertex_buffers */
  165.  
  166.    /* The index buffer. */
  167.    struct pipe_index_buffer index_buffer;
  168.  
  169.    /* Vertex elements. */
  170.    struct u_vbuf_elements *ve, *ve_saved;
  171.  
  172.    /* Vertex elements used for the translate fallback. */
  173.    struct pipe_vertex_element fallback_velems[PIPE_MAX_ATTRIBS];
  174.    /* If non-NULL, this is a vertex element state used for the translate
  175.     * fallback and therefore used for rendering too. */
  176.    boolean using_translate;
  177.    /* The vertex buffer slot index where translated vertices have been
  178.     * stored in. */
  179.    unsigned fallback_vbs[VB_NUM];
  180.  
  181.    /* Which buffer is a user buffer. */
  182.    uint32_t user_vb_mask; /* each bit describes a corresp. buffer */
  183.    /* Which buffer is incompatible (unaligned). */
  184.    uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */
  185.    /* Which buffer has a non-zero stride. */
  186.    uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */
  187. };
  188.  
  189. static void *
  190. u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
  191.                               const struct pipe_vertex_element *attribs);
  192. static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso);
  193.  
  194.  
  195. void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps)
  196. {
  197.    caps->format_fixed32 =
  198.       screen->is_format_supported(screen, PIPE_FORMAT_R32_FIXED, PIPE_BUFFER,
  199.                                   0, PIPE_BIND_VERTEX_BUFFER);
  200.  
  201.    caps->format_float16 =
  202.       screen->is_format_supported(screen, PIPE_FORMAT_R16_FLOAT, PIPE_BUFFER,
  203.                                   0, PIPE_BIND_VERTEX_BUFFER);
  204.  
  205.    caps->format_float64 =
  206.       screen->is_format_supported(screen, PIPE_FORMAT_R64_FLOAT, PIPE_BUFFER,
  207.                                   0, PIPE_BIND_VERTEX_BUFFER);
  208.  
  209.    caps->format_norm32 =
  210.       screen->is_format_supported(screen, PIPE_FORMAT_R32_UNORM, PIPE_BUFFER,
  211.                                   0, PIPE_BIND_VERTEX_BUFFER) &&
  212.       screen->is_format_supported(screen, PIPE_FORMAT_R32_SNORM, PIPE_BUFFER,
  213.                                   0, PIPE_BIND_VERTEX_BUFFER);
  214.  
  215.    caps->format_scaled32 =
  216.       screen->is_format_supported(screen, PIPE_FORMAT_R32_USCALED, PIPE_BUFFER,
  217.                                   0, PIPE_BIND_VERTEX_BUFFER) &&
  218.       screen->is_format_supported(screen, PIPE_FORMAT_R32_SSCALED, PIPE_BUFFER,
  219.                                   0, PIPE_BIND_VERTEX_BUFFER);
  220.  
  221.    caps->buffer_offset_unaligned =
  222.       !screen->get_param(screen,
  223.                         PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY);
  224.  
  225.    caps->buffer_stride_unaligned =
  226.       !screen->get_param(screen,
  227.                         PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY);
  228.  
  229.    caps->velem_src_offset_unaligned =
  230.       !screen->get_param(screen,
  231.                         PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY);
  232.  
  233.    caps->user_vertex_buffers =
  234.       screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
  235. }
  236.  
  237. struct u_vbuf *
  238. u_vbuf_create(struct pipe_context *pipe,
  239.               struct u_vbuf_caps *caps, unsigned aux_vertex_buffer_index)
  240. {
  241.    struct u_vbuf *mgr = CALLOC_STRUCT(u_vbuf);
  242.  
  243.    mgr->caps = *caps;
  244.    mgr->aux_vertex_buffer_slot = aux_vertex_buffer_index;
  245.    mgr->pipe = pipe;
  246.    mgr->cso_cache = cso_cache_create();
  247.    mgr->translate_cache = translate_cache_create();
  248.    memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
  249.  
  250.    mgr->uploader = u_upload_create(pipe, 1024 * 1024, 4,
  251.                                    PIPE_BIND_VERTEX_BUFFER);
  252.  
  253.    return mgr;
  254. }
  255.  
  256. /* u_vbuf uses its own caching for vertex elements, because it needs to keep
  257.  * its own preprocessed state per vertex element CSO. */
  258. static struct u_vbuf_elements *
  259. u_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr, unsigned count,
  260.                                     const struct pipe_vertex_element *states)
  261. {
  262.    struct pipe_context *pipe = mgr->pipe;
  263.    unsigned key_size, hash_key;
  264.    struct cso_hash_iter iter;
  265.    struct u_vbuf_elements *ve;
  266.    struct cso_velems_state velems_state;
  267.  
  268.    /* need to include the count into the stored state data too. */
  269.    key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned);
  270.    velems_state.count = count;
  271.    memcpy(velems_state.velems, states,
  272.           sizeof(struct pipe_vertex_element) * count);
  273.    hash_key = cso_construct_key((void*)&velems_state, key_size);
  274.    iter = cso_find_state_template(mgr->cso_cache, hash_key, CSO_VELEMENTS,
  275.                                   (void*)&velems_state, key_size);
  276.  
  277.    if (cso_hash_iter_is_null(iter)) {
  278.       struct cso_velements *cso = MALLOC_STRUCT(cso_velements);
  279.       memcpy(&cso->state, &velems_state, key_size);
  280.       cso->data = u_vbuf_create_vertex_elements(mgr, count, states);
  281.       cso->delete_state = (cso_state_callback)u_vbuf_delete_vertex_elements;
  282.       cso->context = (void*)mgr;
  283.  
  284.       iter = cso_insert_state(mgr->cso_cache, hash_key, CSO_VELEMENTS, cso);
  285.       ve = cso->data;
  286.    } else {
  287.       ve = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
  288.    }
  289.  
  290.    assert(ve);
  291.  
  292.    if (ve != mgr->ve)
  293.            pipe->bind_vertex_elements_state(pipe, ve->driver_cso);
  294.    return ve;
  295. }
  296.  
  297. void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count,
  298.                                const struct pipe_vertex_element *states)
  299. {
  300.    mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, count, states);
  301. }
  302.  
  303. void u_vbuf_destroy(struct u_vbuf *mgr)
  304. {
  305.    struct pipe_screen *screen = mgr->pipe->screen;
  306.    unsigned i;
  307.    unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
  308.                                               PIPE_SHADER_CAP_MAX_INPUTS);
  309.  
  310.    mgr->pipe->set_index_buffer(mgr->pipe, NULL);
  311.    pipe_resource_reference(&mgr->index_buffer.buffer, NULL);
  312.  
  313.    mgr->pipe->set_vertex_buffers(mgr->pipe, 0, num_vb, NULL);
  314.  
  315.    for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
  316.       pipe_resource_reference(&mgr->vertex_buffer[i].buffer, NULL);
  317.    }
  318.    for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
  319.       pipe_resource_reference(&mgr->real_vertex_buffer[i].buffer, NULL);
  320.    }
  321.    pipe_resource_reference(&mgr->aux_vertex_buffer_saved.buffer, NULL);
  322.  
  323.    translate_cache_destroy(mgr->translate_cache);
  324.    u_upload_destroy(mgr->uploader);
  325.    cso_cache_delete(mgr->cso_cache);
  326.    FREE(mgr);
  327. }
  328.  
  329. static enum pipe_error
  330. u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
  331.                          unsigned vb_mask, unsigned out_vb,
  332.                          int start_vertex, unsigned num_vertices,
  333.                          int start_index, unsigned num_indices, int min_index,
  334.                          boolean unroll_indices)
  335. {
  336.    struct translate *tr;
  337.    struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0};
  338.    struct pipe_resource *out_buffer = NULL;
  339.    uint8_t *out_map;
  340.    unsigned out_offset, mask;
  341.    enum pipe_error err;
  342.  
  343.    /* Get a translate object. */
  344.    tr = translate_cache_find(mgr->translate_cache, key);
  345.  
  346.    /* Map buffers we want to translate. */
  347.    mask = vb_mask;
  348.    while (mask) {
  349.       struct pipe_vertex_buffer *vb;
  350.       unsigned offset;
  351.       uint8_t *map;
  352.       unsigned i = u_bit_scan(&mask);
  353.  
  354.       vb = &mgr->vertex_buffer[i];
  355.       offset = vb->buffer_offset + vb->stride * start_vertex;
  356.  
  357.       if (vb->user_buffer) {
  358.          map = (uint8_t*)vb->user_buffer + offset;
  359.       } else {
  360.          unsigned size = vb->stride ? num_vertices * vb->stride
  361.                                     : sizeof(double)*4;
  362.  
  363.          if (offset+size > vb->buffer->width0) {
  364.             size = vb->buffer->width0 - offset;
  365.          }
  366.  
  367.          map = pipe_buffer_map_range(mgr->pipe, vb->buffer, offset, size,
  368.                                      PIPE_TRANSFER_READ, &vb_transfer[i]);
  369.       }
  370.  
  371.       /* Subtract min_index so that indexing with the index buffer works. */
  372.       if (unroll_indices) {
  373.          map -= vb->stride * min_index;
  374.       }
  375.  
  376.       tr->set_buffer(tr, i, map, vb->stride, ~0);
  377.    }
  378.  
  379.    /* Translate. */
  380.    if (unroll_indices) {
  381.       struct pipe_index_buffer *ib = &mgr->index_buffer;
  382.       struct pipe_transfer *transfer = NULL;
  383.       unsigned offset = ib->offset + start_index * ib->index_size;
  384.       uint8_t *map;
  385.  
  386.       assert((ib->buffer || ib->user_buffer) && ib->index_size);
  387.  
  388.       /* Create and map the output buffer. */
  389.       err = u_upload_alloc(mgr->uploader, 0,
  390.                            key->output_stride * num_indices,
  391.                            &out_offset, &out_buffer,
  392.                            (void**)&out_map);
  393.       if (err != PIPE_OK)
  394.          return err;
  395.  
  396.       if (ib->user_buffer) {
  397.          map = (uint8_t*)ib->user_buffer + offset;
  398.       } else {
  399.          map = pipe_buffer_map_range(mgr->pipe, ib->buffer, offset,
  400.                                      num_indices * ib->index_size,
  401.                                      PIPE_TRANSFER_READ, &transfer);
  402.       }
  403.  
  404.       switch (ib->index_size) {
  405.       case 4:
  406.          tr->run_elts(tr, (unsigned*)map, num_indices, 0, 0, out_map);
  407.          break;
  408.       case 2:
  409.          tr->run_elts16(tr, (uint16_t*)map, num_indices, 0, 0, out_map);
  410.          break;
  411.       case 1:
  412.          tr->run_elts8(tr, map, num_indices, 0, 0, out_map);
  413.          break;
  414.       }
  415.  
  416.       if (transfer) {
  417.          pipe_buffer_unmap(mgr->pipe, transfer);
  418.       }
  419.    } else {
  420.       /* Create and map the output buffer. */
  421.       err = u_upload_alloc(mgr->uploader,
  422.                            key->output_stride * start_vertex,
  423.                            key->output_stride * num_vertices,
  424.                            &out_offset, &out_buffer,
  425.                            (void**)&out_map);
  426.       if (err != PIPE_OK)
  427.          return err;
  428.  
  429.       out_offset -= key->output_stride * start_vertex;
  430.  
  431.       tr->run(tr, 0, num_vertices, 0, 0, out_map);
  432.    }
  433.  
  434.    /* Unmap all buffers. */
  435.    mask = vb_mask;
  436.    while (mask) {
  437.       unsigned i = u_bit_scan(&mask);
  438.  
  439.       if (vb_transfer[i]) {
  440.          pipe_buffer_unmap(mgr->pipe, vb_transfer[i]);
  441.       }
  442.    }
  443.  
  444.    /* Setup the new vertex buffer. */
  445.    mgr->real_vertex_buffer[out_vb].buffer_offset = out_offset;
  446.    mgr->real_vertex_buffer[out_vb].stride = key->output_stride;
  447.  
  448.    /* Move the buffer reference. */
  449.    pipe_resource_reference(
  450.       &mgr->real_vertex_buffer[out_vb].buffer, NULL);
  451.    mgr->real_vertex_buffer[out_vb].buffer = out_buffer;
  452.  
  453.    return PIPE_OK;
  454. }
  455.  
  456. static boolean
  457. u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
  458.                                     unsigned mask[VB_NUM])
  459. {
  460.    unsigned type;
  461.    unsigned fallback_vbs[VB_NUM];
  462.    /* Set the bit for each buffer which is incompatible, or isn't set. */
  463.    uint32_t unused_vb_mask =
  464.       mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
  465.       ~mgr->enabled_vb_mask;
  466.  
  467.    memset(fallback_vbs, ~0, sizeof(fallback_vbs));
  468.  
  469.    /* Find free slots for each type if needed. */
  470.    for (type = 0; type < VB_NUM; type++) {
  471.       if (mask[type]) {
  472.          uint32_t index;
  473.  
  474.          if (!unused_vb_mask) {
  475.             return FALSE;
  476.          }
  477.  
  478.          index = ffs(unused_vb_mask) - 1;
  479.          fallback_vbs[type] = index;
  480.          /*printf("found slot=%i for type=%i\n", index, type);*/
  481.       }
  482.    }
  483.  
  484.    for (type = 0; type < VB_NUM; type++) {
  485.       if (mask[type]) {
  486.          mgr->dirty_real_vb_mask |= 1 << fallback_vbs[type];
  487.       }
  488.    }
  489.  
  490.    memcpy(mgr->fallback_vbs, fallback_vbs, sizeof(fallback_vbs));
  491.    return TRUE;
  492. }
  493.  
  494. static boolean
  495. u_vbuf_translate_begin(struct u_vbuf *mgr,
  496.                        int start_vertex, unsigned num_vertices,
  497.                        int start_instance, unsigned num_instances,
  498.                        int start_index, unsigned num_indices, int min_index,
  499.                        boolean unroll_indices)
  500. {
  501.    unsigned mask[VB_NUM] = {0};
  502.    struct translate_key key[VB_NUM];
  503.    unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */
  504.    unsigned i, type;
  505.    unsigned incompatible_vb_mask = mgr->incompatible_vb_mask &
  506.                                    mgr->ve->used_vb_mask;
  507.  
  508.    int start[VB_NUM] = {
  509.       start_vertex,     /* VERTEX */
  510.       start_instance,   /* INSTANCE */
  511.       0                 /* CONST */
  512.    };
  513.  
  514.    unsigned num[VB_NUM] = {
  515.       num_vertices,     /* VERTEX */
  516.       num_instances,    /* INSTANCE */
  517.       1                 /* CONST */
  518.    };
  519.  
  520.    memset(key, 0, sizeof(key));
  521.    memset(elem_index, ~0, sizeof(elem_index));
  522.  
  523.    /* See if there are vertex attribs of each type to translate and
  524.     * which ones. */
  525.    for (i = 0; i < mgr->ve->count; i++) {
  526.       unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index;
  527.  
  528.       if (!mgr->vertex_buffer[vb_index].stride) {
  529.          if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
  530.              !(incompatible_vb_mask & (1 << vb_index))) {
  531.             continue;
  532.          }
  533.          mask[VB_CONST] |= 1 << vb_index;
  534.       } else if (mgr->ve->ve[i].instance_divisor) {
  535.          if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
  536.              !(incompatible_vb_mask & (1 << vb_index))) {
  537.             continue;
  538.          }
  539.          mask[VB_INSTANCE] |= 1 << vb_index;
  540.       } else {
  541.          if (!unroll_indices &&
  542.              !(mgr->ve->incompatible_elem_mask & (1 << i)) &&
  543.              !(incompatible_vb_mask & (1 << vb_index))) {
  544.             continue;
  545.          }
  546.          mask[VB_VERTEX] |= 1 << vb_index;
  547.       }
  548.    }
  549.  
  550.    assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]);
  551.  
  552.    /* Find free vertex buffer slots. */
  553.    if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) {
  554.       return FALSE;
  555.    }
  556.  
  557.    /* Initialize the translate keys. */
  558.    for (i = 0; i < mgr->ve->count; i++) {
  559.       struct translate_key *k;
  560.       struct translate_element *te;
  561.       unsigned bit, vb_index = mgr->ve->ve[i].vertex_buffer_index;
  562.       bit = 1 << vb_index;
  563.  
  564.       if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
  565.           !(incompatible_vb_mask & (1 << vb_index)) &&
  566.           (!unroll_indices || !(mask[VB_VERTEX] & bit))) {
  567.          continue;
  568.       }
  569.  
  570.       /* Set type to what we will translate.
  571.        * Whether vertex, instance, or constant attribs. */
  572.       for (type = 0; type < VB_NUM; type++) {
  573.          if (mask[type] & bit) {
  574.             break;
  575.          }
  576.       }
  577.       assert(type < VB_NUM);
  578.       assert(translate_is_output_format_supported(mgr->ve->native_format[i]));
  579.       /*printf("velem=%i type=%i\n", i, type);*/
  580.  
  581.       /* Add the vertex element. */
  582.       k = &key[type];
  583.       elem_index[type][i] = k->nr_elements;
  584.  
  585.       te = &k->element[k->nr_elements];
  586.       te->type = TRANSLATE_ELEMENT_NORMAL;
  587.       te->instance_divisor = 0;
  588.       te->input_buffer = vb_index;
  589.       te->input_format = mgr->ve->ve[i].src_format;
  590.       te->input_offset = mgr->ve->ve[i].src_offset;
  591.       te->output_format = mgr->ve->native_format[i];
  592.       te->output_offset = k->output_stride;
  593.  
  594.       k->output_stride += mgr->ve->native_format_size[i];
  595.       k->nr_elements++;
  596.    }
  597.  
  598.    /* Translate buffers. */
  599.    for (type = 0; type < VB_NUM; type++) {
  600.       if (key[type].nr_elements) {
  601.          enum pipe_error err;
  602.          err = u_vbuf_translate_buffers(mgr, &key[type], mask[type],
  603.                                         mgr->fallback_vbs[type],
  604.                                         start[type], num[type],
  605.                                         start_index, num_indices, min_index,
  606.                                         unroll_indices && type == VB_VERTEX);
  607.          if (err != PIPE_OK)
  608.             return FALSE;
  609.  
  610.          /* Fixup the stride for constant attribs. */
  611.          if (type == VB_CONST) {
  612.             mgr->real_vertex_buffer[mgr->fallback_vbs[VB_CONST]].stride = 0;
  613.          }
  614.       }
  615.    }
  616.  
  617.    /* Setup new vertex elements. */
  618.    for (i = 0; i < mgr->ve->count; i++) {
  619.       for (type = 0; type < VB_NUM; type++) {
  620.          if (elem_index[type][i] < key[type].nr_elements) {
  621.             struct translate_element *te = &key[type].element[elem_index[type][i]];
  622.             mgr->fallback_velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor;
  623.             mgr->fallback_velems[i].src_format = te->output_format;
  624.             mgr->fallback_velems[i].src_offset = te->output_offset;
  625.             mgr->fallback_velems[i].vertex_buffer_index = mgr->fallback_vbs[type];
  626.  
  627.             /* elem_index[type][i] can only be set for one type. */
  628.             assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0);
  629.             assert(type > VB_VERTEX   || elem_index[type+2][i] == ~0);
  630.             break;
  631.          }
  632.       }
  633.       /* No translating, just copy the original vertex element over. */
  634.       if (type == VB_NUM) {
  635.          memcpy(&mgr->fallback_velems[i], &mgr->ve->ve[i],
  636.                 sizeof(struct pipe_vertex_element));
  637.       }
  638.    }
  639.  
  640.    u_vbuf_set_vertex_elements_internal(mgr, mgr->ve->count,
  641.                                        mgr->fallback_velems);
  642.    mgr->using_translate = TRUE;
  643.    return TRUE;
  644. }
  645.  
  646. static void u_vbuf_translate_end(struct u_vbuf *mgr)
  647. {
  648.    unsigned i;
  649.  
  650.    /* Restore vertex elements. */
  651.    mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->ve->driver_cso);
  652.    mgr->using_translate = FALSE;
  653.  
  654.    /* Unreference the now-unused VBOs. */
  655.    for (i = 0; i < VB_NUM; i++) {
  656.       unsigned vb = mgr->fallback_vbs[i];
  657.       if (vb != ~0) {
  658.          pipe_resource_reference(&mgr->real_vertex_buffer[vb].buffer, NULL);
  659.          mgr->fallback_vbs[i] = ~0;
  660.  
  661.          /* This will cause the buffer to be unbound in the driver later. */
  662.          mgr->dirty_real_vb_mask |= 1 << vb;
  663.       }
  664.    }
  665. }
  666.  
  667. #define FORMAT_REPLACE(what, withwhat) \
  668.     case PIPE_FORMAT_##what: format = PIPE_FORMAT_##withwhat; break
  669.  
  670. static void *
  671. u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
  672.                               const struct pipe_vertex_element *attribs)
  673. {
  674.    struct pipe_context *pipe = mgr->pipe;
  675.    unsigned i;
  676.    struct pipe_vertex_element driver_attribs[PIPE_MAX_ATTRIBS];
  677.    struct u_vbuf_elements *ve = CALLOC_STRUCT(u_vbuf_elements);
  678.    uint32_t used_buffers = 0;
  679.  
  680.    ve->count = count;
  681.  
  682.    memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count);
  683.    memcpy(driver_attribs, attribs, sizeof(struct pipe_vertex_element) * count);
  684.  
  685.    /* Set the best native format in case the original format is not
  686.     * supported. */
  687.    for (i = 0; i < count; i++) {
  688.       enum pipe_format format = ve->ve[i].src_format;
  689.  
  690.       ve->src_format_size[i] = util_format_get_blocksize(format);
  691.  
  692.       used_buffers |= 1 << ve->ve[i].vertex_buffer_index;
  693.  
  694.       if (!ve->ve[i].instance_divisor) {
  695.          ve->noninstance_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
  696.       }
  697.  
  698.       /* Choose a native format.
  699.        * For now we don't care about the alignment, that's going to
  700.        * be sorted out later. */
  701.       if (!mgr->caps.format_fixed32) {
  702.          switch (format) {
  703.             FORMAT_REPLACE(R32_FIXED,           R32_FLOAT);
  704.             FORMAT_REPLACE(R32G32_FIXED,        R32G32_FLOAT);
  705.             FORMAT_REPLACE(R32G32B32_FIXED,     R32G32B32_FLOAT);
  706.             FORMAT_REPLACE(R32G32B32A32_FIXED,  R32G32B32A32_FLOAT);
  707.             default:;
  708.          }
  709.       }
  710.       if (!mgr->caps.format_float16) {
  711.          switch (format) {
  712.             FORMAT_REPLACE(R16_FLOAT,           R32_FLOAT);
  713.             FORMAT_REPLACE(R16G16_FLOAT,        R32G32_FLOAT);
  714.             FORMAT_REPLACE(R16G16B16_FLOAT,     R32G32B32_FLOAT);
  715.             FORMAT_REPLACE(R16G16B16A16_FLOAT,  R32G32B32A32_FLOAT);
  716.             default:;
  717.          }
  718.       }
  719.       if (!mgr->caps.format_float64) {
  720.          switch (format) {
  721.             FORMAT_REPLACE(R64_FLOAT,           R32_FLOAT);
  722.             FORMAT_REPLACE(R64G64_FLOAT,        R32G32_FLOAT);
  723.             FORMAT_REPLACE(R64G64B64_FLOAT,     R32G32B32_FLOAT);
  724.             FORMAT_REPLACE(R64G64B64A64_FLOAT,  R32G32B32A32_FLOAT);
  725.             default:;
  726.          }
  727.       }
  728.       if (!mgr->caps.format_norm32) {
  729.          switch (format) {
  730.             FORMAT_REPLACE(R32_UNORM,           R32_FLOAT);
  731.             FORMAT_REPLACE(R32G32_UNORM,        R32G32_FLOAT);
  732.             FORMAT_REPLACE(R32G32B32_UNORM,     R32G32B32_FLOAT);
  733.             FORMAT_REPLACE(R32G32B32A32_UNORM,  R32G32B32A32_FLOAT);
  734.             FORMAT_REPLACE(R32_SNORM,           R32_FLOAT);
  735.             FORMAT_REPLACE(R32G32_SNORM,        R32G32_FLOAT);
  736.             FORMAT_REPLACE(R32G32B32_SNORM,     R32G32B32_FLOAT);
  737.             FORMAT_REPLACE(R32G32B32A32_SNORM,  R32G32B32A32_FLOAT);
  738.             default:;
  739.          }
  740.       }
  741.       if (!mgr->caps.format_scaled32) {
  742.          switch (format) {
  743.             FORMAT_REPLACE(R32_USCALED,         R32_FLOAT);
  744.             FORMAT_REPLACE(R32G32_USCALED,      R32G32_FLOAT);
  745.             FORMAT_REPLACE(R32G32B32_USCALED,   R32G32B32_FLOAT);
  746.             FORMAT_REPLACE(R32G32B32A32_USCALED,R32G32B32A32_FLOAT);
  747.             FORMAT_REPLACE(R32_SSCALED,         R32_FLOAT);
  748.             FORMAT_REPLACE(R32G32_SSCALED,      R32G32_FLOAT);
  749.             FORMAT_REPLACE(R32G32B32_SSCALED,   R32G32B32_FLOAT);
  750.             FORMAT_REPLACE(R32G32B32A32_SSCALED,R32G32B32A32_FLOAT);
  751.             default:;
  752.          }
  753.       }
  754.  
  755.       driver_attribs[i].src_format = format;
  756.       ve->native_format[i] = format;
  757.       ve->native_format_size[i] =
  758.             util_format_get_blocksize(ve->native_format[i]);
  759.  
  760.       if (ve->ve[i].src_format != format ||
  761.           (!mgr->caps.velem_src_offset_unaligned &&
  762.            ve->ve[i].src_offset % 4 != 0)) {
  763.          ve->incompatible_elem_mask |= 1 << i;
  764.          ve->incompatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
  765.       } else {
  766.          ve->compatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
  767.       }
  768.    }
  769.  
  770.    ve->used_vb_mask = used_buffers;
  771.    ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers;
  772.    ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers;
  773.  
  774.    /* Align the formats to the size of DWORD if needed. */
  775.    if (!mgr->caps.velem_src_offset_unaligned) {
  776.       for (i = 0; i < count; i++) {
  777.          ve->native_format_size[i] = align(ve->native_format_size[i], 4);
  778.       }
  779.    }
  780.  
  781.    ve->driver_cso =
  782.       pipe->create_vertex_elements_state(pipe, count, driver_attribs);
  783.    return ve;
  784. }
  785.  
  786. static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso)
  787. {
  788.    struct pipe_context *pipe = mgr->pipe;
  789.    struct u_vbuf_elements *ve = cso;
  790.  
  791.    pipe->delete_vertex_elements_state(pipe, ve->driver_cso);
  792.    FREE(ve);
  793. }
  794.  
  795. void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr,
  796.                                unsigned start_slot, unsigned count,
  797.                                const struct pipe_vertex_buffer *bufs)
  798. {
  799.    unsigned i;
  800.    /* which buffers are enabled */
  801.    uint32_t enabled_vb_mask = 0;
  802.    /* which buffers are in user memory */
  803.    uint32_t user_vb_mask = 0;
  804.    /* which buffers are incompatible with the driver */
  805.    uint32_t incompatible_vb_mask = 0;
  806.    /* which buffers have a non-zero stride */
  807.    uint32_t nonzero_stride_vb_mask = 0;
  808.    uint32_t mask = ~(((1ull << count) - 1) << start_slot);
  809.  
  810.    /* Zero out the bits we are going to rewrite completely. */
  811.    mgr->user_vb_mask &= mask;
  812.    mgr->incompatible_vb_mask &= mask;
  813.    mgr->nonzero_stride_vb_mask &= mask;
  814.    mgr->enabled_vb_mask &= mask;
  815.  
  816.    if (!bufs) {
  817.       struct pipe_context *pipe = mgr->pipe;
  818.       /* Unbind. */
  819.       mgr->dirty_real_vb_mask &= mask;
  820.  
  821.       for (i = 0; i < count; i++) {
  822.          unsigned dst_index = start_slot + i;
  823.  
  824.          pipe_resource_reference(&mgr->vertex_buffer[dst_index].buffer, NULL);
  825.          pipe_resource_reference(&mgr->real_vertex_buffer[dst_index].buffer,
  826.                                  NULL);
  827.       }
  828.  
  829.       pipe->set_vertex_buffers(pipe, start_slot, count, NULL);
  830.       return;
  831.    }
  832.  
  833.    for (i = 0; i < count; i++) {
  834.       unsigned dst_index = start_slot + i;
  835.       const struct pipe_vertex_buffer *vb = &bufs[i];
  836.       struct pipe_vertex_buffer *orig_vb = &mgr->vertex_buffer[dst_index];
  837.       struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[dst_index];
  838.  
  839.       if (!vb->buffer && !vb->user_buffer) {
  840.          pipe_resource_reference(&orig_vb->buffer, NULL);
  841.          pipe_resource_reference(&real_vb->buffer, NULL);
  842.          real_vb->user_buffer = NULL;
  843.          continue;
  844.       }
  845.  
  846.       pipe_resource_reference(&orig_vb->buffer, vb->buffer);
  847.       orig_vb->user_buffer = vb->user_buffer;
  848.  
  849.       real_vb->buffer_offset = orig_vb->buffer_offset = vb->buffer_offset;
  850.       real_vb->stride = orig_vb->stride = vb->stride;
  851.  
  852.       if (vb->stride) {
  853.          nonzero_stride_vb_mask |= 1 << dst_index;
  854.       }
  855.       enabled_vb_mask |= 1 << dst_index;
  856.  
  857.       if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0) ||
  858.           (!mgr->caps.buffer_stride_unaligned && vb->stride % 4 != 0)) {
  859.          incompatible_vb_mask |= 1 << dst_index;
  860.          pipe_resource_reference(&real_vb->buffer, NULL);
  861.          continue;
  862.       }
  863.  
  864.       if (!mgr->caps.user_vertex_buffers && vb->user_buffer) {
  865.          user_vb_mask |= 1 << dst_index;
  866.          pipe_resource_reference(&real_vb->buffer, NULL);
  867.          continue;
  868.       }
  869.  
  870.       pipe_resource_reference(&real_vb->buffer, vb->buffer);
  871.       real_vb->user_buffer = vb->user_buffer;
  872.    }
  873.  
  874.    mgr->user_vb_mask |= user_vb_mask;
  875.    mgr->incompatible_vb_mask |= incompatible_vb_mask;
  876.    mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask;
  877.    mgr->enabled_vb_mask |= enabled_vb_mask;
  878.  
  879.    /* All changed buffers are marked as dirty, even the NULL ones,
  880.     * which will cause the NULL buffers to be unbound in the driver later. */
  881.    mgr->dirty_real_vb_mask |= ~mask;
  882. }
  883.  
  884. void u_vbuf_set_index_buffer(struct u_vbuf *mgr,
  885.                              const struct pipe_index_buffer *ib)
  886. {
  887.    struct pipe_context *pipe = mgr->pipe;
  888.  
  889.    if (ib) {
  890.       assert(ib->offset % ib->index_size == 0);
  891.       pipe_resource_reference(&mgr->index_buffer.buffer, ib->buffer);
  892.       memcpy(&mgr->index_buffer, ib, sizeof(*ib));
  893.    } else {
  894.       pipe_resource_reference(&mgr->index_buffer.buffer, NULL);
  895.    }
  896.  
  897.    pipe->set_index_buffer(pipe, ib);
  898. }
  899.  
  900. static enum pipe_error
  901. u_vbuf_upload_buffers(struct u_vbuf *mgr,
  902.                       int start_vertex, unsigned num_vertices,
  903.                       int start_instance, unsigned num_instances)
  904. {
  905.    unsigned i;
  906.    unsigned nr_velems = mgr->ve->count;
  907.    struct pipe_vertex_element *velems =
  908.          mgr->using_translate ? mgr->fallback_velems : mgr->ve->ve;
  909.    unsigned start_offset[PIPE_MAX_ATTRIBS];
  910.    unsigned end_offset[PIPE_MAX_ATTRIBS];
  911.    uint32_t buffer_mask = 0;
  912.  
  913.    /* Determine how much data needs to be uploaded. */
  914.    for (i = 0; i < nr_velems; i++) {
  915.       struct pipe_vertex_element *velem = &velems[i];
  916.       unsigned index = velem->vertex_buffer_index;
  917.       struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index];
  918.       unsigned instance_div, first, size, index_bit;
  919.  
  920.       /* Skip the buffers generated by translate. */
  921.       if (index == mgr->fallback_vbs[VB_VERTEX] ||
  922.           index == mgr->fallback_vbs[VB_INSTANCE] ||
  923.           index == mgr->fallback_vbs[VB_CONST]) {
  924.          continue;
  925.       }
  926.  
  927.       if (!vb->user_buffer) {
  928.          continue;
  929.       }
  930.  
  931.       instance_div = velem->instance_divisor;
  932.       first = vb->buffer_offset + velem->src_offset;
  933.  
  934.       if (!vb->stride) {
  935.          /* Constant attrib. */
  936.          size = mgr->ve->src_format_size[i];
  937.       } else if (instance_div) {
  938.          /* Per-instance attrib. */
  939.          unsigned count = (num_instances + instance_div - 1) / instance_div;
  940.          first += vb->stride * start_instance;
  941.          size = vb->stride * (count - 1) + mgr->ve->src_format_size[i];
  942.       } else {
  943.          /* Per-vertex attrib. */
  944.          first += vb->stride * start_vertex;
  945.          size = vb->stride * (num_vertices - 1) + mgr->ve->src_format_size[i];
  946.       }
  947.  
  948.       index_bit = 1 << index;
  949.  
  950.       /* Update offsets. */
  951.       if (!(buffer_mask & index_bit)) {
  952.          start_offset[index] = first;
  953.          end_offset[index] = first + size;
  954.       } else {
  955.          if (first < start_offset[index])
  956.             start_offset[index] = first;
  957.          if (first + size > end_offset[index])
  958.             end_offset[index] = first + size;
  959.       }
  960.  
  961.       buffer_mask |= index_bit;
  962.    }
  963.  
  964.    /* Upload buffers. */
  965.    while (buffer_mask) {
  966.       unsigned start, end;
  967.       struct pipe_vertex_buffer *real_vb;
  968.       const uint8_t *ptr;
  969.       enum pipe_error err;
  970.  
  971.       i = u_bit_scan(&buffer_mask);
  972.  
  973.       start = start_offset[i];
  974.       end = end_offset[i];
  975.       assert(start < end);
  976.  
  977.       real_vb = &mgr->real_vertex_buffer[i];
  978.       ptr = mgr->vertex_buffer[i].user_buffer;
  979.  
  980.       err = u_upload_data(mgr->uploader, start, end - start, ptr + start,
  981.                           &real_vb->buffer_offset, &real_vb->buffer);
  982.       if (err != PIPE_OK)
  983.          return err;
  984.  
  985.       real_vb->buffer_offset -= start;
  986.    }
  987.  
  988.    return PIPE_OK;
  989. }
  990.  
  991. static boolean u_vbuf_need_minmax_index(struct u_vbuf *mgr)
  992. {
  993.    /* See if there are any per-vertex attribs which will be uploaded or
  994.     * translated. Use bitmasks to get the info instead of looping over vertex
  995.     * elements. */
  996.    return (mgr->ve->used_vb_mask &
  997.            ((mgr->user_vb_mask | mgr->incompatible_vb_mask |
  998.              mgr->ve->incompatible_vb_mask_any) &
  999.             mgr->ve->noninstance_vb_mask_any & mgr->nonzero_stride_vb_mask)) != 0;
  1000. }
  1001.  
  1002. static boolean u_vbuf_mapping_vertex_buffer_blocks(struct u_vbuf *mgr)
  1003. {
  1004.    /* Return true if there are hw buffers which don't need to be translated.
  1005.     *
  1006.     * We could query whether each buffer is busy, but that would
  1007.     * be way more costly than this. */
  1008.    return (mgr->ve->used_vb_mask &
  1009.            (~mgr->user_vb_mask & ~mgr->incompatible_vb_mask &
  1010.             mgr->ve->compatible_vb_mask_all & mgr->ve->noninstance_vb_mask_any &
  1011.             mgr->nonzero_stride_vb_mask)) != 0;
  1012. }
  1013.  
  1014. static void u_vbuf_get_minmax_index(struct pipe_context *pipe,
  1015.                                     struct pipe_index_buffer *ib,
  1016.                                     const struct pipe_draw_info *info,
  1017.                                     int *out_min_index,
  1018.                                     int *out_max_index)
  1019. {
  1020.    struct pipe_transfer *transfer = NULL;
  1021.    const void *indices;
  1022.    unsigned i;
  1023.    unsigned restart_index = info->restart_index;
  1024.  
  1025.    if (ib->user_buffer) {
  1026.       indices = (uint8_t*)ib->user_buffer +
  1027.                 ib->offset + info->start * ib->index_size;
  1028.    } else {
  1029.       indices = pipe_buffer_map_range(pipe, ib->buffer,
  1030.                                       ib->offset + info->start * ib->index_size,
  1031.                                       info->count * ib->index_size,
  1032.                                       PIPE_TRANSFER_READ, &transfer);
  1033.    }
  1034.  
  1035.    switch (ib->index_size) {
  1036.    case 4: {
  1037.       const unsigned *ui_indices = (const unsigned*)indices;
  1038.       unsigned max_ui = 0;
  1039.       unsigned min_ui = ~0U;
  1040.       if (info->primitive_restart) {
  1041.          for (i = 0; i < info->count; i++) {
  1042.             if (ui_indices[i] != restart_index) {
  1043.                if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
  1044.                if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
  1045.             }
  1046.          }
  1047.       }
  1048.       else {
  1049.          for (i = 0; i < info->count; i++) {
  1050.             if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
  1051.             if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
  1052.          }
  1053.       }
  1054.       *out_min_index = min_ui;
  1055.       *out_max_index = max_ui;
  1056.       break;
  1057.    }
  1058.    case 2: {
  1059.       const unsigned short *us_indices = (const unsigned short*)indices;
  1060.       unsigned max_us = 0;
  1061.       unsigned min_us = ~0U;
  1062.       if (info->primitive_restart) {
  1063.          for (i = 0; i < info->count; i++) {
  1064.             if (us_indices[i] != restart_index) {
  1065.                if (us_indices[i] > max_us) max_us = us_indices[i];
  1066.                if (us_indices[i] < min_us) min_us = us_indices[i];
  1067.             }
  1068.          }
  1069.       }
  1070.       else {
  1071.          for (i = 0; i < info->count; i++) {
  1072.             if (us_indices[i] > max_us) max_us = us_indices[i];
  1073.             if (us_indices[i] < min_us) min_us = us_indices[i];
  1074.          }
  1075.       }
  1076.       *out_min_index = min_us;
  1077.       *out_max_index = max_us;
  1078.       break;
  1079.    }
  1080.    case 1: {
  1081.       const unsigned char *ub_indices = (const unsigned char*)indices;
  1082.       unsigned max_ub = 0;
  1083.       unsigned min_ub = ~0U;
  1084.       if (info->primitive_restart) {
  1085.          for (i = 0; i < info->count; i++) {
  1086.             if (ub_indices[i] != restart_index) {
  1087.                if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
  1088.                if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
  1089.             }
  1090.          }
  1091.       }
  1092.       else {
  1093.          for (i = 0; i < info->count; i++) {
  1094.             if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
  1095.             if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
  1096.          }
  1097.       }
  1098.       *out_min_index = min_ub;
  1099.       *out_max_index = max_ub;
  1100.       break;
  1101.    }
  1102.    default:
  1103.       assert(0);
  1104.       *out_min_index = 0;
  1105.       *out_max_index = 0;
  1106.    }
  1107.  
  1108.    if (transfer) {
  1109.       pipe_buffer_unmap(pipe, transfer);
  1110.    }
  1111. }
  1112.  
  1113. static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr)
  1114. {
  1115.    struct pipe_context *pipe = mgr->pipe;
  1116.    unsigned start_slot, count;
  1117.  
  1118.    start_slot = ffs(mgr->dirty_real_vb_mask) - 1;
  1119.    count = util_last_bit(mgr->dirty_real_vb_mask >> start_slot);
  1120.  
  1121.    pipe->set_vertex_buffers(pipe, start_slot, count,
  1122.                             mgr->real_vertex_buffer + start_slot);
  1123.    mgr->dirty_real_vb_mask = 0;
  1124. }
  1125.  
  1126. void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
  1127. {
  1128.    struct pipe_context *pipe = mgr->pipe;
  1129.    int start_vertex, min_index;
  1130.    unsigned num_vertices;
  1131.    boolean unroll_indices = FALSE;
  1132.    uint32_t used_vb_mask = mgr->ve->used_vb_mask;
  1133.    uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask;
  1134.    uint32_t incompatible_vb_mask = mgr->incompatible_vb_mask & used_vb_mask;
  1135.  
  1136.    /* Normal draw. No fallback and no user buffers. */
  1137.    if (!incompatible_vb_mask &&
  1138.        !mgr->ve->incompatible_elem_mask &&
  1139.        !user_vb_mask) {
  1140.  
  1141.       /* Set vertex buffers if needed. */
  1142.       if (mgr->dirty_real_vb_mask & used_vb_mask) {
  1143.          u_vbuf_set_driver_vertex_buffers(mgr);
  1144.       }
  1145.  
  1146.       pipe->draw_vbo(pipe, info);
  1147.       return;
  1148.    }
  1149.  
  1150.    if (info->indexed) {
  1151.       /* See if anything needs to be done for per-vertex attribs. */
  1152.       if (u_vbuf_need_minmax_index(mgr)) {
  1153.          int max_index;
  1154.  
  1155.          if (info->max_index != ~0) {
  1156.             min_index = info->min_index;
  1157.             max_index = info->max_index;
  1158.          } else {
  1159.             u_vbuf_get_minmax_index(mgr->pipe, &mgr->index_buffer, info,
  1160.                                     &min_index, &max_index);
  1161.          }
  1162.  
  1163.          assert(min_index <= max_index);
  1164.  
  1165.          start_vertex = min_index + info->index_bias;
  1166.          num_vertices = max_index + 1 - min_index;
  1167.  
  1168.          /* Primitive restart doesn't work when unrolling indices.
  1169.           * We would have to break this drawing operation into several ones. */
  1170.          /* Use some heuristic to see if unrolling indices improves
  1171.           * performance. */
  1172.          if (!info->primitive_restart &&
  1173.              num_vertices > info->count*2 &&
  1174.              num_vertices-info->count > 32 &&
  1175.              !u_vbuf_mapping_vertex_buffer_blocks(mgr)) {
  1176.             /*printf("num_vertices=%i count=%i\n", num_vertices, info->count);*/
  1177.             unroll_indices = TRUE;
  1178.             user_vb_mask &= ~(mgr->nonzero_stride_vb_mask &
  1179.                               mgr->ve->noninstance_vb_mask_any);
  1180.          }
  1181.       } else {
  1182.          /* Nothing to do for per-vertex attribs. */
  1183.          start_vertex = 0;
  1184.          num_vertices = 0;
  1185.          min_index = 0;
  1186.       }
  1187.    } else {
  1188.       start_vertex = info->start;
  1189.       num_vertices = info->count;
  1190.       min_index = 0;
  1191.    }
  1192.  
  1193.    /* Translate vertices with non-native layouts or formats. */
  1194.    if (unroll_indices ||
  1195.        incompatible_vb_mask ||
  1196.        mgr->ve->incompatible_elem_mask) {
  1197.       if (!u_vbuf_translate_begin(mgr, start_vertex, num_vertices,
  1198.                                   info->start_instance, info->instance_count,
  1199.                                   info->start, info->count, min_index,
  1200.                                   unroll_indices)) {
  1201.          debug_warn_once("u_vbuf_translate_begin() failed");
  1202.          return;
  1203.       }
  1204.  
  1205.       user_vb_mask &= ~(incompatible_vb_mask |
  1206.                         mgr->ve->incompatible_vb_mask_all);
  1207.    }
  1208.  
  1209.    /* Upload user buffers. */
  1210.    if (user_vb_mask) {
  1211.       if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices,
  1212.                                 info->start_instance,
  1213.                                 info->instance_count) != PIPE_OK) {
  1214.          debug_warn_once("u_vbuf_upload_buffers() failed");
  1215.          return;
  1216.       }
  1217.  
  1218.       mgr->dirty_real_vb_mask |= user_vb_mask;
  1219.    }
  1220.  
  1221.    /*
  1222.    if (unroll_indices) {
  1223.       printf("unrolling indices: start_vertex = %i, num_vertices = %i\n",
  1224.              start_vertex, num_vertices);
  1225.       util_dump_draw_info(stdout, info);
  1226.       printf("\n");
  1227.    }
  1228.  
  1229.    unsigned i;
  1230.    for (i = 0; i < mgr->nr_vertex_buffers; i++) {
  1231.       printf("input %i: ", i);
  1232.       util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i);
  1233.       printf("\n");
  1234.    }
  1235.    for (i = 0; i < mgr->nr_real_vertex_buffers; i++) {
  1236.       printf("real %i: ", i);
  1237.       util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i);
  1238.       printf("\n");
  1239.    }
  1240.    */
  1241.  
  1242.    u_upload_unmap(mgr->uploader);
  1243.    u_vbuf_set_driver_vertex_buffers(mgr);
  1244.  
  1245.    if (unlikely(unroll_indices)) {
  1246.       struct pipe_draw_info new_info = *info;
  1247.       new_info.indexed = FALSE;
  1248.       new_info.index_bias = 0;
  1249.       new_info.min_index = 0;
  1250.       new_info.max_index = info->count - 1;
  1251.       new_info.start = 0;
  1252.  
  1253.       pipe->draw_vbo(pipe, &new_info);
  1254.    } else {
  1255.       pipe->draw_vbo(pipe, info);
  1256.    }
  1257.  
  1258.    if (mgr->using_translate) {
  1259.       u_vbuf_translate_end(mgr);
  1260.    }
  1261. }
  1262.  
  1263. void u_vbuf_save_vertex_elements(struct u_vbuf *mgr)
  1264. {
  1265.    assert(!mgr->ve_saved);
  1266.    mgr->ve_saved = mgr->ve;
  1267. }
  1268.  
  1269. void u_vbuf_restore_vertex_elements(struct u_vbuf *mgr)
  1270. {
  1271.    if (mgr->ve != mgr->ve_saved) {
  1272.       struct pipe_context *pipe = mgr->pipe;
  1273.  
  1274.       mgr->ve = mgr->ve_saved;
  1275.       pipe->bind_vertex_elements_state(pipe,
  1276.                                        mgr->ve ? mgr->ve->driver_cso : NULL);
  1277.    }
  1278.    mgr->ve_saved = NULL;
  1279. }
  1280.  
  1281. void u_vbuf_save_aux_vertex_buffer_slot(struct u_vbuf *mgr)
  1282. {
  1283.    struct pipe_vertex_buffer *vb =
  1284.          &mgr->vertex_buffer[mgr->aux_vertex_buffer_slot];
  1285.  
  1286.    pipe_resource_reference(&mgr->aux_vertex_buffer_saved.buffer, vb->buffer);
  1287.    memcpy(&mgr->aux_vertex_buffer_saved, vb, sizeof(*vb));
  1288. }
  1289.  
  1290. void u_vbuf_restore_aux_vertex_buffer_slot(struct u_vbuf *mgr)
  1291. {
  1292.    u_vbuf_set_vertex_buffers(mgr, mgr->aux_vertex_buffer_slot, 1,
  1293.                              &mgr->aux_vertex_buffer_saved);
  1294.    pipe_resource_reference(&mgr->aux_vertex_buffer_saved.buffer, NULL);
  1295. }
  1296.