Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Mesa 3-D graphics library
  3.  *
  4.  * Copyright (C) 2012-2014 LunarG, Inc.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice shall be included
  14.  * in all copies or substantial portions of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22.  * DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Chia-I Wu <olv@lunarg.com>
  26.  */
  27.  
  28. #include "genhw/genhw.h"
  29. #include "util/u_dual_blend.h"
  30. #include "util/u_framebuffer.h"
  31. #include "util/u_half.h"
  32. #include "util/u_resource.h"
  33.  
  34. #include "ilo_buffer.h"
  35. #include "ilo_format.h"
  36. #include "ilo_image.h"
  37. #include "ilo_state_3d.h"
  38. #include "../ilo_shader.h"
  39.  
  40. static void
  41. ve_init_cso(const struct ilo_dev *dev,
  42.             const struct pipe_vertex_element *state,
  43.             unsigned vb_index,
  44.             struct ilo_ve_cso *cso)
  45. {
  46.    int comp[4] = {
  47.       GEN6_VFCOMP_STORE_SRC,
  48.       GEN6_VFCOMP_STORE_SRC,
  49.       GEN6_VFCOMP_STORE_SRC,
  50.       GEN6_VFCOMP_STORE_SRC,
  51.    };
  52.    int format;
  53.  
  54.    ILO_DEV_ASSERT(dev, 6, 8);
  55.  
  56.    switch (util_format_get_nr_components(state->src_format)) {
  57.    case 1: comp[1] = GEN6_VFCOMP_STORE_0;
  58.    case 2: comp[2] = GEN6_VFCOMP_STORE_0;
  59.    case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
  60.                      GEN6_VFCOMP_STORE_1_INT :
  61.                      GEN6_VFCOMP_STORE_1_FP;
  62.    }
  63.  
  64.    format = ilo_format_translate_vertex(dev, state->src_format);
  65.  
  66.    STATIC_ASSERT(Elements(cso->payload) >= 2);
  67.    cso->payload[0] =
  68.       vb_index << GEN6_VE_DW0_VB_INDEX__SHIFT |
  69.       GEN6_VE_DW0_VALID |
  70.       format << GEN6_VE_DW0_FORMAT__SHIFT |
  71.       state->src_offset << GEN6_VE_DW0_VB_OFFSET__SHIFT;
  72.  
  73.    cso->payload[1] =
  74.          comp[0] << GEN6_VE_DW1_COMP0__SHIFT |
  75.          comp[1] << GEN6_VE_DW1_COMP1__SHIFT |
  76.          comp[2] << GEN6_VE_DW1_COMP2__SHIFT |
  77.          comp[3] << GEN6_VE_DW1_COMP3__SHIFT;
  78. }
  79.  
  80. void
  81. ilo_gpe_init_ve(const struct ilo_dev *dev,
  82.                 unsigned num_states,
  83.                 const struct pipe_vertex_element *states,
  84.                 struct ilo_ve_state *ve)
  85. {
  86.    unsigned i;
  87.  
  88.    ILO_DEV_ASSERT(dev, 6, 8);
  89.  
  90.    ve->count = num_states;
  91.    ve->vb_count = 0;
  92.  
  93.    for (i = 0; i < num_states; i++) {
  94.       const unsigned pipe_idx = states[i].vertex_buffer_index;
  95.       const unsigned instance_divisor = states[i].instance_divisor;
  96.       unsigned hw_idx;
  97.  
  98.       /*
  99.        * map the pipe vb to the hardware vb, which has a fixed instance
  100.        * divisor
  101.        */
  102.       for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
  103.          if (ve->vb_mapping[hw_idx] == pipe_idx &&
  104.              ve->instance_divisors[hw_idx] == instance_divisor)
  105.             break;
  106.       }
  107.  
  108.       /* create one if there is no matching hardware vb */
  109.       if (hw_idx >= ve->vb_count) {
  110.          hw_idx = ve->vb_count++;
  111.  
  112.          ve->vb_mapping[hw_idx] = pipe_idx;
  113.          ve->instance_divisors[hw_idx] = instance_divisor;
  114.       }
  115.  
  116.       ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
  117.    }
  118. }
  119.  
  120. void
  121. ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev,
  122.                         struct ilo_ve_cso *cso)
  123. {
  124.    int format;
  125.  
  126.    ILO_DEV_ASSERT(dev, 6, 8);
  127.  
  128.    /*
  129.     * From the Sandy Bridge PRM, volume 2 part 1, page 94:
  130.     *
  131.     *     "- This bit (Edge Flag Enable) must only be ENABLED on the last
  132.     *        valid VERTEX_ELEMENT structure.
  133.     *
  134.     *      - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
  135.     *        and Component 1-3 Control must be set to VFCOMP_NOSTORE.
  136.     *
  137.     *      - The Source Element Format must be set to the UINT format.
  138.     *
  139.     *      - [DevSNB]: Edge Flags are not supported for QUADLIST
  140.     *        primitives.  Software may elect to convert QUADLIST primitives
  141.     *        to some set of corresponding edge-flag-supported primitive
  142.     *        types (e.g., POLYGONs) prior to submission to the 3D pipeline."
  143.     */
  144.    cso->payload[0] |= GEN6_VE_DW0_EDGE_FLAG_ENABLE;
  145.  
  146.    /*
  147.     * Edge flags have format GEN6_FORMAT_R8_USCALED when defined via
  148.     * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined
  149.     * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
  150.     *
  151.     * Since all the hardware cares about is whether the flags are zero or not,
  152.     * we can treat them as the corresponding _UINT formats.
  153.     */
  154.    format = GEN_EXTRACT(cso->payload[0], GEN6_VE_DW0_FORMAT);
  155.    cso->payload[0] &= ~GEN6_VE_DW0_FORMAT__MASK;
  156.  
  157.    switch (format) {
  158.    case GEN6_FORMAT_R32_FLOAT:
  159.       format = GEN6_FORMAT_R32_UINT;
  160.       break;
  161.    case GEN6_FORMAT_R8_USCALED:
  162.       format = GEN6_FORMAT_R8_UINT;
  163.       break;
  164.    default:
  165.       break;
  166.    }
  167.  
  168.    cso->payload[0] |= GEN_SHIFT32(format, GEN6_VE_DW0_FORMAT);
  169.  
  170.    cso->payload[1] =
  171.          GEN6_VFCOMP_STORE_SRC << GEN6_VE_DW1_COMP0__SHIFT |
  172.          GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP1__SHIFT |
  173.          GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP2__SHIFT |
  174.          GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP3__SHIFT;
  175. }
  176.  
  177. void
  178. ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev,
  179.                           int comp0, int comp1, int comp2, int comp3,
  180.                           struct ilo_ve_cso *cso)
  181. {
  182.    ILO_DEV_ASSERT(dev, 6, 8);
  183.  
  184.    STATIC_ASSERT(Elements(cso->payload) >= 2);
  185.  
  186.    assert(comp0 != GEN6_VFCOMP_STORE_SRC &&
  187.           comp1 != GEN6_VFCOMP_STORE_SRC &&
  188.           comp2 != GEN6_VFCOMP_STORE_SRC &&
  189.           comp3 != GEN6_VFCOMP_STORE_SRC);
  190.  
  191.    cso->payload[0] = GEN6_VE_DW0_VALID;
  192.    cso->payload[1] =
  193.          comp0 << GEN6_VE_DW1_COMP0__SHIFT |
  194.          comp1 << GEN6_VE_DW1_COMP1__SHIFT |
  195.          comp2 << GEN6_VE_DW1_COMP2__SHIFT |
  196.          comp3 << GEN6_VE_DW1_COMP3__SHIFT;
  197. }
  198.  
  199. void
  200. ilo_gpe_init_vs_cso(const struct ilo_dev *dev,
  201.                     const struct ilo_shader_state *vs,
  202.                     struct ilo_shader_cso *cso)
  203. {
  204.    int start_grf, vue_read_len, sampler_count, max_threads;
  205.    uint32_t dw2, dw4, dw5;
  206.  
  207.    ILO_DEV_ASSERT(dev, 6, 8);
  208.  
  209.    start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG);
  210.    vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT);
  211.    sampler_count = ilo_shader_get_kernel_param(vs, ILO_KERNEL_SAMPLER_COUNT);
  212.  
  213.    /*
  214.     * From the Sandy Bridge PRM, volume 2 part 1, page 135:
  215.     *
  216.     *     "(Vertex URB Entry Read Length) Specifies the number of pairs of
  217.     *      128-bit vertex elements to be passed into the payload for each
  218.     *      vertex."
  219.     *
  220.     *     "It is UNDEFINED to set this field to 0 indicating no Vertex URB
  221.     *      data to be read and passed to the thread."
  222.     */
  223.    vue_read_len = (vue_read_len + 1) / 2;
  224.    if (!vue_read_len)
  225.       vue_read_len = 1;
  226.  
  227.    max_threads = dev->thread_count;
  228.    if (ilo_dev_gen(dev) == ILO_GEN(7.5) && dev->gt == 2)
  229.       max_threads *= 2;
  230.  
  231.    dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
  232.    dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
  233.  
  234.    dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT |
  235.          vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
  236.          0 << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT;
  237.  
  238.    dw5 = GEN6_VS_DW5_STATISTICS |
  239.          GEN6_VS_DW5_VS_ENABLE;
  240.  
  241.    if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
  242.       dw5 |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT;
  243.    else
  244.       dw5 |= (max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT;
  245.  
  246.    STATIC_ASSERT(Elements(cso->payload) >= 3);
  247.    cso->payload[0] = dw2;
  248.    cso->payload[1] = dw4;
  249.    cso->payload[2] = dw5;
  250. }
  251.  
  252. static void
  253. gs_init_cso_gen6(const struct ilo_dev *dev,
  254.                  const struct ilo_shader_state *gs,
  255.                  struct ilo_shader_cso *cso)
  256. {
  257.    int start_grf, vue_read_len, max_threads;
  258.    uint32_t dw2, dw4, dw5, dw6;
  259.  
  260.    ILO_DEV_ASSERT(dev, 6, 6);
  261.  
  262.    if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) {
  263.       start_grf = ilo_shader_get_kernel_param(gs,
  264.             ILO_KERNEL_URB_DATA_START_REG);
  265.  
  266.       vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
  267.    }
  268.    else {
  269.       start_grf = ilo_shader_get_kernel_param(gs,
  270.             ILO_KERNEL_VS_GEN6_SO_START_REG);
  271.  
  272.       vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT);
  273.    }
  274.  
  275.    /*
  276.     * From the Sandy Bridge PRM, volume 2 part 1, page 153:
  277.     *
  278.     *     "Specifies the amount of URB data read and passed in the thread
  279.     *      payload for each Vertex URB entry, in 256-bit register increments.
  280.     *
  281.     *      It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
  282.     *      0 indicating no Vertex URB data to be read and passed to the
  283.     *      thread."
  284.     */
  285.    vue_read_len = (vue_read_len + 1) / 2;
  286.    if (!vue_read_len)
  287.       vue_read_len = 1;
  288.  
  289.    /*
  290.     * From the Sandy Bridge PRM, volume 2 part 1, page 154:
  291.     *
  292.     *     "Maximum Number of Threads valid range is [0,27] when Rendering
  293.     *      Enabled bit is set."
  294.     *
  295.     * From the Sandy Bridge PRM, volume 2 part 1, page 173:
  296.     *
  297.     *     "Programming Note: If the GS stage is enabled, software must always
  298.     *      allocate at least one GS URB Entry. This is true even if the GS
  299.     *      thread never needs to output vertices to the pipeline, e.g., when
  300.     *      only performing stream output. This is an artifact of the need to
  301.     *      pass the GS thread an initial destination URB handle."
  302.     *
  303.     * As such, we always enable rendering, and limit the number of threads.
  304.     */
  305.    if (dev->gt == 2) {
  306.       /* maximum is 60, but limited to 28 */
  307.       max_threads = 28;
  308.    }
  309.    else {
  310.       /* maximum is 24, but limited to 21 (see brwCreateContext()) */
  311.       max_threads = 21;
  312.    }
  313.  
  314.    dw2 = GEN6_THREADDISP_SPF;
  315.  
  316.    dw4 = vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
  317.          0 << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
  318.          start_grf << GEN6_GS_DW4_URB_GRF_START__SHIFT;
  319.  
  320.    dw5 = (max_threads - 1) << GEN6_GS_DW5_MAX_THREADS__SHIFT |
  321.          GEN6_GS_DW5_STATISTICS |
  322.          GEN6_GS_DW5_SO_STATISTICS |
  323.          GEN6_GS_DW5_RENDER_ENABLE;
  324.  
  325.    /*
  326.     * we cannot make use of GEN6_GS_REORDER because it will reorder
  327.     * triangle strips according to D3D rules (triangle 2N+1 uses vertices
  328.     * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
  329.     * (2N+2, 2N+1, 2N+3)).
  330.     */
  331.    dw6 = GEN6_GS_DW6_GS_ENABLE;
  332.  
  333.    if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
  334.       dw6 |= GEN6_GS_DW6_DISCARD_ADJACENCY;
  335.  
  336.    if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
  337.       const uint32_t svbi_post_inc =
  338.          ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
  339.  
  340.       dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE;
  341.       if (svbi_post_inc) {
  342.          dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE |
  343.                 svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT;
  344.       }
  345.    }
  346.  
  347.    STATIC_ASSERT(Elements(cso->payload) >= 4);
  348.    cso->payload[0] = dw2;
  349.    cso->payload[1] = dw4;
  350.    cso->payload[2] = dw5;
  351.    cso->payload[3] = dw6;
  352. }
  353.  
  354. static void
  355. gs_init_cso_gen7(const struct ilo_dev *dev,
  356.                  const struct ilo_shader_state *gs,
  357.                  struct ilo_shader_cso *cso)
  358. {
  359.    int start_grf, vue_read_len, sampler_count, max_threads;
  360.    uint32_t dw2, dw4, dw5;
  361.  
  362.    ILO_DEV_ASSERT(dev, 7, 7.5);
  363.  
  364.    start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
  365.    vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
  366.    sampler_count = ilo_shader_get_kernel_param(gs, ILO_KERNEL_SAMPLER_COUNT);
  367.  
  368.    /* in pairs */
  369.    vue_read_len = (vue_read_len + 1) / 2;
  370.  
  371.    switch (ilo_dev_gen(dev)) {
  372.    case ILO_GEN(7.5):
  373.       max_threads = (dev->gt >= 2) ? 256 : 70;
  374.       break;
  375.    case ILO_GEN(7):
  376.       max_threads = (dev->gt == 2) ? 128 : 36;
  377.       break;
  378.    default:
  379.       max_threads = 1;
  380.       break;
  381.    }
  382.  
  383.    dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
  384.    dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
  385.  
  386.    dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT |
  387.          GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES |
  388.          0 << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT |
  389.          start_grf << GEN7_GS_DW4_URB_GRF_START__SHIFT;
  390.  
  391.    dw5 = (max_threads - 1) << GEN7_GS_DW5_MAX_THREADS__SHIFT |
  392.          GEN7_GS_DW5_STATISTICS |
  393.          GEN7_GS_DW5_GS_ENABLE;
  394.  
  395.    STATIC_ASSERT(Elements(cso->payload) >= 3);
  396.    cso->payload[0] = dw2;
  397.    cso->payload[1] = dw4;
  398.    cso->payload[2] = dw5;
  399. }
  400.  
  401. void
  402. ilo_gpe_init_gs_cso(const struct ilo_dev *dev,
  403.                     const struct ilo_shader_state *gs,
  404.                     struct ilo_shader_cso *cso)
  405. {
  406.    if (ilo_dev_gen(dev) >= ILO_GEN(7))
  407.       gs_init_cso_gen7(dev, gs, cso);
  408.    else
  409.       gs_init_cso_gen6(dev, gs, cso);
  410. }
  411.  
  412. static void
  413. view_init_null_gen6(const struct ilo_dev *dev,
  414.                     unsigned width, unsigned height,
  415.                     unsigned depth, unsigned level,
  416.                     struct ilo_view_surface *surf)
  417. {
  418.    uint32_t *dw;
  419.  
  420.    ILO_DEV_ASSERT(dev, 6, 6);
  421.  
  422.    assert(width >= 1 && height >= 1 && depth >= 1);
  423.  
  424.    /*
  425.     * From the Sandy Bridge PRM, volume 4 part 1, page 71:
  426.     *
  427.     *     "A null surface will be used in instances where an actual surface is
  428.     *      not bound. When a write message is generated to a null surface, no
  429.     *      actual surface is written to. When a read message (including any
  430.     *      sampling engine message) is generated to a null surface, the result
  431.     *      is all zeros. Note that a null surface type is allowed to be used
  432.     *      with all messages, even if it is not specificially indicated as
  433.     *      supported. All of the remaining fields in surface state are ignored
  434.     *      for null surfaces, with the following exceptions:
  435.     *
  436.     *        * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
  437.     *          depth buffer's corresponding state for all render target
  438.     *          surfaces, including null.
  439.     *        * Surface Format must be R8G8B8A8_UNORM."
  440.     *
  441.     * From the Sandy Bridge PRM, volume 4 part 1, page 82:
  442.     *
  443.     *     "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
  444.     *      true"
  445.     */
  446.  
  447.    STATIC_ASSERT(Elements(surf->payload) >= 6);
  448.    dw = surf->payload;
  449.  
  450.    dw[0] = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT |
  451.            GEN6_FORMAT_B8G8R8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT;
  452.  
  453.    dw[1] = 0;
  454.  
  455.    dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
  456.            (width  - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
  457.            level << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
  458.  
  459.    dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
  460.            GEN6_TILING_X;
  461.  
  462.    dw[4] = 0;
  463.    dw[5] = 0;
  464. }
  465.  
  466. static void
  467. view_init_for_buffer_gen6(const struct ilo_dev *dev,
  468.                           const struct ilo_buffer *buf,
  469.                           unsigned offset, unsigned size,
  470.                           unsigned struct_size,
  471.                           enum pipe_format elem_format,
  472.                           bool is_rt, bool render_cache_rw,
  473.                           struct ilo_view_surface *surf)
  474. {
  475.    const int elem_size = util_format_get_blocksize(elem_format);
  476.    int width, height, depth, pitch;
  477.    int surface_format, num_entries;
  478.    uint32_t *dw;
  479.  
  480.    ILO_DEV_ASSERT(dev, 6, 6);
  481.  
  482.    /*
  483.     * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
  484.     * structure in a buffer.
  485.     */
  486.  
  487.    surface_format = ilo_format_translate_color(dev, elem_format);
  488.  
  489.    num_entries = size / struct_size;
  490.    /* see if there is enough space to fit another element */
  491.    if (size % struct_size >= elem_size)
  492.       num_entries++;
  493.  
  494.    /*
  495.     * From the Sandy Bridge PRM, volume 4 part 1, page 76:
  496.     *
  497.     *     "For SURFTYPE_BUFFER render targets, this field (Surface Base
  498.     *      Address) specifies the base address of first element of the
  499.     *      surface. The surface is interpreted as a simple array of that
  500.     *      single element type. The address must be naturally-aligned to the
  501.     *      element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
  502.     *      must be 16-byte aligned).
  503.     *
  504.     *      For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
  505.     *      the base address of the first element of the surface, computed in
  506.     *      software by adding the surface base address to the byte offset of
  507.     *      the element in the buffer."
  508.     */
  509.    if (is_rt)
  510.       assert(offset % elem_size == 0);
  511.  
  512.    /*
  513.     * From the Sandy Bridge PRM, volume 4 part 1, page 77:
  514.     *
  515.     *     "For buffer surfaces, the number of entries in the buffer ranges
  516.     *      from 1 to 2^27."
  517.     */
  518.    assert(num_entries >= 1 && num_entries <= 1 << 27);
  519.  
  520.    /*
  521.     * From the Sandy Bridge PRM, volume 4 part 1, page 81:
  522.     *
  523.     *     "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
  524.     *      indicates the size of the structure."
  525.     */
  526.    pitch = struct_size;
  527.  
  528.    pitch--;
  529.    num_entries--;
  530.    /* bits [6:0] */
  531.    width  = (num_entries & 0x0000007f);
  532.    /* bits [19:7] */
  533.    height = (num_entries & 0x000fff80) >> 7;
  534.    /* bits [26:20] */
  535.    depth  = (num_entries & 0x07f00000) >> 20;
  536.  
  537.    STATIC_ASSERT(Elements(surf->payload) >= 6);
  538.    dw = surf->payload;
  539.  
  540.    dw[0] = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT |
  541.            surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT;
  542.    if (render_cache_rw)
  543.       dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
  544.  
  545.    dw[1] = offset;
  546.  
  547.    dw[2] = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
  548.            width << GEN6_SURFACE_DW2_WIDTH__SHIFT;
  549.  
  550.    dw[3] = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT |
  551.            pitch << GEN6_SURFACE_DW3_PITCH__SHIFT;
  552.  
  553.    dw[4] = 0;
  554.    dw[5] = 0;
  555. }
  556.  
  557. static void
  558. view_init_for_image_gen6(const struct ilo_dev *dev,
  559.                          const struct ilo_image *img,
  560.                          enum pipe_texture_target target,
  561.                          enum pipe_format format,
  562.                          unsigned first_level,
  563.                          unsigned num_levels,
  564.                          unsigned first_layer,
  565.                          unsigned num_layers,
  566.                          bool is_rt,
  567.                          struct ilo_view_surface *surf)
  568. {
  569.    int surface_type, surface_format;
  570.    int width, height, depth, pitch, lod;
  571.    uint32_t *dw;
  572.  
  573.    ILO_DEV_ASSERT(dev, 6, 6);
  574.  
  575.    surface_type = ilo_gpe_gen6_translate_texture(target);
  576.    assert(surface_type != GEN6_SURFTYPE_BUFFER);
  577.  
  578.    if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && img->separate_stencil)
  579.       format = PIPE_FORMAT_Z32_FLOAT;
  580.  
  581.    if (is_rt)
  582.       surface_format = ilo_format_translate_render(dev, format);
  583.    else
  584.       surface_format = ilo_format_translate_texture(dev, format);
  585.    assert(surface_format >= 0);
  586.  
  587.    width = img->width0;
  588.    height = img->height0;
  589.    depth = (target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers;
  590.    pitch = img->bo_stride;
  591.  
  592.    if (surface_type == GEN6_SURFTYPE_CUBE) {
  593.       /*
  594.        * From the Sandy Bridge PRM, volume 4 part 1, page 81:
  595.        *
  596.        *     "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
  597.        *      range of this field (Depth) is [0,84], indicating the number of
  598.        *      cube array elements (equal to the number of underlying 2D array
  599.        *      elements divided by 6). For other surfaces, this field must be
  600.        *      zero."
  601.        *
  602.        * When is_rt is true, we treat the texture as a 2D one to avoid the
  603.        * restriction.
  604.        */
  605.       if (is_rt) {
  606.          surface_type = GEN6_SURFTYPE_2D;
  607.       }
  608.       else {
  609.          assert(num_layers % 6 == 0);
  610.          depth = num_layers / 6;
  611.       }
  612.    }
  613.  
  614.    /* sanity check the size */
  615.    assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
  616.    switch (surface_type) {
  617.    case GEN6_SURFTYPE_1D:
  618.       assert(width <= 8192 && height == 1 && depth <= 512);
  619.       assert(first_layer < 512 && num_layers <= 512);
  620.       break;
  621.    case GEN6_SURFTYPE_2D:
  622.       assert(width <= 8192 && height <= 8192 && depth <= 512);
  623.       assert(first_layer < 512 && num_layers <= 512);
  624.       break;
  625.    case GEN6_SURFTYPE_3D:
  626.       assert(width <= 2048 && height <= 2048 && depth <= 2048);
  627.       assert(first_layer < 2048 && num_layers <= 512);
  628.       if (!is_rt)
  629.          assert(first_layer == 0);
  630.       break;
  631.    case GEN6_SURFTYPE_CUBE:
  632.       assert(width <= 8192 && height <= 8192 && depth <= 85);
  633.       assert(width == height);
  634.       assert(first_layer < 512 && num_layers <= 512);
  635.       if (is_rt)
  636.          assert(first_layer == 0);
  637.       break;
  638.    default:
  639.       assert(!"unexpected surface type");
  640.       break;
  641.    }
  642.  
  643.    /* non-full array spacing is supported only on GEN7+ */
  644.    assert(img->walk != ILO_IMAGE_WALK_LOD);
  645.    /* non-interleaved samples are supported only on GEN7+ */
  646.    if (img->sample_count > 1)
  647.       assert(img->interleaved_samples);
  648.  
  649.    if (is_rt) {
  650.       assert(num_levels == 1);
  651.       lod = first_level;
  652.    }
  653.    else {
  654.       lod = num_levels - 1;
  655.    }
  656.  
  657.    /*
  658.     * From the Sandy Bridge PRM, volume 4 part 1, page 76:
  659.     *
  660.     *     "Linear render target surface base addresses must be element-size
  661.     *      aligned, for non-YUV surface formats, or a multiple of 2
  662.     *      element-sizes for YUV surface formats. Other linear surfaces have
  663.     *      no alignment requirements (byte alignment is sufficient.)"
  664.     *
  665.     * From the Sandy Bridge PRM, volume 4 part 1, page 81:
  666.     *
  667.     *     "For linear render target surfaces, the pitch must be a multiple
  668.     *      of the element size for non-YUV surface formats. Pitch must be a
  669.     *      multiple of 2 * element size for YUV surface formats."
  670.     *
  671.     * From the Sandy Bridge PRM, volume 4 part 1, page 86:
  672.     *
  673.     *     "For linear surfaces, this field (X Offset) must be zero"
  674.     */
  675.    if (img->tiling == GEN6_TILING_NONE) {
  676.       if (is_rt) {
  677.          const int elem_size = util_format_get_blocksize(format);
  678.          assert(pitch % elem_size == 0);
  679.       }
  680.    }
  681.  
  682.    STATIC_ASSERT(Elements(surf->payload) >= 6);
  683.    dw = surf->payload;
  684.  
  685.    dw[0] = surface_type << GEN6_SURFACE_DW0_TYPE__SHIFT |
  686.            surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT |
  687.            GEN6_SURFACE_DW0_MIPLAYOUT_BELOW;
  688.  
  689.    if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) {
  690.       dw[0] |= 1 << 9 |
  691.                GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
  692.    }
  693.  
  694.    if (is_rt)
  695.       dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
  696.  
  697.    dw[1] = 0;
  698.  
  699.    dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
  700.            (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
  701.            lod << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
  702.  
  703.    assert(img->tiling != GEN8_TILING_W);
  704.    dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
  705.            (pitch - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT |
  706.            img->tiling;
  707.  
  708.    dw[4] = first_level << GEN6_SURFACE_DW4_MIN_LOD__SHIFT |
  709.            first_layer << 17 |
  710.            (num_layers - 1) << 8 |
  711.            ((img->sample_count > 1) ? GEN6_SURFACE_DW4_MULTISAMPLECOUNT_4 :
  712.                                       GEN6_SURFACE_DW4_MULTISAMPLECOUNT_1);
  713.  
  714.    dw[5] = 0;
  715.  
  716.    assert(img->align_j == 2 || img->align_j == 4);
  717.    if (img->align_j == 4)
  718.       dw[5] |= GEN6_SURFACE_DW5_VALIGN_4;
  719. }
  720.  
  721. static void
  722. view_init_null_gen7(const struct ilo_dev *dev,
  723.                     unsigned width, unsigned height,
  724.                     unsigned depth, unsigned level,
  725.                     struct ilo_view_surface *surf)
  726. {
  727.    uint32_t *dw;
  728.  
  729.    ILO_DEV_ASSERT(dev, 7, 8);
  730.  
  731.    assert(width >= 1 && height >= 1 && depth >= 1);
  732.  
  733.    /*
  734.     * From the Ivy Bridge PRM, volume 4 part 1, page 62:
  735.     *
  736.     *     "A null surface is used in instances where an actual surface is not
  737.     *      bound. When a write message is generated to a null surface, no
  738.     *      actual surface is written to. When a read message (including any
  739.     *      sampling engine message) is generated to a null surface, the result
  740.     *      is all zeros.  Note that a null surface type is allowed to be used
  741.     *      with all messages, even if it is not specificially indicated as
  742.     *      supported. All of the remaining fields in surface state are ignored
  743.     *      for null surfaces, with the following exceptions:
  744.     *
  745.     *      * Width, Height, Depth, LOD, and Render Target View Extent fields
  746.     *        must match the depth buffer's corresponding state for all render
  747.     *        target surfaces, including null.
  748.     *      * All sampling engine and data port messages support null surfaces
  749.     *        with the above behavior, even if not mentioned as specifically
  750.     *        supported, except for the following:
  751.     *        * Data Port Media Block Read/Write messages.
  752.     *      * The Surface Type of a surface used as a render target (accessed
  753.     *        via the Data Port's Render Target Write message) must be the same
  754.     *        as the Surface Type of all other render targets and of the depth
  755.     *        buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth
  756.     *        buffer or render targets are SURFTYPE_NULL."
  757.     *
  758.     * From the Ivy Bridge PRM, volume 4 part 1, page 65:
  759.     *
  760.     *     "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
  761.     *      true"
  762.     */
  763.  
  764.    STATIC_ASSERT(Elements(surf->payload) >= 13);
  765.    dw = surf->payload;
  766.  
  767.    dw[0] = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT |
  768.            GEN6_FORMAT_B8G8R8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT;
  769.  
  770.    if (ilo_dev_gen(dev) >= ILO_GEN(8))
  771.       dw[0] |= GEN6_TILING_X << GEN8_SURFACE_DW0_TILING__SHIFT;
  772.    else
  773.       dw[0] |= GEN6_TILING_X << GEN7_SURFACE_DW0_TILING__SHIFT;
  774.  
  775.    dw[1] = 0;
  776.  
  777.    dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
  778.            GEN_SHIFT32(width  - 1, GEN7_SURFACE_DW2_WIDTH);
  779.  
  780.    dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH);
  781.  
  782.    dw[4] = 0;
  783.    dw[5] = level;
  784.  
  785.    dw[6] = 0;
  786.    dw[7] = 0;
  787.  
  788.    if (ilo_dev_gen(dev) >= ILO_GEN(8))
  789.       memset(&dw[8], 0, sizeof(*dw) * (13 - 8));
  790. }
  791.  
  792. static void
  793. view_init_for_buffer_gen7(const struct ilo_dev *dev,
  794.                           const struct ilo_buffer *buf,
  795.                           unsigned offset, unsigned size,
  796.                           unsigned struct_size,
  797.                           enum pipe_format elem_format,
  798.                           bool is_rt, bool render_cache_rw,
  799.                           struct ilo_view_surface *surf)
  800. {
  801.    const bool typed = (elem_format != PIPE_FORMAT_NONE);
  802.    const bool structured = (!typed && struct_size > 1);
  803.    const int elem_size = (typed) ?
  804.       util_format_get_blocksize(elem_format) : 1;
  805.    int width, height, depth, pitch;
  806.    int surface_type, surface_format, num_entries;
  807.    uint32_t *dw;
  808.  
  809.    ILO_DEV_ASSERT(dev, 7, 8);
  810.  
  811.    surface_type = (structured) ? GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER;
  812.  
  813.    surface_format = (typed) ?
  814.       ilo_format_translate_color(dev, elem_format) : GEN6_FORMAT_RAW;
  815.  
  816.    num_entries = size / struct_size;
  817.    /* see if there is enough space to fit another element */
  818.    if (size % struct_size >= elem_size && !structured)
  819.       num_entries++;
  820.  
  821.    /*
  822.     * From the Ivy Bridge PRM, volume 4 part 1, page 67:
  823.     *
  824.     *     "For SURFTYPE_BUFFER render targets, this field (Surface Base
  825.     *      Address) specifies the base address of first element of the
  826.     *      surface. The surface is interpreted as a simple array of that
  827.     *      single element type. The address must be naturally-aligned to the
  828.     *      element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
  829.     *      must be 16-byte aligned)
  830.     *
  831.     *      For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
  832.     *      the base address of the first element of the surface, computed in
  833.     *      software by adding the surface base address to the byte offset of
  834.     *      the element in the buffer."
  835.     */
  836.    if (is_rt)
  837.       assert(offset % elem_size == 0);
  838.  
  839.    /*
  840.     * From the Ivy Bridge PRM, volume 4 part 1, page 68:
  841.     *
  842.     *     "For typed buffer and structured buffer surfaces, the number of
  843.     *      entries in the buffer ranges from 1 to 2^27.  For raw buffer
  844.     *      surfaces, the number of entries in the buffer is the number of
  845.     *      bytes which can range from 1 to 2^30."
  846.     */
  847.    assert(num_entries >= 1 &&
  848.           num_entries <= 1 << ((typed || structured) ? 27 : 30));
  849.  
  850.    /*
  851.     * From the Ivy Bridge PRM, volume 4 part 1, page 69:
  852.     *
  853.     *     "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
  854.     *      11 if the Surface Format is RAW (the size of the buffer must be a
  855.     *      multiple of 4 bytes)."
  856.     *
  857.     * From the Ivy Bridge PRM, volume 4 part 1, page 70:
  858.     *
  859.     *     "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
  860.     *      field (Surface Pitch) indicates the size of the structure."
  861.     *
  862.     *     "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
  863.     *      must be a multiple of 4 bytes."
  864.     */
  865.    if (structured)
  866.       assert(struct_size % 4 == 0);
  867.    else if (!typed)
  868.       assert(num_entries % 4 == 0);
  869.  
  870.    pitch = struct_size;
  871.  
  872.    pitch--;
  873.    num_entries--;
  874.    /* bits [6:0] */
  875.    width  = (num_entries & 0x0000007f);
  876.    /* bits [20:7] */
  877.    height = (num_entries & 0x001fff80) >> 7;
  878.    /* bits [30:21] */
  879.    depth  = (num_entries & 0x7fe00000) >> 21;
  880.    /* limit to [26:21] */
  881.    if (typed || structured)
  882.       depth &= 0x3f;
  883.  
  884.    STATIC_ASSERT(Elements(surf->payload) >= 13);
  885.    dw = surf->payload;
  886.  
  887.    dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
  888.            surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
  889.    if (render_cache_rw)
  890.       dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
  891.  
  892.    if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
  893.       dw[8] = offset;
  894.       memset(&dw[9], 0, sizeof(*dw) * (13 - 9));
  895.    } else {
  896.       dw[1] = offset;
  897.    }
  898.  
  899.    dw[2] = GEN_SHIFT32(height, GEN7_SURFACE_DW2_HEIGHT) |
  900.            GEN_SHIFT32(width, GEN7_SURFACE_DW2_WIDTH);
  901.  
  902.    dw[3] = GEN_SHIFT32(depth, GEN7_SURFACE_DW3_DEPTH) |
  903.            pitch;
  904.  
  905.    dw[4] = 0;
  906.    dw[5] = 0;
  907.  
  908.    dw[6] = 0;
  909.    dw[7] = 0;
  910.  
  911.    if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
  912.       dw[7] |= GEN_SHIFT32(GEN75_SCS_RED,   GEN75_SURFACE_DW7_SCS_R) |
  913.                GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
  914.                GEN_SHIFT32(GEN75_SCS_BLUE,  GEN75_SURFACE_DW7_SCS_B) |
  915.                GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
  916.    }
  917. }
  918.  
  919. static void
  920. view_init_for_image_gen7(const struct ilo_dev *dev,
  921.                          const struct ilo_image *img,
  922.                          enum pipe_texture_target target,
  923.                          enum pipe_format format,
  924.                          unsigned first_level,
  925.                          unsigned num_levels,
  926.                          unsigned first_layer,
  927.                          unsigned num_layers,
  928.                          bool is_rt,
  929.                          struct ilo_view_surface *surf)
  930. {
  931.    int surface_type, surface_format;
  932.    int width, height, depth, pitch, lod;
  933.    uint32_t *dw;
  934.  
  935.    ILO_DEV_ASSERT(dev, 7, 8);
  936.  
  937.    surface_type = ilo_gpe_gen6_translate_texture(target);
  938.    assert(surface_type != GEN6_SURFTYPE_BUFFER);
  939.  
  940.    if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && img->separate_stencil)
  941.       format = PIPE_FORMAT_Z32_FLOAT;
  942.  
  943.    if (is_rt)
  944.       surface_format = ilo_format_translate_render(dev, format);
  945.    else
  946.       surface_format = ilo_format_translate_texture(dev, format);
  947.    assert(surface_format >= 0);
  948.  
  949.    width = img->width0;
  950.    height = img->height0;
  951.    depth = (target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers;
  952.    pitch = img->bo_stride;
  953.  
  954.    if (surface_type == GEN6_SURFTYPE_CUBE) {
  955.       /*
  956.        * From the Ivy Bridge PRM, volume 4 part 1, page 70:
  957.        *
  958.        *     "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
  959.        *      this field is [0,340], indicating the number of cube array
  960.        *      elements (equal to the number of underlying 2D array elements
  961.        *      divided by 6). For other surfaces, this field must be zero."
  962.        *
  963.        * When is_rt is true, we treat the texture as a 2D one to avoid the
  964.        * restriction.
  965.        */
  966.       if (is_rt) {
  967.          surface_type = GEN6_SURFTYPE_2D;
  968.       }
  969.       else {
  970.          assert(num_layers % 6 == 0);
  971.          depth = num_layers / 6;
  972.       }
  973.    }
  974.  
  975.    /* sanity check the size */
  976.    assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
  977.    assert(first_layer < 2048 && num_layers <= 2048);
  978.    switch (surface_type) {
  979.    case GEN6_SURFTYPE_1D:
  980.       assert(width <= 16384 && height == 1 && depth <= 2048);
  981.       break;
  982.    case GEN6_SURFTYPE_2D:
  983.       assert(width <= 16384 && height <= 16384 && depth <= 2048);
  984.       break;
  985.    case GEN6_SURFTYPE_3D:
  986.       assert(width <= 2048 && height <= 2048 && depth <= 2048);
  987.       if (!is_rt)
  988.          assert(first_layer == 0);
  989.       break;
  990.    case GEN6_SURFTYPE_CUBE:
  991.       assert(width <= 16384 && height <= 16384 && depth <= 86);
  992.       assert(width == height);
  993.       if (is_rt)
  994.          assert(first_layer == 0);
  995.       break;
  996.    default:
  997.       assert(!"unexpected surface type");
  998.       break;
  999.    }
  1000.  
  1001.    if (is_rt) {
  1002.       assert(num_levels == 1);
  1003.       lod = first_level;
  1004.    }
  1005.    else {
  1006.       lod = num_levels - 1;
  1007.    }
  1008.  
  1009.    /*
  1010.     * From the Ivy Bridge PRM, volume 4 part 1, page 68:
  1011.     *
  1012.     *     "The Base Address for linear render target surfaces and surfaces
  1013.     *      accessed with the typed surface read/write data port messages must
  1014.     *      be element-size aligned, for non-YUV surface formats, or a multiple
  1015.     *      of 2 element-sizes for YUV surface formats.  Other linear surfaces
  1016.     *      have no alignment requirements (byte alignment is sufficient)."
  1017.     *
  1018.     * From the Ivy Bridge PRM, volume 4 part 1, page 70:
  1019.     *
  1020.     *     "For linear render target surfaces and surfaces accessed with the
  1021.     *      typed data port messages, the pitch must be a multiple of the
  1022.     *      element size for non-YUV surface formats. Pitch must be a multiple
  1023.     *      of 2 * element size for YUV surface formats. For linear surfaces
  1024.     *      with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
  1025.     *      of 4 bytes.For other linear surfaces, the pitch can be any multiple
  1026.     *      of bytes."
  1027.     *
  1028.     * From the Ivy Bridge PRM, volume 4 part 1, page 74:
  1029.     *
  1030.     *     "For linear surfaces, this field (X Offset) must be zero."
  1031.     */
  1032.    if (img->tiling == GEN6_TILING_NONE) {
  1033.       if (is_rt) {
  1034.          const int elem_size = util_format_get_blocksize(format);
  1035.          assert(pitch % elem_size == 0);
  1036.       }
  1037.    }
  1038.  
  1039.    STATIC_ASSERT(Elements(surf->payload) >= 13);
  1040.    dw = surf->payload;
  1041.  
  1042.    dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
  1043.            surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
  1044.  
  1045.    /*
  1046.     * From the Ivy Bridge PRM, volume 4 part 1, page 63:
  1047.     *
  1048.     *     "If this field (Surface Array) is enabled, the Surface Type must be
  1049.     *      SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
  1050.     *      disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
  1051.     *      SURFTYPE_CUBE, the Depth field must be set to zero."
  1052.     *
  1053.     * For non-3D sampler surfaces, resinfo (the sampler message) always
  1054.     * returns zero for the number of layers when this field is not set.
  1055.     */
  1056.    if (surface_type != GEN6_SURFTYPE_3D) {
  1057.       switch (target) {
  1058.       case PIPE_TEXTURE_1D_ARRAY:
  1059.       case PIPE_TEXTURE_2D_ARRAY:
  1060.       case PIPE_TEXTURE_CUBE_ARRAY:
  1061.          dw[0] |= GEN7_SURFACE_DW0_IS_ARRAY;
  1062.          break;
  1063.       default:
  1064.          assert(depth == 1);
  1065.          break;
  1066.       }
  1067.    }
  1068.  
  1069.    if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
  1070.       switch (img->align_j) {
  1071.       case 4:
  1072.          dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
  1073.          break;
  1074.       case 8:
  1075.          dw[0] |= GEN8_SURFACE_DW0_VALIGN_8;
  1076.          break;
  1077.       case 16:
  1078.          dw[0] |= GEN8_SURFACE_DW0_VALIGN_16;
  1079.          break;
  1080.       default:
  1081.          assert(!"unsupported valign");
  1082.          break;
  1083.       }
  1084.  
  1085.       switch (img->align_i) {
  1086.       case 4:
  1087.          dw[0] |= GEN8_SURFACE_DW0_HALIGN_4;
  1088.          break;
  1089.       case 8:
  1090.          dw[0] |= GEN8_SURFACE_DW0_HALIGN_8;
  1091.          break;
  1092.       case 16:
  1093.          dw[0] |= GEN8_SURFACE_DW0_HALIGN_16;
  1094.          break;
  1095.       default:
  1096.          assert(!"unsupported halign");
  1097.          break;
  1098.       }
  1099.  
  1100.       dw[0] |= img->tiling << GEN8_SURFACE_DW0_TILING__SHIFT;
  1101.    } else {
  1102.       assert(img->align_i == 4 || img->align_i == 8);
  1103.       assert(img->align_j == 2 || img->align_j == 4);
  1104.  
  1105.       if (img->align_j == 4)
  1106.          dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
  1107.  
  1108.       if (img->align_i == 8)
  1109.          dw[0] |= GEN7_SURFACE_DW0_HALIGN_8;
  1110.  
  1111.       assert(img->tiling != GEN8_TILING_W);
  1112.       dw[0] |= img->tiling << GEN7_SURFACE_DW0_TILING__SHIFT;
  1113.  
  1114.       if (img->walk == ILO_IMAGE_WALK_LOD)
  1115.          dw[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0;
  1116.       else
  1117.          dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL;
  1118.    }
  1119.  
  1120.    if (is_rt)
  1121.       dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
  1122.  
  1123.    if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt)
  1124.       dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
  1125.  
  1126.    if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
  1127.       assert(img->walk_layer_height % 4 == 0);
  1128.       dw[1] = img->walk_layer_height / 4;
  1129.    } else {
  1130.       dw[1] = 0;
  1131.    }
  1132.  
  1133.    dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
  1134.            GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH);
  1135.  
  1136.    dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH) |
  1137.            (pitch - 1);
  1138.  
  1139.    dw[4] = first_layer << 18 |
  1140.            (num_layers - 1) << 7;
  1141.  
  1142.    /*
  1143.     * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
  1144.     * means the samples are interleaved.  The layouts are the same when the
  1145.     * number of samples is 1.
  1146.     */
  1147.    if (img->interleaved_samples && img->sample_count > 1) {
  1148.       assert(!is_rt);
  1149.       dw[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL;
  1150.    }
  1151.    else {
  1152.       dw[4] |= GEN7_SURFACE_DW4_MSFMT_MSS;
  1153.    }
  1154.  
  1155.    switch (img->sample_count) {
  1156.    case 0:
  1157.    case 1:
  1158.    default:
  1159.       dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_1;
  1160.       break;
  1161.    case 2:
  1162.       dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_2;
  1163.       break;
  1164.    case 4:
  1165.       dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4;
  1166.       break;
  1167.    case 8:
  1168.       dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8;
  1169.       break;
  1170.    case 16:
  1171.       dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_16;
  1172.       break;
  1173.    }
  1174.  
  1175.    dw[5] = GEN_SHIFT32(first_level, GEN7_SURFACE_DW5_MIN_LOD) |
  1176.            lod;
  1177.  
  1178.    dw[6] = 0;
  1179.    dw[7] = 0;
  1180.  
  1181.    if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
  1182.       dw[7] |= GEN_SHIFT32(GEN75_SCS_RED,   GEN75_SURFACE_DW7_SCS_R) |
  1183.                GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
  1184.                GEN_SHIFT32(GEN75_SCS_BLUE,  GEN75_SURFACE_DW7_SCS_B) |
  1185.                GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
  1186.    }
  1187.  
  1188.    if (ilo_dev_gen(dev) >= ILO_GEN(8))
  1189.       memset(&dw[8], 0, sizeof(*dw) * (13 - 8));
  1190. }
  1191.  
  1192. void
  1193. ilo_gpe_init_view_surface_null(const struct ilo_dev *dev,
  1194.                                unsigned width, unsigned height,
  1195.                                unsigned depth, unsigned level,
  1196.                                struct ilo_view_surface *surf)
  1197. {
  1198.    if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
  1199.       view_init_null_gen7(dev,
  1200.             width, height, depth, level, surf);
  1201.    } else {
  1202.       view_init_null_gen6(dev,
  1203.             width, height, depth, level, surf);
  1204.    }
  1205.  
  1206.    surf->bo = NULL;
  1207.    surf->scanout = false;
  1208. }
  1209.  
  1210. void
  1211. ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev,
  1212.                                      const struct ilo_buffer *buf,
  1213.                                      unsigned offset, unsigned size,
  1214.                                      unsigned struct_size,
  1215.                                      enum pipe_format elem_format,
  1216.                                      bool is_rt, bool render_cache_rw,
  1217.                                      struct ilo_view_surface *surf)
  1218. {
  1219.    if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
  1220.       view_init_for_buffer_gen7(dev, buf, offset, size,
  1221.             struct_size, elem_format, is_rt, render_cache_rw, surf);
  1222.    } else {
  1223.       view_init_for_buffer_gen6(dev, buf, offset, size,
  1224.             struct_size, elem_format, is_rt, render_cache_rw, surf);
  1225.    }
  1226.  
  1227.    /* do not increment reference count */
  1228.    surf->bo = buf->bo;
  1229.    surf->scanout = false;
  1230. }
  1231.  
  1232. void
  1233. ilo_gpe_init_view_surface_for_image(const struct ilo_dev *dev,
  1234.                                     const struct ilo_image *img,
  1235.                                     enum pipe_texture_target target,
  1236.                                     enum pipe_format format,
  1237.                                     unsigned first_level,
  1238.                                     unsigned num_levels,
  1239.                                     unsigned first_layer,
  1240.                                     unsigned num_layers,
  1241.                                     bool is_rt,
  1242.                                     struct ilo_view_surface *surf)
  1243. {
  1244.    if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
  1245.       view_init_for_image_gen7(dev, img, target, format,
  1246.             first_level, num_levels, first_layer, num_layers,
  1247.             is_rt, surf);
  1248.    } else {
  1249.       view_init_for_image_gen6(dev, img, target, format,
  1250.             first_level, num_levels, first_layer, num_layers,
  1251.             is_rt, surf);
  1252.    }
  1253.  
  1254.    surf->scanout = img->scanout;
  1255.    /* do not increment reference count */
  1256.    surf->bo = img->bo;
  1257. }
  1258.  
  1259. static void
  1260. sampler_init_border_color_gen6(const struct ilo_dev *dev,
  1261.                                const union pipe_color_union *color,
  1262.                                uint32_t *dw, int num_dwords)
  1263. {
  1264.    float rgba[4] = {
  1265.       color->f[0], color->f[1], color->f[2], color->f[3],
  1266.    };
  1267.  
  1268.    ILO_DEV_ASSERT(dev, 6, 6);
  1269.  
  1270.    assert(num_dwords >= 12);
  1271.  
  1272.    /*
  1273.     * This state is not documented in the Sandy Bridge PRM, but in the
  1274.     * Ironlake PRM.  SNORM8 seems to be in DW11 instead of DW1.
  1275.     */
  1276.  
  1277.    /* IEEE_FP */
  1278.    dw[1] = fui(rgba[0]);
  1279.    dw[2] = fui(rgba[1]);
  1280.    dw[3] = fui(rgba[2]);
  1281.    dw[4] = fui(rgba[3]);
  1282.  
  1283.    /* FLOAT_16 */
  1284.    dw[5] = util_float_to_half(rgba[0]) |
  1285.            util_float_to_half(rgba[1]) << 16;
  1286.    dw[6] = util_float_to_half(rgba[2]) |
  1287.            util_float_to_half(rgba[3]) << 16;
  1288.  
  1289.    /* clamp to [-1.0f, 1.0f] */
  1290.    rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
  1291.    rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
  1292.    rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
  1293.    rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
  1294.  
  1295.    /* SNORM16 */
  1296.    dw[9] =  (int16_t) util_iround(rgba[0] * 32767.0f) |
  1297.             (int16_t) util_iround(rgba[1] * 32767.0f) << 16;
  1298.    dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
  1299.             (int16_t) util_iround(rgba[3] * 32767.0f) << 16;
  1300.  
  1301.    /* SNORM8 */
  1302.    dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
  1303.             (int8_t) util_iround(rgba[1] * 127.0f) << 8 |
  1304.             (int8_t) util_iround(rgba[2] * 127.0f) << 16 |
  1305.             (int8_t) util_iround(rgba[3] * 127.0f) << 24;
  1306.  
  1307.    /* clamp to [0.0f, 1.0f] */
  1308.    rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
  1309.    rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
  1310.    rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
  1311.    rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
  1312.  
  1313.    /* UNORM8 */
  1314.    dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
  1315.            (uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
  1316.            (uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
  1317.            (uint8_t) util_iround(rgba[3] * 255.0f) << 24;
  1318.  
  1319.    /* UNORM16 */
  1320.    dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
  1321.            (uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
  1322.    dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
  1323.            (uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
  1324. }
  1325.  
  1326. /**
  1327.  * Translate a pipe texture mipfilter to the matching hardware mipfilter.
  1328.  */
  1329. static int
  1330. gen6_translate_tex_mipfilter(unsigned filter)
  1331. {
  1332.    switch (filter) {
  1333.    case PIPE_TEX_MIPFILTER_NEAREST: return GEN6_MIPFILTER_NEAREST;
  1334.    case PIPE_TEX_MIPFILTER_LINEAR:  return GEN6_MIPFILTER_LINEAR;
  1335.    case PIPE_TEX_MIPFILTER_NONE:    return GEN6_MIPFILTER_NONE;
  1336.    default:
  1337.       assert(!"unknown mipfilter");
  1338.       return GEN6_MIPFILTER_NONE;
  1339.    }
  1340. }
  1341.  
  1342. /**
  1343.  * Translate a pipe texture filter to the matching hardware mapfilter.
  1344.  */
  1345. static int
  1346. gen6_translate_tex_filter(unsigned filter)
  1347. {
  1348.    switch (filter) {
  1349.    case PIPE_TEX_FILTER_NEAREST: return GEN6_MAPFILTER_NEAREST;
  1350.    case PIPE_TEX_FILTER_LINEAR:  return GEN6_MAPFILTER_LINEAR;
  1351.    default:
  1352.       assert(!"unknown sampler filter");
  1353.       return GEN6_MAPFILTER_NEAREST;
  1354.    }
  1355. }
  1356.  
  1357. /**
  1358.  * Translate a pipe texture coordinate wrapping mode to the matching hardware
  1359.  * wrapping mode.
  1360.  */
  1361. static int
  1362. gen6_translate_tex_wrap(unsigned wrap)
  1363. {
  1364.    switch (wrap) {
  1365.    case PIPE_TEX_WRAP_CLAMP:              return GEN8_TEXCOORDMODE_HALF_BORDER;
  1366.    case PIPE_TEX_WRAP_REPEAT:             return GEN6_TEXCOORDMODE_WRAP;
  1367.    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:      return GEN6_TEXCOORDMODE_CLAMP;
  1368.    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:    return GEN6_TEXCOORDMODE_CLAMP_BORDER;
  1369.    case PIPE_TEX_WRAP_MIRROR_REPEAT:      return GEN6_TEXCOORDMODE_MIRROR;
  1370.    case PIPE_TEX_WRAP_MIRROR_CLAMP:
  1371.    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
  1372.    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
  1373.    default:
  1374.       assert(!"unknown sampler wrap mode");
  1375.       return GEN6_TEXCOORDMODE_WRAP;
  1376.    }
  1377. }
  1378.  
  1379. /**
  1380.  * Translate a pipe shadow compare function to the matching hardware shadow
  1381.  * function.
  1382.  */
  1383. static int
  1384. gen6_translate_shadow_func(unsigned func)
  1385. {
  1386.    /*
  1387.     * For PIPE_FUNC_x, the reference value is on the left-hand side of the
  1388.     * comparison, and 1.0 is returned when the comparison is true.
  1389.     *
  1390.     * For GEN6_COMPAREFUNCTION_x, the reference value is on the right-hand side of
  1391.     * the comparison, and 0.0 is returned when the comparison is true.
  1392.     */
  1393.    switch (func) {
  1394.    case PIPE_FUNC_NEVER:      return GEN6_COMPAREFUNCTION_ALWAYS;
  1395.    case PIPE_FUNC_LESS:       return GEN6_COMPAREFUNCTION_LEQUAL;
  1396.    case PIPE_FUNC_EQUAL:      return GEN6_COMPAREFUNCTION_NOTEQUAL;
  1397.    case PIPE_FUNC_LEQUAL:     return GEN6_COMPAREFUNCTION_LESS;
  1398.    case PIPE_FUNC_GREATER:    return GEN6_COMPAREFUNCTION_GEQUAL;
  1399.    case PIPE_FUNC_NOTEQUAL:   return GEN6_COMPAREFUNCTION_EQUAL;
  1400.    case PIPE_FUNC_GEQUAL:     return GEN6_COMPAREFUNCTION_GREATER;
  1401.    case PIPE_FUNC_ALWAYS:     return GEN6_COMPAREFUNCTION_NEVER;
  1402.    default:
  1403.       assert(!"unknown shadow compare function");
  1404.       return GEN6_COMPAREFUNCTION_NEVER;
  1405.    }
  1406. }
  1407.  
  1408. void
  1409. ilo_gpe_init_sampler_cso(const struct ilo_dev *dev,
  1410.                          const struct pipe_sampler_state *state,
  1411.                          struct ilo_sampler_cso *sampler)
  1412. {
  1413.    int mip_filter, min_filter, mag_filter, max_aniso;
  1414.    int lod_bias, max_lod, min_lod;
  1415.    int wrap_s, wrap_t, wrap_r, wrap_cube;
  1416.    uint32_t dw0, dw1, dw3;
  1417.  
  1418.    ILO_DEV_ASSERT(dev, 6, 8);
  1419.  
  1420.    memset(sampler, 0, sizeof(*sampler));
  1421.  
  1422.    mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter);
  1423.    min_filter = gen6_translate_tex_filter(state->min_img_filter);
  1424.    mag_filter = gen6_translate_tex_filter(state->mag_img_filter);
  1425.  
  1426.    sampler->anisotropic = state->max_anisotropy;
  1427.  
  1428.    if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16)
  1429.       max_aniso = state->max_anisotropy / 2 - 1;
  1430.    else if (state->max_anisotropy > 16)
  1431.       max_aniso = GEN6_ANISORATIO_16;
  1432.    else
  1433.       max_aniso = GEN6_ANISORATIO_2;
  1434.  
  1435.    /*
  1436.     *
  1437.     * Here is how the hardware calculate per-pixel LOD, from my reading of the
  1438.     * PRMs:
  1439.     *
  1440.     *  1) LOD is set to log2(ratio of texels to pixels) if not specified in
  1441.     *     other ways.  The number of texels is measured using level
  1442.     *     SurfMinLod.
  1443.     *  2) Bias is added to LOD.
  1444.     *  3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
  1445.     *     compared with Base to determine whether magnification or
  1446.     *     minification is needed.  (if preclamp is disabled, LOD is compared
  1447.     *     with Base before clamping)
  1448.     *  4) If magnification is needed, or no mipmapping is requested, LOD is
  1449.     *     set to floor(MinLod).
  1450.     *  5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
  1451.     *
  1452.     * With Gallium interface, Base is always zero and
  1453.     * pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
  1454.     */
  1455.    if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
  1456.       const float scale = 256.0f;
  1457.  
  1458.       /* [-16.0, 16.0) in S4.8 */
  1459.       lod_bias = (int)
  1460.          (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
  1461.       lod_bias &= 0x1fff;
  1462.  
  1463.       /* [0.0, 14.0] in U4.8 */
  1464.       max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale);
  1465.       min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale);
  1466.    }
  1467.    else {
  1468.       const float scale = 64.0f;
  1469.  
  1470.       /* [-16.0, 16.0) in S4.6 */
  1471.       lod_bias = (int)
  1472.          (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
  1473.       lod_bias &= 0x7ff;
  1474.  
  1475.       /* [0.0, 13.0] in U4.6 */
  1476.       max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale);
  1477.       min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale);
  1478.    }
  1479.  
  1480.    /*
  1481.     * We want LOD to be clamped to determine magnification/minification, and
  1482.     * get set to zero when it is magnification or when mipmapping is disabled.
  1483.     * The hardware would set LOD to floor(MinLod) and that is a problem when
  1484.     * MinLod is greater than or equal to 1.0f.
  1485.     *
  1486.     * With Base being zero, it is always minification when MinLod is non-zero.
  1487.     * To achieve our goal, we just need to set MinLod to zero and set
  1488.     * MagFilter to MinFilter when mipmapping is disabled.
  1489.     */
  1490.    if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
  1491.       min_lod = 0;
  1492.       mag_filter = min_filter;
  1493.    }
  1494.  
  1495.    /* determine wrap s/t/r */
  1496.    wrap_s = gen6_translate_tex_wrap(state->wrap_s);
  1497.    wrap_t = gen6_translate_tex_wrap(state->wrap_t);
  1498.    wrap_r = gen6_translate_tex_wrap(state->wrap_r);
  1499.    if (ilo_dev_gen(dev) < ILO_GEN(8)) {
  1500.       /*
  1501.        * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
  1502.        * PIPE_TEX_WRAP_CLAMP_TO_EDGE;  for linear filtering,
  1503.        * PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while
  1504.        * additionally clamping the texture coordinates to [0.0, 1.0].
  1505.        *
  1506.        * PIPE_TEX_WRAP_CLAMP is not supported natively until Gen8.  The
  1507.        * clamping has to be taken care of in the shaders.  There are two
  1508.        * filters here, but let the minification one has a say.
  1509.        */
  1510.       const bool clamp_is_to_edge =
  1511.          (state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
  1512.  
  1513.       if (clamp_is_to_edge) {
  1514.          if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER)
  1515.             wrap_s = GEN6_TEXCOORDMODE_CLAMP;
  1516.          if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER)
  1517.             wrap_t = GEN6_TEXCOORDMODE_CLAMP;
  1518.          if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER)
  1519.             wrap_r = GEN6_TEXCOORDMODE_CLAMP;
  1520.       } else {
  1521.          if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER) {
  1522.             wrap_s = GEN6_TEXCOORDMODE_CLAMP_BORDER;
  1523.             sampler->saturate_s = true;
  1524.          }
  1525.          if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER) {
  1526.             wrap_t = GEN6_TEXCOORDMODE_CLAMP_BORDER;
  1527.             sampler->saturate_t = true;
  1528.          }
  1529.          if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER) {
  1530.             wrap_r = GEN6_TEXCOORDMODE_CLAMP_BORDER;
  1531.             sampler->saturate_r = true;
  1532.          }
  1533.       }
  1534.    }
  1535.  
  1536.    /*
  1537.     * From the Sandy Bridge PRM, volume 4 part 1, page 107:
  1538.     *
  1539.     *     "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
  1540.     *      and TEXCOORDMODE_CUBE settings are valid, and each TC component
  1541.     *      must have the same Address Control mode."
  1542.     *
  1543.     * From the Ivy Bridge PRM, volume 4 part 1, page 96:
  1544.     *
  1545.     *     "This field (Cube Surface Control Mode) must be set to
  1546.     *      CUBECTRLMODE_PROGRAMMED"
  1547.     *
  1548.     * Therefore, we cannot use "Cube Surface Control Mode" for semless cube
  1549.     * map filtering.
  1550.     */
  1551.    if (state->seamless_cube_map &&
  1552.        (state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
  1553.         state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
  1554.       wrap_cube = GEN6_TEXCOORDMODE_CUBE;
  1555.    }
  1556.    else {
  1557.       wrap_cube = GEN6_TEXCOORDMODE_CLAMP;
  1558.    }
  1559.  
  1560.    if (!state->normalized_coords) {
  1561.       /*
  1562.        * From the Ivy Bridge PRM, volume 4 part 1, page 98:
  1563.        *
  1564.        *     "The following state must be set as indicated if this field
  1565.        *      (Non-normalized Coordinate Enable) is enabled:
  1566.        *
  1567.        *      - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
  1568.        *        TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
  1569.        *      - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
  1570.        *      - Mag Mode Filter must be MAPFILTER_NEAREST or
  1571.        *        MAPFILTER_LINEAR.
  1572.        *      - Min Mode Filter must be MAPFILTER_NEAREST or
  1573.        *        MAPFILTER_LINEAR.
  1574.        *      - Mip Mode Filter must be MIPFILTER_NONE.
  1575.        *      - Min LOD must be 0.
  1576.        *      - Max LOD must be 0.
  1577.        *      - MIP Count must be 0.
  1578.        *      - Surface Min LOD must be 0.
  1579.        *      - Texture LOD Bias must be 0."
  1580.        */
  1581.       assert(wrap_s == GEN6_TEXCOORDMODE_CLAMP ||
  1582.              wrap_s == GEN6_TEXCOORDMODE_CLAMP_BORDER);
  1583.       assert(wrap_t == GEN6_TEXCOORDMODE_CLAMP ||
  1584.              wrap_t == GEN6_TEXCOORDMODE_CLAMP_BORDER);
  1585.       assert(wrap_r == GEN6_TEXCOORDMODE_CLAMP ||
  1586.              wrap_r == GEN6_TEXCOORDMODE_CLAMP_BORDER);
  1587.  
  1588.       assert(mag_filter == GEN6_MAPFILTER_NEAREST ||
  1589.              mag_filter == GEN6_MAPFILTER_LINEAR);
  1590.       assert(min_filter == GEN6_MAPFILTER_NEAREST ||
  1591.              min_filter == GEN6_MAPFILTER_LINEAR);
  1592.  
  1593.       /* work around a bug in util_blitter */
  1594.       mip_filter = GEN6_MIPFILTER_NONE;
  1595.  
  1596.       assert(mip_filter == GEN6_MIPFILTER_NONE);
  1597.    }
  1598.  
  1599.    if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
  1600.       dw0 = 1 << 28 |
  1601.             mip_filter << 20 |
  1602.             lod_bias << 1;
  1603.  
  1604.       sampler->dw_filter = mag_filter << 17 |
  1605.                            min_filter << 14;
  1606.  
  1607.       sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
  1608.                                  GEN6_MAPFILTER_ANISOTROPIC << 14 |
  1609.                                  1;
  1610.  
  1611.       dw1 = min_lod << 20 |
  1612.             max_lod << 8;
  1613.  
  1614.       if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
  1615.          dw1 |= gen6_translate_shadow_func(state->compare_func) << 1;
  1616.  
  1617.       dw3 = max_aniso << 19;
  1618.  
  1619.       /* round the coordinates for linear filtering */
  1620.       if (min_filter != GEN6_MAPFILTER_NEAREST) {
  1621.          dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
  1622.                  GEN6_SAMPLER_DW3_V_MIN_ROUND |
  1623.                  GEN6_SAMPLER_DW3_R_MIN_ROUND);
  1624.       }
  1625.       if (mag_filter != GEN6_MAPFILTER_NEAREST) {
  1626.          dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
  1627.                  GEN6_SAMPLER_DW3_V_MAG_ROUND |
  1628.                  GEN6_SAMPLER_DW3_R_MAG_ROUND);
  1629.       }
  1630.  
  1631.       if (!state->normalized_coords)
  1632.          dw3 |= 1 << 10;
  1633.  
  1634.       sampler->dw_wrap = wrap_s << 6 |
  1635.                          wrap_t << 3 |
  1636.                          wrap_r;
  1637.  
  1638.       /*
  1639.        * As noted in the classic i965 driver, the HW may still reference
  1640.        * wrap_t and wrap_r for 1D textures.  We need to set them to a safe
  1641.        * mode
  1642.        */
  1643.       sampler->dw_wrap_1d = wrap_s << 6 |
  1644.                             GEN6_TEXCOORDMODE_WRAP << 3 |
  1645.                             GEN6_TEXCOORDMODE_WRAP;
  1646.  
  1647.       sampler->dw_wrap_cube = wrap_cube << 6 |
  1648.                               wrap_cube << 3 |
  1649.                               wrap_cube;
  1650.  
  1651.       STATIC_ASSERT(Elements(sampler->payload) >= 7);
  1652.  
  1653.       sampler->payload[0] = dw0;
  1654.       sampler->payload[1] = dw1;
  1655.       sampler->payload[2] = dw3;
  1656.  
  1657.       memcpy(&sampler->payload[3],
  1658.             state->border_color.ui, sizeof(state->border_color.ui));
  1659.    }
  1660.    else {
  1661.       dw0 = 1 << 28 |
  1662.             mip_filter << 20 |
  1663.             lod_bias << 3;
  1664.  
  1665.       if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
  1666.          dw0 |= gen6_translate_shadow_func(state->compare_func);
  1667.  
  1668.       sampler->dw_filter = (min_filter != mag_filter) << 27 |
  1669.                            mag_filter << 17 |
  1670.                            min_filter << 14;
  1671.  
  1672.       sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
  1673.                                  GEN6_MAPFILTER_ANISOTROPIC << 14;
  1674.  
  1675.       dw1 = min_lod << 22 |
  1676.             max_lod << 12;
  1677.  
  1678.       sampler->dw_wrap = wrap_s << 6 |
  1679.                          wrap_t << 3 |
  1680.                          wrap_r;
  1681.  
  1682.       sampler->dw_wrap_1d = wrap_s << 6 |
  1683.                             GEN6_TEXCOORDMODE_WRAP << 3 |
  1684.                             GEN6_TEXCOORDMODE_WRAP;
  1685.  
  1686.       sampler->dw_wrap_cube = wrap_cube << 6 |
  1687.                               wrap_cube << 3 |
  1688.                               wrap_cube;
  1689.  
  1690.       dw3 = max_aniso << 19;
  1691.  
  1692.       /* round the coordinates for linear filtering */
  1693.       if (min_filter != GEN6_MAPFILTER_NEAREST) {
  1694.          dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
  1695.                  GEN6_SAMPLER_DW3_V_MIN_ROUND |
  1696.                  GEN6_SAMPLER_DW3_R_MIN_ROUND);
  1697.       }
  1698.       if (mag_filter != GEN6_MAPFILTER_NEAREST) {
  1699.          dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
  1700.                  GEN6_SAMPLER_DW3_V_MAG_ROUND |
  1701.                  GEN6_SAMPLER_DW3_R_MAG_ROUND);
  1702.       }
  1703.  
  1704.       if (!state->normalized_coords)
  1705.          dw3 |= 1;
  1706.  
  1707.       STATIC_ASSERT(Elements(sampler->payload) >= 15);
  1708.  
  1709.       sampler->payload[0] = dw0;
  1710.       sampler->payload[1] = dw1;
  1711.       sampler->payload[2] = dw3;
  1712.  
  1713.       sampler_init_border_color_gen6(dev,
  1714.             &state->border_color, &sampler->payload[3], 12);
  1715.    }
  1716. }
  1717.