Subversion Repositories Kolibri OS

Rev

Rev 4401 | Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
  3.  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
  4.  develop this 3D driver.
  5.  
  6.  Permission is hereby granted, free of charge, to any person obtaining
  7.  a copy of this software and associated documentation files (the
  8.  "Software"), to deal in the Software without restriction, including
  9.  without limitation the rights to use, copy, modify, merge, publish,
  10.  distribute, sublicense, and/or sell copies of the Software, and to
  11.  permit persons to whom the Software is furnished to do so, subject to
  12.  the following conditions:
  13.  
  14.  The above copyright notice and this permission notice (including the
  15.  next paragraph) shall be included in all copies or substantial
  16.  portions of the Software.
  17.  
  18.  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19.  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21.  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22.  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23.  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24.  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  
  26.  **********************************************************************/
  27.  /*
  28.   * Authors:
  29.   *   Keith Whitwell <keith@tungstengraphics.com>
  30.   */
  31.  
  32.  
  33. #include "main/context.h"
  34. #include "main/blend.h"
  35. #include "main/mtypes.h"
  36. #include "main/samplerobj.h"
  37. #include "program/prog_parameter.h"
  38.  
  39. #include "intel_mipmap_tree.h"
  40. #include "intel_batchbuffer.h"
  41. #include "intel_tex.h"
  42. #include "intel_fbo.h"
  43. #include "intel_buffer_objects.h"
  44.  
  45. #include "brw_context.h"
  46. #include "brw_state.h"
  47. #include "brw_defines.h"
  48. #include "brw_wm.h"
  49.  
  50. GLuint
  51. translate_tex_target(GLenum target)
  52. {
  53.    switch (target) {
  54.    case GL_TEXTURE_1D:
  55.    case GL_TEXTURE_1D_ARRAY_EXT:
  56.       return BRW_SURFACE_1D;
  57.  
  58.    case GL_TEXTURE_RECTANGLE_NV:
  59.       return BRW_SURFACE_2D;
  60.  
  61.    case GL_TEXTURE_2D:
  62.    case GL_TEXTURE_2D_ARRAY_EXT:
  63.    case GL_TEXTURE_EXTERNAL_OES:
  64.    case GL_TEXTURE_2D_MULTISAMPLE:
  65.    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66.       return BRW_SURFACE_2D;
  67.  
  68.    case GL_TEXTURE_3D:
  69.       return BRW_SURFACE_3D;
  70.  
  71.    case GL_TEXTURE_CUBE_MAP:
  72.    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73.       return BRW_SURFACE_CUBE;
  74.  
  75.    default:
  76.       assert(0);
  77.       return 0;
  78.    }
  79. }
  80.  
  81. uint32_t
  82. brw_get_surface_tiling_bits(uint32_t tiling)
  83. {
  84.    switch (tiling) {
  85.    case I915_TILING_X:
  86.       return BRW_SURFACE_TILED;
  87.    case I915_TILING_Y:
  88.       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  89.    default:
  90.       return 0;
  91.    }
  92. }
  93.  
  94.  
  95. uint32_t
  96. brw_get_surface_num_multisamples(unsigned num_samples)
  97. {
  98.    if (num_samples > 1)
  99.       return BRW_SURFACE_MULTISAMPLECOUNT_4;
  100.    else
  101.       return BRW_SURFACE_MULTISAMPLECOUNT_1;
  102. }
  103.  
  104.  
  105. /**
  106.  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
  107.  * swizzling.
  108.  */
  109. int
  110. brw_get_texture_swizzle(const struct gl_context *ctx,
  111.                         const struct gl_texture_object *t)
  112. {
  113.    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
  114.  
  115.    int swizzles[SWIZZLE_NIL + 1] = {
  116.       SWIZZLE_X,
  117.       SWIZZLE_Y,
  118.       SWIZZLE_Z,
  119.       SWIZZLE_W,
  120.       SWIZZLE_ZERO,
  121.       SWIZZLE_ONE,
  122.       SWIZZLE_NIL
  123.    };
  124.  
  125.    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
  126.        img->_BaseFormat == GL_DEPTH_STENCIL) {
  127.       GLenum depth_mode = t->DepthMode;
  128.  
  129.       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
  130.        * with depth component data specified with a sized internal format.
  131.        * Otherwise, it's left at the old default, GL_LUMINANCE.
  132.        */
  133.       if (_mesa_is_gles3(ctx) &&
  134.           img->InternalFormat != GL_DEPTH_COMPONENT &&
  135.           img->InternalFormat != GL_DEPTH_STENCIL) {
  136.          depth_mode = GL_RED;
  137.       }
  138.  
  139.       switch (depth_mode) {
  140.       case GL_ALPHA:
  141.          swizzles[0] = SWIZZLE_ZERO;
  142.          swizzles[1] = SWIZZLE_ZERO;
  143.          swizzles[2] = SWIZZLE_ZERO;
  144.          swizzles[3] = SWIZZLE_X;
  145.          break;
  146.       case GL_LUMINANCE:
  147.          swizzles[0] = SWIZZLE_X;
  148.          swizzles[1] = SWIZZLE_X;
  149.          swizzles[2] = SWIZZLE_X;
  150.          swizzles[3] = SWIZZLE_ONE;
  151.          break;
  152.       case GL_INTENSITY:
  153.          swizzles[0] = SWIZZLE_X;
  154.          swizzles[1] = SWIZZLE_X;
  155.          swizzles[2] = SWIZZLE_X;
  156.          swizzles[3] = SWIZZLE_X;
  157.          break;
  158.       case GL_RED:
  159.          swizzles[0] = SWIZZLE_X;
  160.          swizzles[1] = SWIZZLE_ZERO;
  161.          swizzles[2] = SWIZZLE_ZERO;
  162.          swizzles[3] = SWIZZLE_ONE;
  163.          break;
  164.       }
  165.    }
  166.  
  167.    /* If the texture's format is alpha-only, force R, G, and B to
  168.     * 0.0. Similarly, if the texture's format has no alpha channel,
  169.     * force the alpha value read to 1.0. This allows for the
  170.     * implementation to use an RGBA texture for any of these formats
  171.     * without leaking any unexpected values.
  172.     */
  173.    switch (img->_BaseFormat) {
  174.    case GL_ALPHA:
  175.       swizzles[0] = SWIZZLE_ZERO;
  176.       swizzles[1] = SWIZZLE_ZERO;
  177.       swizzles[2] = SWIZZLE_ZERO;
  178.       break;
  179.    case GL_RED:
  180.    case GL_RG:
  181.    case GL_RGB:
  182.       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
  183.          swizzles[3] = SWIZZLE_ONE;
  184.       break;
  185.    }
  186.  
  187.    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
  188.                         swizzles[GET_SWZ(t->_Swizzle, 1)],
  189.                         swizzles[GET_SWZ(t->_Swizzle, 2)],
  190.                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
  191. }
  192.  
  193.  
  194. static void
  195. brw_update_buffer_texture_surface(struct gl_context *ctx,
  196.                                   unsigned unit,
  197.                                   uint32_t *binding_table,
  198.                                   unsigned surf_index)
  199. {
  200.    struct brw_context *brw = brw_context(ctx);
  201.    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
  202.    uint32_t *surf;
  203.    struct intel_buffer_object *intel_obj =
  204.       intel_buffer_object(tObj->BufferObject);
  205.    drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
  206.    gl_format format = tObj->_BufferObjectFormat;
  207.    uint32_t brw_format = brw_format_for_mesa_format(format);
  208.    int texel_size = _mesa_get_format_bytes(format);
  209.  
  210.    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
  211.       _mesa_problem(NULL, "bad format %s for texture buffer\n",
  212.                     _mesa_get_format_name(format));
  213.    }
  214.  
  215.    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
  216.                           6 * 4, 32, &binding_table[surf_index]);
  217.  
  218.    surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
  219.               (brw_format_for_mesa_format(format) << BRW_SURFACE_FORMAT_SHIFT));
  220.  
  221.    if (brw->gen >= 6)
  222.       surf[0] |= BRW_SURFACE_RC_READ_WRITE;
  223.  
  224.    if (bo) {
  225.       surf[1] = bo->offset; /* reloc */
  226.  
  227.       /* Emit relocation to surface contents. */
  228.       drm_intel_bo_emit_reloc(brw->batch.bo,
  229.                               binding_table[surf_index] + 4,
  230.                               bo, 0, I915_GEM_DOMAIN_SAMPLER, 0);
  231.  
  232.       int w = intel_obj->Base.Size / texel_size;
  233.       surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
  234.                  ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
  235.       surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
  236.                  (texel_size - 1) << BRW_SURFACE_PITCH_SHIFT);
  237.    } else {
  238.       surf[1] = 0;
  239.       surf[2] = 0;
  240.       surf[3] = 0;
  241.    }
  242.  
  243.    surf[4] = 0;
  244.    surf[5] = 0;
  245. }
  246.  
  247. static void
  248. brw_update_texture_surface(struct gl_context *ctx,
  249.                            unsigned unit,
  250.                            uint32_t *binding_table,
  251.                            unsigned surf_index)
  252. {
  253.    struct brw_context *brw = brw_context(ctx);
  254.    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
  255.    struct intel_texture_object *intelObj = intel_texture_object(tObj);
  256.    struct intel_mipmap_tree *mt = intelObj->mt;
  257.    struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
  258.    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
  259.    uint32_t *surf;
  260.    uint32_t tile_x, tile_y;
  261.  
  262.    /* BRW_NEW_UNIFORM_BUFFER */
  263.    if (tObj->Target == GL_TEXTURE_BUFFER) {
  264.       brw_update_buffer_texture_surface(ctx, unit, binding_table, surf_index);
  265.       return;
  266.    }
  267.  
  268.    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
  269.                           6 * 4, 32, &binding_table[surf_index]);
  270.  
  271.    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
  272.               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
  273.               BRW_SURFACE_CUBEFACE_ENABLES |
  274.               (translate_tex_format(brw,
  275.                                     mt->format,
  276.                                     tObj->DepthMode,
  277.                                     sampler->sRGBDecode) <<
  278.                BRW_SURFACE_FORMAT_SHIFT));
  279.  
  280.    surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
  281.    surf[1] += intel_miptree_get_tile_offsets(intelObj->mt, firstImage->Level, 0,
  282.                                              &tile_x, &tile_y);
  283.  
  284.    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
  285.               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
  286.               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
  287.  
  288.    surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
  289.               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
  290.               (intelObj->mt->region->pitch - 1) <<
  291.               BRW_SURFACE_PITCH_SHIFT);
  292.  
  293.    surf[4] = brw_get_surface_num_multisamples(intelObj->mt->num_samples);
  294.  
  295.    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
  296.    /* Note that the low bits of these fields are missing, so
  297.     * there's the possibility of getting in trouble.
  298.     */
  299.    assert(tile_x % 4 == 0);
  300.    assert(tile_y % 2 == 0);
  301.    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
  302.               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
  303.               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
  304.  
  305.    /* Emit relocation to surface contents */
  306.    drm_intel_bo_emit_reloc(brw->batch.bo,
  307.                            binding_table[surf_index] + 4,
  308.                            intelObj->mt->region->bo,
  309.                            surf[1] - intelObj->mt->region->bo->offset,
  310.                            I915_GEM_DOMAIN_SAMPLER, 0);
  311. }
  312.  
  313. /**
  314.  * Create the constant buffer surface.  Vertex/fragment shader constants will be
  315.  * read from this buffer with Data Port Read instructions/messages.
  316.  */
  317. static void
  318. brw_create_constant_surface(struct brw_context *brw,
  319.                             drm_intel_bo *bo,
  320.                             uint32_t offset,
  321.                             uint32_t size,
  322.                             uint32_t *out_offset,
  323.                             bool dword_pitch)
  324. {
  325.    uint32_t stride = dword_pitch ? 4 : 16;
  326.    uint32_t elements = ALIGN(size, stride) / stride;
  327.    const GLint w = elements - 1;
  328.    uint32_t *surf;
  329.  
  330.    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
  331.                           6 * 4, 32, out_offset);
  332.  
  333.    surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
  334.               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
  335.               BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT);
  336.  
  337.    if (brw->gen >= 6)
  338.       surf[0] |= BRW_SURFACE_RC_READ_WRITE;
  339.  
  340.    surf[1] = bo->offset + offset; /* reloc */
  341.  
  342.    surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
  343.               ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
  344.  
  345.    surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
  346.               (stride - 1) << BRW_SURFACE_PITCH_SHIFT);
  347.  
  348.    surf[4] = 0;
  349.    surf[5] = 0;
  350.  
  351.    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
  352.     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
  353.     * physical cache.  It is mapped in hardware to the sampler cache."
  354.     */
  355.    drm_intel_bo_emit_reloc(brw->batch.bo,
  356.                            *out_offset + 4,
  357.                            bo, offset,
  358.                            I915_GEM_DOMAIN_SAMPLER, 0);
  359. }
  360.  
  361. /**
  362.  * Set up a binding table entry for use by stream output logic (transform
  363.  * feedback).
  364.  *
  365.  * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
  366.  */
  367. void
  368. brw_update_sol_surface(struct brw_context *brw,
  369.                        struct gl_buffer_object *buffer_obj,
  370.                        uint32_t *out_offset, unsigned num_vector_components,
  371.                        unsigned stride_dwords, unsigned offset_dwords)
  372. {
  373.    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
  374.    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
  375.    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
  376.                                     out_offset);
  377.    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
  378.    uint32_t offset_bytes = 4 * offset_dwords;
  379.    size_t size_dwords = buffer_obj->Size / 4;
  380.    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
  381.  
  382.    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
  383.     * too big to map using a single binding table entry?
  384.     */
  385.    assert((size_dwords - offset_dwords) / stride_dwords
  386.           <= BRW_MAX_NUM_BUFFER_ENTRIES);
  387.  
  388.    if (size_dwords > offset_dwords + num_vector_components) {
  389.       /* There is room for at least 1 transform feedback output in the buffer.
  390.        * Compute the number of additional transform feedback outputs the
  391.        * buffer has room for.
  392.        */
  393.       buffer_size_minus_1 =
  394.          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
  395.    } else {
  396.       /* There isn't even room for a single transform feedback output in the
  397.        * buffer.  We can't configure the binding table entry to prevent output
  398.        * entirely; we'll have to rely on the geometry shader to detect
  399.        * overflow.  But to minimize the damage in case of a bug, set up the
  400.        * binding table entry to just allow a single output.
  401.        */
  402.       buffer_size_minus_1 = 0;
  403.    }
  404.    width = buffer_size_minus_1 & 0x7f;
  405.    height = (buffer_size_minus_1 & 0xfff80) >> 7;
  406.    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
  407.  
  408.    switch (num_vector_components) {
  409.    case 1:
  410.       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
  411.       break;
  412.    case 2:
  413.       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
  414.       break;
  415.    case 3:
  416.       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
  417.       break;
  418.    case 4:
  419.       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
  420.       break;
  421.    default:
  422.       assert(!"Invalid vector size for transform feedback output");
  423.       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
  424.       break;
  425.    }
  426.  
  427.    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
  428.       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
  429.       surface_format << BRW_SURFACE_FORMAT_SHIFT |
  430.       BRW_SURFACE_RC_READ_WRITE;
  431.    surf[1] = bo->offset + offset_bytes; /* reloc */
  432.    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
  433.               height << BRW_SURFACE_HEIGHT_SHIFT);
  434.    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
  435.               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
  436.    surf[4] = 0;
  437.    surf[5] = 0;
  438.  
  439.    /* Emit relocation to surface contents. */
  440.    drm_intel_bo_emit_reloc(brw->batch.bo,
  441.                            *out_offset + 4,
  442.                            bo, offset_bytes,
  443.                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
  444. }
  445.  
  446. /* Creates a new WM constant buffer reflecting the current fragment program's
  447.  * constants, if needed by the fragment program.
  448.  *
  449.  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
  450.  * state atom.
  451.  */
  452. static void
  453. brw_upload_wm_pull_constants(struct brw_context *brw)
  454. {
  455.    struct gl_context *ctx = &brw->ctx;
  456.    /* BRW_NEW_FRAGMENT_PROGRAM */
  457.    struct brw_fragment_program *fp =
  458.       (struct brw_fragment_program *) brw->fragment_program;
  459.    struct gl_program_parameter_list *params = fp->program.Base.Parameters;
  460.    const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
  461.    const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
  462.    float *constants;
  463.    unsigned int i;
  464.  
  465.    _mesa_load_state_parameters(ctx, params);
  466.  
  467.    /* CACHE_NEW_WM_PROG */
  468.    if (brw->wm.prog_data->nr_pull_params == 0) {
  469.       if (brw->wm.const_bo) {
  470.          drm_intel_bo_unreference(brw->wm.const_bo);
  471.          brw->wm.const_bo = NULL;
  472.          brw->wm.surf_offset[surf_index] = 0;
  473.          brw->state.dirty.brw |= BRW_NEW_SURFACES;
  474.       }
  475.       return;
  476.    }
  477.  
  478.    drm_intel_bo_unreference(brw->wm.const_bo);
  479.    brw->wm.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
  480.                                          size, 64);
  481.  
  482.    /* _NEW_PROGRAM_CONSTANTS */
  483.    drm_intel_gem_bo_map_gtt(brw->wm.const_bo);
  484.    constants = brw->wm.const_bo->virtual;
  485.    for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
  486.       constants[i] = *brw->wm.prog_data->pull_param[i];
  487.    }
  488.    drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
  489.  
  490.    brw->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
  491.                                      &brw->wm.surf_offset[surf_index],
  492.                                      true);
  493.  
  494.    brw->state.dirty.brw |= BRW_NEW_SURFACES;
  495. }
  496.  
  497. const struct brw_tracked_state brw_wm_pull_constants = {
  498.    .dirty = {
  499.       .mesa = (_NEW_PROGRAM_CONSTANTS),
  500.       .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
  501.       .cache = CACHE_NEW_WM_PROG,
  502.    },
  503.    .emit = brw_upload_wm_pull_constants,
  504. };
  505.  
  506. static void
  507. brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
  508. {
  509.    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
  510.     * Notes):
  511.     *
  512.     *     A null surface will be used in instances where an actual surface is
  513.     *     not bound. When a write message is generated to a null surface, no
  514.     *     actual surface is written to. When a read message (including any
  515.     *     sampling engine message) is generated to a null surface, the result
  516.     *     is all zeros. Note that a null surface type is allowed to be used
  517.     *     with all messages, even if it is not specificially indicated as
  518.     *     supported. All of the remaining fields in surface state are ignored
  519.     *     for null surfaces, with the following exceptions:
  520.     *
  521.     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
  522.     *       depth buffer’s corresponding state for all render target surfaces,
  523.     *       including null.
  524.     *
  525.     *     - Surface Format must be R8G8B8A8_UNORM.
  526.     */
  527.    struct gl_context *ctx = &brw->ctx;
  528.    uint32_t *surf;
  529.    unsigned surface_type = BRW_SURFACE_NULL;
  530.    drm_intel_bo *bo = NULL;
  531.    unsigned pitch_minus_1 = 0;
  532.    uint32_t multisampling_state = 0;
  533.  
  534.    /* _NEW_BUFFERS */
  535.    const struct gl_framebuffer *fb = ctx->DrawBuffer;
  536.  
  537.    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
  538.                           6 * 4, 32, &brw->wm.surf_offset[unit]);
  539.  
  540.    if (fb->Visual.samples > 1) {
  541.       /* On Gen6, null render targets seem to cause GPU hangs when
  542.        * multisampling.  So work around this problem by rendering into dummy
  543.        * color buffer.
  544.        *
  545.        * To decrease the amount of memory needed by the workaround buffer, we
  546.        * set its pitch to 128 bytes (the width of a Y tile).  This means that
  547.        * the amount of memory needed for the workaround buffer is
  548.        * (width_in_tiles + height_in_tiles - 1) tiles.
  549.        *
  550.        * Note that since the workaround buffer will be interpreted by the
  551.        * hardware as an interleaved multisampled buffer, we need to compute
  552.        * width_in_tiles and height_in_tiles by dividing the width and height
  553.        * by 16 rather than the normal Y-tile size of 32.
  554.        */
  555.       unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
  556.       unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
  557.       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
  558.       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
  559.                          size_needed);
  560.       bo = brw->wm.multisampled_null_render_target_bo;
  561.       surface_type = BRW_SURFACE_2D;
  562.       pitch_minus_1 = 127;
  563.       multisampling_state =
  564.          brw_get_surface_num_multisamples(fb->Visual.samples);
  565.    }
  566.  
  567.    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
  568.               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
  569.    if (brw->gen < 6) {
  570.       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
  571.                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
  572.                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
  573.                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
  574.    }
  575.    surf[1] = bo ? bo->offset : 0;
  576.    surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
  577.               (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
  578.  
  579.    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
  580.     * Notes):
  581.     *
  582.     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
  583.     */
  584.    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
  585.               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
  586.    surf[4] = multisampling_state;
  587.    surf[5] = 0;
  588.  
  589.    if (bo) {
  590.       drm_intel_bo_emit_reloc(brw->batch.bo,
  591.                               brw->wm.surf_offset[unit] + 4,
  592.                               bo, 0,
  593.                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
  594.    }
  595. }
  596.  
  597. /**
  598.  * Sets up a surface state structure to point at the given region.
  599.  * While it is only used for the front/back buffer currently, it should be
  600.  * usable for further buffers when doing ARB_draw_buffer support.
  601.  */
  602. static void
  603. brw_update_renderbuffer_surface(struct brw_context *brw,
  604.                                 struct gl_renderbuffer *rb,
  605.                                 bool layered,
  606.                                 unsigned int unit)
  607. {
  608.    struct gl_context *ctx = &brw->ctx;
  609.    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
  610.    struct intel_mipmap_tree *mt = irb->mt;
  611.    struct intel_region *region;
  612.    uint32_t *surf;
  613.    uint32_t tile_x, tile_y;
  614.    uint32_t format = 0;
  615.    /* _NEW_BUFFERS */
  616.    gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
  617.  
  618.    assert(!layered);
  619.  
  620.    if (rb->TexImage && !brw->has_surface_tile_offset) {
  621.       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
  622.  
  623.       if (tile_x != 0 || tile_y != 0) {
  624.          /* Original gen4 hardware couldn't draw to a non-tile-aligned
  625.           * destination in a miptree unless you actually setup your renderbuffer
  626.           * as a miptree and used the fragile lod/array_index/etc. controls to
  627.           * select the image.  So, instead, we just make a new single-level
  628.           * miptree and render into that.
  629.           */
  630.          intel_renderbuffer_move_to_temp(brw, irb, false);
  631.          mt = irb->mt;
  632.       }
  633.    }
  634.  
  635.    intel_miptree_used_for_rendering(irb->mt);
  636.  
  637.    region = irb->mt->region;
  638.  
  639.    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
  640.                           6 * 4, 32, &brw->wm.surf_offset[unit]);
  641.  
  642.    format = brw->render_target_format[rb_format];
  643.    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
  644.       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
  645.                     __FUNCTION__, _mesa_get_format_name(rb_format));
  646.    }
  647.  
  648.    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
  649.               format << BRW_SURFACE_FORMAT_SHIFT);
  650.  
  651.    /* reloc */
  652.    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
  653.               region->bo->offset);
  654.  
  655.    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
  656.               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
  657.  
  658.    surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
  659.               (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
  660.  
  661.    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
  662.  
  663.    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
  664.    /* Note that the low bits of these fields are missing, so
  665.     * there's the possibility of getting in trouble.
  666.     */
  667.    assert(tile_x % 4 == 0);
  668.    assert(tile_y % 2 == 0);
  669.    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
  670.               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
  671.               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
  672.  
  673.    if (brw->gen < 6) {
  674.       /* _NEW_COLOR */
  675.       if (!ctx->Color.ColorLogicOpEnabled &&
  676.           (ctx->Color.BlendEnabled & (1 << unit)))
  677.          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
  678.  
  679.       if (!ctx->Color.ColorMask[unit][0])
  680.          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
  681.       if (!ctx->Color.ColorMask[unit][1])
  682.          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
  683.       if (!ctx->Color.ColorMask[unit][2])
  684.          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
  685.  
  686.       /* As mentioned above, disable writes to the alpha component when the
  687.        * renderbuffer is XRGB.
  688.        */
  689.       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
  690.           !ctx->Color.ColorMask[unit][3]) {
  691.          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
  692.       }
  693.    }
  694.  
  695.    drm_intel_bo_emit_reloc(brw->batch.bo,
  696.                            brw->wm.surf_offset[unit] + 4,
  697.                            region->bo,
  698.                            surf[1] - region->bo->offset,
  699.                            I915_GEM_DOMAIN_RENDER,
  700.                            I915_GEM_DOMAIN_RENDER);
  701. }
  702.  
  703. /**
  704.  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
  705.  */
  706. static void
  707. brw_update_renderbuffer_surfaces(struct brw_context *brw)
  708. {
  709.     struct gl_context *ctx = &brw->ctx;
  710.     GLuint i;
  711.  
  712.    /* _NEW_BUFFERS | _NEW_COLOR */
  713.    /* Update surfaces for drawing buffers */
  714.    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
  715.       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
  716.          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
  717.                 brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
  718.                                                       ctx->DrawBuffer->Layered, i);
  719.             } else {
  720.                 brw->vtbl.update_null_renderbuffer_surface(brw, i);
  721.             }
  722.         }
  723.     } else {
  724.       brw->vtbl.update_null_renderbuffer_surface(brw, 0);
  725.     }
  726.     brw->state.dirty.brw |= BRW_NEW_SURFACES;
  727. }
  728.  
  729. const struct brw_tracked_state brw_renderbuffer_surfaces = {
  730.    .dirty = {
  731.       .mesa = (_NEW_COLOR |
  732.                _NEW_BUFFERS),
  733.       .brw = BRW_NEW_BATCH,
  734.       .cache = 0
  735.    },
  736.    .emit = brw_update_renderbuffer_surfaces,
  737. };
  738.  
  739. const struct brw_tracked_state gen6_renderbuffer_surfaces = {
  740.    .dirty = {
  741.       .mesa = _NEW_BUFFERS,
  742.       .brw = BRW_NEW_BATCH,
  743.       .cache = 0
  744.    },
  745.    .emit = brw_update_renderbuffer_surfaces,
  746. };
  747.  
  748. /**
  749.  * Construct SURFACE_STATE objects for enabled textures.
  750.  */
  751. static void
  752. brw_update_texture_surfaces(struct brw_context *brw)
  753. {
  754.    struct gl_context *ctx = &brw->ctx;
  755.  
  756.    /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM:
  757.     * Unfortunately, we're stuck using the gl_program structs until the
  758.     * ARB_fragment_program front-end gets converted to GLSL IR.  These
  759.     * have the downside that SamplerUnits is split and only contains the
  760.     * mappings for samplers active in that stage.
  761.     */
  762.    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
  763.    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
  764.  
  765.    unsigned num_samplers = _mesa_fls(vs->SamplersUsed | fs->SamplersUsed);
  766.  
  767.    for (unsigned s = 0; s < num_samplers; s++) {
  768.       brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(s)] = 0;
  769.       brw->wm.surf_offset[SURF_INDEX_TEXTURE(s)] = 0;
  770.  
  771.       if (vs->SamplersUsed & (1 << s)) {
  772.          const unsigned unit = vs->SamplerUnits[s];
  773.  
  774.          /* _NEW_TEXTURE */
  775.          if (ctx->Texture.Unit[unit]._ReallyEnabled) {
  776.             brw->vtbl.update_texture_surface(ctx, unit,
  777.                                              brw->vs.surf_offset,
  778.                                              SURF_INDEX_VS_TEXTURE(s));
  779.          }
  780.       }
  781.  
  782.       if (fs->SamplersUsed & (1 << s)) {
  783.          const unsigned unit = fs->SamplerUnits[s];
  784.  
  785.          /* _NEW_TEXTURE */
  786.          if (ctx->Texture.Unit[unit]._ReallyEnabled) {
  787.             brw->vtbl.update_texture_surface(ctx, unit,
  788.                                              brw->wm.surf_offset,
  789.                                              SURF_INDEX_TEXTURE(s));
  790.          }
  791.       }
  792.    }
  793.  
  794.    brw->state.dirty.brw |= BRW_NEW_SURFACES;
  795. }
  796.  
  797. const struct brw_tracked_state brw_texture_surfaces = {
  798.    .dirty = {
  799.       .mesa = _NEW_TEXTURE,
  800.       .brw = BRW_NEW_BATCH |
  801.              BRW_NEW_UNIFORM_BUFFER |
  802.              BRW_NEW_VERTEX_PROGRAM |
  803.              BRW_NEW_FRAGMENT_PROGRAM,
  804.       .cache = 0
  805.    },
  806.    .emit = brw_update_texture_surfaces,
  807. };
  808.  
  809. void
  810. brw_upload_ubo_surfaces(struct brw_context *brw,
  811.                         struct gl_shader *shader,
  812.                         uint32_t *surf_offsets)
  813. {
  814.    struct gl_context *ctx = &brw->ctx;
  815.  
  816.    if (!shader)
  817.       return;
  818.  
  819.    for (int i = 0; i < shader->NumUniformBlocks; i++) {
  820.       struct gl_uniform_buffer_binding *binding;
  821.       struct intel_buffer_object *intel_bo;
  822.  
  823.       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
  824.       intel_bo = intel_buffer_object(binding->BufferObject);
  825.       drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
  826.  
  827.       /* Because behavior for referencing outside of the binding's size in the
  828.        * glBindBufferRange case is undefined, we can just bind the whole buffer
  829.        * glBindBufferBase wants and be a correct implementation.
  830.        */
  831.       brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
  832.                                         bo->size - binding->Offset,
  833.                                         &surf_offsets[i],
  834.                                         shader->Type == GL_FRAGMENT_SHADER);
  835.    }
  836.  
  837.    if (shader->NumUniformBlocks)
  838.       brw->state.dirty.brw |= BRW_NEW_SURFACES;
  839. }
  840.  
  841. static void
  842. brw_upload_wm_ubo_surfaces(struct brw_context *brw)
  843. {
  844.    struct gl_context *ctx = &brw->ctx;
  845.    /* _NEW_PROGRAM */
  846.    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
  847.  
  848.    if (!prog)
  849.       return;
  850.  
  851.    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
  852.                            &brw->wm.surf_offset[SURF_INDEX_WM_UBO(0)]);
  853. }
  854.  
  855. const struct brw_tracked_state brw_wm_ubo_surfaces = {
  856.    .dirty = {
  857.       .mesa = _NEW_PROGRAM,
  858.       .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
  859.       .cache = 0,
  860.    },
  861.    .emit = brw_upload_wm_ubo_surfaces,
  862. };
  863.  
  864. /**
  865.  * Constructs the binding table for the WM surface state, which maps unit
  866.  * numbers to surface state objects.
  867.  */
  868. static void
  869. brw_upload_wm_binding_table(struct brw_context *brw)
  870. {
  871.    uint32_t *bind;
  872.    int i;
  873.  
  874.    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
  875.       gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
  876.    }
  877.  
  878.    /* Might want to calculate nr_surfaces first, to avoid taking up so much
  879.     * space for the binding table.
  880.     */
  881.    bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
  882.                           sizeof(uint32_t) * BRW_MAX_WM_SURFACES,
  883.                           32, &brw->wm.bind_bo_offset);
  884.  
  885.    /* BRW_NEW_SURFACES */
  886.    for (i = 0; i < BRW_MAX_WM_SURFACES; i++) {
  887.       bind[i] = brw->wm.surf_offset[i];
  888.    }
  889.  
  890.    brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
  891. }
  892.  
  893. const struct brw_tracked_state brw_wm_binding_table = {
  894.    .dirty = {
  895.       .mesa = 0,
  896.       .brw = (BRW_NEW_BATCH |
  897.               BRW_NEW_SURFACES),
  898.       .cache = 0
  899.    },
  900.    .emit = brw_upload_wm_binding_table,
  901. };
  902.  
  903. void
  904. gen4_init_vtable_surface_functions(struct brw_context *brw)
  905. {
  906.    brw->vtbl.update_texture_surface = brw_update_texture_surface;
  907.    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
  908.    brw->vtbl.update_null_renderbuffer_surface =
  909.       brw_update_null_renderbuffer_surface;
  910.    brw->vtbl.create_constant_surface = brw_create_constant_surface;
  911. }
  912.