Subversion Repositories Kolibri OS

Rev

Rev 4358 | Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2013 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. extern "C" {
  25. #include "main/teximage.h"
  26. #include "main/blend.h"
  27. #include "main/fbobject.h"
  28. #include "main/renderbuffer.h"
  29. }
  30.  
  31. #include "glsl/ralloc.h"
  32.  
  33. #include "intel_fbo.h"
  34.  
  35. #include "brw_blorp.h"
  36. #include "brw_context.h"
  37. #include "brw_eu.h"
  38. #include "brw_state.h"
  39.  
  40. #define FILE_DEBUG_FLAG DEBUG_BLORP
  41.  
  42. struct brw_blorp_const_color_prog_key
  43. {
  44.    bool use_simd16_replicated_data;
  45.    bool pad[3];
  46. };
  47.  
  48. /**
  49.  * Parameters for a blorp operation where the fragment shader outputs a
  50.  * constant color.  This is used for both fast color clears and color
  51.  * resolves.
  52.  */
  53. class brw_blorp_const_color_params : public brw_blorp_params
  54. {
  55. public:
  56.    virtual uint32_t get_wm_prog(struct brw_context *brw,
  57.                                 brw_blorp_prog_data **prog_data) const;
  58.  
  59. protected:
  60.    brw_blorp_const_color_prog_key wm_prog_key;
  61. };
  62.  
  63. class brw_blorp_clear_params : public brw_blorp_const_color_params
  64. {
  65. public:
  66.    brw_blorp_clear_params(struct brw_context *brw,
  67.                           struct gl_framebuffer *fb,
  68.                           struct gl_renderbuffer *rb,
  69.                           GLubyte *color_mask,
  70.                           bool partial_clear);
  71. };
  72.  
  73.  
  74. /**
  75.  * Parameters for a blorp operation that performs a "render target resolve".
  76.  * This is used to resolve pending fast clear pixels before a color buffer is
  77.  * used for texturing, ReadPixels, or scanout.
  78.  */
  79. class brw_blorp_rt_resolve_params : public brw_blorp_const_color_params
  80. {
  81. public:
  82.    brw_blorp_rt_resolve_params(struct brw_context *brw,
  83.                                struct intel_mipmap_tree *mt);
  84. };
  85.  
  86.  
  87. class brw_blorp_const_color_program
  88. {
  89. public:
  90.    brw_blorp_const_color_program(struct brw_context *brw,
  91.                                  const brw_blorp_const_color_prog_key *key);
  92.    ~brw_blorp_const_color_program();
  93.  
  94.    const GLuint *compile(struct brw_context *brw, GLuint *program_size);
  95.  
  96.    brw_blorp_prog_data prog_data;
  97.  
  98. private:
  99.    void alloc_regs();
  100.  
  101.    void *mem_ctx;
  102.    struct brw_context *brw;
  103.    const brw_blorp_const_color_prog_key *key;
  104.    struct brw_compile func;
  105.  
  106.    /* Thread dispatch header */
  107.    struct brw_reg R0;
  108.  
  109.    /* Pixel X/Y coordinates (always in R1). */
  110.    struct brw_reg R1;
  111.  
  112.    /* Register with push constants (a single vec4) */
  113.    struct brw_reg clear_rgba;
  114.  
  115.    /* MRF used for render target writes */
  116.    GLuint base_mrf;
  117. };
  118.  
  119. brw_blorp_const_color_program::brw_blorp_const_color_program(
  120.       struct brw_context *brw,
  121.       const brw_blorp_const_color_prog_key *key)
  122.    : mem_ctx(ralloc_context(NULL)),
  123.      brw(brw),
  124.      key(key),
  125.      R0(),
  126.      R1(),
  127.      clear_rgba(),
  128.      base_mrf(0)
  129. {
  130.    brw_init_compile(brw, &func, mem_ctx);
  131. }
  132.  
  133. brw_blorp_const_color_program::~brw_blorp_const_color_program()
  134. {
  135.    ralloc_free(mem_ctx);
  136. }
  137.  
  138.  
  139. /**
  140.  * Determine if fast color clear supports the given clear color.
  141.  *
  142.  * Fast color clear can only clear to color values of 1.0 or 0.0.  At the
  143.  * moment we only support floating point, unorm, and snorm buffers.
  144.  */
  145. static bool
  146. is_color_fast_clear_compatible(struct brw_context *brw,
  147.                                gl_format format,
  148.                                const union gl_color_union *color)
  149. {
  150.    if (_mesa_is_format_integer_color(format))
  151.       return false;
  152.  
  153.    for (int i = 0; i < 4; i++) {
  154.       if (color->f[i] != 0.0 && color->f[i] != 1.0) {
  155.          perf_debug("Clear color unsupported by fast color clear.  "
  156.                     "Falling back to slow clear.\n");
  157.          return false;
  158.       }
  159.    }
  160.    return true;
  161. }
  162.  
  163.  
  164. /**
  165.  * Convert the given color to a bitfield suitable for ORing into DWORD 7 of
  166.  * SURFACE_STATE.
  167.  */
  168. static uint32_t
  169. compute_fast_clear_color_bits(const union gl_color_union *color)
  170. {
  171.    uint32_t bits = 0;
  172.    for (int i = 0; i < 4; i++) {
  173.       if (color->f[i] != 0.0)
  174.          bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
  175.    }
  176.    return bits;
  177. }
  178.  
  179.  
  180. brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw,
  181.                                                struct gl_framebuffer *fb,
  182.                                                struct gl_renderbuffer *rb,
  183.                                                GLubyte *color_mask,
  184.                                                bool partial_clear)
  185. {
  186.    struct gl_context *ctx = &brw->ctx;
  187.    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
  188.  
  189.    dst.set(brw, irb->mt, irb->mt_level, irb->mt_layer);
  190.  
  191.    /* Override the surface format according to the context's sRGB rules. */
  192.    gl_format format = _mesa_get_render_format(ctx, irb->mt->format);
  193.    dst.brw_surfaceformat = brw->render_target_format[format];
  194.  
  195.    x0 = fb->_Xmin;
  196.    x1 = fb->_Xmax;
  197.    if (rb->Name != 0) {
  198.       y0 = fb->_Ymin;
  199.       y1 = fb->_Ymax;
  200.    } else {
  201.       y0 = rb->Height - fb->_Ymax;
  202.       y1 = rb->Height - fb->_Ymin;
  203.    }
  204.  
  205.    float *push_consts = (float *)&wm_push_consts;
  206.  
  207.    push_consts[0] = ctx->Color.ClearColor.f[0];
  208.    push_consts[1] = ctx->Color.ClearColor.f[1];
  209.    push_consts[2] = ctx->Color.ClearColor.f[2];
  210.    push_consts[3] = ctx->Color.ClearColor.f[3];
  211.  
  212.    use_wm_prog = true;
  213.  
  214.    memset(&wm_prog_key, 0, sizeof(wm_prog_key));
  215.  
  216.    wm_prog_key.use_simd16_replicated_data = true;
  217.  
  218.    /* From the SNB PRM (Vol4_Part1):
  219.     *
  220.     *     "Replicated data (Message Type = 111) is only supported when
  221.     *      accessing tiled memory.  Using this Message Type to access linear
  222.     *      (untiled) memory is UNDEFINED."
  223.     */
  224.    if (irb->mt->region->tiling == I915_TILING_NONE)
  225.       wm_prog_key.use_simd16_replicated_data = false;
  226.  
  227.    /* Constant color writes ignore everyting in blend and color calculator
  228.     * state.  This is not documented.
  229.     */
  230.    for (int i = 0; i < 4; i++) {
  231.       if (!color_mask[i]) {
  232.          color_write_disable[i] = true;
  233.          wm_prog_key.use_simd16_replicated_data = false;
  234.       }
  235.    }
  236.  
  237.    /* If we can do this as a fast color clear, do so. */
  238.    if (irb->mt->mcs_state != INTEL_MCS_STATE_NONE && !partial_clear &&
  239.        wm_prog_key.use_simd16_replicated_data &&
  240.        is_color_fast_clear_compatible(brw, format, &ctx->Color.ClearColor)) {
  241.       memset(push_consts, 0xff, 4*sizeof(float));
  242.       fast_clear_op = GEN7_FAST_CLEAR_OP_FAST_CLEAR;
  243.  
  244.       /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
  245.        * Target(s)", beneath the "Fast Color Clear" bullet (p327):
  246.        *
  247.        *     Clear pass must have a clear rectangle that must follow alignment
  248.        *     rules in terms of pixels and lines as shown in the table
  249.        *     below. Further, the clear-rectangle height and width must be
  250.        *     multiple of the following dimensions. If the height and width of
  251.        *     the render target being cleared do not meet these requirements,
  252.        *     an MCS buffer can be created such that it follows the requirement
  253.        *     and covers the RT.
  254.        *
  255.        * The alignment size in the table that follows is related to the
  256.        * alignment size returned by intel_get_non_msrt_mcs_alignment(), but
  257.        * with X alignment multiplied by 16 and Y alignment multiplied by 32.
  258.        */
  259.       unsigned x_align, y_align;
  260.       intel_get_non_msrt_mcs_alignment(brw, irb->mt, &x_align, &y_align);
  261.       x_align *= 16;
  262.       y_align *= 32;
  263.  
  264.          /* From BSpec: 3D-Media-GPGPU Engine > 3D Pipeline > Pixel > Pixel
  265.        * Backend > MCS Buffer for Render Target(s) [DevIVB+] > Table "Color
  266.        * Clear of Non-MultiSampled Render Target Restrictions":
  267.        *
  268.        *   Clear rectangle must be aligned to two times the number of pixels in
  269.        *   the table shown below due to 16x16 hashing across the slice.
  270.           */
  271.          x0 = ROUND_DOWN_TO(x0, 2 * x_align);
  272.          y0 = ROUND_DOWN_TO(y0, 2 * y_align);
  273.          x1 = ALIGN(x1, 2 * x_align);
  274.          y1 = ALIGN(y1, 2 * y_align);
  275.  
  276.       /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
  277.        * Target(s)", beneath the "Fast Color Clear" bullet (p327):
  278.        *
  279.        *     In order to optimize the performance MCS buffer (when bound to 1X
  280.        *     RT) clear similarly to MCS buffer clear for MSRT case, clear rect
  281.        *     is required to be scaled by the following factors in the
  282.        *     horizontal and vertical directions:
  283.        *
  284.        * The X and Y scale down factors in the table that follows are each
  285.        * equal to half the alignment value computed above.
  286.        */
  287.       unsigned x_scaledown = x_align / 2;
  288.       unsigned y_scaledown = y_align / 2;
  289.       x0 /= x_scaledown;
  290.       y0 /= y_scaledown;
  291.       x1 /= x_scaledown;
  292.       y1 /= y_scaledown;
  293.    }
  294. }
  295.  
  296.  
  297. brw_blorp_rt_resolve_params::brw_blorp_rt_resolve_params(
  298.       struct brw_context *brw,
  299.       struct intel_mipmap_tree *mt)
  300. {
  301.    dst.set(brw, mt, 0 /* level */, 0 /* layer */);
  302.  
  303.    /* From the Ivy Bridge PRM, Vol2 Part1 11.9 "Render Target Resolve":
  304.     *
  305.     *     A rectangle primitive must be scaled down by the following factors
  306.     *     with respect to render target being resolved.
  307.     *
  308.     * The scaledown factors in the table that follows are related to the
  309.     * alignment size returned by intel_get_non_msrt_mcs_alignment(), but with
  310.     * X and Y alignment each divided by 2.
  311.     */
  312.    unsigned x_align, y_align;
  313.    intel_get_non_msrt_mcs_alignment(brw, mt, &x_align, &y_align);
  314.    unsigned x_scaledown = x_align / 2;
  315.    unsigned y_scaledown = y_align / 2;
  316.    x0 = y0 = 0;
  317.    x1 = ALIGN(mt->logical_width0, x_scaledown) / x_scaledown;
  318.    y1 = ALIGN(mt->logical_height0, y_scaledown) / y_scaledown;
  319.  
  320.    fast_clear_op = GEN7_FAST_CLEAR_OP_RESOLVE;
  321.  
  322.    /* Note: there is no need to initialize push constants because it doesn't
  323.     * matter what data gets dispatched to the render target.  However, we must
  324.     * ensure that the fragment shader delivers the data using the "replicated
  325.     * color" message.
  326.     */
  327.    use_wm_prog = true;
  328.    memset(&wm_prog_key, 0, sizeof(wm_prog_key));
  329.    wm_prog_key.use_simd16_replicated_data = true;
  330. }
  331.  
  332.  
  333. uint32_t
  334. brw_blorp_const_color_params::get_wm_prog(struct brw_context *brw,
  335.                                           brw_blorp_prog_data **prog_data)
  336.    const
  337. {
  338.    uint32_t prog_offset = 0;
  339.    if (!brw_search_cache(&brw->cache, BRW_BLORP_CONST_COLOR_PROG,
  340.                          &this->wm_prog_key, sizeof(this->wm_prog_key),
  341.                          &prog_offset, prog_data)) {
  342.       brw_blorp_const_color_program prog(brw, &this->wm_prog_key);
  343.       GLuint program_size;
  344.       const GLuint *program = prog.compile(brw, &program_size);
  345.       brw_upload_cache(&brw->cache, BRW_BLORP_CONST_COLOR_PROG,
  346.                        &this->wm_prog_key, sizeof(this->wm_prog_key),
  347.                        program, program_size,
  348.                        &prog.prog_data, sizeof(prog.prog_data),
  349.                        &prog_offset, prog_data);
  350.    }
  351.    return prog_offset;
  352. }
  353.  
  354. void
  355. brw_blorp_const_color_program::alloc_regs()
  356. {
  357.    int reg = 0;
  358.    this->R0 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
  359.    this->R1 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
  360.  
  361.    prog_data.first_curbe_grf = reg;
  362.    clear_rgba = retype(brw_vec4_grf(reg++, 0), BRW_REGISTER_TYPE_F);
  363.    reg += BRW_BLORP_NUM_PUSH_CONST_REGS;
  364.  
  365.    /* Make sure we didn't run out of registers */
  366.    assert(reg <= GEN7_MRF_HACK_START);
  367.  
  368.    this->base_mrf = 2;
  369. }
  370.  
  371. const GLuint *
  372. brw_blorp_const_color_program::compile(struct brw_context *brw,
  373.                                        GLuint *program_size)
  374. {
  375.    /* Set up prog_data */
  376.    memset(&prog_data, 0, sizeof(prog_data));
  377.    prog_data.persample_msaa_dispatch = false;
  378.  
  379.    alloc_regs();
  380.  
  381.    brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
  382.  
  383.    struct brw_reg mrf_rt_write =
  384.       retype(vec16(brw_message_reg(base_mrf)), BRW_REGISTER_TYPE_F);
  385.  
  386.    uint32_t mlen, msg_type;
  387.    if (key->use_simd16_replicated_data) {
  388.       /* The message payload is a single register with the low 4 floats/ints
  389.        * filled with the constant clear color.
  390.        */
  391.       brw_set_mask_control(&func, BRW_MASK_DISABLE);
  392.       brw_MOV(&func, vec4(brw_message_reg(base_mrf)), clear_rgba);
  393.       brw_set_mask_control(&func, BRW_MASK_ENABLE);
  394.  
  395.       msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
  396.       mlen = 1;
  397.    } else {
  398.       for (int i = 0; i < 4; i++) {
  399.          /* The message payload is pairs of registers for 16 pixels each of r,
  400.           * g, b, and a.
  401.           */
  402.          brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
  403.          brw_MOV(&func,
  404.                  brw_message_reg(base_mrf + i * 2),
  405.                  brw_vec1_grf(clear_rgba.nr, i));
  406.          brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
  407.       }
  408.  
  409.       msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
  410.       mlen = 8;
  411.    }
  412.  
  413.    /* Now write to the render target and terminate the thread */
  414.    brw_fb_WRITE(&func,
  415.                 16 /* dispatch_width */,
  416.                 base_mrf /* msg_reg_nr */,
  417.                 mrf_rt_write /* src0 */,
  418.                 msg_type,
  419.                 BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX,
  420.                 mlen,
  421.                 0 /* response_length */,
  422.                 true /* eot */,
  423.                 false /* header present */);
  424.  
  425.    if (unlikely(INTEL_DEBUG & DEBUG_BLORP)) {
  426.       printf("Native code for BLORP clear:\n");
  427.       brw_dump_compile(&func, stdout, 0, func.next_insn_offset);
  428.       printf("\n");
  429.    }
  430.    return brw_get_program(&func, program_size);
  431. }
  432.  
  433. extern "C" {
  434. bool
  435. brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
  436.                       bool partial_clear)
  437. {
  438.    struct gl_context *ctx = &brw->ctx;
  439.  
  440.    /* The constant color clear code doesn't work for multisampled surfaces, so
  441.     * we need to support falling back to other clear mechanisms.
  442.     * Unfortunately, our clear code is based on a bitmask that doesn't
  443.     * distinguish individual color attachments, so we walk the attachments to
  444.     * see if any require fallback, and fall back for all if any of them need
  445.     * to.
  446.     */
  447.    for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) {
  448.       struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[buf];
  449.       struct intel_renderbuffer *irb = intel_renderbuffer(rb);
  450.  
  451.       if (irb && irb->mt->msaa_layout != INTEL_MSAA_LAYOUT_NONE)
  452.          return false;
  453.    }
  454.  
  455.    for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) {
  456.       struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[buf];
  457.       struct intel_renderbuffer *irb = intel_renderbuffer(rb);
  458.  
  459.       /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
  460.        * the framebuffer can be complete with some attachments missing.  In
  461.        * this case the _ColorDrawBuffers pointer will be NULL.
  462.        */
  463.       if (rb == NULL)
  464.          continue;
  465.  
  466.       brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf],
  467.                                     partial_clear);
  468.  
  469.       bool is_fast_clear =
  470.          (params.fast_clear_op == GEN7_FAST_CLEAR_OP_FAST_CLEAR);
  471.       if (is_fast_clear) {
  472.          /* Record the clear color in the miptree so that it will be
  473.           * programmed in SURFACE_STATE by later rendering and resolve
  474.           * operations.
  475.           */
  476.          uint32_t new_color_value =
  477.             compute_fast_clear_color_bits(&ctx->Color.ClearColor);
  478.          if (irb->mt->fast_clear_color_value != new_color_value) {
  479.             irb->mt->fast_clear_color_value = new_color_value;
  480.             brw->state.dirty.brw |= BRW_NEW_SURFACES;
  481.          }
  482.  
  483.          /* If the buffer is already in INTEL_MCS_STATE_CLEAR, the clear is
  484.           * redundant and can be skipped.
  485.           */
  486.          if (irb->mt->mcs_state == INTEL_MCS_STATE_CLEAR)
  487.             continue;
  488.  
  489.          /* If the MCS buffer hasn't been allocated yet, we need to allocate
  490.           * it now.
  491.           */
  492.          if (!irb->mt->mcs_mt) {
  493.             if (!intel_miptree_alloc_non_msrt_mcs(brw, irb->mt)) {
  494.                /* MCS allocation failed--probably this will only happen in
  495.                 * out-of-memory conditions.  But in any case, try to recover
  496.                 * by falling back to a non-blorp clear technique.
  497.                 */
  498.                return false;
  499.             }
  500.             brw->state.dirty.brw |= BRW_NEW_SURFACES;
  501.          }
  502.       }
  503.  
  504.       DBG("%s to mt %p level %d layer %d\n", __FUNCTION__,
  505.           irb->mt, irb->mt_level, irb->mt_layer);
  506.  
  507.       brw_blorp_exec(brw, &params);
  508.  
  509.       if (is_fast_clear) {
  510.          /* Now that the fast clear has occurred, put the buffer in
  511.           * INTEL_MCS_STATE_CLEAR so that we won't waste time doing redundant
  512.           * clears.
  513.           */
  514.          irb->mt->mcs_state = INTEL_MCS_STATE_CLEAR;
  515.       }
  516.    }
  517.  
  518.    return true;
  519. }
  520.  
  521. void
  522. brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt)
  523. {
  524.    DBG("%s to mt %p\n", __FUNCTION__, mt);
  525.  
  526.    brw_blorp_rt_resolve_params params(brw, mt);
  527.    brw_blorp_exec(brw, &params);
  528.    mt->mcs_state = INTEL_MCS_STATE_RESOLVED;
  529. }
  530.  
  531. } /* extern "C" */
  532.