Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2013 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. extern "C" {
  25. #include "main/teximage.h"
  26. #include "main/blend.h"
  27. #include "main/fbobject.h"
  28. #include "main/renderbuffer.h"
  29. }
  30.  
  31. #include "glsl/ralloc.h"
  32.  
  33. #include "intel_fbo.h"
  34.  
  35. #include "brw_blorp.h"
  36. #include "brw_context.h"
  37. #include "brw_eu.h"
  38. #include "brw_state.h"
  39.  
  40. #define FILE_DEBUG_FLAG DEBUG_BLORP
  41.  
  42. struct brw_blorp_const_color_prog_key
  43. {
  44.    bool use_simd16_replicated_data;
  45.    bool pad[3];
  46. };
  47.  
  48. /**
  49.  * Parameters for a blorp operation where the fragment shader outputs a
  50.  * constant color.  This is used for both fast color clears and color
  51.  * resolves.
  52.  */
  53. class brw_blorp_const_color_params : public brw_blorp_params
  54. {
  55. public:
  56.    virtual uint32_t get_wm_prog(struct brw_context *brw,
  57.                                 brw_blorp_prog_data **prog_data) const;
  58.  
  59. protected:
  60.    brw_blorp_const_color_prog_key wm_prog_key;
  61. };
  62.  
  63. class brw_blorp_clear_params : public brw_blorp_const_color_params
  64. {
  65. public:
  66.    brw_blorp_clear_params(struct brw_context *brw,
  67.                           struct gl_framebuffer *fb,
  68.                           struct gl_renderbuffer *rb,
  69.                           GLubyte *color_mask,
  70.                           bool partial_clear);
  71. };
  72.  
  73.  
  74. /**
  75.  * Parameters for a blorp operation that performs a "render target resolve".
  76.  * This is used to resolve pending fast clear pixels before a color buffer is
  77.  * used for texturing, ReadPixels, or scanout.
  78.  */
  79. class brw_blorp_rt_resolve_params : public brw_blorp_const_color_params
  80. {
  81. public:
  82.    brw_blorp_rt_resolve_params(struct brw_context *brw,
  83.                                struct intel_mipmap_tree *mt);
  84. };
  85.  
  86.  
  87. class brw_blorp_const_color_program
  88. {
  89. public:
  90.    brw_blorp_const_color_program(struct brw_context *brw,
  91.                                  const brw_blorp_const_color_prog_key *key);
  92.    ~brw_blorp_const_color_program();
  93.  
  94.    const GLuint *compile(struct brw_context *brw, GLuint *program_size);
  95.  
  96.    brw_blorp_prog_data prog_data;
  97.  
  98. private:
  99.    void alloc_regs();
  100.  
  101.    void *mem_ctx;
  102.    struct brw_context *brw;
  103.    const brw_blorp_const_color_prog_key *key;
  104.    struct brw_compile func;
  105.  
  106.    /* Thread dispatch header */
  107.    struct brw_reg R0;
  108.  
  109.    /* Pixel X/Y coordinates (always in R1). */
  110.    struct brw_reg R1;
  111.  
  112.    /* Register with push constants (a single vec4) */
  113.    struct brw_reg clear_rgba;
  114.  
  115.    /* MRF used for render target writes */
  116.    GLuint base_mrf;
  117. };
  118.  
  119. brw_blorp_const_color_program::brw_blorp_const_color_program(
  120.       struct brw_context *brw,
  121.       const brw_blorp_const_color_prog_key *key)
  122.    : mem_ctx(ralloc_context(NULL)),
  123.      brw(brw),
  124.      key(key),
  125.      R0(),
  126.      R1(),
  127.      clear_rgba(),
  128.      base_mrf(0)
  129. {
  130.    brw_init_compile(brw, &func, mem_ctx);
  131. }
  132.  
  133. brw_blorp_const_color_program::~brw_blorp_const_color_program()
  134. {
  135.    ralloc_free(mem_ctx);
  136. }
  137.  
  138.  
  139. /**
  140.  * Determine if fast color clear supports the given clear color.
  141.  *
  142.  * Fast color clear can only clear to color values of 1.0 or 0.0.  At the
  143.  * moment we only support floating point, unorm, and snorm buffers.
  144.  */
  145. static bool
  146. is_color_fast_clear_compatible(struct brw_context *brw,
  147.                                gl_format format,
  148.                                const union gl_color_union *color)
  149. {
  150.    if (_mesa_is_format_integer_color(format))
  151.       return false;
  152.  
  153.    for (int i = 0; i < 4; i++) {
  154.       if (color->f[i] != 0.0 && color->f[i] != 1.0) {
  155.          perf_debug("Clear color unsupported by fast color clear.  "
  156.                     "Falling back to slow clear.\n");
  157.          return false;
  158.       }
  159.    }
  160.    return true;
  161. }
  162.  
  163.  
  164. /**
  165.  * Convert the given color to a bitfield suitable for ORing into DWORD 7 of
  166.  * SURFACE_STATE.
  167.  */
  168. static uint32_t
  169. compute_fast_clear_color_bits(const union gl_color_union *color)
  170. {
  171.    uint32_t bits = 0;
  172.    for (int i = 0; i < 4; i++) {
  173.       if (color->f[i] != 0.0)
  174.          bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
  175.    }
  176.    return bits;
  177. }
  178.  
  179.  
  180. brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw,
  181.                                                struct gl_framebuffer *fb,
  182.                                                struct gl_renderbuffer *rb,
  183.                                                GLubyte *color_mask,
  184.                                                bool partial_clear)
  185. {
  186.    struct gl_context *ctx = &brw->ctx;
  187.    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
  188.  
  189.    dst.set(brw, irb->mt, irb->mt_level, irb->mt_layer);
  190.  
  191.    /* Override the surface format according to the context's sRGB rules. */
  192.    gl_format format = _mesa_get_render_format(ctx, irb->mt->format);
  193.    dst.brw_surfaceformat = brw->render_target_format[format];
  194.  
  195.    x0 = fb->_Xmin;
  196.    x1 = fb->_Xmax;
  197.    if (rb->Name != 0) {
  198.       y0 = fb->_Ymin;
  199.       y1 = fb->_Ymax;
  200.    } else {
  201.       y0 = rb->Height - fb->_Ymax;
  202.       y1 = rb->Height - fb->_Ymin;
  203.    }
  204.  
  205.    float *push_consts = (float *)&wm_push_consts;
  206.  
  207.    push_consts[0] = ctx->Color.ClearColor.f[0];
  208.    push_consts[1] = ctx->Color.ClearColor.f[1];
  209.    push_consts[2] = ctx->Color.ClearColor.f[2];
  210.    push_consts[3] = ctx->Color.ClearColor.f[3];
  211.  
  212.    use_wm_prog = true;
  213.  
  214.    memset(&wm_prog_key, 0, sizeof(wm_prog_key));
  215.  
  216.    wm_prog_key.use_simd16_replicated_data = true;
  217.  
  218.    /* From the SNB PRM (Vol4_Part1):
  219.     *
  220.     *     "Replicated data (Message Type = 111) is only supported when
  221.     *      accessing tiled memory.  Using this Message Type to access linear
  222.     *      (untiled) memory is UNDEFINED."
  223.     */
  224.    if (irb->mt->region->tiling == I915_TILING_NONE)
  225.       wm_prog_key.use_simd16_replicated_data = false;
  226.  
  227.    /* Constant color writes ignore everyting in blend and color calculator
  228.     * state.  This is not documented.
  229.     */
  230.    for (int i = 0; i < 4; i++) {
  231.       if (!color_mask[i]) {
  232.          color_write_disable[i] = true;
  233.          wm_prog_key.use_simd16_replicated_data = false;
  234.       }
  235.    }
  236.  
  237.    /* If we can do this as a fast color clear, do so. */
  238.    if (irb->mt->mcs_state != INTEL_MCS_STATE_NONE && !partial_clear &&
  239.        wm_prog_key.use_simd16_replicated_data &&
  240.        is_color_fast_clear_compatible(brw, format, &ctx->Color.ClearColor)) {
  241.       memset(push_consts, 0xff, 4*sizeof(float));
  242.       fast_clear_op = GEN7_FAST_CLEAR_OP_FAST_CLEAR;
  243.  
  244.       /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
  245.        * Target(s)", beneath the "Fast Color Clear" bullet (p327):
  246.        *
  247.        *     Clear pass must have a clear rectangle that must follow alignment
  248.        *     rules in terms of pixels and lines as shown in the table
  249.        *     below. Further, the clear-rectangle height and width must be
  250.        *     multiple of the following dimensions. If the height and width of
  251.        *     the render target being cleared do not meet these requirements,
  252.        *     an MCS buffer can be created such that it follows the requirement
  253.        *     and covers the RT.
  254.        *
  255.        * The alignment size in the table that follows is related to the
  256.        * alignment size returned by intel_get_non_msrt_mcs_alignment(), but
  257.        * with X alignment multiplied by 16 and Y alignment multiplied by 32.
  258.        */
  259.       unsigned x_align, y_align;
  260.       intel_get_non_msrt_mcs_alignment(brw, irb->mt, &x_align, &y_align);
  261.       x_align *= 16;
  262.       y_align *= 32;
  263.  
  264.       if (brw->is_haswell && brw->gt == 3) {
  265.          /* From BSpec: 3D-Media-GPGPU Engine > 3D Pipeline > Pixel > Pixel
  266.           * Backend > MCS Buffer for Render Target(s) [DevIVB+]:
  267.           * [DevHSW:GT3]: Clear rectangle must be aligned to two times the
  268.           * number of pixels in the table shown below...
  269.           * x_align, y_align values computed above are the relevant entries
  270.           * in the referred table.
  271.           */
  272.          x0 = ROUND_DOWN_TO(x0, 2 * x_align);
  273.          y0 = ROUND_DOWN_TO(y0, 2 * y_align);
  274.          x1 = ALIGN(x1, 2 * x_align);
  275.          y1 = ALIGN(y1, 2 * y_align);
  276.       } else {
  277.          x0 = ROUND_DOWN_TO(x0,  x_align);
  278.          y0 = ROUND_DOWN_TO(y0, y_align);
  279.          x1 = ALIGN(x1, x_align);
  280.          y1 = ALIGN(y1, y_align);
  281.       }
  282.  
  283.       /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
  284.        * Target(s)", beneath the "Fast Color Clear" bullet (p327):
  285.        *
  286.        *     In order to optimize the performance MCS buffer (when bound to 1X
  287.        *     RT) clear similarly to MCS buffer clear for MSRT case, clear rect
  288.        *     is required to be scaled by the following factors in the
  289.        *     horizontal and vertical directions:
  290.        *
  291.        * The X and Y scale down factors in the table that follows are each
  292.        * equal to half the alignment value computed above.
  293.        */
  294.       unsigned x_scaledown = x_align / 2;
  295.       unsigned y_scaledown = y_align / 2;
  296.       x0 /= x_scaledown;
  297.       y0 /= y_scaledown;
  298.       x1 /= x_scaledown;
  299.       y1 /= y_scaledown;
  300.    }
  301. }
  302.  
  303.  
  304. brw_blorp_rt_resolve_params::brw_blorp_rt_resolve_params(
  305.       struct brw_context *brw,
  306.       struct intel_mipmap_tree *mt)
  307. {
  308.    dst.set(brw, mt, 0 /* level */, 0 /* layer */);
  309.  
  310.    /* From the Ivy Bridge PRM, Vol2 Part1 11.9 "Render Target Resolve":
  311.     *
  312.     *     A rectangle primitive must be scaled down by the following factors
  313.     *     with respect to render target being resolved.
  314.     *
  315.     * The scaledown factors in the table that follows are related to the
  316.     * alignment size returned by intel_get_non_msrt_mcs_alignment(), but with
  317.     * X and Y alignment each divided by 2.
  318.     */
  319.    unsigned x_align, y_align;
  320.    intel_get_non_msrt_mcs_alignment(brw, mt, &x_align, &y_align);
  321.    unsigned x_scaledown = x_align / 2;
  322.    unsigned y_scaledown = y_align / 2;
  323.    x0 = y0 = 0;
  324.    x1 = ALIGN(mt->logical_width0, x_scaledown) / x_scaledown;
  325.    y1 = ALIGN(mt->logical_height0, y_scaledown) / y_scaledown;
  326.  
  327.    fast_clear_op = GEN7_FAST_CLEAR_OP_RESOLVE;
  328.  
  329.    /* Note: there is no need to initialize push constants because it doesn't
  330.     * matter what data gets dispatched to the render target.  However, we must
  331.     * ensure that the fragment shader delivers the data using the "replicated
  332.     * color" message.
  333.     */
  334.    use_wm_prog = true;
  335.    memset(&wm_prog_key, 0, sizeof(wm_prog_key));
  336.    wm_prog_key.use_simd16_replicated_data = true;
  337. }
  338.  
  339.  
  340. uint32_t
  341. brw_blorp_const_color_params::get_wm_prog(struct brw_context *brw,
  342.                                           brw_blorp_prog_data **prog_data)
  343.    const
  344. {
  345.    uint32_t prog_offset = 0;
  346.    if (!brw_search_cache(&brw->cache, BRW_BLORP_CONST_COLOR_PROG,
  347.                          &this->wm_prog_key, sizeof(this->wm_prog_key),
  348.                          &prog_offset, prog_data)) {
  349.       brw_blorp_const_color_program prog(brw, &this->wm_prog_key);
  350.       GLuint program_size;
  351.       const GLuint *program = prog.compile(brw, &program_size);
  352.       brw_upload_cache(&brw->cache, BRW_BLORP_CONST_COLOR_PROG,
  353.                        &this->wm_prog_key, sizeof(this->wm_prog_key),
  354.                        program, program_size,
  355.                        &prog.prog_data, sizeof(prog.prog_data),
  356.                        &prog_offset, prog_data);
  357.    }
  358.    return prog_offset;
  359. }
  360.  
  361. void
  362. brw_blorp_const_color_program::alloc_regs()
  363. {
  364.    int reg = 0;
  365.    this->R0 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
  366.    this->R1 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
  367.  
  368.    prog_data.first_curbe_grf = reg;
  369.    clear_rgba = retype(brw_vec4_grf(reg++, 0), BRW_REGISTER_TYPE_F);
  370.    reg += BRW_BLORP_NUM_PUSH_CONST_REGS;
  371.  
  372.    /* Make sure we didn't run out of registers */
  373.    assert(reg <= GEN7_MRF_HACK_START);
  374.  
  375.    this->base_mrf = 2;
  376. }
  377.  
  378. const GLuint *
  379. brw_blorp_const_color_program::compile(struct brw_context *brw,
  380.                                        GLuint *program_size)
  381. {
  382.    /* Set up prog_data */
  383.    memset(&prog_data, 0, sizeof(prog_data));
  384.    prog_data.persample_msaa_dispatch = false;
  385.  
  386.    alloc_regs();
  387.  
  388.    brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
  389.  
  390.    struct brw_reg mrf_rt_write =
  391.       retype(vec16(brw_message_reg(base_mrf)), BRW_REGISTER_TYPE_F);
  392.  
  393.    uint32_t mlen, msg_type;
  394.    if (key->use_simd16_replicated_data) {
  395.       /* The message payload is a single register with the low 4 floats/ints
  396.        * filled with the constant clear color.
  397.        */
  398.       brw_set_mask_control(&func, BRW_MASK_DISABLE);
  399.       brw_MOV(&func, vec4(brw_message_reg(base_mrf)), clear_rgba);
  400.       brw_set_mask_control(&func, BRW_MASK_ENABLE);
  401.  
  402.       msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
  403.       mlen = 1;
  404.    } else {
  405.       for (int i = 0; i < 4; i++) {
  406.          /* The message payload is pairs of registers for 16 pixels each of r,
  407.           * g, b, and a.
  408.           */
  409.          brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
  410.          brw_MOV(&func,
  411.                  brw_message_reg(base_mrf + i * 2),
  412.                  brw_vec1_grf(clear_rgba.nr, i));
  413.          brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
  414.       }
  415.  
  416.       msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
  417.       mlen = 8;
  418.    }
  419.  
  420.    /* Now write to the render target and terminate the thread */
  421.    brw_fb_WRITE(&func,
  422.                 16 /* dispatch_width */,
  423.                 base_mrf /* msg_reg_nr */,
  424.                 mrf_rt_write /* src0 */,
  425.                 msg_type,
  426.                 BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX,
  427.                 mlen,
  428.                 0 /* response_length */,
  429.                 true /* eot */,
  430.                 false /* header present */);
  431.  
  432.    if (unlikely(INTEL_DEBUG & DEBUG_BLORP)) {
  433.       printf("Native code for BLORP clear:\n");
  434.       brw_dump_compile(&func, stdout, 0, func.next_insn_offset);
  435.       printf("\n");
  436.    }
  437.    return brw_get_program(&func, program_size);
  438. }
  439.  
  440. extern "C" {
  441. bool
  442. brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
  443.                       bool partial_clear)
  444. {
  445.    struct gl_context *ctx = &brw->ctx;
  446.  
  447.    /* The constant color clear code doesn't work for multisampled surfaces, so
  448.     * we need to support falling back to other clear mechanisms.
  449.     * Unfortunately, our clear code is based on a bitmask that doesn't
  450.     * distinguish individual color attachments, so we walk the attachments to
  451.     * see if any require fallback, and fall back for all if any of them need
  452.     * to.
  453.     */
  454.    for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) {
  455.       struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[buf];
  456.       struct intel_renderbuffer *irb = intel_renderbuffer(rb);
  457.  
  458.       if (irb && irb->mt->msaa_layout != INTEL_MSAA_LAYOUT_NONE)
  459.          return false;
  460.    }
  461.  
  462.    for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) {
  463.       struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[buf];
  464.       struct intel_renderbuffer *irb = intel_renderbuffer(rb);
  465.  
  466.       /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
  467.        * the framebuffer can be complete with some attachments missing.  In
  468.        * this case the _ColorDrawBuffers pointer will be NULL.
  469.        */
  470.       if (rb == NULL)
  471.          continue;
  472.  
  473.       brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf],
  474.                                     partial_clear);
  475.  
  476.       bool is_fast_clear =
  477.          (params.fast_clear_op == GEN7_FAST_CLEAR_OP_FAST_CLEAR);
  478.       if (is_fast_clear) {
  479.          /* Record the clear color in the miptree so that it will be
  480.           * programmed in SURFACE_STATE by later rendering and resolve
  481.           * operations.
  482.           */
  483.          uint32_t new_color_value =
  484.             compute_fast_clear_color_bits(&ctx->Color.ClearColor);
  485.          if (irb->mt->fast_clear_color_value != new_color_value) {
  486.             irb->mt->fast_clear_color_value = new_color_value;
  487.             brw->state.dirty.brw |= BRW_NEW_SURFACES;
  488.          }
  489.  
  490.          /* If the buffer is already in INTEL_MCS_STATE_CLEAR, the clear is
  491.           * redundant and can be skipped.
  492.           */
  493.          if (irb->mt->mcs_state == INTEL_MCS_STATE_CLEAR)
  494.             continue;
  495.  
  496.          /* If the MCS buffer hasn't been allocated yet, we need to allocate
  497.           * it now.
  498.           */
  499.          if (!irb->mt->mcs_mt) {
  500.             if (!intel_miptree_alloc_non_msrt_mcs(brw, irb->mt)) {
  501.                /* MCS allocation failed--probably this will only happen in
  502.                 * out-of-memory conditions.  But in any case, try to recover
  503.                 * by falling back to a non-blorp clear technique.
  504.                 */
  505.                return false;
  506.             }
  507.             brw->state.dirty.brw |= BRW_NEW_SURFACES;
  508.          }
  509.       }
  510.  
  511.       DBG("%s to mt %p level %d layer %d\n", __FUNCTION__,
  512.           irb->mt, irb->mt_level, irb->mt_layer);
  513.  
  514.       brw_blorp_exec(brw, &params);
  515.  
  516.       if (is_fast_clear) {
  517.          /* Now that the fast clear has occurred, put the buffer in
  518.           * INTEL_MCS_STATE_CLEAR so that we won't waste time doing redundant
  519.           * clears.
  520.           */
  521.          irb->mt->mcs_state = INTEL_MCS_STATE_CLEAR;
  522.       }
  523.    }
  524.  
  525.    return true;
  526. }
  527.  
  528. void
  529. brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt)
  530. {
  531.    DBG("%s to mt %p\n", __FUNCTION__, mt);
  532.  
  533.    brw_blorp_rt_resolve_params params(brw, mt);
  534.    brw_blorp_exec(brw, &params);
  535.    mt->mcs_state = INTEL_MCS_STATE_RESOLVED;
  536. }
  537.  
  538. } /* extern "C" */
  539.