Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2008 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Eric Anholt <eric@anholt.net>
  25.  *    Kenneth Graunke <kenneth@whitecape.org>
  26.  */
  27.  
  28. /** @file gen6_queryobj.c
  29.  *
  30.  * Support for query objects (GL_ARB_occlusion_query, GL_ARB_timer_query,
  31.  * GL_EXT_transform_feedback, and friends) on platforms that support
  32.  * hardware contexts (Gen6+).
  33.  */
  34. #include "main/imports.h"
  35.  
  36. #include "brw_context.h"
  37. #include "brw_defines.h"
  38. #include "brw_state.h"
  39. #include "intel_batchbuffer.h"
  40. #include "intel_reg.h"
  41.  
  42. /**
  43.  * Emit PIPE_CONTROLs to write the current GPU timestamp into a buffer.
  44.  */
  45. static void
  46. write_timestamp(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
  47. {
  48.    /* Emit workaround flushes: */
  49.    if (brw->gen == 6) {
  50.       /* The timestamp write below is a non-zero post-sync op, which on
  51.        * Gen6 necessitates a CS stall.  CS stalls need stall at scoreboard
  52.        * set.  See the comments for intel_emit_post_sync_nonzero_flush().
  53.        */
  54.       BEGIN_BATCH(4);
  55.       OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
  56.       OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD);
  57.       OUT_BATCH(0);
  58.       OUT_BATCH(0);
  59.       ADVANCE_BATCH();
  60.    }
  61.  
  62.    BEGIN_BATCH(5);
  63.    OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
  64.    OUT_BATCH(PIPE_CONTROL_WRITE_TIMESTAMP);
  65.    OUT_RELOC(query_bo,
  66.              I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  67.              PIPE_CONTROL_GLOBAL_GTT_WRITE |
  68.              idx * sizeof(uint64_t));
  69.    OUT_BATCH(0);
  70.    OUT_BATCH(0);
  71.    ADVANCE_BATCH();
  72. }
  73.  
  74. /**
  75.  * Emit PIPE_CONTROLs to write the PS_DEPTH_COUNT register into a buffer.
  76.  */
  77. static void
  78. write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
  79. {
  80.    /* Emit Sandybridge workaround flush: */
  81.    if (brw->gen == 6)
  82.       intel_emit_post_sync_nonzero_flush(brw);
  83.  
  84.    BEGIN_BATCH(5);
  85.    OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
  86.    OUT_BATCH(PIPE_CONTROL_DEPTH_STALL |
  87.              PIPE_CONTROL_WRITE_DEPTH_COUNT);
  88.    OUT_RELOC(query_bo,
  89.              I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  90.              PIPE_CONTROL_GLOBAL_GTT_WRITE |
  91.              (idx * sizeof(uint64_t)));
  92.    OUT_BATCH(0);
  93.    OUT_BATCH(0);
  94.    ADVANCE_BATCH();
  95. }
  96.  
  97. /*
  98.  * Write an arbitrary 64-bit register to a buffer via MI_STORE_REGISTER_MEM.
  99.  *
  100.  * Only TIMESTAMP and PS_DEPTH_COUNT have special PIPE_CONTROL support; other
  101.  * counters have to be read via the generic MI_STORE_REGISTER_MEM.  This
  102.  * function also performs a pipeline flush for proper synchronization.
  103.  */
  104. static void
  105. write_reg(struct brw_context *brw,
  106.           drm_intel_bo *query_bo, uint32_t reg, int idx)
  107. {
  108.    assert(brw->gen >= 6);
  109.  
  110.    intel_batchbuffer_emit_mi_flush(brw);
  111.  
  112.    /* MI_STORE_REGISTER_MEM only stores a single 32-bit value, so to
  113.     * read a full 64-bit register, we need to do two of them.
  114.     */
  115.    BEGIN_BATCH(3);
  116.    OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
  117.    OUT_BATCH(reg);
  118.    OUT_RELOC(query_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  119.              idx * sizeof(uint64_t));
  120.    ADVANCE_BATCH();
  121.  
  122.    BEGIN_BATCH(3);
  123.    OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
  124.    OUT_BATCH(reg + sizeof(uint32_t));
  125.    OUT_RELOC(query_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  126.              sizeof(uint32_t) + idx * sizeof(uint64_t));
  127.    ADVANCE_BATCH();
  128. }
  129.  
  130. static void
  131. write_primitives_generated(struct brw_context *brw,
  132.                            drm_intel_bo *query_bo, int idx)
  133. {
  134.    write_reg(brw, query_bo, CL_INVOCATION_COUNT, idx);
  135. }
  136.  
  137. static void
  138. write_xfb_primitives_written(struct brw_context *brw,
  139.                              drm_intel_bo *query_bo, int idx)
  140. {
  141.    if (brw->gen >= 7) {
  142.       write_reg(brw, query_bo, SO_NUM_PRIMS_WRITTEN0_IVB, idx);
  143.    } else {
  144.       write_reg(brw, query_bo, SO_NUM_PRIMS_WRITTEN, idx);
  145.    }
  146. }
  147.  
  148. /**
  149.  * Wait on the query object's BO and calculate the final result.
  150.  */
  151. static void
  152. gen6_queryobj_get_results(struct gl_context *ctx,
  153.                           struct brw_query_object *query)
  154. {
  155.    struct brw_context *brw = brw_context(ctx);
  156.  
  157.    if (query->bo == NULL)
  158.       return;
  159.  
  160.    /* If the application has requested the query result, but this batch is
  161.     * still contributing to it, flush it now so the results will be present
  162.     * when mapped.
  163.     */
  164.    if (drm_intel_bo_references(brw->batch.bo, query->bo))
  165.       intel_batchbuffer_flush(brw);
  166.  
  167.    if (unlikely(brw->perf_debug)) {
  168.       if (drm_intel_bo_busy(query->bo)) {
  169.          perf_debug("Stalling on the GPU waiting for a query object.\n");
  170.       }
  171.    }
  172.  
  173.    drm_intel_bo_map(query->bo, false);
  174.    uint64_t *results = query->bo->virtual;
  175.    switch (query->Base.Target) {
  176.    case GL_TIME_ELAPSED:
  177.       /* The query BO contains the starting and ending timestamps.
  178.        * Subtract the two and convert to nanoseconds.
  179.        */
  180.       query->Base.Result += 80 * (results[1] - results[0]);
  181.       break;
  182.  
  183.    case GL_TIMESTAMP:
  184.       /* Our timer is a clock that increments every 80ns (regardless of
  185.        * other clock scaling in the system).  The timestamp register we can
  186.        * read for glGetTimestamp() masks out the top 32 bits, so we do that
  187.        * here too to let the two counters be compared against each other.
  188.        *
  189.        * If we just multiplied that 32 bits of data by 80, it would roll
  190.        * over at a non-power-of-two, so an application couldn't use
  191.        * GL_QUERY_COUNTER_BITS to handle rollover correctly.  Instead, we
  192.        * report 36 bits and truncate at that (rolling over 5 times as often
  193.        * as the HW counter), and when the 32-bit counter rolls over, it
  194.        * happens to also be at a rollover in the reported value from near
  195.        * (1<<36) to 0.
  196.        *
  197.        * The low 32 bits rolls over in ~343 seconds.  Our 36-bit result
  198.        * rolls over every ~69 seconds.
  199.        *
  200.        * The query BO contains a single timestamp value in results[0].
  201.        */
  202.       query->Base.Result = 80 * (results[0] & 0xffffffff);
  203.       query->Base.Result &= (1ull << 36) - 1;
  204.       break;
  205.  
  206.    case GL_SAMPLES_PASSED_ARB:
  207.       /* We need to use += rather than = here since some BLT-based operations
  208.        * may have added additional samples to our occlusion query value.
  209.        */
  210.       query->Base.Result += results[1] - results[0];
  211.       break;
  212.  
  213.    case GL_ANY_SAMPLES_PASSED:
  214.    case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
  215.       if (results[0] != results[1])
  216.          query->Base.Result = true;
  217.       break;
  218.  
  219.    case GL_PRIMITIVES_GENERATED:
  220.    case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
  221.       query->Base.Result = results[1] - results[0];
  222.       break;
  223.  
  224.    default:
  225.       assert(!"Unrecognized query target in brw_queryobj_get_results()");
  226.       break;
  227.    }
  228.    drm_intel_bo_unmap(query->bo);
  229.  
  230.    /* Now that we've processed the data stored in the query's buffer object,
  231.     * we can release it.
  232.     */
  233.    drm_intel_bo_unreference(query->bo);
  234.    query->bo = NULL;
  235. }
  236.  
  237. /**
  238.  * Driver hook for glBeginQuery().
  239.  *
  240.  * Initializes driver structures and emits any GPU commands required to begin
  241.  * recording data for the query.
  242.  */
  243. static void
  244. gen6_begin_query(struct gl_context *ctx, struct gl_query_object *q)
  245. {
  246.    struct brw_context *brw = brw_context(ctx);
  247.    struct brw_query_object *query = (struct brw_query_object *)q;
  248.  
  249.    /* Since we're starting a new query, we need to throw away old results. */
  250.    drm_intel_bo_unreference(query->bo);
  251.    query->bo = drm_intel_bo_alloc(brw->bufmgr, "query results", 4096, 4096);
  252.  
  253.    switch (query->Base.Target) {
  254.    case GL_TIME_ELAPSED:
  255.       /* For timestamp queries, we record the starting time right away so that
  256.        * we measure the full time between BeginQuery and EndQuery.  There's
  257.        * some debate about whether this is the right thing to do.  Our decision
  258.        * is based on the following text from the ARB_timer_query extension:
  259.        *
  260.        * "(5) Should the extension measure total time elapsed between the full
  261.        *      completion of the BeginQuery and EndQuery commands, or just time
  262.        *      spent in the graphics library?
  263.        *
  264.        *  RESOLVED:  This extension will measure the total time elapsed
  265.        *  between the full completion of these commands.  Future extensions
  266.        *  may implement a query to determine time elapsed at different stages
  267.        *  of the graphics pipeline."
  268.        *
  269.        * We write a starting timestamp now (at index 0).  At EndQuery() time,
  270.        * we'll write a second timestamp (at index 1), and subtract the two to
  271.        * obtain the time elapsed.  Notably, this includes time elapsed while
  272.        * the system was doing other work, such as running other applications.
  273.        */
  274.       write_timestamp(brw, query->bo, 0);
  275.       break;
  276.  
  277.    case GL_ANY_SAMPLES_PASSED:
  278.    case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
  279.    case GL_SAMPLES_PASSED_ARB:
  280.       write_depth_count(brw, query->bo, 0);
  281.       break;
  282.  
  283.    case GL_PRIMITIVES_GENERATED:
  284.       write_primitives_generated(brw, query->bo, 0);
  285.       break;
  286.  
  287.    case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
  288.       write_xfb_primitives_written(brw, query->bo, 0);
  289.       break;
  290.  
  291.    default:
  292.       assert(!"Unrecognized query target in brw_begin_query()");
  293.       break;
  294.    }
  295. }
  296.  
  297. /**
  298.  * Driver hook for glEndQuery().
  299.  *
  300.  * Emits GPU commands to record a final query value, ending any data capturing.
  301.  * However, the final result isn't necessarily available until the GPU processes
  302.  * those commands.  brw_queryobj_get_results() processes the captured data to
  303.  * produce the final result.
  304.  */
  305. static void
  306. gen6_end_query(struct gl_context *ctx, struct gl_query_object *q)
  307. {
  308.    struct brw_context *brw = brw_context(ctx);
  309.    struct brw_query_object *query = (struct brw_query_object *)q;
  310.  
  311.    switch (query->Base.Target) {
  312.    case GL_TIME_ELAPSED:
  313.       write_timestamp(brw, query->bo, 1);
  314.       break;
  315.  
  316.    case GL_ANY_SAMPLES_PASSED:
  317.    case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
  318.    case GL_SAMPLES_PASSED_ARB:
  319.       write_depth_count(brw, query->bo, 1);
  320.       break;
  321.  
  322.    case GL_PRIMITIVES_GENERATED:
  323.       write_primitives_generated(brw, query->bo, 1);
  324.       break;
  325.  
  326.    case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
  327.       write_xfb_primitives_written(brw, query->bo, 1);
  328.       break;
  329.  
  330.    default:
  331.       assert(!"Unrecognized query target in brw_end_query()");
  332.       break;
  333.    }
  334. }
  335.  
  336. /**
  337.  * The WaitQuery() driver hook.
  338.  *
  339.  * Wait for a query result to become available and return it.  This is the
  340.  * backing for glGetQueryObjectiv() with the GL_QUERY_RESULT pname.
  341.  */
  342. static void gen6_wait_query(struct gl_context *ctx, struct gl_query_object *q)
  343. {
  344.    struct brw_query_object *query = (struct brw_query_object *)q;
  345.  
  346.    gen6_queryobj_get_results(ctx, query);
  347.    query->Base.Ready = true;
  348. }
  349.  
  350. /**
  351.  * The CheckQuery() driver hook.
  352.  *
  353.  * Checks whether a query result is ready yet.  If not, flushes.
  354.  * This is the backing for glGetQueryObjectiv()'s QUERY_RESULT_AVAILABLE pname.
  355.  */
  356. static void gen6_check_query(struct gl_context *ctx, struct gl_query_object *q)
  357. {
  358.    struct brw_context *brw = brw_context(ctx);
  359.    struct brw_query_object *query = (struct brw_query_object *)q;
  360.  
  361.    /* From the GL_ARB_occlusion_query spec:
  362.     *
  363.     *     "Instead of allowing for an infinite loop, performing a
  364.     *      QUERY_RESULT_AVAILABLE_ARB will perform a flush if the result is
  365.     *      not ready yet on the first time it is queried.  This ensures that
  366.     *      the async query will return true in finite time.
  367.     */
  368.    if (query->bo && drm_intel_bo_references(brw->batch.bo, query->bo))
  369.       intel_batchbuffer_flush(brw);
  370.  
  371.    if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) {
  372.       gen6_queryobj_get_results(ctx, query);
  373.       query->Base.Ready = true;
  374.    }
  375. }
  376.  
  377. /* Initialize Gen6+-specific query object functions. */
  378. void gen6_init_queryobj_functions(struct dd_function_table *functions)
  379. {
  380.    functions->BeginQuery = gen6_begin_query;
  381.    functions->EndQuery = gen6_end_query;
  382.    functions->CheckQuery = gen6_check_query;
  383.    functions->WaitQuery = gen6_wait_query;
  384. }
  385.