Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28.  
  29. #include "i915_reg.h"
  30. #include "i915_context.h"
  31. #include "i915_batch.h"
  32. #include "i915_debug.h"
  33. #include "i915_fpc.h"
  34. #include "i915_resource.h"
  35.  
  36. #include "pipe/p_context.h"
  37. #include "pipe/p_defines.h"
  38. #include "pipe/p_format.h"
  39.  
  40. #include "util/u_format.h"
  41. #include "util/u_math.h"
  42. #include "util/u_memory.h"
  43.  
  44. struct i915_tracked_hw_state {
  45.    const char *name;
  46.    void (*validate)(struct i915_context *, unsigned *batch_space);
  47.    void (*emit)(struct i915_context *);
  48.    unsigned dirty, batch_space;
  49. };
  50.  
  51.  
  52. static void
  53. validate_flush(struct i915_context *i915, unsigned *batch_space)
  54. {
  55.    *batch_space = i915->flush_dirty ? 1 : 0;
  56. }
  57.  
  58. static void
  59. emit_flush(struct i915_context *i915)
  60. {
  61.    /* Cache handling is very cheap atm. State handling can request to flushes:
  62.     * - I915_FLUSH_CACHE which is a flush everything request and
  63.     * - I915_PIPELINE_FLUSH which is specifically for the draw_offset flush.
  64.     * Because the cache handling is so dumb, no explicit "invalidate map cache".
  65.     * Also, the first is a strict superset of the latter, so the following logic
  66.     * works. */
  67.    if (i915->flush_dirty & I915_FLUSH_CACHE)
  68.       OUT_BATCH(MI_FLUSH | FLUSH_MAP_CACHE);
  69.    else if (i915->flush_dirty & I915_PIPELINE_FLUSH)
  70.       OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE);
  71. }
  72.  
  73. uint32_t invariant_state[] = {
  74.    _3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | AA_LINE_ECAAR_WIDTH_1_0 |
  75.              AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0,
  76.  
  77.    _3DSTATE_DFLT_DIFFUSE_CMD, 0,
  78.  
  79.    _3DSTATE_DFLT_SPEC_CMD, 0,
  80.  
  81.    _3DSTATE_DFLT_Z_CMD, 0,
  82.  
  83.    _3DSTATE_COORD_SET_BINDINGS |
  84.              CSB_TCB(0, 0) |
  85.              CSB_TCB(1, 1) |
  86.              CSB_TCB(2, 2) |
  87.              CSB_TCB(3, 3) |
  88.              CSB_TCB(4, 4) |
  89.              CSB_TCB(5, 5) |
  90.              CSB_TCB(6, 6) |
  91.              CSB_TCB(7, 7),
  92.  
  93.    _3DSTATE_RASTER_RULES_CMD |
  94.              ENABLE_POINT_RASTER_RULE |
  95.              OGL_POINT_RASTER_RULE |
  96.              ENABLE_LINE_STRIP_PROVOKE_VRTX |
  97.              ENABLE_TRI_FAN_PROVOKE_VRTX |
  98.              LINE_STRIP_PROVOKE_VRTX(1) |
  99.              TRI_FAN_PROVOKE_VRTX(2) |
  100.              ENABLE_TEXKILL_3D_4D |
  101.              TEXKILL_4D,
  102.  
  103.    _3DSTATE_DEPTH_SUBRECT_DISABLE,
  104.  
  105.    /* disable indirect state for now
  106.     */
  107.    _3DSTATE_LOAD_INDIRECT | 0, 0};
  108.  
  109. static void
  110. emit_invariant(struct i915_context *i915)
  111. {
  112.    i915_winsys_batchbuffer_write(i915->batch, invariant_state,
  113.                                  Elements(invariant_state)*sizeof(uint32_t));
  114. }
  115.  
  116. static void
  117. validate_immediate(struct i915_context *i915, unsigned *batch_space)
  118. {
  119.    unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
  120.                      1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
  121.                      1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
  122.                      1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
  123.                     i915->immediate_dirty;
  124.  
  125.    if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0) && i915->vbo)
  126.       i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo;
  127.  
  128.    *batch_space = 1 + util_bitcount(dirty);
  129. }
  130.  
  131. static void
  132. emit_immediate(struct i915_context *i915)
  133. {
  134.    /* remove unwanted bits and S7 */
  135.    unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
  136.                      1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
  137.                      1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
  138.                      1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
  139.                     i915->immediate_dirty;
  140.    int i, num = util_bitcount(dirty);
  141.    assert(num && num <= I915_MAX_IMMEDIATE);
  142.  
  143.    OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
  144.              dirty << 4 | (num - 1));
  145.  
  146.    if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) {
  147.       if (i915->vbo)
  148.          OUT_RELOC(i915->vbo, I915_USAGE_VERTEX,
  149.                    i915->current.immediate[I915_IMMEDIATE_S0]);
  150.       else
  151.          OUT_BATCH(0);
  152.    }
  153.  
  154.    for (i = 1; i < I915_MAX_IMMEDIATE; i++) {
  155.       if (dirty & (1 << i)) {
  156.          /* Fixup blend function for A8 dst buffers.
  157.           * When we blend to an A8 buffer, the GPU thinks it's a G8 buffer,
  158.           * and therefore we need to use the color factor for alphas. */
  159.          if ((i == I915_IMMEDIATE_S6) &&
  160.              (i915->current.target_fixup_format == PIPE_FORMAT_A8_UNORM)) {
  161.             uint32_t imm = i915->current.immediate[i];
  162.             uint32_t srcRGB = (imm >> S6_CBUF_SRC_BLEND_FACT_SHIFT) & BLENDFACT_MASK;
  163.             if (srcRGB == BLENDFACT_DST_ALPHA)
  164.                srcRGB = BLENDFACT_DST_COLR;
  165.             else if (srcRGB == BLENDFACT_INV_DST_ALPHA)
  166.                srcRGB = BLENDFACT_INV_DST_COLR;
  167.             imm &= ~SRC_BLND_FACT(BLENDFACT_MASK);
  168.             imm |= SRC_BLND_FACT(srcRGB);
  169.             OUT_BATCH(imm);
  170.          } else {
  171.             OUT_BATCH(i915->current.immediate[i]);
  172.          }
  173.       }
  174.    }
  175. }
  176.  
  177. static void
  178. validate_dynamic(struct i915_context *i915, unsigned *batch_space)
  179. {
  180.    *batch_space = util_bitcount(i915->dynamic_dirty & ((1 << I915_MAX_DYNAMIC) - 1));
  181. }
  182.  
  183. static void
  184. emit_dynamic(struct i915_context *i915)
  185. {
  186.    int i;
  187.    for (i = 0; i < I915_MAX_DYNAMIC; i++) {
  188.       if (i915->dynamic_dirty & (1 << i))
  189.          OUT_BATCH(i915->current.dynamic[i]);
  190.    }
  191. }
  192.  
  193. static void
  194. validate_static(struct i915_context *i915, unsigned *batch_space)
  195. {
  196.    *batch_space = 0;
  197.  
  198.    if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) {
  199.       i915->validation_buffers[i915->num_validation_buffers++]
  200.          = i915->current.cbuf_bo;
  201.       *batch_space += 3;
  202.    }
  203.  
  204.    if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) {
  205.       i915->validation_buffers[i915->num_validation_buffers++]
  206.          = i915->current.depth_bo;
  207.       *batch_space += 3;
  208.    }
  209.  
  210.    if (i915->static_dirty & I915_DST_VARS)
  211.       *batch_space += 2;
  212.  
  213.    if (i915->static_dirty & I915_DST_RECT)
  214.       *batch_space += 5;
  215. }
  216.  
  217. static void
  218. emit_static(struct i915_context *i915)
  219. {
  220.    if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) {
  221.       OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
  222.       OUT_BATCH(i915->current.cbuf_flags);
  223.       OUT_RELOC(i915->current.cbuf_bo,
  224.                 I915_USAGE_RENDER,
  225.                 0);
  226.    }
  227.  
  228.    /* What happens if no zbuf??
  229.     */
  230.    if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) {
  231.       OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
  232.       OUT_BATCH(i915->current.depth_flags);
  233.       OUT_RELOC(i915->current.depth_bo,
  234.                 I915_USAGE_RENDER,
  235.                 0);
  236.    }
  237.  
  238.    if (i915->static_dirty & I915_DST_VARS) {
  239.       OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
  240.       OUT_BATCH(i915->current.dst_buf_vars);
  241.    }
  242. }
  243.  
  244. static void
  245. validate_map(struct i915_context *i915, unsigned *batch_space)
  246. {
  247.    const uint enabled = i915->current.sampler_enable_flags;
  248.    uint unit;
  249.    struct i915_texture *tex;
  250.  
  251.    *batch_space = i915->current.sampler_enable_nr ?
  252.      2 + 3*i915->current.sampler_enable_nr : 0;
  253.  
  254.    for (unit = 0; unit < I915_TEX_UNITS; unit++) {
  255.       if (enabled & (1 << unit)) {
  256.          tex = i915_texture(i915->fragment_sampler_views[unit]->texture);
  257.          i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer;
  258.       }
  259.    }
  260. }
  261.  
  262. static void
  263. emit_map(struct i915_context *i915)
  264. {
  265.    const uint nr = i915->current.sampler_enable_nr;
  266.    if (nr) {
  267.       const uint enabled = i915->current.sampler_enable_flags;
  268.       uint unit;
  269.       uint count = 0;
  270.       OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
  271.       OUT_BATCH(enabled);
  272.       for (unit = 0; unit < I915_TEX_UNITS; unit++) {
  273.          if (enabled & (1 << unit)) {
  274.             struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture);
  275.             struct i915_winsys_buffer *buf = texture->buffer;
  276.             assert(buf);
  277.  
  278.             count++;
  279.  
  280.             OUT_RELOC(buf, I915_USAGE_SAMPLER, 0);
  281.             OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */
  282.             OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */
  283.          }
  284.       }
  285.       assert(count == nr);
  286.    }
  287. }
  288.  
  289. static void
  290. validate_sampler(struct i915_context *i915, unsigned *batch_space)
  291. {
  292.    *batch_space = i915->current.sampler_enable_nr ?
  293.      2 + 3*i915->current.sampler_enable_nr : 0;
  294. }
  295.  
  296. static void
  297. emit_sampler(struct i915_context *i915)
  298. {
  299.    if (i915->current.sampler_enable_nr) {
  300.       int i;
  301.  
  302.       OUT_BATCH( _3DSTATE_SAMPLER_STATE |
  303.                  (3 * i915->current.sampler_enable_nr) );
  304.  
  305.       OUT_BATCH( i915->current.sampler_enable_flags );
  306.  
  307.       for (i = 0; i < I915_TEX_UNITS; i++) {
  308.          if (i915->current.sampler_enable_flags & (1<<i)) {
  309.             OUT_BATCH( i915->current.sampler[i][0] );
  310.             OUT_BATCH( i915->current.sampler[i][1] );
  311.             OUT_BATCH( i915->current.sampler[i][2] );
  312.          }
  313.       }
  314.    }
  315. }
  316.  
  317. static void
  318. validate_constants(struct i915_context *i915, unsigned *batch_space)
  319. {
  320.    int nr = i915->fs->num_constants ?
  321.       2 + 4*i915->fs->num_constants : 0;
  322.  
  323.    *batch_space = nr;
  324. }
  325.  
  326. static void
  327. emit_constants(struct i915_context *i915)
  328. {
  329.    /* Collate the user-defined constants with the fragment shader's
  330.     * immediates according to the constant_flags[] array.
  331.     */
  332.    const uint nr = i915->fs->num_constants;
  333.  
  334.    assert(nr < I915_MAX_CONSTANT);
  335.    if (nr) {
  336.       uint i;
  337.  
  338.       OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) );
  339.       OUT_BATCH((1 << nr) - 1);
  340.  
  341.       for (i = 0; i < nr; i++) {
  342.          const uint *c;
  343.          if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {
  344.             /* grab user-defined constant */
  345.             c = (uint *) i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])->data;
  346.             c += 4 * i;
  347.          }
  348.          else {
  349.             /* emit program constant */
  350.             c = (uint *) i915->fs->constants[i];
  351.          }
  352. #if 0 /* debug */
  353.          {
  354.             float *f = (float *) c;
  355.             printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],
  356.                    (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER
  357.                     ? "user" : "immediate"));
  358.          }
  359. #endif
  360.          OUT_BATCH(*c++);
  361.          OUT_BATCH(*c++);
  362.          OUT_BATCH(*c++);
  363.          OUT_BATCH(*c++);
  364.       }
  365.    }
  366. }
  367.  
  368. static void
  369. validate_program(struct i915_context *i915, unsigned *batch_space)
  370. {
  371.    uint additional_size = 0;
  372.  
  373.    additional_size += i915->current.target_fixup_format ? 3 : 0;
  374.  
  375.    /* we need more batch space if we want to emulate rgba framebuffers */
  376.    *batch_space = i915->fs->decl_len + i915->fs->program_len + additional_size;
  377. }
  378.  
  379. static void
  380. emit_program(struct i915_context *i915)
  381. {
  382.    uint additional_size = 0;
  383.    uint i;
  384.  
  385.    /* count how much additional space we'll need */
  386.    validate_program(i915, &additional_size);
  387.    additional_size -= i915->fs->decl_len + i915->fs->program_len;
  388.  
  389.    /* we should always have, at least, a pass-through program */
  390.    assert(i915->fs->program_len > 0);
  391.  
  392.    /* output the declarations */
  393.    {
  394.       /* first word has the size, we have to adjust that */
  395.       uint size = (i915->fs->decl[0]);
  396.       size += additional_size;
  397.       OUT_BATCH(size);
  398.    }
  399.  
  400.    for (i = 1 ; i < i915->fs->decl_len; i++)
  401.       OUT_BATCH(i915->fs->decl[i]);
  402.  
  403.    /* output the program */
  404.    assert(i915->fs->program_len % 3 == 0);
  405.    for (i = 0 ; i < i915->fs->program_len; i+=3) {
  406.       OUT_BATCH(i915->fs->program[i]);
  407.       OUT_BATCH(i915->fs->program[i+1]);
  408.       OUT_BATCH(i915->fs->program[i+2]);
  409.    }
  410.  
  411.    /* we emit an additional mov with swizzle to fake RGBA framebuffers */
  412.    if (i915->current.target_fixup_format) {
  413.       /* mov out_color, out_color.zyxw */
  414.       OUT_BATCH(A0_MOV |
  415.                 (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) |
  416.                 A0_DEST_CHANNEL_ALL |
  417.                 (REG_TYPE_OC << A0_SRC0_TYPE_SHIFT) |
  418.                 (T_DIFFUSE << A0_SRC0_NR_SHIFT));
  419.       OUT_BATCH(i915->current.fixup_swizzle);
  420.       OUT_BATCH(0);
  421.    }
  422. }
  423.  
  424. static void
  425. emit_draw_rect(struct i915_context *i915)
  426. {
  427.    if (i915->static_dirty & I915_DST_RECT) {
  428.       OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
  429.       OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS);
  430.       OUT_BATCH(i915->current.draw_offset);
  431.       OUT_BATCH(i915->current.draw_size);
  432.       OUT_BATCH(i915->current.draw_offset);
  433.    }
  434. }
  435.  
  436. static boolean
  437. i915_validate_state(struct i915_context *i915, unsigned *batch_space)
  438. {
  439.    unsigned tmp;
  440.  
  441.    i915->num_validation_buffers = 0;
  442.    if (i915->hardware_dirty & I915_HW_INVARIANT)
  443.       *batch_space = Elements(invariant_state);
  444.    else
  445.       *batch_space = 0;
  446.  
  447. #if 0
  448. static int counter_total = 0;
  449. #define VALIDATE_ATOM(atom, hw_dirty) \
  450.    if (i915->hardware_dirty & hw_dirty) { \
  451.       static int counter_##atom = 0;\
  452.       validate_##atom(i915, &tmp); \
  453.       *batch_space += tmp;\
  454.       counter_##atom += tmp;\
  455.       counter_total += tmp;\
  456.       printf("%s: \t%d/%d \t%2.2f\n",#atom, counter_##atom, counter_total, counter_##atom*100.f/counter_total);}
  457. #else
  458. #define VALIDATE_ATOM(atom, hw_dirty) \
  459.    if (i915->hardware_dirty & hw_dirty) { \
  460.       validate_##atom(i915, &tmp); \
  461.       *batch_space += tmp; }
  462. #endif
  463.    VALIDATE_ATOM(flush, I915_HW_FLUSH);
  464.    VALIDATE_ATOM(immediate, I915_HW_IMMEDIATE);
  465.    VALIDATE_ATOM(dynamic, I915_HW_DYNAMIC);
  466.    VALIDATE_ATOM(static, I915_HW_STATIC);
  467.    VALIDATE_ATOM(map, I915_HW_MAP);
  468.    VALIDATE_ATOM(sampler, I915_HW_SAMPLER);
  469.    VALIDATE_ATOM(constants, I915_HW_CONSTANTS);
  470.    VALIDATE_ATOM(program, I915_HW_PROGRAM);
  471. #undef VALIDATE_ATOM
  472.  
  473.    if (i915->num_validation_buffers == 0)
  474.       return TRUE;
  475.  
  476.    if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers,
  477.                                      i915->num_validation_buffers))
  478.       return FALSE;
  479.  
  480.    return TRUE;
  481. }
  482.  
  483. /* Push the state into the sarea and/or texture memory.
  484.  */
  485. void
  486. i915_emit_hardware_state(struct i915_context *i915 )
  487. {
  488.    unsigned batch_space;
  489.    uintptr_t save_ptr;
  490.  
  491.    assert(i915->dirty == 0);
  492.  
  493.    if (I915_DBG_ON(DBG_ATOMS))
  494.       i915_dump_hardware_dirty(i915, __FUNCTION__);
  495.  
  496.    if (!i915_validate_state(i915, &batch_space)) {
  497.       FLUSH_BATCH(NULL, I915_FLUSH_ASYNC);
  498.       assert(i915_validate_state(i915, &batch_space));
  499.    }
  500.  
  501.    if(!BEGIN_BATCH(batch_space)) {
  502.       FLUSH_BATCH(NULL, I915_FLUSH_ASYNC);
  503.       assert(i915_validate_state(i915, &batch_space));
  504.       assert(BEGIN_BATCH(batch_space));
  505.    }
  506.  
  507.    save_ptr = (uintptr_t)i915->batch->ptr;
  508.  
  509. #define EMIT_ATOM(atom, hw_dirty) \
  510.    if (i915->hardware_dirty & hw_dirty) \
  511.       emit_##atom(i915);
  512.    EMIT_ATOM(flush, I915_HW_FLUSH);
  513.    EMIT_ATOM(invariant, I915_HW_INVARIANT);
  514.    EMIT_ATOM(immediate, I915_HW_IMMEDIATE);
  515.    EMIT_ATOM(dynamic, I915_HW_DYNAMIC);
  516.    EMIT_ATOM(static, I915_HW_STATIC);
  517.    EMIT_ATOM(map, I915_HW_MAP);
  518.    EMIT_ATOM(sampler, I915_HW_SAMPLER);
  519.    EMIT_ATOM(constants, I915_HW_CONSTANTS);
  520.    EMIT_ATOM(program, I915_HW_PROGRAM);
  521.    EMIT_ATOM(draw_rect, I915_HW_STATIC);
  522. #undef EMIT_ATOM
  523.  
  524.    I915_DBG(DBG_EMIT, "%s: used %d dwords, %d dwords reserved\n", __FUNCTION__,
  525.             ((uintptr_t)i915->batch->ptr - save_ptr) / 4,
  526.             batch_space);
  527.    assert(((uintptr_t)i915->batch->ptr - save_ptr) / 4 == batch_space);
  528.  
  529.    i915->hardware_dirty = 0;
  530.    i915->immediate_dirty = 0;
  531.    i915->dynamic_dirty = 0;
  532.    i915->static_dirty = 0;
  533.    i915->flush_dirty = 0;
  534. }
  535.