Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2011 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. #ifndef BRW_VEC4_H
  25. #define BRW_VEC4_H
  26.  
  27. #include <stdint.h>
  28. #include "brw_shader.h"
  29. #include "main/compiler.h"
  30. #include "program/hash_table.h"
  31. #include "brw_program.h"
  32.  
  33. #ifdef __cplusplus
  34. #include "brw_ir_vec4.h"
  35.  
  36. extern "C" {
  37. #endif
  38.  
  39. #include "brw_context.h"
  40. #include "brw_eu.h"
  41. #include "intel_asm_annotation.h"
  42.  
  43. #ifdef __cplusplus
  44. }; /* extern "C" */
  45. #endif
  46.  
  47. #include "glsl/ir.h"
  48.  
  49.  
  50. struct brw_vec4_compile {
  51.    GLuint last_scratch; /**< measured in 32-byte (register size) units */
  52. };
  53.  
  54. #ifdef __cplusplus
  55. extern "C" {
  56. #endif
  57.  
  58. void
  59. brw_vue_setup_prog_key_for_precompile(struct gl_context *ctx,
  60.                                       struct brw_vue_prog_key *key,
  61.                                       GLuint id, struct gl_program *prog);
  62.  
  63. #ifdef __cplusplus
  64. } /* extern "C" */
  65.  
  66. namespace brw {
  67.  
  68. class vec4_live_variables;
  69.  
  70. /**
  71.  * The vertex shader front-end.
  72.  *
  73.  * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
  74.  * fixed-function) into VS IR.
  75.  */
  76. class vec4_visitor : public backend_visitor
  77. {
  78. public:
  79.    vec4_visitor(struct brw_context *brw,
  80.                 struct brw_vec4_compile *c,
  81.                 struct gl_program *prog,
  82.                 const struct brw_vue_prog_key *key,
  83.                 struct brw_vue_prog_data *prog_data,
  84.                 struct gl_shader_program *shader_prog,
  85.                 gl_shader_stage stage,
  86.                 void *mem_ctx,
  87.                 bool no_spills,
  88.                 shader_time_shader_type st_base,
  89.                 shader_time_shader_type st_written,
  90.                 shader_time_shader_type st_reset);
  91.    ~vec4_visitor();
  92.  
  93.    dst_reg dst_null_f()
  94.    {
  95.       return dst_reg(brw_null_reg());
  96.    }
  97.  
  98.    dst_reg dst_null_d()
  99.    {
  100.       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  101.    }
  102.  
  103.    dst_reg dst_null_ud()
  104.    {
  105.       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
  106.    }
  107.  
  108.    struct brw_vec4_compile * const c;
  109.    const struct brw_vue_prog_key * const key;
  110.    struct brw_vue_prog_data * const prog_data;
  111.    unsigned int sanity_param_count;
  112.  
  113.    char *fail_msg;
  114.    bool failed;
  115.  
  116.    /**
  117.     * GLSL IR currently being processed, which is associated with our
  118.     * driver IR instructions for debugging purposes.
  119.     */
  120.    const void *base_ir;
  121.    const char *current_annotation;
  122.  
  123.    int first_non_payload_grf;
  124.    unsigned int max_grf;
  125.    int *virtual_grf_start;
  126.    int *virtual_grf_end;
  127.    brw::vec4_live_variables *live_intervals;
  128.    dst_reg userplane[MAX_CLIP_PLANES];
  129.  
  130.    dst_reg *variable_storage(ir_variable *var);
  131.  
  132.    void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
  133.  
  134.    bool need_all_constants_in_pull_buffer;
  135.  
  136.    /**
  137.     * \name Visit methods
  138.     *
  139.     * As typical for the visitor pattern, there must be one \c visit method for
  140.     * each concrete subclass of \c ir_instruction.  Virtual base classes within
  141.     * the hierarchy should not have \c visit methods.
  142.     */
  143.    /*@{*/
  144.    virtual void visit(ir_variable *);
  145.    virtual void visit(ir_loop *);
  146.    virtual void visit(ir_loop_jump *);
  147.    virtual void visit(ir_function_signature *);
  148.    virtual void visit(ir_function *);
  149.    virtual void visit(ir_expression *);
  150.    virtual void visit(ir_swizzle *);
  151.    virtual void visit(ir_dereference_variable  *);
  152.    virtual void visit(ir_dereference_array *);
  153.    virtual void visit(ir_dereference_record *);
  154.    virtual void visit(ir_assignment *);
  155.    virtual void visit(ir_constant *);
  156.    virtual void visit(ir_call *);
  157.    virtual void visit(ir_return *);
  158.    virtual void visit(ir_discard *);
  159.    virtual void visit(ir_texture *);
  160.    virtual void visit(ir_if *);
  161.    virtual void visit(ir_emit_vertex *);
  162.    virtual void visit(ir_end_primitive *);
  163.    /*@}*/
  164.  
  165.    src_reg result;
  166.  
  167.    /* Regs for vertex results.  Generated at ir_variable visiting time
  168.     * for the ir->location's used.
  169.     */
  170.    dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
  171.    const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
  172.    int *uniform_size;
  173.    int *uniform_vector_size;
  174.    int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
  175.    int uniforms;
  176.  
  177.    src_reg shader_start_time;
  178.  
  179.    struct hash_table *variable_ht;
  180.  
  181.    bool run(void);
  182.    void fail(const char *msg, ...);
  183.  
  184.    void setup_uniform_clipplane_values();
  185.    void setup_uniform_values(ir_variable *ir);
  186.    void setup_builtin_uniform_values(ir_variable *ir);
  187.    int setup_uniforms(int payload_reg);
  188.    bool reg_allocate_trivial();
  189.    bool reg_allocate();
  190.    void evaluate_spill_costs(float *spill_costs, bool *no_spill);
  191.    int choose_spill_reg(struct ra_graph *g);
  192.    void spill_reg(int spill_reg);
  193.    void move_grf_array_access_to_scratch();
  194.    void move_uniform_array_access_to_pull_constants();
  195.    void move_push_constants_to_pull_constants();
  196.    void split_uniform_registers();
  197.    void pack_uniform_registers();
  198.    void calculate_live_intervals();
  199.    void invalidate_live_intervals();
  200.    void split_virtual_grfs();
  201.    bool opt_vector_float();
  202.    bool opt_reduce_swizzle();
  203.    bool dead_code_eliminate();
  204.    int var_range_start(unsigned v, unsigned n) const;
  205.    int var_range_end(unsigned v, unsigned n) const;
  206.    bool virtual_grf_interferes(int a, int b);
  207.    bool opt_copy_propagation(bool do_constant_prop = true);
  208.    bool opt_cse_local(bblock_t *block);
  209.    bool opt_cse();
  210.    bool opt_algebraic();
  211.    bool opt_register_coalesce();
  212.    bool eliminate_find_live_channel();
  213.    bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
  214.    void opt_set_dependency_control();
  215.    void opt_schedule_instructions();
  216.  
  217.    vec4_instruction *emit(vec4_instruction *inst);
  218.  
  219.    vec4_instruction *emit(enum opcode opcode);
  220.    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
  221.    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
  222.                           const src_reg &src0);
  223.    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
  224.                           const src_reg &src0, const src_reg &src1);
  225.    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
  226.                           const src_reg &src0, const src_reg &src1,
  227.                           const src_reg &src2);
  228.  
  229.    vec4_instruction *emit_before(bblock_t *block,
  230.                                  vec4_instruction *inst,
  231.                                  vec4_instruction *new_inst);
  232.  
  233. #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
  234. #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
  235. #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
  236.    EMIT1(MOV)
  237.    EMIT1(NOT)
  238.    EMIT1(RNDD)
  239.    EMIT1(RNDE)
  240.    EMIT1(RNDZ)
  241.    EMIT1(FRC)
  242.    EMIT1(F32TO16)
  243.    EMIT1(F16TO32)
  244.    EMIT2(ADD)
  245.    EMIT2(MUL)
  246.    EMIT2(MACH)
  247.    EMIT2(MAC)
  248.    EMIT2(AND)
  249.    EMIT2(OR)
  250.    EMIT2(XOR)
  251.    EMIT2(DP3)
  252.    EMIT2(DP4)
  253.    EMIT2(DPH)
  254.    EMIT2(SHL)
  255.    EMIT2(SHR)
  256.    EMIT2(ASR)
  257.    vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
  258.                          enum brw_conditional_mod condition);
  259.    vec4_instruction *IF(src_reg src0, src_reg src1,
  260.                         enum brw_conditional_mod condition);
  261.    vec4_instruction *IF(enum brw_predicate predicate);
  262.    EMIT1(SCRATCH_READ)
  263.    EMIT2(SCRATCH_WRITE)
  264.    EMIT3(LRP)
  265.    EMIT1(BFREV)
  266.    EMIT3(BFE)
  267.    EMIT2(BFI1)
  268.    EMIT3(BFI2)
  269.    EMIT1(FBH)
  270.    EMIT1(FBL)
  271.    EMIT1(CBIT)
  272.    EMIT3(MAD)
  273.    EMIT2(ADDC)
  274.    EMIT2(SUBB)
  275. #undef EMIT1
  276. #undef EMIT2
  277. #undef EMIT3
  278.  
  279.    int implied_mrf_writes(vec4_instruction *inst);
  280.  
  281.    bool try_rewrite_rhs_to_dst(ir_assignment *ir,
  282.                                dst_reg dst,
  283.                                src_reg src,
  284.                                vec4_instruction *pre_rhs_inst,
  285.                                vec4_instruction *last_rhs_inst);
  286.  
  287.    /** Walks an exec_list of ir_instruction and sends it through this visitor. */
  288.    void visit_instructions(const exec_list *list);
  289.  
  290.    void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst,
  291.                     src_reg src0, src_reg src1, src_reg one);
  292.  
  293.    void emit_bool_to_cond_code(ir_rvalue *ir, enum brw_predicate *predicate);
  294.    void emit_if_gen6(ir_if *ir);
  295.  
  296.    void emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
  297.                     src_reg src0, src_reg src1);
  298.  
  299.    void emit_lrp(const dst_reg &dst,
  300.                  const src_reg &x, const src_reg &y, const src_reg &a);
  301.  
  302.    /** Copy any live channel from \p src to the first channel of \p dst. */
  303.    void emit_uniformize(const dst_reg &dst, const src_reg &src);
  304.  
  305.    void emit_block_move(dst_reg *dst, src_reg *src,
  306.                         const struct glsl_type *type, brw_predicate predicate);
  307.  
  308.    void emit_constant_values(dst_reg *dst, ir_constant *value);
  309.  
  310.    /**
  311.     * Emit the correct dot-product instruction for the type of arguments
  312.     */
  313.    void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
  314.  
  315.    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
  316.                     dst_reg dst, src_reg src0);
  317.  
  318.    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
  319.                     dst_reg dst, src_reg src0, src_reg src1);
  320.  
  321.    src_reg fix_3src_operand(src_reg src);
  322.  
  323.    void emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
  324.                   const src_reg &src1 = src_reg());
  325.    src_reg fix_math_operand(src_reg src);
  326.  
  327.    void emit_pack_half_2x16(dst_reg dst, src_reg src0);
  328.    void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
  329.    void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
  330.    void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
  331.    void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
  332.    void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
  333.  
  334.    uint32_t gather_channel(ir_texture *ir, uint32_t sampler);
  335.    src_reg emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler);
  336.    void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
  337.    void swizzle_result(ir_texture *ir, src_reg orig_val, uint32_t sampler);
  338.  
  339.    void emit_ndc_computation();
  340.    void emit_psiz_and_flags(dst_reg reg);
  341.    void emit_clip_distances(dst_reg reg, int offset);
  342.    vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
  343.    void emit_urb_slot(dst_reg reg, int varying);
  344.  
  345.    void emit_shader_time_begin();
  346.    void emit_shader_time_end();
  347.    void emit_shader_time_write(enum shader_time_shader_type type,
  348.                                src_reg value);
  349.  
  350.    void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
  351.                             dst_reg dst, src_reg offset, src_reg src0,
  352.                             src_reg src1);
  353.  
  354.    void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
  355.                                   src_reg offset);
  356.  
  357.    src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
  358.                               src_reg *reladdr, int reg_offset);
  359.    src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst,
  360.                                     src_reg *reladdr, int reg_offset);
  361.    void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
  362.                           dst_reg dst,
  363.                           src_reg orig_src,
  364.                           int base_offset);
  365.    void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
  366.                            int base_offset);
  367.    void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
  368.                                 dst_reg dst,
  369.                                 src_reg orig_src,
  370.                                 int base_offset);
  371.    void emit_pull_constant_load_reg(dst_reg dst,
  372.                                     src_reg surf_index,
  373.                                     src_reg offset,
  374.                                     bblock_t *before_block,
  375.                                     vec4_instruction *before_inst);
  376.    src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
  377.                                 vec4_instruction *inst, src_reg src);
  378.  
  379.    bool try_emit_mad(ir_expression *ir);
  380.    bool try_emit_b2f_of_compare(ir_expression *ir);
  381.    void resolve_ud_negate(src_reg *reg);
  382.    void resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg);
  383.  
  384.    src_reg get_timestamp();
  385.  
  386.    bool process_move_condition(ir_rvalue *ir);
  387.  
  388.    void dump_instruction(backend_instruction *inst);
  389.    void dump_instruction(backend_instruction *inst, FILE *file);
  390.  
  391.    void visit_atomic_counter_intrinsic(ir_call *ir);
  392.  
  393. protected:
  394.    void emit_vertex();
  395.    void lower_attributes_to_hw_regs(const int *attribute_map,
  396.                                     bool interleaved);
  397.    void setup_payload_interference(struct ra_graph *g, int first_payload_node,
  398.                                    int reg_node_count);
  399.    virtual dst_reg *make_reg_for_system_value(ir_variable *ir) = 0;
  400.    virtual void assign_binding_table_offsets();
  401.    virtual void setup_payload() = 0;
  402.    virtual void emit_prolog() = 0;
  403.    virtual void emit_program_code() = 0;
  404.    virtual void emit_thread_end() = 0;
  405.    virtual void emit_urb_write_header(int mrf) = 0;
  406.    virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
  407.    virtual int compute_array_stride(ir_dereference_array *ir);
  408.  
  409. private:
  410.    /**
  411.     * If true, then register allocation should fail instead of spilling.
  412.     */
  413.    const bool no_spills;
  414.  
  415.    const shader_time_shader_type st_base;
  416.    const shader_time_shader_type st_written;
  417.    const shader_time_shader_type st_reset;
  418. };
  419.  
  420.  
  421. /**
  422.  * The vertex shader code generator.
  423.  *
  424.  * Translates VS IR to actual i965 assembly code.
  425.  */
  426. class vec4_generator
  427. {
  428. public:
  429.    vec4_generator(struct brw_context *brw,
  430.                   struct gl_shader_program *shader_prog,
  431.                   struct gl_program *prog,
  432.                   struct brw_vue_prog_data *prog_data,
  433.                   void *mem_ctx,
  434.                   bool debug_flag,
  435.                   const char *stage_name,
  436.                   const char *stage_abbrev);
  437.    ~vec4_generator();
  438.  
  439.    const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size);
  440.  
  441. private:
  442.    void generate_code(const cfg_t *cfg);
  443.  
  444.    void generate_math1_gen4(vec4_instruction *inst,
  445.                             struct brw_reg dst,
  446.                             struct brw_reg src);
  447.    void generate_math2_gen4(vec4_instruction *inst,
  448.                             struct brw_reg dst,
  449.                             struct brw_reg src0,
  450.                             struct brw_reg src1);
  451.    void generate_math_gen6(vec4_instruction *inst,
  452.                            struct brw_reg dst,
  453.                            struct brw_reg src0,
  454.                            struct brw_reg src1);
  455.  
  456.    void generate_tex(vec4_instruction *inst,
  457.                      struct brw_reg dst,
  458.                      struct brw_reg src,
  459.                      struct brw_reg sampler_index);
  460.  
  461.    void generate_vs_urb_write(vec4_instruction *inst);
  462.    void generate_gs_urb_write(vec4_instruction *inst);
  463.    void generate_gs_urb_write_allocate(vec4_instruction *inst);
  464.    void generate_gs_thread_end(vec4_instruction *inst);
  465.    void generate_gs_set_write_offset(struct brw_reg dst,
  466.                                      struct brw_reg src0,
  467.                                      struct brw_reg src1);
  468.    void generate_gs_set_vertex_count(struct brw_reg dst,
  469.                                      struct brw_reg src);
  470.    void generate_gs_svb_write(vec4_instruction *inst,
  471.                               struct brw_reg dst,
  472.                               struct brw_reg src0,
  473.                               struct brw_reg src1);
  474.    void generate_gs_svb_set_destination_index(vec4_instruction *inst,
  475.                                               struct brw_reg dst,
  476.                                               struct brw_reg src);
  477.    void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src);
  478.    void generate_gs_prepare_channel_masks(struct brw_reg dst);
  479.    void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
  480.    void generate_gs_get_instance_id(struct brw_reg dst);
  481.    void generate_gs_ff_sync_set_primitives(struct brw_reg dst,
  482.                                            struct brw_reg src0,
  483.                                            struct brw_reg src1,
  484.                                            struct brw_reg src2);
  485.    void generate_gs_ff_sync(vec4_instruction *inst,
  486.                             struct brw_reg dst,
  487.                             struct brw_reg src0,
  488.                             struct brw_reg src1);
  489.    void generate_gs_set_primitive_id(struct brw_reg dst);
  490.    void generate_oword_dual_block_offsets(struct brw_reg m1,
  491.                                           struct brw_reg index);
  492.    void generate_scratch_write(vec4_instruction *inst,
  493.                                struct brw_reg dst,
  494.                                struct brw_reg src,
  495.                                struct brw_reg index);
  496.    void generate_scratch_read(vec4_instruction *inst,
  497.                               struct brw_reg dst,
  498.                               struct brw_reg index);
  499.    void generate_pull_constant_load(vec4_instruction *inst,
  500.                                     struct brw_reg dst,
  501.                                     struct brw_reg index,
  502.                                     struct brw_reg offset);
  503.    void generate_pull_constant_load_gen7(vec4_instruction *inst,
  504.                                          struct brw_reg dst,
  505.                                          struct brw_reg surf_index,
  506.                                          struct brw_reg offset);
  507.    void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
  508.                                          struct brw_reg dst);
  509.    void generate_unpack_flags(struct brw_reg dst);
  510.  
  511.    struct brw_context *brw;
  512.    const struct brw_device_info *devinfo;
  513.  
  514.    struct brw_codegen *p;
  515.  
  516.    struct gl_shader_program *shader_prog;
  517.    const struct gl_program *prog;
  518.  
  519.    struct brw_vue_prog_data *prog_data;
  520.  
  521.    void *mem_ctx;
  522.    const char *stage_name;
  523.    const char *stage_abbrev;
  524.    const bool debug_flag;
  525. };
  526.  
  527. } /* namespace brw */
  528. #endif /* __cplusplus */
  529.  
  530. #endif /* BRW_VEC4_H */
  531.