Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * on the rights to use, copy, modify, merge, publish, distribute, sub
  8.  * license, and/or sell copies of the Software, and to permit persons to whom
  9.  * the Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18.  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19.  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20.  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21.  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *      Vadim Girlin
  25.  */
  26.  
  27. #ifndef SB_SHADER_H_
  28. #define SB_SHADER_H_
  29.  
  30. #include <list>
  31. #include <string>
  32. #include <map>
  33.  
  34. #include "sb_ir.h"
  35. #include "sb_expr.h"
  36.  
  37. namespace r600_sb {
  38.  
  39. struct shader_input {
  40.         unsigned comp_mask;
  41.         unsigned preloaded;
  42. };
  43.  
  44. struct error_info {
  45.         node *n;
  46.         unsigned arg_index;
  47.         std::string message;
  48. };
  49.  
  50. typedef std::multimap<node*, error_info> error_map;
  51.  
  52. class sb_context;
  53.  
  54. typedef std::vector<shader_input> inputs_vec;
  55. typedef std::vector<gpr_array*> gpr_array_vec;
  56.  
  57. struct ra_edge {
  58.         value *a, *b;
  59.         unsigned cost;
  60.  
  61.         ra_edge(value *a, value *b, unsigned cost) : a(a), b(b), cost(cost) {}
  62. };
  63.  
  64. enum chunk_flags {
  65.         RCF_GLOBAL = (1 << 0),
  66.         RCF_PIN_CHAN = (1 << 1),
  67.         RCF_PIN_REG = (1 << 2),
  68.  
  69.         RCF_FIXED = (1 << 3),
  70.  
  71.         RCF_PREALLOC = (1 << 4)
  72. };
  73.  
  74. enum dce_flags {
  75.         DF_REMOVE_DEAD  = (1 << 0),
  76.         DF_REMOVE_UNUSED = (1 << 1),
  77.         DF_EXPAND = (1 << 2),
  78. };
  79.  
  80. inline dce_flags operator |(dce_flags l, dce_flags r) {
  81.         return (dce_flags)((unsigned)l|(unsigned)r);
  82. }
  83.  
  84. inline chunk_flags operator |(chunk_flags l, chunk_flags r) {
  85.         return (chunk_flags)((unsigned)l|(unsigned)r);
  86. }
  87. inline chunk_flags& operator |=(chunk_flags &l, chunk_flags r) {
  88.         l = l | r;
  89.         return l;
  90. }
  91.  
  92. inline chunk_flags& operator &=(chunk_flags &l, chunk_flags r) {
  93.         l = (chunk_flags)((unsigned)l & (unsigned)r);
  94.         return l;
  95. }
  96.  
  97. inline chunk_flags operator ~(chunk_flags r) {
  98.         return (chunk_flags)~(unsigned)r;
  99. }
  100.  
  101. struct ra_chunk {
  102.         vvec values;
  103.         chunk_flags flags;
  104.         unsigned cost;
  105.         sel_chan pin;
  106.  
  107.         ra_chunk() : values(), flags(), cost(), pin() {}
  108.  
  109.         bool is_fixed() { return flags & RCF_FIXED; }
  110.         void fix() { flags |= RCF_FIXED; }
  111.  
  112.         bool is_global() { return flags & RCF_GLOBAL; }
  113.         void set_global() {     flags |= RCF_GLOBAL; }
  114.  
  115.         bool is_reg_pinned() { return flags & RCF_PIN_REG; }
  116.         bool is_chan_pinned() { return flags & RCF_PIN_CHAN; }
  117.  
  118.         bool is_prealloc() { return flags & RCF_PREALLOC; }
  119.         void set_prealloc() { flags |= RCF_PREALLOC; }
  120. };
  121.  
  122. typedef std::vector<ra_chunk*> chunk_vector;
  123.  
  124. class ra_constraint {
  125. public:
  126.         ra_constraint(constraint_kind kind) : kind(kind), cost(0) {}
  127.  
  128.         constraint_kind kind;
  129.         vvec values;
  130.         unsigned cost;
  131.  
  132.         void update_values();
  133.         bool check();
  134. };
  135.  
  136. typedef std::vector<ra_constraint*> constraint_vec;
  137. typedef std::vector<ra_chunk*> chunk_vec;
  138.  
  139. // priority queue
  140. // FIXME use something more suitale or custom class ?
  141.  
  142. template <class T>
  143. struct cost_compare {
  144.         bool operator ()(const T& t1, const T& t2) {
  145.                 return t1->cost > t2->cost;
  146.         }
  147. };
  148.  
  149. template <class T, class Comp>
  150. class queue {
  151.         typedef std::vector<T> container;
  152.         container cont;
  153.  
  154. public:
  155.         queue() : cont() {}
  156.  
  157.         typedef typename container::iterator iterator;
  158.  
  159.         iterator begin() { return cont.begin(); }
  160.         iterator end() { return cont.end(); }
  161.  
  162.         iterator insert(const T& t) {
  163.                 iterator I = std::upper_bound(begin(), end(), t, Comp());
  164.                 if (I == end())
  165.                         cont.push_back(t);
  166.                 else
  167.                         cont.insert(I, t);
  168.  
  169.                 return I;
  170.         }
  171.  
  172.         void erase(const T& t) {
  173.                 std::pair<iterator, iterator> R =
  174.                                 std::equal_range(begin(), end(), t, Comp());
  175.                 iterator F = std::find(R.first, R.second, t);
  176.                 if (F != R.second)
  177.                         cont.erase(F);
  178.         }
  179. };
  180.  
  181. typedef queue<ra_chunk*, cost_compare<ra_chunk*> > chunk_queue;
  182. typedef queue<ra_edge*, cost_compare<ra_edge*> > edge_queue;
  183. typedef queue<ra_constraint*, cost_compare<ra_constraint*> > constraint_queue;
  184.  
  185. typedef std::set<ra_chunk*> chunk_set;
  186.  
  187. class shader;
  188.  
  189. class coalescer {
  190.  
  191.         shader &sh;
  192.  
  193.         edge_queue edges;
  194.         chunk_queue chunks;
  195.         constraint_queue constraints;
  196.  
  197.         constraint_vec all_constraints;
  198.         chunk_vec all_chunks;
  199.  
  200. public:
  201.  
  202.         coalescer(shader &sh) : sh(sh), edges(), chunks(), constraints() {}
  203.         ~coalescer();
  204.  
  205.         int run();
  206.  
  207.         void add_edge(value *a, value *b, unsigned cost);
  208.         void build_chunks();
  209.         void build_constraint_queue();
  210.         void build_chunk_queue();
  211.         int color_constraints();
  212.         void color_chunks();
  213.  
  214.         ra_constraint* create_constraint(constraint_kind kind);
  215.  
  216.         enum ac_cost {
  217.                 phi_cost = 10000,
  218.                 copy_cost = 1,
  219.         };
  220.  
  221.         void dump_edges();
  222.         void dump_chunks();
  223.         void dump_constraint_queue();
  224.  
  225.         static void dump_chunk(ra_chunk *c);
  226.         static void dump_constraint(ra_constraint* c);
  227.  
  228.         void get_chunk_interferences(ra_chunk *c, val_set &s);
  229.  
  230. private:
  231.  
  232.         void create_chunk(value *v);
  233.         void unify_chunks(ra_edge *e);
  234.         bool chunks_interference(ra_chunk *c1, ra_chunk *c2);
  235.  
  236.         int color_reg_constraint(ra_constraint *c);
  237.         void color_phi_constraint(ra_constraint *c);
  238.  
  239.  
  240.         void init_reg_bitset(sb_bitset &bs, val_set &vs);
  241.  
  242.         void color_chunk(ra_chunk *c, sel_chan color);
  243.  
  244.         ra_chunk* detach_value(value *v);
  245. };
  246.  
  247.  
  248.  
  249. class shader {
  250.  
  251.         sb_context &ctx;
  252.  
  253.         typedef sb_map<uint32_t, value*> value_map;
  254.         value_map reg_values;
  255.  
  256.         // read-only values
  257.         value_map const_values; // immediate constants key -const  value (uint32_t)
  258.         value_map special_ro_values; //  key - hw alu_sel & chan
  259.         value_map kcache_values;
  260.  
  261.         gpr_array_vec gpr_arrays;
  262.  
  263.         unsigned next_temp_value_index;
  264.  
  265.         unsigned prep_regs_count;
  266.  
  267.         value* pred_sels[2];
  268.  
  269.         regions_vec regions;
  270.         inputs_vec inputs;
  271.  
  272.         value *undef;
  273.  
  274.         sb_value_pool val_pool;
  275.         sb_pool pool;
  276.  
  277.         std::vector<node*> all_nodes;
  278.  
  279. public:
  280.         shader_stats src_stats, opt_stats;
  281.  
  282.         error_map errors;
  283.  
  284.         bool optimized;
  285.  
  286.         unsigned id;
  287.  
  288.         coalescer coal;
  289.  
  290.         static const unsigned temp_regid_offset = 512;
  291.  
  292.         bbs_vec bbs;
  293.  
  294.         const shader_target target;
  295.  
  296.         value_table vt;
  297.         expr_handler ex;
  298.  
  299.         container_node *root;
  300.  
  301.         bool compute_interferences;
  302.  
  303.         bool has_alu_predication;
  304.         bool uses_gradients;
  305.  
  306.         bool safe_math;
  307.  
  308.         unsigned ngpr, nstack;
  309.  
  310.         unsigned dce_flags;
  311.  
  312.         shader(sb_context &sctx, shader_target t, unsigned id);
  313.  
  314.         ~shader();
  315.  
  316.         sb_context &get_ctx() const { return ctx; }
  317.  
  318.         value* get_const_value(const literal & v);
  319.         value* get_special_value(unsigned sv_id, unsigned version = 0);
  320.         value* create_temp_value();
  321.         value* get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel,
  322.                          unsigned version = 0);
  323.  
  324.  
  325.         value* get_special_ro_value(unsigned sel);
  326.         value* get_kcache_value(unsigned bank, unsigned index, unsigned chan);
  327.  
  328.         value* get_value_version(value* v, unsigned ver);
  329.  
  330.         void init();
  331.         void add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask, bool src);
  332.  
  333.         void dump_ir();
  334.  
  335.         void add_gpr_array(unsigned gpr_start, unsigned gpr_count,
  336.                            unsigned comp_mask);
  337.  
  338.         value* get_pred_sel(int sel);
  339.         bool assign_slot(alu_node *n, alu_node *slots[5]);
  340.  
  341.         gpr_array* get_gpr_array(unsigned reg, unsigned chan);
  342.  
  343.         void add_input(unsigned gpr, bool preloaded = false,
  344.                        unsigned comp_mask = 0xF);
  345.  
  346.         const inputs_vec & get_inputs() {return inputs; }
  347.  
  348.         regions_vec & get_regions() { return regions; }
  349.  
  350.         void init_call_fs(cf_node *cf);
  351.  
  352.         value *get_undef_value();
  353.         void set_undef(val_set &s);
  354.  
  355.         node* create_node(node_type nt, node_subtype nst,
  356.                           node_flags flags = NF_EMPTY);
  357.         alu_node* create_alu();
  358.         alu_group_node* create_alu_group();
  359.         alu_packed_node* create_alu_packed();
  360.         cf_node* create_cf();
  361.         cf_node* create_cf(unsigned op);
  362.         fetch_node* create_fetch();
  363.         region_node* create_region();
  364.         depart_node* create_depart(region_node *target);
  365.         repeat_node* create_repeat(region_node *target);
  366.         container_node* create_container(node_type nt = NT_LIST,
  367.                                          node_subtype nst = NST_LIST,
  368.                                          node_flags flags = NF_EMPTY);
  369.         if_node* create_if();
  370.         bb_node* create_bb(unsigned id, unsigned loop_level);
  371.  
  372.         value* get_value_by_uid(unsigned id) { return val_pool[id - 1]; }
  373.  
  374.         cf_node* create_clause(node_subtype nst);
  375.  
  376.         void create_bbs();
  377.         void expand_bbs();
  378.  
  379.         alu_node* create_mov(value* dst, value* src);
  380.         alu_node* create_copy_mov(value *dst, value *src, unsigned affcost = 1);
  381.  
  382.         const char * get_shader_target_name();
  383.  
  384.         std::string get_full_target_name();
  385.  
  386.         void create_bbs(container_node* n, bbs_vec &bbs, int loop_level = 0);
  387.         void expand_bbs(bbs_vec &bbs);
  388.  
  389.         sched_queue_id get_queue_id(node* n);
  390.  
  391.         void simplify_dep_rep(node *dr);
  392.  
  393.         unsigned first_temp_gpr();
  394.         unsigned num_nontemp_gpr();
  395.  
  396.         gpr_array_vec& arrays() { return gpr_arrays; }
  397.  
  398.         void set_uses_kill();
  399.  
  400.         void fill_array_values(gpr_array *a, vvec &vv);
  401.  
  402.         alu_node* clone(alu_node *n);
  403.  
  404.         sb_value_pool& get_value_pool() { return val_pool; }
  405.  
  406.         void collect_stats(bool opt);
  407.  
  408. private:
  409.         value* create_value(value_kind k, sel_chan regid, unsigned ver);
  410.         value* get_value(value_kind kind, sel_chan id,
  411.                                  unsigned version = 0);
  412.         value* get_ro_value(value_map &vm, value_kind vk, unsigned key);
  413. };
  414.  
  415. }
  416.  
  417. #endif /* SHADER_H_ */
  418.