Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * on the rights to use, copy, modify, merge, publish, distribute, sub
  8.  * license, and/or sell copies of the Software, and to permit persons to whom
  9.  * the Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18.  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19.  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20.  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21.  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *      Vadim Girlin
  25.  */
  26.  
  27. #ifndef SB_SCHED_H_
  28. #define SB_SCHED_H_
  29.  
  30. namespace r600_sb {
  31.  
  32. typedef sb_map<node*, unsigned> uc_map;
  33.  
  34. // resource trackers for scheduler
  35. // rp = read port
  36. // uc = use count
  37.  
  38. typedef sb_set<unsigned> kc_lines;
  39.  
  40. class rp_kcache_tracker {
  41.         unsigned rp[4];
  42.         unsigned uc[4];
  43.         const unsigned sel_count;
  44.  
  45.         unsigned kc_sel(sel_chan r) {
  46.                 return sel_count == 4 ? (unsigned)r : ((r - 1) >> 1) + 1;
  47.         }
  48.  
  49. public:
  50.         rp_kcache_tracker(shader &sh);
  51.  
  52.         bool try_reserve(node *n);
  53.         void unreserve(node *n);
  54.  
  55.  
  56.         bool try_reserve(sel_chan r);
  57.         void unreserve(sel_chan r);
  58.  
  59.         void reset();
  60.  
  61.         unsigned num_sels() { return !!rp[0] + !!rp[1] + !!rp[2] + !!rp[3]; }
  62.  
  63.         unsigned get_lines(kc_lines &lines);
  64. };
  65.  
  66. class literal_tracker {
  67.         literal lt[4];
  68.         unsigned uc[4];
  69. public:
  70.         literal_tracker() : lt(), uc() {}
  71.  
  72.         bool try_reserve(alu_node *n);
  73.         void unreserve(alu_node *n);
  74.  
  75.         bool try_reserve(literal l);
  76.         void unreserve(literal l);
  77.  
  78.         void reset();
  79.  
  80.         unsigned count() { return !!uc[0] + !!uc[1] + !!uc[2] + !!uc[3]; }
  81.  
  82.         void init_group_literals(alu_group_node *g);
  83.  
  84. };
  85.  
  86. class rp_gpr_tracker {
  87.         // rp[cycle][elem]
  88.         unsigned rp[3][4];
  89.         unsigned uc[3][4];
  90.  
  91. public:
  92.         rp_gpr_tracker() : rp(), uc() {}
  93.  
  94.         bool try_reserve(alu_node *n);
  95.         void unreserve(alu_node *n);
  96.  
  97.         bool try_reserve(unsigned cycle, unsigned sel, unsigned chan);
  98.         void unreserve(unsigned cycle, unsigned sel, unsigned chan);
  99.  
  100.         void reset();
  101.  
  102.         void dump();
  103. };
  104.  
  105. class alu_group_tracker {
  106.  
  107.         shader &sh;
  108.  
  109.         rp_kcache_tracker kc;
  110.         rp_gpr_tracker gpr;
  111.         literal_tracker lt;
  112.  
  113.         alu_node * slots[5];
  114.  
  115.         unsigned available_slots;
  116.  
  117.         unsigned max_slots;
  118.  
  119.         typedef std::map<value*, unsigned> value_index_map;
  120.  
  121.         value_index_map vmap;
  122.  
  123.         bool has_mova;
  124.         bool uses_ar;
  125.         bool has_predset;
  126.         bool has_kill;
  127.         bool updates_exec_mask;
  128.  
  129.         unsigned chan_count[4];
  130.  
  131.         // param index + 1 (0 means that group doesn't refer to Params)
  132.         // we can't use more than one param index in a group
  133.         unsigned interp_param;
  134.  
  135.         unsigned next_id;
  136.  
  137.         node_vec packed_ops;
  138.  
  139.         void assign_slot(unsigned slot, alu_node *n);
  140.  
  141. public:
  142.         alu_group_tracker(shader &sh);
  143.  
  144.         // FIXME use fast bs correctness check (values for same chan <= 3) ??
  145.         bool try_reserve(alu_node *n);
  146.         bool try_reserve(alu_packed_node *p);
  147.  
  148.         void reinit();
  149.         void reset(bool keep_packed = false);
  150.  
  151.         sel_chan get_value_id(value *v);
  152.         void update_flags(alu_node *n);
  153.  
  154.         alu_node* slot(unsigned i) { return slots[i]; }
  155.  
  156.         unsigned used_slots() {
  157.                 return (~available_slots) & ((1 << max_slots) - 1);
  158.         }
  159.  
  160.         unsigned inst_count() {
  161.                 return __builtin_popcount(used_slots());
  162.         }
  163.  
  164.         unsigned literal_count() { return lt.count(); }
  165.         unsigned literal_slot_count() { return (literal_count() + 1) >> 1; };
  166.         unsigned slot_count() { return inst_count() + literal_slot_count(); }
  167.  
  168.         alu_group_node* emit();
  169.  
  170.         rp_kcache_tracker& kcache() { return kc; }
  171.  
  172.         bool has_update_exec_mask() { return updates_exec_mask; }
  173.         unsigned avail_slots() { return available_slots; }
  174.  
  175.         void discard_all_slots(container_node &removed_nodes);
  176.         void discard_slots(unsigned slot_mask, container_node &removed_nodes);
  177.  
  178.         bool has_ar_load() { return has_mova; }
  179. };
  180.  
  181. class alu_kcache_tracker {
  182.         bc_kcache kc[4];
  183.         sb_set<unsigned> lines;
  184.         unsigned max_kcs;
  185.  
  186. public:
  187.  
  188.         alu_kcache_tracker(sb_hw_class hc)
  189.                 : kc(), lines(), max_kcs(hc >= HW_CLASS_EVERGREEN ? 4 : 2) {}
  190.  
  191.         void reset();
  192.         bool try_reserve(alu_group_tracker &gt);
  193.         bool update_kc();
  194.         void init_clause(bc_cf &bc) {
  195.                 memcpy(bc.kc, kc, sizeof(kc));
  196.         }
  197. };
  198.  
  199. class alu_clause_tracker {
  200.         shader &sh;
  201.  
  202.         alu_kcache_tracker kt;
  203.         unsigned slot_count;
  204.  
  205.         alu_group_tracker grp0;
  206.         alu_group_tracker grp1;
  207.  
  208.         unsigned group;
  209.  
  210.         cf_node *clause;
  211.  
  212.         bool push_exec_mask;
  213.  
  214. public:
  215.         container_node conflict_nodes;
  216.  
  217.         // current values of AR and PR registers that we have to preload
  218.         // till the end of clause (in fact, beginning, because we're scheduling
  219.         // bottom-up)
  220.         value *current_ar;
  221.         value *current_pr;
  222.  
  223.         alu_clause_tracker(shader &sh);
  224.  
  225.         void reset();
  226.  
  227.         // current group
  228.         alu_group_tracker& grp() { return group ? grp1 : grp0; }
  229.         // previous group
  230.         alu_group_tracker& prev_grp() { return group ? grp0 : grp1; }
  231.  
  232.         void emit_group();
  233.         void emit_clause(container_node *c);
  234.         bool check_clause_limits();
  235.         void new_group();
  236.         bool is_empty();
  237.  
  238.         alu_node* create_ar_load();
  239.  
  240.         void discard_current_group();
  241.  
  242.         unsigned total_slots() { return slot_count; }
  243. };
  244.  
  245. class post_scheduler : public pass {
  246.  
  247.         container_node ready, ready_copies; // alu only
  248.         container_node pending, bb_pending;
  249.         bb_node *cur_bb;
  250.         val_set live; // values live at the end of the alu clause
  251.         uc_map ucm;
  252.         alu_clause_tracker alu;
  253.  
  254.         typedef std::map<sel_chan, value*> rv_map;
  255.         rv_map regmap, prev_regmap;
  256.  
  257.         val_set cleared_interf;
  258.  
  259. public:
  260.  
  261.         post_scheduler(shader &sh) : pass(sh),
  262.                 ready(), ready_copies(), pending(), cur_bb(),
  263.                 live(), ucm(), alu(sh), regmap(), cleared_interf() {}
  264.  
  265.         virtual int run();
  266.         void run_on(container_node *n);
  267.         void schedule_bb(bb_node *bb);
  268.  
  269.         void process_alu(container_node *c);
  270.         void schedule_alu(container_node *c);
  271.         bool prepare_alu_group();
  272.  
  273.         void release_op(node *n);
  274.  
  275.         void release_src_values(node *n);
  276.         void release_src_vec(vvec &vv, bool src);
  277.         void release_src_val(value *v);
  278.  
  279.         void init_uc_val(container_node *c, value *v);
  280.         void init_uc_vec(container_node *c, vvec &vv, bool src);
  281.         unsigned init_ucm(container_node *c, node *n);
  282.  
  283.         void init_regmap();
  284.  
  285.         bool check_interferences();
  286.  
  287.         unsigned try_add_instruction(node *n);
  288.  
  289.         bool check_copy(node *n);
  290.         void dump_group(alu_group_tracker &rt);
  291.  
  292.         bool unmap_dst(alu_node *n);
  293.         bool unmap_dst_val(value *d);
  294.  
  295.         bool map_src(alu_node *n);
  296.         bool map_src_vec(vvec &vv, bool src);
  297.         bool map_src_val(value *v);
  298.  
  299.         bool recolor_local(value *v);
  300.  
  301.         void update_local_interferences();
  302.         void update_live_src_vec(vvec &vv, val_set *born, bool src);
  303.         void update_live_dst_vec(vvec &vv);
  304.         void update_live(node *n, val_set *born);
  305.         void process_group();
  306.  
  307.         void set_color_local_val(value *v, sel_chan color);
  308.         void set_color_local(value *v, sel_chan color);
  309.  
  310.         void add_interferences(value *v, sb_bitset &rb, val_set &vs);
  311.  
  312.         void init_globals(val_set &s, bool prealloc);
  313.  
  314.         void recolor_locals();
  315.  
  316.         void dump_regmap();
  317.  
  318.         void emit_load_ar();
  319.         void emit_clause();
  320.  
  321.         void process_ready_copies();
  322. };
  323.  
  324. } // namespace r600_sb
  325.  
  326. #endif /* SB_SCHED_H_ */
  327.