Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * on the rights to use, copy, modify, merge, publish, distribute, sub
  8.  * license, and/or sell copies of the Software, and to permit persons to whom
  9.  * the Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18.  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19.  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20.  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21.  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *      Vadim Girlin
  25.  */
  26.  
  27. #define IFC_DEBUG 0
  28.  
  29. #if IFC_DEBUG
  30. #define IFC_DUMP(q) do { q } while (0)
  31. #else
  32. #define IFC_DUMP(q)
  33. #endif
  34.  
  35. #include "sb_shader.h"
  36. #include "sb_pass.h"
  37.  
  38. namespace r600_sb {
  39.  
  40. int if_conversion::run() {
  41.  
  42.         regions_vec &rv = sh.get_regions();
  43.  
  44.         unsigned converted = 0;
  45.  
  46.         for (regions_vec::reverse_iterator N, I = rv.rbegin(), E = rv.rend();
  47.                         I != E; I = N) {
  48.                 N = I; ++N;
  49.  
  50.                 region_node *r = *I;
  51.                 if (run_on(r)) {
  52.                         rv.erase(I.base() - 1);
  53.                         ++converted;
  54.                 }
  55.         }
  56.         return 0;
  57. }
  58.  
  59. void if_conversion::convert_kill_instructions(region_node *r,
  60.                                               value *em, bool branch,
  61.                                               container_node *c) {
  62.         value *cnd = NULL;
  63.  
  64.         for (node_iterator I = c->begin(), E = c->end(), N; I != E; I = N) {
  65.                 N = I + 1;
  66.  
  67.                 if (!I->is_alu_inst())
  68.                         continue;
  69.  
  70.                 alu_node *a = static_cast<alu_node*>(*I);
  71.                 unsigned flags = a->bc.op_ptr->flags;
  72.  
  73.                 if (!(flags & AF_KILL))
  74.                         continue;
  75.  
  76.                 // ignore predicated or non-const kill instructions
  77.                 if (a->pred || !a->src[0]->is_const() || !a->src[1]->is_const())
  78.                         continue;
  79.  
  80.                 literal l0 = a->src[0]->literal_value;
  81.                 literal l1 = a->src[1]->literal_value;
  82.  
  83.                 expr_handler::apply_alu_src_mod(a->bc, 0, l0);
  84.                 expr_handler::apply_alu_src_mod(a->bc, 1, l1);
  85.  
  86.                 if (expr_handler::evaluate_condition(flags, l0, l1)) {
  87.                         // kill with constant 'true' condition, we'll convert it to the
  88.                         // conditional kill outside of the if-then-else block
  89.  
  90.                         a->remove();
  91.  
  92.                         if (!cnd) {
  93.                                 cnd = get_select_value_for_em(sh, em);
  94.                         } else {
  95.                                 // more than one kill with the same condition, just remove it
  96.                                 continue;
  97.                         }
  98.  
  99.                         r->insert_before(a);
  100.                         a->bc.set_op(branch ? ALU_OP2_KILLE_INT : ALU_OP2_KILLNE_INT);
  101.  
  102.                         a->src[0] = cnd;
  103.                         a->src[1] = sh.get_const_value(0);
  104.                         // clear modifiers
  105.                         memset(&a->bc.src[0], 0, sizeof(bc_alu_src));
  106.                         memset(&a->bc.src[1], 0, sizeof(bc_alu_src));
  107.                 } else {
  108.                         // kill with constant 'false' condition, this shouldn't happen
  109.                         // but remove it anyway
  110.                         a->remove();
  111.                 }
  112.         }
  113. }
  114.  
  115. bool if_conversion::check_and_convert(region_node *r) {
  116.  
  117.         depart_node *nd1 = static_cast<depart_node*>(r->first);
  118.         if (!nd1->is_depart())
  119.                 return false;
  120.         if_node *nif = static_cast<if_node*>(nd1->first);
  121.         if (!nif->is_if())
  122.                 return false;
  123.         depart_node *nd2 = static_cast<depart_node*>(nif->first);
  124.         if (!nd2->is_depart())
  125.                 return false;
  126.  
  127.         value* &em = nif->cond;
  128.  
  129.         node_stats s;
  130.  
  131.         r->collect_stats(s);
  132.  
  133.         IFC_DUMP(
  134.                 sblog << "ifcvt: region " << r->region_id << " :\n";
  135.                 s.dump();
  136.         );
  137.  
  138.         if (s.region_count || s.fetch_count || s.alu_kill_count ||
  139.                         s.if_count != 1 || s.repeat_count)
  140.                 return false;
  141.  
  142.         unsigned real_alu_count = s.alu_count - s.alu_copy_mov_count;
  143.  
  144.         // if_conversion allows to eliminate JUMP-ALU_POP_AFTER or
  145.         // JUMP-ALU-ELSE-ALU_POP_AFTER, for now let's assume that 3 CF instructions
  146.         // are eliminated. According to the docs, cost of CF instruction is
  147.         // equal to ~40 ALU VLIW instructions (instruction groups),
  148.         // so we have eliminated cost equal to ~120 groups in total.
  149.         // Let's also assume that we have avg 3 ALU instructions per group,
  150.         // This means that potential eliminated cost is about 360 single alu inst.
  151.         // On the other hand, we are speculatively executing conditional code now,
  152.         // so we are increasing the cost in some cases. In the worst case, we'll
  153.         // have to execute real_alu_count additional alu instructions instead of
  154.         // jumping over them. Let's assume for now that average added cost is
  155.         //
  156.         // (0.9 * real_alu_count)
  157.         //
  158.         // So we should perform if_conversion if
  159.         //
  160.         // (0.9 * real_alu_count) < 360, or
  161.         //
  162.         // real_alu_count < 400
  163.         //
  164.         // So if real_alu_count is more than 400, than we think that if_conversion
  165.         // doesn't make sense.
  166.  
  167.         // FIXME: We can use more precise heuristic, taking into account sizes of
  168.         // the branches and their probability instead of total size.
  169.         // Another way to improve this is to consider the number of the groups
  170.         // instead of the number of instructions (taking into account actual VLIW
  171.         // packing).
  172.         // (Currently we don't know anything about packing at this stage, but
  173.         // probably we can make some more precise estimations anyway)
  174.  
  175.         if (real_alu_count > 400)
  176.                 return false;
  177.  
  178.         IFC_DUMP( sblog << "if_cvt: processing...\n"; );
  179.  
  180.         value *select = get_select_value_for_em(sh, em);
  181.  
  182.         if (!select)
  183.                 return false;
  184.  
  185.         for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E; ++I) {
  186.                 node *n = *I;
  187.  
  188.                 alu_node *ns = convert_phi(select, n);
  189.  
  190.                 if (ns)
  191.                         r->insert_after(ns);
  192.         }
  193.  
  194.         nd2->expand();
  195.         nif->expand();
  196.         nd1->expand();
  197.         r->expand();
  198.  
  199.         return true;
  200. }
  201.  
  202. bool if_conversion::run_on(region_node* r) {
  203.  
  204.         if (r->dep_count() != 2 || r->rep_count() != 1)
  205.                 return false;
  206.  
  207.         depart_node *nd1 = static_cast<depart_node*>(r->first);
  208.         if (!nd1->is_depart())
  209.                 return false;
  210.         if_node *nif = static_cast<if_node*>(nd1->first);
  211.         if (!nif->is_if())
  212.                 return false;
  213.         depart_node *nd2 = static_cast<depart_node*>(nif->first);
  214.         if (!nd2->is_depart())
  215.                 return false;
  216.  
  217.         value* &em = nif->cond;
  218.  
  219.         convert_kill_instructions(r, em, true, nd2);
  220.         convert_kill_instructions(r, em, false, nd1);
  221.  
  222.         if (check_and_convert(r))
  223.                 return true;
  224.  
  225.         if (nd2->empty() && nif->next) {
  226.                 // empty true branch, non-empty false branch
  227.                 // we'll invert it to get rid of 'else'
  228.  
  229.                 assert(em && em->def);
  230.  
  231.                 alu_node *predset = static_cast<alu_node*>(em->def);
  232.  
  233.                 // create clone of PREDSET instruction with inverted condition.
  234.                 // PREDSET has 3 dst operands in our IR (value written to gpr,
  235.                 // predicate value and exec mask value), we'll split it such that
  236.                 // new PREDSET will define exec mask value only, and two others will
  237.                 // be defined in the old PREDSET (if they are not used then DCE will
  238.                 // simply remove old PREDSET).
  239.  
  240.                 alu_node *newpredset = sh.clone(predset);
  241.                 predset->insert_after(newpredset);
  242.  
  243.                 predset->dst[2] = NULL;
  244.  
  245.                 newpredset->dst[0] = NULL;
  246.                 newpredset->dst[1] = NULL;
  247.  
  248.                 em->def = newpredset;
  249.  
  250.                 unsigned cc = newpredset->bc.op_ptr->flags & AF_CC_MASK;
  251.                 unsigned cmptype = newpredset->bc.op_ptr->flags & AF_CMP_TYPE_MASK;
  252.                 bool swapargs = false;
  253.  
  254.                 cc = invert_setcc_condition(cc, swapargs);
  255.  
  256.                 if (swapargs) {
  257.                         std::swap(newpredset->src[0], newpredset->src[1]);
  258.                         std::swap(newpredset->bc.src[0], newpredset->bc.src[1]);
  259.                 }
  260.  
  261.                 unsigned newopcode = get_predsetcc_op(cc, cmptype);
  262.                 newpredset->bc.set_op(newopcode);
  263.  
  264.                 // move the code from the 'false' branch ('else') to the 'true' branch
  265.                 nd2->move(nif->next, NULL);
  266.  
  267.                 // swap phi operands
  268.                 for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E;
  269.                                 ++I) {
  270.                         node *p = *I;
  271.                         assert(p->src.size() == 2);
  272.                         std::swap(p->src[0], p->src[1]);
  273.                 }
  274.         }
  275.  
  276.         return false;
  277. }
  278.  
  279. alu_node* if_conversion::convert_phi(value* select, node* phi) {
  280.         assert(phi->dst.size() == 1 || phi->src.size() == 2);
  281.  
  282.         value *d = phi->dst[0];
  283.         value *v1 = phi->src[0];
  284.         value *v2 = phi->src[1];
  285.  
  286.         assert(d);
  287.  
  288.         if (!d->is_any_gpr())
  289.                 return NULL;
  290.  
  291.         if (v1->is_undef()) {
  292.                 if (v2->is_undef()) {
  293.                         return NULL;
  294.                 } else {
  295.                         return sh.create_mov(d, v2);
  296.                 }
  297.         } else if (v2->is_undef())
  298.                 return sh.create_mov(d, v1);
  299.  
  300.         alu_node* n = sh.create_alu();
  301.  
  302.         n->bc.set_op(ALU_OP3_CNDE_INT);
  303.         n->dst.push_back(d);
  304.         n->src.push_back(select);
  305.         n->src.push_back(v1);
  306.         n->src.push_back(v2);
  307.  
  308.         return n;
  309. }
  310.  
  311. } // namespace r600_sb
  312.