Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright 2012 Advanced Micro Devices, Inc.
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * on the rights to use, copy, modify, merge, publish, distribute, sub
  8.  * license, and/or sell copies of the Software, and to permit persons to whom
  9.  * the Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18.  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19.  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20.  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21.  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22.  *
  23.  * Author: Tom Stellard <thomas.stellard@amd.com>
  24.  */
  25.  
  26. #include "radeon_compiler.h"
  27. #include "radeon_compiler_util.h"
  28. #include "radeon_dataflow.h"
  29. #include "radeon_program.h"
  30. #include "radeon_program_constants.h"
  31.  
  32. struct vert_fc_state {
  33.         struct radeon_compiler *C;
  34.         unsigned BranchDepth;
  35.         unsigned LoopDepth;
  36.         unsigned LoopsReserved;
  37.         int PredStack[R500_PVS_MAX_LOOP_DEPTH];
  38.         int PredicateReg;
  39.         unsigned InCFBreak;
  40. };
  41.  
  42. static void build_pred_src(
  43.         struct rc_src_register * src,
  44.         struct vert_fc_state * fc_state)
  45. {
  46.         src->Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
  47.                                         RC_SWIZZLE_UNUSED, RC_SWIZZLE_W);
  48.         src->File = RC_FILE_TEMPORARY;
  49.         src->Index = fc_state->PredicateReg;
  50. }
  51.  
  52. static void build_pred_dst(
  53.         struct rc_dst_register * dst,
  54.         struct vert_fc_state * fc_state)
  55. {
  56.         dst->WriteMask = RC_MASK_W;
  57.         dst->File = RC_FILE_TEMPORARY;
  58.         dst->Index = fc_state->PredicateReg;
  59. }
  60.  
  61. static void mark_write(void * userdata, struct rc_instruction * inst,
  62.                 rc_register_file file,  unsigned int index, unsigned int mask)
  63. {
  64.         unsigned int * writemasks = userdata;
  65.  
  66.         if (file != RC_FILE_TEMPORARY)
  67.                 return;
  68.  
  69.         if (index >= R300_VS_MAX_TEMPS)
  70.                 return;
  71.  
  72.         writemasks[index] |= mask;
  73. }
  74.  
  75. static int reserve_predicate_reg(struct vert_fc_state * fc_state)
  76. {
  77.         int i;
  78.         unsigned int writemasks[RC_REGISTER_MAX_INDEX];
  79.         struct rc_instruction * inst;
  80.         memset(writemasks, 0, sizeof(writemasks));
  81.         for(inst = fc_state->C->Program.Instructions.Next;
  82.                                 inst != &fc_state->C->Program.Instructions;
  83.                                 inst = inst->Next) {
  84.                 rc_for_all_writes_mask(inst, mark_write, writemasks);
  85.         }
  86.  
  87.         for(i = 0; i < fc_state->C->max_temp_regs; i++) {
  88.                 /* Most of the control flow instructions only write the
  89.                  * W component of the Predicate Register, but
  90.                  * the docs say that ME_PRED_SET_CLR and
  91.                  * ME_PRED_SET_RESTORE write all components of the
  92.                  * register, so we must reserve a register that has
  93.                  * all its components free. */
  94.                 if (!writemasks[i]) {
  95.                         fc_state->PredicateReg = i;
  96.                         break;
  97.                 }
  98.         }
  99.         if (i == fc_state->C->max_temp_regs) {
  100.                 rc_error(fc_state->C, "No free temporary to use for"
  101.                                 " predicate stack counter.\n");
  102.                 return -1;
  103.         }
  104.         return 1;
  105. }
  106.  
  107. static void lower_bgnloop(
  108.         struct rc_instruction * inst,
  109.         struct vert_fc_state * fc_state)
  110. {
  111.         struct rc_instruction * new_inst =
  112.                         rc_insert_new_instruction(fc_state->C, inst->Prev);
  113.  
  114.         if ((!fc_state->C->is_r500
  115.                 && fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH)
  116.              || fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) {
  117.                 rc_error(fc_state->C, "Loops are nested too deep.");
  118.                 return;
  119.         }
  120.  
  121.         if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) {
  122.                 if (fc_state->PredicateReg == -1) {
  123.                         if (reserve_predicate_reg(fc_state) == -1) {
  124.                                 return;
  125.                         }
  126.                 }
  127.  
  128.                 /* Initialize the predicate bit to true. */
  129.                 new_inst->U.I.Opcode = RC_ME_PRED_SEQ;
  130.                 build_pred_dst(&new_inst->U.I.DstReg, fc_state);
  131.                 new_inst->U.I.SrcReg[0].Index = 0;
  132.                 new_inst->U.I.SrcReg[0].File = RC_FILE_NONE;
  133.                 new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
  134.         } else {
  135.                 fc_state->PredStack[fc_state->LoopDepth] =
  136.                                                 fc_state->PredicateReg;
  137.                 /* Copy the the current predicate value to this loop's
  138.                  * predicate register */
  139.  
  140.                 /* Use the old predicate value for src0 */
  141.                 build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
  142.  
  143.                 /* Reserve this loop's predicate register */
  144.                 if (reserve_predicate_reg(fc_state) == -1) {
  145.                         return;
  146.                 }
  147.  
  148.                 /* Copy the old predicate value to the new register */
  149.                 new_inst->U.I.Opcode = RC_OPCODE_ADD;
  150.                 build_pred_dst(&new_inst->U.I.DstReg, fc_state);
  151.                 new_inst->U.I.SrcReg[1].Index = 0;
  152.                 new_inst->U.I.SrcReg[1].File = RC_FILE_NONE;
  153.                 new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
  154.         }
  155.  
  156. }
  157.  
  158. static void lower_brk(
  159.         struct rc_instruction * inst,
  160.         struct vert_fc_state * fc_state)
  161. {
  162.         if (fc_state->LoopDepth == 1) {
  163.                 inst->U.I.Opcode = RC_OPCODE_RCP;
  164.                 inst->U.I.DstReg.Pred = RC_PRED_INV;
  165.                 inst->U.I.SrcReg[0].Index = 0;
  166.                 inst->U.I.SrcReg[0].File = RC_FILE_NONE;
  167.                 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
  168.         } else {
  169.                 inst->U.I.Opcode = RC_ME_PRED_SET_CLR;
  170.                 inst->U.I.DstReg.Pred = RC_PRED_SET;
  171.         }
  172.  
  173.         build_pred_dst(&inst->U.I.DstReg, fc_state);
  174. }
  175.  
  176. static void lower_endloop(
  177.         struct rc_instruction * inst,
  178.         struct vert_fc_state * fc_state)
  179. {
  180.         struct rc_instruction * new_inst =
  181.                         rc_insert_new_instruction(fc_state->C, inst);
  182.  
  183.         new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE;
  184.         build_pred_dst(&new_inst->U.I.DstReg, fc_state);
  185.         /* Restore the previous predicate register. */
  186.         fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1];
  187.         build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
  188. }
  189.  
  190. static void lower_if(
  191.         struct rc_instruction * inst,
  192.         struct vert_fc_state * fc_state)
  193. {
  194.         /* Reserve a temporary to use as our predicate stack counter, if we
  195.          * don't already have one. */
  196.         if (fc_state->PredicateReg == -1) {
  197.                 /* If we are inside a loop, the Predicate Register should
  198.                  * have already been defined. */
  199.                 assert(fc_state->LoopDepth == 0);
  200.  
  201.                 if (reserve_predicate_reg(fc_state) == -1) {
  202.                         return;
  203.                 }
  204.         }
  205.  
  206.         if (inst->Next->U.I.Opcode == RC_OPCODE_BRK) {
  207.                 fc_state->InCFBreak = 1;
  208.         }
  209.         if ((fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0)
  210.                         || (fc_state->LoopDepth == 1 && fc_state->InCFBreak)) {
  211.                 if (fc_state->InCFBreak) {
  212.                         inst->U.I.Opcode = RC_ME_PRED_SEQ;
  213.                         inst->U.I.DstReg.Pred = RC_PRED_SET;
  214.                 } else {
  215.                         inst->U.I.Opcode = RC_ME_PRED_SNEQ;
  216.                 }
  217.         } else {
  218.                 unsigned swz;
  219.                 inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
  220.                 memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0],
  221.                                                 sizeof(inst->U.I.SrcReg[1]));
  222.                 swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle);
  223.                 /* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the
  224.                  * w component */
  225.                 inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED,
  226.                                 RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz);
  227.                 build_pred_src(&inst->U.I.SrcReg[0], fc_state);
  228.         }
  229.         build_pred_dst(&inst->U.I.DstReg, fc_state);
  230. }
  231.  
  232. void rc_vert_fc(struct radeon_compiler *c, void *user)
  233. {
  234.         struct rc_instruction * inst;
  235.         struct vert_fc_state fc_state;
  236.  
  237.         memset(&fc_state, 0, sizeof(fc_state));
  238.         fc_state.PredicateReg = -1;
  239.         fc_state.C = c;
  240.  
  241.         for(inst = c->Program.Instructions.Next;
  242.                                         inst != &c->Program.Instructions;
  243.                                         inst = inst->Next) {
  244.  
  245.                 switch (inst->U.I.Opcode) {
  246.  
  247.                 case RC_OPCODE_BGNLOOP:
  248.                         lower_bgnloop(inst, &fc_state);
  249.                         fc_state.LoopDepth++;
  250.                         break;
  251.  
  252.                 case RC_OPCODE_BRK:
  253.                         lower_brk(inst, &fc_state);
  254.                         break;
  255.  
  256.                 case RC_OPCODE_ENDLOOP:
  257.                         if (fc_state.BranchDepth != 0
  258.                                         || fc_state.LoopDepth != 1) {
  259.                                 lower_endloop(inst, &fc_state);
  260.                         }
  261.                         fc_state.LoopDepth--;
  262.                         /* Skip PRED_RESTORE */
  263.                         inst = inst->Next;
  264.                         break;
  265.                 case RC_OPCODE_IF:
  266.                         lower_if(inst, &fc_state);
  267.                         fc_state.BranchDepth++;
  268.                         break;
  269.  
  270.                 case RC_OPCODE_ELSE:
  271.                         inst->U.I.Opcode = RC_ME_PRED_SET_INV;
  272.                         build_pred_dst(&inst->U.I.DstReg, &fc_state);
  273.                         build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
  274.                         break;
  275.  
  276.                 case RC_OPCODE_ENDIF:
  277.                         if (fc_state.LoopDepth == 1 && fc_state.InCFBreak) {
  278.                                 struct rc_instruction * to_delete = inst;
  279.                                 inst = inst->Prev;
  280.                                 rc_remove_instruction(to_delete);
  281.                                 /* XXX: Delete the endif instruction */
  282.                         } else {
  283.                                 inst->U.I.Opcode = RC_ME_PRED_SET_POP;
  284.                                 build_pred_dst(&inst->U.I.DstReg, &fc_state);
  285.                                 build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
  286.                         }
  287.                         fc_state.InCFBreak = 0;
  288.                         fc_state.BranchDepth--;
  289.                         break;
  290.  
  291.                 default:
  292.                         if (fc_state.BranchDepth || fc_state.LoopDepth) {
  293.                                 inst->U.I.DstReg.Pred = RC_PRED_SET;
  294.                         }
  295.                         break;
  296.                 }
  297.  
  298.                 if (c->Error) {
  299.                         return;
  300.                 }
  301.         }
  302. }
  303.