Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright (C) 2005 Ben Skeggs.
  3.  *
  4.  * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
  5.  * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
  6.  *
  7.  * All Rights Reserved.
  8.  *
  9.  * Permission is hereby granted, free of charge, to any person obtaining
  10.  * a copy of this software and associated documentation files (the
  11.  * "Software"), to deal in the Software without restriction, including
  12.  * without limitation the rights to use, copy, modify, merge, publish,
  13.  * distribute, sublicense, and/or sell copies of the Software, and to
  14.  * permit persons to whom the Software is furnished to do so, subject to
  15.  * the following conditions:
  16.  *
  17.  * The above copyright notice and this permission notice (including the
  18.  * next paragraph) shall be included in all copies or substantial
  19.  * portions of the Software.
  20.  *
  21.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  22.  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  23.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  24.  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  25.  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  26.  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  27.  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  28.  *
  29.  */
  30.  
  31. /**
  32.  * \file
  33.  *
  34.  * \author Ben Skeggs <darktama@iinet.net.au>
  35.  *
  36.  * \author Jerome Glisse <j.glisse@gmail.com>
  37.  *
  38.  * \author Corbin Simpson <MostAwesomeDude@gmail.com>
  39.  *
  40.  */
  41.  
  42. #include "r500_fragprog.h"
  43.  
  44. #include "../r300_reg.h"
  45.  
  46. #include "radeon_program_pair.h"
  47.  
  48. #define PROG_CODE \
  49.         struct r500_fragment_program_code *code = &c->code->code.r500
  50.  
  51. #define error(fmt, args...) do {                        \
  52.                 rc_error(&c->Base, "%s::%s(): " fmt "\n",       \
  53.                         __FILE__, __FUNCTION__, ##args);        \
  54.         } while(0)
  55.  
  56.  
  57. struct branch_info {
  58.         int If;
  59.         int Else;
  60.         int Endif;
  61. };
  62.  
  63. struct r500_loop_info {
  64.         int BgnLoop;
  65.  
  66.         int BranchDepth;
  67.         int * Brks;
  68.         int BrkCount;
  69.         int BrkReserved;
  70.  
  71.         int * Conts;
  72.         int ContCount;
  73.         int ContReserved;
  74. };
  75.  
  76. struct emit_state {
  77.         struct radeon_compiler * C;
  78.         struct r500_fragment_program_code * Code;
  79.  
  80.         struct branch_info * Branches;
  81.         unsigned int CurrentBranchDepth;
  82.         unsigned int BranchesReserved;
  83.  
  84.         struct r500_loop_info * Loops;
  85.         unsigned int CurrentLoopDepth;
  86.         unsigned int LoopsReserved;
  87.  
  88.         unsigned int MaxBranchDepth;
  89.  
  90. };
  91.  
  92. static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
  93. {
  94.         switch(opcode) {
  95.         case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
  96.         case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
  97.         case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
  98.         case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
  99.         case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
  100.         case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
  101.         case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
  102.         default:
  103.                 error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
  104.                 /* fall through */
  105.         case RC_OPCODE_NOP:
  106.                 /* fall through */
  107.         case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
  108.         case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
  109.         case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
  110.         case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
  111.         }
  112. }
  113.  
  114. static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
  115. {
  116.         switch(opcode) {
  117.         case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
  118.         case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
  119.         case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
  120.         case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
  121.         case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
  122.         case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
  123.         case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
  124.         case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
  125.         case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
  126.         case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
  127.         default:
  128.                 error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
  129.                 /* fall through */
  130.         case RC_OPCODE_NOP:
  131.                 /* fall through */
  132.         case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
  133.         case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
  134.         case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
  135.         case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
  136.         case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
  137.         case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
  138.         }
  139. }
  140.  
  141. static unsigned int fix_hw_swizzle(unsigned int swz)
  142. {
  143.     switch (swz) {
  144.         case RC_SWIZZLE_ZERO:
  145.         case RC_SWIZZLE_UNUSED:
  146.             swz = 4;
  147.             break;
  148.         case RC_SWIZZLE_HALF:
  149.             swz = 5;
  150.             break;
  151.         case RC_SWIZZLE_ONE:
  152.             swz = 6;
  153.             break;
  154.     }
  155.  
  156.         return swz;
  157. }
  158.  
  159. static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
  160. {
  161.         unsigned int t = inst->RGB.Arg[arg].Source;
  162.         int comp;
  163.         t |= inst->RGB.Arg[arg].Negate << 11;
  164.         t |= inst->RGB.Arg[arg].Abs << 12;
  165.  
  166.         for(comp = 0; comp < 3; ++comp)
  167.                 t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
  168.  
  169.         return t;
  170. }
  171.  
  172. static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
  173. {
  174.         unsigned int t = inst->Alpha.Arg[i].Source;
  175.         t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
  176.         t |= inst->Alpha.Arg[i].Negate << 5;
  177.         t |= inst->Alpha.Arg[i].Abs << 6;
  178.         return t;
  179. }
  180.  
  181. static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
  182. {
  183.         switch(func) {
  184.         case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
  185.         case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
  186.         case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
  187.         case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
  188.         default:
  189.                 rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
  190.                 return 0;
  191.         }
  192. }
  193.  
  194. static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
  195. {
  196.         if (index > code->max_temp_idx)
  197.                 code->max_temp_idx = index;
  198. }
  199.  
  200. static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
  201. {
  202.         /* From docs:
  203.          *   Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
  204.          * MSB = 1 << 7 */
  205.         if (!src.Used)
  206.                 return 1 << 7;
  207.  
  208.         if (src.File == RC_FILE_CONSTANT) {
  209.                 return src.Index | R500_RGB_ADDR0_CONST;
  210.         } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
  211.                 use_temporary(code, src.Index);
  212.                 return src.Index;
  213.         } else if (src.File == RC_FILE_INLINE) {
  214.                 return src.Index | (1 << 7);
  215.         }
  216.  
  217.         return 0;
  218. }
  219.  
  220. /**
  221.  * NOP the specified instruction if it is not a texture lookup.
  222.  */
  223. static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
  224. {
  225.         PROG_CODE;
  226.  
  227.         if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
  228.                 code->inst[ip].inst0 |= R500_INST_NOP;
  229.         }
  230. }
  231.  
  232. /**
  233.  * Emit a paired ALU instruction.
  234.  */
  235. static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
  236. {
  237.         int ip;
  238.         PROG_CODE;
  239.  
  240.         if (code->inst_end >= c->Base.max_alu_insts-1) {
  241.                 error("emit_alu: Too many instructions");
  242.                 return;
  243.         }
  244.  
  245.         ip = ++code->inst_end;
  246.  
  247.         /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
  248.         if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
  249.                 inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
  250.                 if (ip > 0) {
  251.                         alu_nop(c, ip - 1);
  252.                 }
  253.         }
  254.  
  255.         code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
  256.         code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
  257.  
  258.         if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
  259.                 code->inst[ip].inst0 = R500_INST_TYPE_OUT;
  260.                 if (inst->WriteALUResult) {
  261.                         error("Cannot write output and ALU result at the same time");
  262.                         return;
  263.                 }
  264.         } else {
  265.                 code->inst[ip].inst0 = R500_INST_TYPE_ALU;
  266.         }
  267.         code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
  268.  
  269.         code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
  270.         code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
  271.         code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
  272.         if (inst->Nop) {
  273.                 code->inst[ip].inst0 |= R500_INST_NOP;
  274.         }
  275.         if (inst->Alpha.DepthWriteMask) {
  276.                 code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
  277.                 c->code->writes_depth = 1;
  278.         }
  279.  
  280.         code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
  281.         code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
  282.         use_temporary(code, inst->Alpha.DestIndex);
  283.         use_temporary(code, inst->RGB.DestIndex);
  284.  
  285.         if (inst->RGB.Saturate)
  286.                 code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
  287.         if (inst->Alpha.Saturate)
  288.                 code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
  289.  
  290.         /* Set the presubtract operation. */
  291.         switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
  292.                 case RC_PRESUB_BIAS:
  293.                         code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
  294.                         break;
  295.                 case RC_PRESUB_SUB:
  296.                         code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
  297.                         break;
  298.                 case RC_PRESUB_ADD:
  299.                         code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
  300.                         break;
  301.                 case RC_PRESUB_INV:
  302.                         code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
  303.                         break;
  304.                 default:
  305.                         break;
  306.         }
  307.         switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
  308.                 case RC_PRESUB_BIAS:
  309.                         code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
  310.                         break;
  311.                 case RC_PRESUB_SUB:
  312.                         code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
  313.                         break;
  314.                 case RC_PRESUB_ADD:
  315.                         code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
  316.                         break;
  317.                 case RC_PRESUB_INV:
  318.                         code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
  319.                         break;
  320.                 default:
  321.                         break;
  322.         }
  323.  
  324.         /* Set the output modifier */
  325.         code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT;
  326.         code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT;
  327.  
  328.         code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
  329.         code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
  330.         code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
  331.  
  332.         code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
  333.         code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
  334.         code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
  335.  
  336.         code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
  337.         code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
  338.         code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
  339.  
  340.         code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
  341.         code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
  342.         code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
  343.  
  344.         code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
  345.         code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
  346.  
  347.         if (inst->WriteALUResult) {
  348.                 code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
  349.  
  350.                 if (inst->WriteALUResult == RC_ALURESULT_X)
  351.                         code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
  352.                 else
  353.                         code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
  354.  
  355.                 code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
  356.         }
  357. }
  358.  
  359. static unsigned int translate_strq_swizzle(unsigned int swizzle)
  360. {
  361.         unsigned int swiz = 0;
  362.         int i;
  363.         for (i = 0; i < 4; i++)
  364.                 swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
  365.         return swiz;
  366. }
  367.  
  368. /**
  369.  * Emit a single TEX instruction
  370.  */
  371. static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
  372. {
  373.         int ip;
  374.         PROG_CODE;
  375.  
  376.         if (code->inst_end >= c->Base.max_alu_insts-1) {
  377.                 error("emit_tex: Too many instructions");
  378.                 return 0;
  379.         }
  380.  
  381.         ip = ++code->inst_end;
  382.  
  383.         code->inst[ip].inst0 = R500_INST_TYPE_TEX
  384.                 | (inst->DstReg.WriteMask << 11)
  385.                 | (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
  386.         code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
  387.                 | (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT);
  388.  
  389.         if (inst->TexSrcTarget == RC_TEXTURE_RECT)
  390.                 code->inst[ip].inst1 |= R500_TEX_UNSCALED;
  391.  
  392.         switch (inst->Opcode) {
  393.         case RC_OPCODE_KIL:
  394.                 code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
  395.                 break;
  396.         case RC_OPCODE_TEX:
  397.                 code->inst[ip].inst1 |= R500_TEX_INST_LD;
  398.                 break;
  399.         case RC_OPCODE_TXB:
  400.                 code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
  401.                 break;
  402.         case RC_OPCODE_TXP:
  403.                 code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
  404.                 break;
  405.         case RC_OPCODE_TXD:
  406.                 code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
  407.                 break;
  408.         case RC_OPCODE_TXL:
  409.                 code->inst[ip].inst1 |= R500_TEX_INST_LOD;
  410.                 break;
  411.         default:
  412.                 error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
  413.         }
  414.  
  415.         use_temporary(code, inst->SrcReg[0].Index);
  416.         if (inst->Opcode != RC_OPCODE_KIL)
  417.                 use_temporary(code, inst->DstReg.Index);
  418.  
  419.         code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
  420.                 | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
  421.                 | R500_TEX_DST_ADDR(inst->DstReg.Index)
  422.                 | (GET_SWZ(inst->TexSwizzle, 0) << 24)
  423.                 | (GET_SWZ(inst->TexSwizzle, 1) << 26)
  424.                 | (GET_SWZ(inst->TexSwizzle, 2) << 28)
  425.                 | (GET_SWZ(inst->TexSwizzle, 3) << 30)
  426.                 ;
  427.  
  428.         if (inst->Opcode == RC_OPCODE_TXD) {
  429.                 use_temporary(code, inst->SrcReg[1].Index);
  430.                 use_temporary(code, inst->SrcReg[2].Index);
  431.  
  432.                 /* DX and DY parameters are specified in a separate register. */
  433.                 code->inst[ip].inst3 =
  434.                         R500_DX_ADDR(inst->SrcReg[1].Index) |
  435.                         (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
  436.                         R500_DY_ADDR(inst->SrcReg[2].Index) |
  437.                         (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
  438.         }
  439.  
  440.         return 1;
  441. }
  442.  
  443. static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
  444. {
  445.         unsigned int newip;
  446.  
  447.         if (s->Code->inst_end >= s->C->max_alu_insts-1) {
  448.                 rc_error(s->C, "emit_tex: Too many instructions");
  449.                 return;
  450.         }
  451.  
  452.         newip = ++s->Code->inst_end;
  453.  
  454.         /* Currently all loops use the same integer constant to intialize
  455.          * the loop variables. */
  456.         if(!s->Code->int_constants[0]) {
  457.                 s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
  458.                 s->Code->int_constant_count = 1;
  459.         }
  460.         s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
  461.  
  462.         switch(inst->U.I.Opcode){
  463.         struct branch_info * branch;
  464.         struct r500_loop_info * loop;
  465.         case RC_OPCODE_BGNLOOP:
  466.                 memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
  467.                         s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
  468.  
  469.                 loop = &s->Loops[s->CurrentLoopDepth++];
  470.                 memset(loop, 0, sizeof(struct r500_loop_info));
  471.                 loop->BranchDepth = s->CurrentBranchDepth;
  472.                 loop->BgnLoop = newip;
  473.  
  474.                 s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
  475.                         | R500_FC_JUMP_FUNC(0x00)
  476.                         | R500_FC_IGNORE_UNCOVERED
  477.                         ;
  478.                 break;
  479.         case RC_OPCODE_BRK:
  480.                 loop = &s->Loops[s->CurrentLoopDepth - 1];
  481.                 memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
  482.                                         loop->BrkCount, loop->BrkReserved, 1);
  483.  
  484.                 loop->Brks[loop->BrkCount++] = newip;
  485.                 s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
  486.                         | R500_FC_JUMP_FUNC(0xff)
  487.                         | R500_FC_B_OP1_DECR
  488.                         | R500_FC_B_POP_CNT(
  489.                                 s->CurrentBranchDepth - loop->BranchDepth)
  490.                         | R500_FC_IGNORE_UNCOVERED
  491.                         ;
  492.                 break;
  493.  
  494.         case RC_OPCODE_CONT:
  495.                 loop = &s->Loops[s->CurrentLoopDepth - 1];
  496.                 memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
  497.                                         loop->ContCount, loop->ContReserved, 1);
  498.                 loop->Conts[loop->ContCount++] = newip;
  499.                 s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
  500.                         | R500_FC_JUMP_FUNC(0xff)
  501.                         | R500_FC_B_OP1_DECR
  502.                         | R500_FC_B_POP_CNT(
  503.                                 s->CurrentBranchDepth - loop->BranchDepth)
  504.                         | R500_FC_IGNORE_UNCOVERED
  505.                         ;
  506.                 break;
  507.  
  508.         case RC_OPCODE_ENDLOOP:
  509.         {
  510.                 loop = &s->Loops[s->CurrentLoopDepth - 1];
  511.                 /* Emit ENDLOOP */
  512.                 s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
  513.                         | R500_FC_JUMP_FUNC(0xff)
  514.                         | R500_FC_JUMP_ANY
  515.                         | R500_FC_IGNORE_UNCOVERED
  516.                         ;
  517.                 /* The constant integer at index 0 is used by all loops. */
  518.                 s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
  519.                         | R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
  520.                         ;
  521.  
  522.                 /* Set jump address and int constant for BGNLOOP */
  523.                 s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
  524.                         | R500_FC_JUMP_ADDR(newip)
  525.                         ;
  526.  
  527.                 /* Set jump address for the BRK instructions. */
  528.                 while(loop->BrkCount--) {
  529.                         s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
  530.                                                 R500_FC_JUMP_ADDR(newip + 1);
  531.                 }
  532.  
  533.                 /* Set jump address for CONT instructions. */
  534.                 while(loop->ContCount--) {
  535.                         s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
  536.                                                 R500_FC_JUMP_ADDR(newip);
  537.                 }
  538.                 s->CurrentLoopDepth--;
  539.                 break;
  540.         }
  541.         case RC_OPCODE_IF:
  542.                 if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
  543.                         rc_error(s->C, "Branch depth exceeds hardware limit");
  544.                         return;
  545.                 }
  546.                 memory_pool_array_reserve(&s->C->Pool, struct branch_info,
  547.                                 s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
  548.  
  549.                 branch = &s->Branches[s->CurrentBranchDepth++];
  550.                 branch->If = newip;
  551.                 branch->Else = -1;
  552.                 branch->Endif = -1;
  553.  
  554.                 if (s->CurrentBranchDepth > s->MaxBranchDepth)
  555.                         s->MaxBranchDepth = s->CurrentBranchDepth;
  556.  
  557.                 /* actual instruction is filled in at ENDIF time */
  558.                 break;
  559.        
  560.         case RC_OPCODE_ELSE:
  561.                 if (!s->CurrentBranchDepth) {
  562.                         rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
  563.                         return;
  564.                 }
  565.  
  566.                 branch = &s->Branches[s->CurrentBranchDepth - 1];
  567.                 branch->Else = newip;
  568.  
  569.                 /* actual instruction is filled in at ENDIF time */
  570.                 break;
  571.  
  572.         case RC_OPCODE_ENDIF:
  573.                 if (!s->CurrentBranchDepth) {
  574.                         rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
  575.                         return;
  576.                 }
  577.  
  578.                 branch = &s->Branches[s->CurrentBranchDepth - 1];
  579.                 branch->Endif = newip;
  580.  
  581.                 s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
  582.                         | R500_FC_A_OP_NONE /* no address stack */
  583.                         | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
  584.                         | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
  585.                         | R500_FC_B_OP1_NONE /* no branch counter if stay */
  586.                         | R500_FC_B_POP_CNT(1)
  587.                         ;
  588.                 s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
  589.                 s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
  590.                         | R500_FC_A_OP_NONE /* no address stack */
  591.                         | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
  592.                         | R500_FC_B_OP0_INCR /* increment branch counter if stay */
  593.                         | R500_FC_IGNORE_UNCOVERED
  594.                 ;
  595.  
  596.                 if (branch->Else >= 0) {
  597.                         /* increment branch counter also if jump */
  598.                         s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
  599.                         s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
  600.  
  601.                         s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
  602.                                 | R500_FC_A_OP_NONE /* no address stack */
  603.                                 | R500_FC_B_ELSE /* all active pixels want to jump */
  604.                                 | R500_FC_B_OP0_NONE /* no counter op if stay */
  605.                                 | R500_FC_B_OP1_DECR /* decrement branch counter if jump */
  606.                                 | R500_FC_B_POP_CNT(1)
  607.                         ;
  608.                         s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
  609.                 } else {
  610.                         /* don't touch branch counter on jump */
  611.                         s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
  612.                         s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
  613.                 }
  614.  
  615.  
  616.                 s->CurrentBranchDepth--;
  617.                 break;
  618.         default:
  619.                 rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
  620.         }
  621. }
  622.  
  623. void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
  624. {
  625.         struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
  626.         struct emit_state s;
  627.         struct r500_fragment_program_code *code = &compiler->code->code.r500;
  628.  
  629.         memset(&s, 0, sizeof(s));
  630.         s.C = &compiler->Base;
  631.         s.Code = code;
  632.  
  633.         memset(code, 0, sizeof(*code));
  634.         code->max_temp_idx = 1;
  635.         code->inst_end = -1;
  636.  
  637.         for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
  638.             inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
  639.             inst = inst->Next) {
  640.                 if (inst->Type == RC_INSTRUCTION_NORMAL) {
  641.                         const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
  642.  
  643.                         if (opcode->IsFlowControl) {
  644.                                 emit_flowcontrol(&s, inst);
  645.                         } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
  646.                                 continue;
  647.                         } else {
  648.                                 emit_tex(compiler, &inst->U.I);
  649.                         }
  650.                 } else {
  651.                         emit_paired(compiler, &inst->U.P);
  652.                 }
  653.         }
  654.  
  655.         if (code->max_temp_idx >= compiler->Base.max_temp_regs)
  656.                 rc_error(&compiler->Base, "Too many hardware temporaries used");
  657.  
  658.         if (compiler->Base.Error)
  659.                 return;
  660.  
  661.         if (code->inst_end == -1 ||
  662.             (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
  663.                 int ip;
  664.  
  665.                 /* This may happen when dead-code elimination is disabled or
  666.                  * when most of the fragment program logic is leading to a KIL */
  667.                 if (code->inst_end >= compiler->Base.max_alu_insts-1) {
  668.                         rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
  669.                         return;
  670.                 }
  671.  
  672.                 ip = ++code->inst_end;
  673.                 code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
  674.         }
  675.  
  676.         /* Make sure TEX_SEM_WAIT is set on the last instruction */
  677.         code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT;
  678.  
  679.         /* Enable full flow control mode if we are using loops or have if
  680.          * statements nested at least four deep. */
  681.         if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
  682.                 if (code->max_temp_idx < 1)
  683.                         code->max_temp_idx = 1;
  684.  
  685.                 code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
  686.         }
  687. }
  688.