Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright (C) 2009 Nicolai Haehnle.
  3.  *
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining
  7.  * a copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sublicense, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial
  16.  * portions of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19.  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21.  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22.  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23.  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24.  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  */
  27.  
  28. #include "radeon_program_pair.h"
  29.  
  30. #include <stdio.h>
  31.  
  32. #include "radeon_compiler.h"
  33. #include "radeon_compiler_util.h"
  34. #include "radeon_dataflow.h"
  35. #include "radeon_list.h"
  36. #include "radeon_variable.h"
  37.  
  38. #include "util/u_debug.h"
  39.  
  40. #define VERBOSE 0
  41.  
  42. #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
  43.  
  44. struct schedule_instruction {
  45.         struct rc_instruction * Instruction;
  46.  
  47.         /** Next instruction in the linked list of ready instructions. */
  48.         struct schedule_instruction *NextReady;
  49.  
  50.         /** Values that this instruction reads and writes */
  51.         struct reg_value * WriteValues[4];
  52.         struct reg_value * ReadValues[12];
  53.         unsigned int NumWriteValues:3;
  54.         unsigned int NumReadValues:4;
  55.  
  56.         /**
  57.          * Number of (read and write) dependencies that must be resolved before
  58.          * this instruction can be scheduled.
  59.          */
  60.         unsigned int NumDependencies:5;
  61.  
  62.         /** List of all readers (see rc_get_readers() for the definition of
  63.          * "all readers"), even those outside the basic block this instruction
  64.          * lives in. */
  65.         struct rc_reader_data GlobalReaders;
  66.  
  67.         /** If the scheduler has paired an RGB and an Alpha instruction together,
  68.          * PairedInst references the alpha insturction's dependency information.
  69.          */
  70.         struct schedule_instruction * PairedInst;
  71.  
  72.         /** This scheduler uses the value of Score to determine which
  73.          * instruction to schedule.  Instructions with a higher value of Score
  74.          * will be scheduled first. */
  75.         int Score;
  76.  
  77.         /** The number of components that read from a TEX instruction. */
  78.         unsigned TexReadCount;
  79.  
  80.         /** For TEX instructions a list of readers */
  81.         struct rc_list * TexReaders;
  82. };
  83.  
  84.  
  85. /**
  86.  * Used to keep track of which instructions read a value.
  87.  */
  88. struct reg_value_reader {
  89.         struct schedule_instruction *Reader;
  90.         struct reg_value_reader *Next;
  91. };
  92.  
  93. /**
  94.  * Used to keep track which values are stored in each component of a
  95.  * RC_FILE_TEMPORARY.
  96.  */
  97. struct reg_value {
  98.         struct schedule_instruction * Writer;
  99.  
  100.         /**
  101.          * Unordered linked list of instructions that read from this value.
  102.          * When this value becomes available, we increase all readers'
  103.          * dependency count.
  104.          */
  105.         struct reg_value_reader *Readers;
  106.  
  107.         /**
  108.          * Number of readers of this value. This is decremented each time
  109.          * a reader of the value is committed.
  110.          * When the reader cound reaches zero, the dependency count
  111.          * of the instruction writing \ref Next is decremented.
  112.          */
  113.         unsigned int NumReaders;
  114.  
  115.         struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
  116. };
  117.  
  118. struct register_state {
  119.         struct reg_value * Values[4];
  120. };
  121.  
  122. struct remap_reg {
  123.         struct rc_instruciont * Inst;
  124.         unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
  125.         unsigned int OldSwizzle:3;
  126.         unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
  127.         unsigned int NewSwizzle:3;
  128.         unsigned int OnlyTexReads:1;
  129.         struct remap_reg * Next;
  130. };
  131.  
  132. struct schedule_state {
  133.         struct radeon_compiler * C;
  134.         struct schedule_instruction * Current;
  135.         /** Array of the previous writers of Current's destination register
  136.          * indexed by channel. */
  137.         struct schedule_instruction * PrevWriter[4];
  138.  
  139.         struct register_state Temporary[RC_REGISTER_MAX_INDEX];
  140.  
  141.         /**
  142.          * Linked lists of instructions that can be scheduled right now,
  143.          * based on which ALU/TEX resources they require.
  144.          */
  145.         /*@{*/
  146.         struct schedule_instruction *ReadyFullALU;
  147.         struct schedule_instruction *ReadyRGB;
  148.         struct schedule_instruction *ReadyAlpha;
  149.         struct schedule_instruction *ReadyTEX;
  150.         /*@}*/
  151.         struct rc_list *PendingTEX;
  152.  
  153.         void (*CalcScore)(struct schedule_instruction *);
  154.         long max_tex_group;
  155.         unsigned PrevBlockHasTex:1;
  156.         unsigned TEXCount;
  157.         unsigned Opt:1;
  158. };
  159.  
  160. static struct reg_value ** get_reg_valuep(struct schedule_state * s,
  161.                 rc_register_file file, unsigned int index, unsigned int chan)
  162. {
  163.         if (file != RC_FILE_TEMPORARY)
  164.                 return 0;
  165.  
  166.         if (index >= RC_REGISTER_MAX_INDEX) {
  167.                 rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
  168.                 return 0;
  169.         }
  170.  
  171.         return &s->Temporary[index].Values[chan];
  172. }
  173.  
  174. static unsigned get_tex_read_count(struct schedule_instruction * sinst)
  175. {
  176.         unsigned tex_read_count = sinst->TexReadCount;
  177.         if (sinst->PairedInst) {
  178.                 tex_read_count += sinst->PairedInst->TexReadCount;
  179.         }
  180.         return tex_read_count;
  181. }
  182.  
  183. #if VERBOSE
  184. static void print_list(struct schedule_instruction * sinst)
  185. {
  186.         struct schedule_instruction * ptr;
  187.         for (ptr = sinst; ptr; ptr=ptr->NextReady) {
  188.                 unsigned tex_read_count = get_tex_read_count(ptr);
  189.                 unsigned score = sinst->Score;
  190.                 fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score,
  191.                                                 tex_read_count);
  192.         }
  193.         fprintf(stderr, "\n");
  194. }
  195. #endif
  196.  
  197. static void remove_inst_from_list(struct schedule_instruction ** list,
  198.                                         struct schedule_instruction * inst)
  199. {
  200.         struct schedule_instruction * prev = NULL;
  201.         struct schedule_instruction * list_ptr;
  202.         for (list_ptr = *list; list_ptr; prev = list_ptr,
  203.                                         list_ptr = list_ptr->NextReady) {
  204.                 if (list_ptr == inst) {
  205.                         if (prev) {
  206.                                 prev->NextReady = inst->NextReady;
  207.                         } else {
  208.                                 *list = inst->NextReady;
  209.                         }
  210.                         inst->NextReady = NULL;
  211.                         break;
  212.                 }
  213.         }
  214. }
  215.  
  216. static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
  217. {
  218.         inst->NextReady = *list;
  219.         *list = inst;
  220. }
  221.  
  222. static void add_inst_to_list_score(struct schedule_instruction ** list,
  223.                                         struct schedule_instruction * inst)
  224. {
  225.         struct schedule_instruction * temp;
  226.         struct schedule_instruction * prev;
  227.         if (!*list) {
  228.                 *list = inst;
  229.                 return;
  230.         }
  231.         temp = *list;
  232.         prev = NULL;
  233.         while(temp && inst->Score <= temp->Score) {
  234.                 prev = temp;
  235.                 temp = temp->NextReady;
  236.         }
  237.  
  238.         if (!prev) {
  239.                 inst->NextReady = temp;
  240.                 *list = inst;
  241.         } else {
  242.                 prev->NextReady = inst;
  243.                 inst->NextReady = temp;
  244.         }
  245. }
  246.  
  247. static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
  248. {
  249.         DBG("%i is now ready\n", sinst->Instruction->IP);
  250.  
  251.         /* Adding Ready TEX instructions to the end of the "Ready List" helps
  252.          * us emit TEX instructions in blocks without losing our place. */
  253.         if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
  254.                 add_inst_to_list_score(&s->ReadyTEX, sinst);
  255.         else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
  256.                 add_inst_to_list_score(&s->ReadyRGB, sinst);
  257.         else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
  258.                 add_inst_to_list_score(&s->ReadyAlpha, sinst);
  259.         else
  260.                 add_inst_to_list_score(&s->ReadyFullALU, sinst);
  261. }
  262.  
  263. static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
  264. {
  265.         assert(sinst->NumDependencies > 0);
  266.         sinst->NumDependencies--;
  267.         if (!sinst->NumDependencies)
  268.                 instruction_ready(s, sinst);
  269. }
  270.  
  271. /* These functions provide different heuristics for scheduling instructions.
  272.  * The default is calc_score_readers. */
  273.  
  274. #if 0
  275.  
  276. static void calc_score_zero(struct schedule_instruction * sinst)
  277. {
  278.         sinst->Score = 0;
  279. }
  280.  
  281. static void calc_score_deps(struct schedule_instruction * sinst)
  282. {
  283.         int i;
  284.         sinst->Score = 0;
  285.         for (i = 0; i < sinst->NumWriteValues; i++) {
  286.                 struct reg_value * v = sinst->WriteValues[i];
  287.                 if (v->NumReaders) {
  288.                         struct reg_value_reader * r;
  289.                         for (r = v->Readers; r; r = r->Next) {
  290.                                 if (r->Reader->NumDependencies == 1) {
  291.                                         sinst->Score += 100;
  292.                                 }
  293.                                 sinst->Score += r->Reader->NumDependencies;
  294.                         }
  295.                 }
  296.         }
  297. }
  298.  
  299. #endif
  300.  
  301. #define NO_OUTPUT_SCORE (1 << 24)
  302.  
  303. static void score_no_output(struct schedule_instruction * sinst)
  304. {
  305.         assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL);
  306.         if (!sinst->Instruction->U.P.RGB.OutputWriteMask &&
  307.                         !sinst->Instruction->U.P.Alpha.OutputWriteMask) {
  308.                 if (sinst->PairedInst) {
  309.                         if (!sinst->PairedInst->Instruction->U.P.
  310.                                                         RGB.OutputWriteMask
  311.                                         && !sinst->PairedInst->Instruction->U.P.
  312.                                                         Alpha.OutputWriteMask) {
  313.                                 sinst->Score |= NO_OUTPUT_SCORE;
  314.                         }
  315.  
  316.                 } else {
  317.                         sinst->Score |= NO_OUTPUT_SCORE;
  318.                 }
  319.         }
  320. }
  321.  
  322. #define PAIRED_SCORE (1 << 16)
  323.  
  324. static void calc_score_r300(struct schedule_instruction * sinst)
  325. {
  326.         unsigned src_idx;
  327.  
  328.         if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
  329.                 sinst->Score = 0;
  330.                 return;
  331.         }
  332.  
  333.         score_no_output(sinst);
  334.  
  335.         if (sinst->PairedInst) {
  336.                 sinst->Score |= PAIRED_SCORE;
  337.                 return;
  338.         }
  339.  
  340.         for (src_idx = 0; src_idx < 4; src_idx++) {
  341.                 sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used +
  342.                                 sinst->Instruction->U.P.Alpha.Src[src_idx].Used;
  343.         }
  344. }
  345.  
  346. #define NO_READ_TEX_SCORE (1 << 16)
  347.  
  348. static void calc_score_readers(struct schedule_instruction * sinst)
  349. {
  350.         if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
  351.                 sinst->Score = 0;
  352.         } else {
  353.                 sinst->Score = sinst->NumReadValues;
  354.                 if (sinst->PairedInst) {
  355.                         sinst->Score += sinst->PairedInst->NumReadValues;
  356.                 }
  357.                 if (get_tex_read_count(sinst) == 0) {
  358.                         sinst->Score |= NO_READ_TEX_SCORE;
  359.                 }
  360.                 score_no_output(sinst);
  361.         }
  362. }
  363.  
  364. /**
  365.  * This function decreases the dependencies of the next instruction that
  366.  * wants to write to each of sinst's read values.
  367.  */
  368. static void commit_update_reads(struct schedule_state * s,
  369.                                         struct schedule_instruction * sinst){
  370.         unsigned int i;
  371.         for(i = 0; i < sinst->NumReadValues; ++i) {
  372.                 struct reg_value * v = sinst->ReadValues[i];
  373.                 assert(v->NumReaders > 0);
  374.                 v->NumReaders--;
  375.                 if (!v->NumReaders) {
  376.                         if (v->Next) {
  377.                                 decrease_dependencies(s, v->Next->Writer);
  378.                         }
  379.                 }
  380.         }
  381.         if (sinst->PairedInst) {
  382.                 commit_update_reads(s, sinst->PairedInst);
  383.         }
  384. }
  385.  
  386. static void commit_update_writes(struct schedule_state * s,
  387.                                         struct schedule_instruction * sinst){
  388.         unsigned int i;
  389.         for(i = 0; i < sinst->NumWriteValues; ++i) {
  390.                 struct reg_value * v = sinst->WriteValues[i];
  391.                 if (v->NumReaders) {
  392.                         for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
  393.                                 decrease_dependencies(s, r->Reader);
  394.                         }
  395.                 } else {
  396.                         /* This happens in instruction sequences of the type
  397.                          *  OP r.x, ...;
  398.                          *  OP r.x, r.x, ...;
  399.                          * See also the subtlety in how instructions that both
  400.                          * read and write the same register are scanned.
  401.                          */
  402.                         if (v->Next)
  403.                                 decrease_dependencies(s, v->Next->Writer);
  404.                 }
  405.         }
  406.         if (sinst->PairedInst) {
  407.                 commit_update_writes(s, sinst->PairedInst);
  408.         }
  409. }
  410.  
  411. static void notify_sem_wait(struct schedule_state *s)
  412. {
  413.         struct rc_list * pend_ptr;
  414.         for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) {
  415.                 struct rc_list * read_ptr;
  416.                 struct schedule_instruction * pending = pend_ptr->Item;
  417.                 for (read_ptr = pending->TexReaders; read_ptr;
  418.                                                 read_ptr = read_ptr->Next) {
  419.                         struct schedule_instruction * reader = read_ptr->Item;
  420.                         reader->TexReadCount--;
  421.                 }
  422.         }
  423.         s->PendingTEX = NULL;
  424. }
  425.  
  426. static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
  427. {
  428.         DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score);
  429.  
  430.         commit_update_reads(s, sinst);
  431.  
  432.         commit_update_writes(s, sinst);
  433.  
  434.         if (get_tex_read_count(sinst) > 0) {
  435.                 sinst->Instruction->U.P.SemWait = 1;
  436.                 notify_sem_wait(s);
  437.         }
  438. }
  439.  
  440. /**
  441.  * Emit all ready texture instructions in a single block.
  442.  *
  443.  * Emit as a single block to (hopefully) sample many textures in parallel,
  444.  * and to avoid hardware indirections on R300.
  445.  */
  446. static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
  447. {
  448.         struct schedule_instruction *readytex;
  449.         struct rc_instruction * inst_begin;
  450.  
  451.         assert(s->ReadyTEX);
  452.         notify_sem_wait(s);
  453.  
  454.         /* Node marker for R300 */
  455.         inst_begin = rc_insert_new_instruction(s->C, before->Prev);
  456.         inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
  457.  
  458.         /* Link texture instructions back in */
  459.         readytex = s->ReadyTEX;
  460.         while(readytex) {
  461.                 rc_insert_instruction(before->Prev, readytex->Instruction);
  462.                 DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
  463.  
  464.                 /* All of the TEX instructions in the same TEX block have
  465.                  * their source registers read from before any of the
  466.                  * instructions in that block write to their destination
  467.                  * registers.  This means that when we commit a TEX
  468.                  * instruction, any other TEX instruction that wants to write
  469.                  * to one of the committed instruction's source register can be
  470.                  * marked as ready and should be emitted in the same TEX
  471.                  * block. This prevents the following sequence from being
  472.                  * emitted in two different TEX blocks:
  473.                  * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
  474.                  * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
  475.                  */
  476.                 commit_update_reads(s, readytex);
  477.                 readytex = readytex->NextReady;
  478.         }
  479.         readytex = s->ReadyTEX;
  480.         s->ReadyTEX = 0;
  481.         while(readytex){
  482.                 DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
  483.                 commit_update_writes(s, readytex);
  484.                 /* Set semaphore bits for last TEX instruction in the block */
  485.                 if (!readytex->NextReady) {
  486.                         readytex->Instruction->U.I.TexSemAcquire = 1;
  487.                         readytex->Instruction->U.I.TexSemWait = 1;
  488.                 }
  489.                 rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex));
  490.                 readytex = readytex->NextReady;
  491.         }
  492. }
  493.  
  494. /* This is a helper function for destructive_merge_instructions().  It helps
  495.  * merge presubtract sources from two instructions and makes sure the
  496.  * presubtract sources end up in the correct spot.  This function assumes that
  497.  * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
  498.  * but no scalar instruction (alpha).
  499.  * @return 0 if merging the presubtract sources fails.
  500.  * @retrun 1 if merging the presubtract sources succeeds.
  501.  */
  502. static int merge_presub_sources(
  503.         struct rc_pair_instruction * dst_full,
  504.         struct rc_pair_sub_instruction src,
  505.         unsigned int type)
  506. {
  507.         unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
  508.         struct rc_pair_sub_instruction * dst_sub;
  509.         const struct rc_opcode_info * info;
  510.  
  511.         assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
  512.  
  513.         switch(type) {
  514.         case RC_SOURCE_RGB:
  515.                 is_rgb = 1;
  516.                 is_alpha = 0;
  517.                 dst_sub = &dst_full->RGB;
  518.                 break;
  519.         case RC_SOURCE_ALPHA:
  520.                 is_rgb = 0;
  521.                 is_alpha = 1;
  522.                 dst_sub = &dst_full->Alpha;
  523.                 break;
  524.         default:
  525.                 assert(0);
  526.                 return 0;
  527.         }
  528.  
  529.         info = rc_get_opcode_info(dst_full->RGB.Opcode);
  530.  
  531.         if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
  532.                 return 0;
  533.  
  534.         srcp_regs = rc_presubtract_src_reg_count(
  535.                                         src.Src[RC_PAIR_PRESUB_SRC].Index);
  536.         for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
  537.                 unsigned int arg;
  538.                 int free_source;
  539.                 unsigned int one_way = 0;
  540.                 struct rc_pair_instruction_source srcp = src.Src[srcp_src];
  541.                 struct rc_pair_instruction_source temp;
  542.  
  543.                 free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
  544.                                                         srcp.File, srcp.Index);
  545.  
  546.                 /* If free_source < 0 then there are no free source
  547.                  * slots. */
  548.                 if (free_source < 0)
  549.                         return 0;
  550.  
  551.                 temp = dst_sub->Src[srcp_src];
  552.                 dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
  553.  
  554.                 /* srcp needs src0 and src1 to be the same */
  555.                 if (free_source < srcp_src) {
  556.                         if (!temp.Used)
  557.                                 continue;
  558.                         free_source = rc_pair_alloc_source(dst_full, is_rgb,
  559.                                         is_alpha, temp.File, temp.Index);
  560.                         if (free_source < 0)
  561.                                 return 0;
  562.                         one_way = 1;
  563.                 } else {
  564.                         dst_sub->Src[free_source] = temp;
  565.                 }
  566.  
  567.                 /* If free_source == srcp_src, then the presubtract
  568.                  * source is already in the correct place. */
  569.                 if (free_source == srcp_src)
  570.                         continue;
  571.  
  572.                 /* Shuffle the sources, so we can put the
  573.                  * presubtract source in the correct place. */
  574.                 for(arg = 0; arg < info->NumSrcRegs; arg++) {
  575.                         /*If this arg does not read from an rgb source,
  576.                          * do nothing. */
  577.                         if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
  578.                                                                 & type)) {
  579.                                 continue;
  580.                         }
  581.  
  582.                         if (dst_full->RGB.Arg[arg].Source == srcp_src)
  583.                                 dst_full->RGB.Arg[arg].Source = free_source;
  584.                         /* We need to do this just in case register
  585.                          * is one of the sources already, but in the
  586.                          * wrong spot. */
  587.                         else if(dst_full->RGB.Arg[arg].Source == free_source
  588.                                                         && !one_way) {
  589.                                 dst_full->RGB.Arg[arg].Source = srcp_src;
  590.                         }
  591.                 }
  592.         }
  593.         return 1;
  594. }
  595.  
  596.  
  597. /* This function assumes that rgb.Alpha and alpha.RGB are unused */
  598. static int destructive_merge_instructions(
  599.                 struct rc_pair_instruction * rgb,
  600.                 struct rc_pair_instruction * alpha)
  601. {
  602.         const struct rc_opcode_info * opcode;
  603.  
  604.         assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
  605.         assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
  606.  
  607.         /* Presubtract registers need to be merged first so that registers
  608.          * needed by the presubtract operation can be placed in src0 and/or
  609.          * src1. */
  610.  
  611.         /* Merge the rgb presubtract registers. */
  612.         if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
  613.                 if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
  614.                         return 0;
  615.                 }
  616.         }
  617.         /* Merge the alpha presubtract registers */
  618.         if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
  619.                 if(!merge_presub_sources(rgb,  alpha->Alpha, RC_SOURCE_ALPHA)){
  620.                         return 0;
  621.                 }
  622.         }
  623.  
  624.         /* Copy alpha args into rgb */
  625.         opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
  626.  
  627.         for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
  628.                 unsigned int srcrgb = 0;
  629.                 unsigned int srcalpha = 0;
  630.                 unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
  631.                 rc_register_file file = 0;
  632.                 unsigned int index = 0;
  633.                 int source;
  634.  
  635.                 if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
  636.                         srcrgb = 1;
  637.                         file = alpha->RGB.Src[oldsrc].File;
  638.                         index = alpha->RGB.Src[oldsrc].Index;
  639.                 } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
  640.                         srcalpha = 1;
  641.                         file = alpha->Alpha.Src[oldsrc].File;
  642.                         index = alpha->Alpha.Src[oldsrc].Index;
  643.                 }
  644.  
  645.                 source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
  646.                 if (source < 0)
  647.                         return 0;
  648.  
  649.                 rgb->Alpha.Arg[arg].Source = source;
  650.                 rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
  651.                 rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
  652.                 rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
  653.         }
  654.  
  655.         /* Copy alpha opcode into rgb */
  656.         rgb->Alpha.Opcode = alpha->Alpha.Opcode;
  657.         rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
  658.         rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
  659.         rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
  660.         rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
  661.         rgb->Alpha.Saturate = alpha->Alpha.Saturate;
  662.         rgb->Alpha.Omod = alpha->Alpha.Omod;
  663.  
  664.         /* Merge ALU result writing */
  665.         if (alpha->WriteALUResult) {
  666.                 if (rgb->WriteALUResult)
  667.                         return 0;
  668.  
  669.                 rgb->WriteALUResult = alpha->WriteALUResult;
  670.                 rgb->ALUResultCompare = alpha->ALUResultCompare;
  671.         }
  672.  
  673.         /* Copy SemWait */
  674.         rgb->SemWait |= alpha->SemWait;
  675.  
  676.         return 1;
  677. }
  678.  
  679. /**
  680.  * Try to merge the given instructions into the rgb instructions.
  681.  *
  682.  * Return true on success; on failure, return false, and keep
  683.  * the instructions untouched.
  684.  */
  685. static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
  686. {
  687.         struct rc_pair_instruction backup;
  688.  
  689.         /*Instructions can't write output registers and ALU result at the
  690.          * same time. */
  691.         if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
  692.                 || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
  693.                 return 0;
  694.         }
  695.  
  696.         /* Writing output registers in the middle of shaders is slow, so
  697.          * we don't want to pair output writes with temp writes. */
  698.         if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask)
  699.                 || (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) {
  700.                 return 0;
  701.         }
  702.  
  703.         memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
  704.  
  705.         if (destructive_merge_instructions(rgb, alpha))
  706.                 return 1;
  707.  
  708.         memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
  709.         return 0;
  710. }
  711.  
  712. static void presub_nop(struct rc_instruction * emitted) {
  713.         int prev_rgb_index, prev_alpha_index, i, num_src;
  714.  
  715.         /* We don't need a nop if the previous instruction is a TEX. */
  716.         if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
  717.                 return;
  718.         }
  719.         if (emitted->Prev->U.P.RGB.WriteMask)
  720.                 prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
  721.         else
  722.                 prev_rgb_index = -1;
  723.         if (emitted->Prev->U.P.Alpha.WriteMask)
  724.                 prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
  725.         else
  726.                 prev_alpha_index = 1;
  727.  
  728.         /* Check the previous rgb instruction */
  729.         if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
  730.                 num_src = rc_presubtract_src_reg_count(
  731.                                 emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
  732.                 for (i = 0; i < num_src; i++) {
  733.                         unsigned int index = emitted->U.P.RGB.Src[i].Index;
  734.                         if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
  735.                             && (index  == prev_rgb_index
  736.                                 || index == prev_alpha_index)) {
  737.                                 emitted->Prev->U.P.Nop = 1;
  738.                                 return;
  739.                         }
  740.                 }
  741.         }
  742.  
  743.         /* Check the previous alpha instruction. */
  744.         if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
  745.                 return;
  746.  
  747.         num_src = rc_presubtract_src_reg_count(
  748.                                 emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
  749.         for (i = 0; i < num_src; i++) {
  750.                 unsigned int index = emitted->U.P.Alpha.Src[i].Index;
  751.                 if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
  752.                    && (index == prev_rgb_index || index == prev_alpha_index)) {
  753.                         emitted->Prev->U.P.Nop = 1;
  754.                         return;
  755.                 }
  756.         }
  757. }
  758.  
  759. static void rgb_to_alpha_remap (
  760.         struct rc_instruction * inst,
  761.         struct rc_pair_instruction_arg * arg,
  762.         rc_register_file old_file,
  763.         rc_swizzle old_swz,
  764.         unsigned int new_index)
  765. {
  766.         int new_src_index;
  767.         unsigned int i;
  768.  
  769.         for (i = 0; i < 3; i++) {
  770.                 if (get_swz(arg->Swizzle, i) == old_swz) {
  771.                         SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
  772.                 }
  773.         }
  774.         new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
  775.                                                         old_file, new_index);
  776.         /* This conversion is not possible, we must have made a mistake in
  777.          * is_rgb_to_alpha_possible. */
  778.         if (new_src_index < 0) {
  779.                 assert(0);
  780.                 return;
  781.         }
  782.  
  783.         arg->Source = new_src_index;
  784. }
  785.  
  786. static int can_remap(unsigned int opcode)
  787. {
  788.         switch(opcode) {
  789.         case RC_OPCODE_DDX:
  790.         case RC_OPCODE_DDY:
  791.                 return 0;
  792.         default:
  793.                 return 1;
  794.         }
  795. }
  796.  
  797. static int can_convert_opcode_to_alpha(unsigned int opcode)
  798. {
  799.         switch(opcode) {
  800.         case RC_OPCODE_DDX:
  801.         case RC_OPCODE_DDY:
  802.         case RC_OPCODE_DP2:
  803.         case RC_OPCODE_DP3:
  804.         case RC_OPCODE_DP4:
  805.         case RC_OPCODE_DPH:
  806.                 return 0;
  807.         default:
  808.                 return 1;
  809.         }
  810. }
  811.  
  812. static void is_rgb_to_alpha_possible(
  813.         void * userdata,
  814.         struct rc_instruction * inst,
  815.         struct rc_pair_instruction_arg * arg,
  816.         struct rc_pair_instruction_source * src)
  817. {
  818.         unsigned int read_chan = RC_SWIZZLE_UNUSED;
  819.         unsigned int alpha_sources = 0;
  820.         unsigned int i;
  821.         struct rc_reader_data * reader_data = userdata;
  822.  
  823.         if (!can_remap(inst->U.P.RGB.Opcode)
  824.             || !can_remap(inst->U.P.Alpha.Opcode)) {
  825.                 reader_data->Abort = 1;
  826.                 return;
  827.         }
  828.  
  829.         if (!src)
  830.                 return;
  831.  
  832.         /* XXX There are some cases where we can still do the conversion if
  833.          * a reader reads from a presubtract source, but for now we'll prevent
  834.          * it. */
  835.         if (arg->Source == RC_PAIR_PRESUB_SRC) {
  836.                 reader_data->Abort = 1;
  837.                 return;
  838.         }
  839.  
  840.         /* Make sure the source only reads the register component that we
  841.          * are going to be convering from.  It is OK if the instruction uses
  842.          * this component more than once.
  843.          * XXX If the index we will be converting to is the same as the
  844.          * current index, then it is OK to read from more than one component.
  845.          */
  846.         for (i = 0; i < 3; i++) {
  847.                 rc_swizzle swz = get_swz(arg->Swizzle, i);
  848.                 switch(swz) {
  849.                 case RC_SWIZZLE_X:
  850.                 case RC_SWIZZLE_Y:
  851.                 case RC_SWIZZLE_Z:
  852.                 case RC_SWIZZLE_W:
  853.                         if (read_chan == RC_SWIZZLE_UNUSED) {
  854.                                 read_chan = swz;
  855.                         } else if (read_chan != swz) {
  856.                                 reader_data->Abort = 1;
  857.                                 return;
  858.                         }
  859.                         break;
  860.                 default:
  861.                         break;
  862.                 }
  863.         }
  864.  
  865.         /* Make sure there are enough alpha sources.
  866.          * XXX If we know what register all the readers are going
  867.          * to be remapped to, then in some situations we can still do
  868.          * the subsitution, even if all 3 alpha sources are being used.*/
  869.         for (i = 0; i < 3; i++) {
  870.                 if (inst->U.P.Alpha.Src[i].Used) {
  871.                         alpha_sources++;
  872.                 }
  873.         }
  874.         if (alpha_sources > 2) {
  875.                 reader_data->Abort = 1;
  876.                 return;
  877.         }
  878. }
  879.  
  880. static int convert_rgb_to_alpha(
  881.         struct schedule_state * s,
  882.         struct schedule_instruction * sched_inst)
  883. {
  884.         struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
  885.         unsigned int old_mask = pair_inst->RGB.WriteMask;
  886.         unsigned int old_swz = rc_mask_to_swizzle(old_mask);
  887.         const struct rc_opcode_info * info =
  888.                                 rc_get_opcode_info(pair_inst->RGB.Opcode);
  889.         int new_index = -1;
  890.         unsigned int i;
  891.  
  892.         if (sched_inst->GlobalReaders.Abort)
  893.                 return 0;
  894.  
  895.         if (!pair_inst->RGB.WriteMask)
  896.                 return 0;
  897.  
  898.         if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
  899.             || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
  900.                 return 0;
  901.         }
  902.  
  903.         assert(sched_inst->NumWriteValues == 1);
  904.  
  905.         if (!sched_inst->WriteValues[0]) {
  906.                 assert(0);
  907.                 return 0;
  908.         }
  909.  
  910.         /* We start at the old index, because if we can reuse the same
  911.          * register and just change the swizzle then it is more likely we
  912.          * will be able to convert all the readers. */
  913.         for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
  914.                 struct reg_value ** new_regvalp = get_reg_valuep(
  915.                                                 s, RC_FILE_TEMPORARY, i, 3);
  916.                 if (!*new_regvalp) {
  917.                         struct reg_value ** old_regvalp =
  918.                                 get_reg_valuep(s,
  919.                                         RC_FILE_TEMPORARY,
  920.                                         pair_inst->RGB.DestIndex,
  921.                                         rc_mask_to_swizzle(old_mask));
  922.                         new_index = i;
  923.                         *new_regvalp = *old_regvalp;
  924.                         *old_regvalp = NULL;
  925.                         new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
  926.                         break;
  927.                 }
  928.         }
  929.         if (new_index < 0) {
  930.                 return 0;
  931.         }
  932.  
  933.         /* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA
  934.          * as the RGB opcode, then the Alpha instruction will already contain
  935.          * the correct opcode and instruction args, so we do not want to
  936.          * overwrite them.
  937.          */
  938.         if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) {
  939.                 pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
  940.                 memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
  941.                                                 sizeof(pair_inst->Alpha.Arg));
  942.         }
  943.         pair_inst->Alpha.DestIndex = new_index;
  944.         pair_inst->Alpha.WriteMask = RC_MASK_W;
  945.         pair_inst->Alpha.Target = pair_inst->RGB.Target;
  946.         pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
  947.         pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
  948.         pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
  949.         pair_inst->Alpha.Omod = pair_inst->RGB.Omod;
  950.         /* Move the swizzles into the first chan */
  951.         for (i = 0; i < info->NumSrcRegs; i++) {
  952.                 unsigned int j;
  953.                 for (j = 0; j < 3; j++) {
  954.                         unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
  955.                         if (swz != RC_SWIZZLE_UNUSED) {
  956.                                 pair_inst->Alpha.Arg[i].Swizzle =
  957.                                                         rc_init_swizzle(swz, 1);
  958.                                 break;
  959.                         }
  960.                 }
  961.         }
  962.         pair_inst->RGB.Opcode = RC_OPCODE_NOP;
  963.         pair_inst->RGB.DestIndex = 0;
  964.         pair_inst->RGB.WriteMask = 0;
  965.         pair_inst->RGB.Target = 0;
  966.         pair_inst->RGB.OutputWriteMask = 0;
  967.         pair_inst->RGB.DepthWriteMask = 0;
  968.         pair_inst->RGB.Saturate = 0;
  969.         memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
  970.  
  971.         for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
  972.                 struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
  973.                 rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg,
  974.                                         RC_FILE_TEMPORARY, old_swz, new_index);
  975.         }
  976.         return 1;
  977. }
  978.  
  979. static void try_convert_and_pair(
  980.         struct schedule_state *s,
  981.         struct schedule_instruction ** inst_list)
  982. {
  983.         struct schedule_instruction * list_ptr = *inst_list;
  984.         while (list_ptr && *inst_list && (*inst_list)->NextReady) {
  985.                 int paired = 0;
  986.                 if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP
  987.                         && list_ptr->Instruction->U.P.RGB.Opcode
  988.                                                 != RC_OPCODE_REPL_ALPHA) {
  989.                                 goto next;
  990.                 }
  991.                 if (list_ptr->NumWriteValues == 1
  992.                                         && convert_rgb_to_alpha(s, list_ptr)) {
  993.  
  994.                         struct schedule_instruction * pair_ptr;
  995.                         remove_inst_from_list(inst_list, list_ptr);
  996.                         add_inst_to_list_score(&s->ReadyAlpha, list_ptr);
  997.  
  998.                         for (pair_ptr = s->ReadyRGB; pair_ptr;
  999.                                         pair_ptr = pair_ptr->NextReady) {
  1000.                                 if (merge_instructions(&pair_ptr->Instruction->U.P,
  1001.                                                 &list_ptr->Instruction->U.P)) {
  1002.                                         remove_inst_from_list(&s->ReadyAlpha, list_ptr);
  1003.                                         remove_inst_from_list(&s->ReadyRGB, pair_ptr);
  1004.                                         pair_ptr->PairedInst = list_ptr;
  1005.  
  1006.                                         add_inst_to_list(&s->ReadyFullALU, pair_ptr);
  1007.                                         list_ptr = *inst_list;
  1008.                                         paired = 1;
  1009.                                         break;
  1010.                                 }
  1011.  
  1012.                         }
  1013.                 }
  1014.                 if (!paired) {
  1015. next:
  1016.                         list_ptr = list_ptr->NextReady;
  1017.                 }
  1018.         }
  1019. }
  1020.  
  1021. /**
  1022.  * This function attempts to merge RGB and Alpha instructions together.
  1023.  */
  1024. static void pair_instructions(struct schedule_state * s)
  1025. {
  1026.         struct schedule_instruction *rgb_ptr;
  1027.         struct schedule_instruction *alpha_ptr;
  1028.  
  1029.         /* Some pairings might fail because they require too
  1030.          * many source slots; try all possible pairings if necessary */
  1031.         rgb_ptr = s->ReadyRGB;
  1032.         while(rgb_ptr) {
  1033.                 struct schedule_instruction * rgb_next = rgb_ptr->NextReady;
  1034.                 alpha_ptr = s->ReadyAlpha;
  1035.                 while(alpha_ptr) {
  1036.                         struct schedule_instruction * alpha_next = alpha_ptr->NextReady;
  1037.                         if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) {
  1038.                                 /* Remove RGB and Alpha from their ready lists.
  1039.                                  */
  1040.                                 remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
  1041.                                 remove_inst_from_list(&s->ReadyAlpha, alpha_ptr);
  1042.                                 rgb_ptr->PairedInst = alpha_ptr;
  1043.                                 add_inst_to_list(&s->ReadyFullALU, rgb_ptr);
  1044.                                 break;
  1045.                         }
  1046.                         alpha_ptr = alpha_next;
  1047.                 }
  1048.                 rgb_ptr = rgb_next;
  1049.         }
  1050.  
  1051.         if (!s->Opt) {
  1052.                 return;
  1053.         }
  1054.  
  1055.         /* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB
  1056.          * slot can be converted into Alpha instructions. */
  1057.         try_convert_and_pair(s, &s->ReadyFullALU);
  1058.  
  1059.         /* Try to convert some of the RGB instructions to Alpha and
  1060.          * try to pair it with another RGB. */
  1061.         try_convert_and_pair(s, &s->ReadyRGB);
  1062. }
  1063.  
  1064. static void update_max_score(
  1065.         struct schedule_state * s,
  1066.         struct schedule_instruction ** list,
  1067.         int * max_score,
  1068.         struct schedule_instruction ** max_inst_out,
  1069.         struct schedule_instruction *** list_out)
  1070. {
  1071.         struct schedule_instruction * list_ptr;
  1072.         for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) {
  1073.                 int score;
  1074.                 s->CalcScore(list_ptr);
  1075.                 score = list_ptr->Score;
  1076.                 if (!*max_inst_out || score > *max_score) {
  1077.                         *max_score = score;
  1078.                         *max_inst_out = list_ptr;
  1079.                         *list_out = list;
  1080.                 }
  1081.         }
  1082. }
  1083.  
  1084. static void emit_instruction(
  1085.         struct schedule_state * s,
  1086.         struct rc_instruction * before)
  1087. {
  1088.         int max_score = -1;
  1089.         struct schedule_instruction * max_inst = NULL;
  1090.         struct schedule_instruction ** max_list = NULL;
  1091.         unsigned tex_count = 0;
  1092.         struct schedule_instruction * tex_ptr;
  1093.  
  1094.         pair_instructions(s);
  1095. #if VERBOSE
  1096.         fprintf(stderr, "Full:\n");
  1097.         print_list(s->ReadyFullALU);
  1098.         fprintf(stderr, "RGB:\n");
  1099.         print_list(s->ReadyRGB);
  1100.         fprintf(stderr, "Alpha:\n");
  1101.         print_list(s->ReadyAlpha);
  1102.         fprintf(stderr, "TEX:\n");
  1103.         print_list(s->ReadyTEX);
  1104. #endif
  1105.  
  1106.         for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) {
  1107.                 if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) {
  1108.                         emit_all_tex(s, before);
  1109.                         return;
  1110.                 }
  1111.                 tex_count++;
  1112.         }
  1113.         update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list);
  1114.         update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list);
  1115.         update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list);
  1116.  
  1117.         if (tex_count >= s->max_tex_group || max_score == -1
  1118.                 || (s->TEXCount > 0 && tex_count == s->TEXCount)
  1119.                 || (!s->C->is_r500 && tex_count > 0 && max_score == -1)) {
  1120.                 emit_all_tex(s, before);
  1121.         } else {
  1122.  
  1123.  
  1124.                 remove_inst_from_list(max_list, max_inst);
  1125.                 rc_insert_instruction(before->Prev, max_inst->Instruction);
  1126.                 commit_alu_instruction(s, max_inst);
  1127.  
  1128.                 presub_nop(before->Prev);
  1129.         }
  1130. }
  1131.  
  1132. static void add_tex_reader(
  1133.         struct schedule_state * s,
  1134.         struct schedule_instruction * writer,
  1135.         struct schedule_instruction * reader)
  1136. {
  1137.         if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) {
  1138.                 /*Not a TEX instructions */
  1139.                 return;
  1140.         }
  1141.         reader->TexReadCount++;
  1142.         rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader));
  1143. }
  1144.  
  1145. static void scan_read(void * data, struct rc_instruction * inst,
  1146.                 rc_register_file file, unsigned int index, unsigned int chan)
  1147. {
  1148.         struct schedule_state * s = data;
  1149.         struct reg_value ** v = get_reg_valuep(s, file, index, chan);
  1150.         struct reg_value_reader * reader;
  1151.  
  1152.         if (!v)
  1153.                 return;
  1154.  
  1155.         if (*v && (*v)->Writer == s->Current) {
  1156.                 /* The instruction reads and writes to a register component.
  1157.                  * In this case, we only want to increment dependencies by one.
  1158.                  * Why?
  1159.                  * Because each instruction depends on the writers of its source
  1160.                  * registers _and_ the most recent writer of its destination
  1161.                  * register.  In this case, the current instruction (s->Current)
  1162.                  * has a dependency that both writes to one of its source
  1163.                  * registers and was the most recent writer to its destination
  1164.                  * register.  We have already marked this dependency in
  1165.                  * scan_write(), so we don't need to do it again.
  1166.                  */
  1167.  
  1168.                 /* We need to make sure we are adding s->Current to the
  1169.                  * previous writer's list of TexReaders, if the previous writer
  1170.                  * was a TEX instruction.
  1171.                  */
  1172.                 add_tex_reader(s, s->PrevWriter[chan], s->Current);
  1173.  
  1174.                 return;
  1175.         }
  1176.  
  1177.         DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
  1178.  
  1179.         reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
  1180.         reader->Reader = s->Current;
  1181.         if (!*v) {
  1182.                 /* In this situation, the instruction reads from a register
  1183.                  * that hasn't been written to or read from in the current
  1184.                  * block. */
  1185.                 *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
  1186.                 memset(*v, 0, sizeof(struct reg_value));
  1187.                 (*v)->Readers = reader;
  1188.         } else {
  1189.                 reader->Next = (*v)->Readers;
  1190.                 (*v)->Readers = reader;
  1191.                 /* Only update the current instruction's dependencies if the
  1192.                  * register it reads from has been written to in this block. */
  1193.                 if ((*v)->Writer) {
  1194.                         add_tex_reader(s, (*v)->Writer, s->Current);
  1195.                         s->Current->NumDependencies++;
  1196.                 }
  1197.         }
  1198.         (*v)->NumReaders++;
  1199.  
  1200.         if (s->Current->NumReadValues >= 12) {
  1201.                 rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
  1202.         } else {
  1203.                 s->Current->ReadValues[s->Current->NumReadValues++] = *v;
  1204.         }
  1205. }
  1206.  
  1207. static void scan_write(void * data, struct rc_instruction * inst,
  1208.                 rc_register_file file, unsigned int index, unsigned int chan)
  1209. {
  1210.         struct schedule_state * s = data;
  1211.         struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
  1212.         struct reg_value * newv;
  1213.  
  1214.         if (!pv)
  1215.                 return;
  1216.  
  1217.         DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
  1218.  
  1219.         newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
  1220.         memset(newv, 0, sizeof(*newv));
  1221.  
  1222.         newv->Writer = s->Current;
  1223.  
  1224.         if (*pv) {
  1225.                 (*pv)->Next = newv;
  1226.                 s->Current->NumDependencies++;
  1227.                 /* Keep track of the previous writer to s->Current's destination
  1228.                  * register */
  1229.                 s->PrevWriter[chan] = (*pv)->Writer;
  1230.         }
  1231.  
  1232.         *pv = newv;
  1233.  
  1234.         if (s->Current->NumWriteValues >= 4) {
  1235.                 rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
  1236.         } else {
  1237.                 s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
  1238.         }
  1239. }
  1240.  
  1241. static void is_rgb_to_alpha_possible_normal(
  1242.         void * userdata,
  1243.         struct rc_instruction * inst,
  1244.         struct rc_src_register * src)
  1245. {
  1246.         struct rc_reader_data * reader_data = userdata;
  1247.         reader_data->Abort = 1;
  1248.  
  1249. }
  1250.  
  1251. static void schedule_block(struct schedule_state * s,
  1252.                 struct rc_instruction * begin, struct rc_instruction * end)
  1253. {
  1254.         unsigned int ip;
  1255.  
  1256.         /* Scan instructions for data dependencies */
  1257.         ip = 0;
  1258.         for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
  1259.                 s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current));
  1260.                 memset(s->Current, 0, sizeof(struct schedule_instruction));
  1261.  
  1262.                 if (inst->Type == RC_INSTRUCTION_NORMAL) {
  1263.                         const struct rc_opcode_info * info =
  1264.                                         rc_get_opcode_info(inst->U.I.Opcode);
  1265.                         if (info->HasTexture) {
  1266.                                 s->TEXCount++;
  1267.                         }
  1268.                 }
  1269.  
  1270.                 /* XXX: This causes SemWait to be set for all instructions in
  1271.                  * a block if the previous block contained a TEX instruction.
  1272.                  * We can do better here, but it will take a lot of work. */
  1273.                 if (s->PrevBlockHasTex) {
  1274.                         s->Current->TexReadCount = 1;
  1275.                 }
  1276.  
  1277.                 s->Current->Instruction = inst;
  1278.                 inst->IP = ip++;
  1279.  
  1280.                 DBG("%i: Scanning\n", inst->IP);
  1281.  
  1282.                 /* The order of things here is subtle and maybe slightly
  1283.                  * counter-intuitive, to account for the case where an
  1284.                  * instruction writes to the same register as it reads
  1285.                  * from. */
  1286.                 rc_for_all_writes_chan(inst, &scan_write, s);
  1287.                 rc_for_all_reads_chan(inst, &scan_read, s);
  1288.  
  1289.                 DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies);
  1290.  
  1291.                 if (!s->Current->NumDependencies) {
  1292.                         instruction_ready(s, s->Current);
  1293.                 }
  1294.  
  1295.                 /* Get global readers for possible RGB->Alpha conversion. */
  1296.                 s->Current->GlobalReaders.ExitOnAbort = 1;
  1297.                 rc_get_readers(s->C, inst, &s->Current->GlobalReaders,
  1298.                                 is_rgb_to_alpha_possible_normal,
  1299.                                 is_rgb_to_alpha_possible, NULL);
  1300.         }
  1301.  
  1302.         /* Temporarily unlink all instructions */
  1303.         begin->Prev->Next = end;
  1304.         end->Prev = begin->Prev;
  1305.  
  1306.         /* Schedule instructions back */
  1307.         while(!s->C->Error &&
  1308.               (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) {
  1309.                 emit_instruction(s, end);
  1310.         }
  1311. }
  1312.  
  1313. static int is_controlflow(struct rc_instruction * inst)
  1314. {
  1315.         if (inst->Type == RC_INSTRUCTION_NORMAL) {
  1316.                 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
  1317.                 return opcode->IsFlowControl;
  1318.         }
  1319.         return 0;
  1320. }
  1321.  
  1322. void rc_pair_schedule(struct radeon_compiler *cc, void *user)
  1323. {
  1324.         struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
  1325.         struct schedule_state s;
  1326.         struct rc_instruction * inst = c->Base.Program.Instructions.Next;
  1327.         unsigned int * opt = user;
  1328.  
  1329.         memset(&s, 0, sizeof(s));
  1330.         s.Opt = *opt;
  1331.         s.C = &c->Base;
  1332.         if (s.C->is_r500) {
  1333.                 s.CalcScore = calc_score_readers;
  1334.         } else {
  1335.                 s.CalcScore = calc_score_r300;
  1336.         }
  1337.         s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8);
  1338.         while(inst != &c->Base.Program.Instructions) {
  1339.                 struct rc_instruction * first;
  1340.  
  1341.                 if (is_controlflow(inst)) {
  1342.                         inst = inst->Next;
  1343.                         continue;
  1344.                 }
  1345.  
  1346.                 first = inst;
  1347.  
  1348.                 while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
  1349.                         inst = inst->Next;
  1350.  
  1351.                 DBG("Schedule one block\n");
  1352.                 memset(s.Temporary, 0, sizeof(s.Temporary));
  1353.                 s.TEXCount = 0;
  1354.                 schedule_block(&s, first, inst);
  1355.                 if (s.PendingTEX) {
  1356.                         s.PrevBlockHasTex = 1;
  1357.                 }
  1358.         }
  1359. }
  1360.