0,0 → 1,753 |
/* |
* Copyright 2010 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
|
/** |
* \file |
*/ |
|
#include "radeon_compiler_util.h" |
|
#include "radeon_compiler.h" |
#include "radeon_dataflow.h" |
/** |
*/ |
unsigned int rc_swizzle_to_writemask(unsigned int swz) |
{ |
unsigned int mask = 0; |
unsigned int i; |
|
for(i = 0; i < 4; i++) { |
mask |= 1 << GET_SWZ(swz, i); |
} |
mask &= RC_MASK_XYZW; |
|
return mask; |
} |
|
rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) |
{ |
if (idx & 0x4) |
return idx; |
return GET_SWZ(swz, idx); |
} |
|
/** |
* The purpose of this function is to standardize the number channels used by |
* swizzles. All swizzles regardless of what instruction they are a part of |
* should have 4 channels initialized with values. |
* @param channels The number of channels in initial_value that have a |
* meaningful value. |
* @return An initialized swizzle that has all of the unused channels set to |
* RC_SWIZZLE_UNUSED. |
*/ |
unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels) |
{ |
unsigned int i; |
for (i = channels; i < 4; i++) { |
SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED); |
} |
return initial_value; |
} |
|
unsigned int combine_swizzles4(unsigned int src, |
rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) |
{ |
unsigned int ret = 0; |
|
ret |= get_swz(src, swz_x); |
ret |= get_swz(src, swz_y) << 3; |
ret |= get_swz(src, swz_z) << 6; |
ret |= get_swz(src, swz_w) << 9; |
|
return ret; |
} |
|
unsigned int combine_swizzles(unsigned int src, unsigned int swz) |
{ |
unsigned int ret = 0; |
|
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); |
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; |
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; |
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; |
|
return ret; |
} |
|
/** |
* @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W |
*/ |
rc_swizzle rc_mask_to_swizzle(unsigned int mask) |
{ |
switch (mask) { |
case RC_MASK_X: return RC_SWIZZLE_X; |
case RC_MASK_Y: return RC_SWIZZLE_Y; |
case RC_MASK_Z: return RC_SWIZZLE_Z; |
case RC_MASK_W: return RC_SWIZZLE_W; |
} |
return RC_SWIZZLE_UNUSED; |
} |
|
/* Reorder mask bits according to swizzle. */ |
unsigned swizzle_mask(unsigned swizzle, unsigned mask) |
{ |
unsigned ret = 0; |
for (unsigned chan = 0; chan < 4; ++chan) { |
unsigned swz = GET_SWZ(swizzle, chan); |
if (swz < 4) |
ret |= GET_BIT(mask, swz) << chan; |
} |
return ret; |
} |
|
static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info) |
{ |
if (info->HasTexture) { |
return 0; |
} |
switch (info->Opcode) { |
case RC_OPCODE_DP2: |
case RC_OPCODE_DP3: |
case RC_OPCODE_DP4: |
case RC_OPCODE_DDX: |
case RC_OPCODE_DDY: |
return 0; |
default: |
return 1; |
} |
} |
|
/** |
* @return A swizzle the results from converting old_swizzle using |
* conversion_swizzle |
*/ |
unsigned int rc_adjust_channels( |
unsigned int old_swizzle, |
unsigned int conversion_swizzle) |
{ |
unsigned int i; |
unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); |
for (i = 0; i < 4; i++) { |
unsigned int new_chan = get_swz(conversion_swizzle, i); |
if (new_chan == RC_SWIZZLE_UNUSED) { |
continue; |
} |
SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i)); |
} |
return new_swizzle; |
} |
|
static unsigned int rewrite_writemask( |
unsigned int old_mask, |
unsigned int conversion_swizzle) |
{ |
unsigned int new_mask = 0; |
unsigned int i; |
|
for (i = 0; i < 4; i++) { |
if (!GET_BIT(old_mask, i) |
|| GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) { |
continue; |
} |
new_mask |= (1 << GET_SWZ(conversion_swizzle, i)); |
} |
|
return new_mask; |
} |
|
/** |
* This function rewrites the writemask of sub and adjusts the swizzles |
* of all its source registers based on the conversion_swizzle. |
* conversion_swizzle represents a mapping of the old writemask to the |
* new writemask. For a detailed description of how conversion swizzles |
* work see rc_rewrite_swizzle(). |
*/ |
void rc_pair_rewrite_writemask( |
struct rc_pair_sub_instruction * sub, |
unsigned int conversion_swizzle) |
{ |
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); |
unsigned int i; |
|
sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle); |
|
if (!srcs_need_rewrite(info)) { |
return ; |
} |
|
for (i = 0; i < info->NumSrcRegs; i++) { |
sub->Arg[i].Swizzle = |
rc_adjust_channels(sub->Arg[i].Swizzle, |
conversion_swizzle); |
} |
} |
|
static void normal_rewrite_writemask_cb( |
void * userdata, |
struct rc_instruction * inst, |
struct rc_src_register * src) |
{ |
unsigned int * conversion_swizzle = (unsigned int *)userdata; |
src->Swizzle = rc_adjust_channels(src->Swizzle, *conversion_swizzle); |
} |
|
/** |
* This function is the same as rc_pair_rewrite_writemask() except it |
* operates on normal instructions. |
*/ |
void rc_normal_rewrite_writemask( |
struct rc_instruction * inst, |
unsigned int conversion_swizzle) |
{ |
struct rc_sub_instruction * sub = &inst->U.I; |
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); |
sub->DstReg.WriteMask = |
rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle); |
|
if (info->HasTexture) { |
unsigned int i; |
assert(sub->TexSwizzle == RC_SWIZZLE_XYZW); |
for (i = 0; i < 4; i++) { |
unsigned int swz = GET_SWZ(conversion_swizzle, i); |
if (swz > 3) |
continue; |
SET_SWZ(sub->TexSwizzle, swz, i); |
} |
} |
|
if (!srcs_need_rewrite(info)) { |
return; |
} |
|
rc_for_all_reads_src(inst, normal_rewrite_writemask_cb, |
&conversion_swizzle); |
} |
|
/** |
* This function replaces each value 'swz' in swizzle with the value of |
* GET_SWZ(conversion_swizzle, swz). So, if you want to change all the X's |
* in swizzle to Y, then conversion_swizzle should be Y___ (0xff9). If you want |
* to change all the Y's in swizzle to X, then conversion_swizzle should be |
* _X__ (0xfc7). If you want to change the Y's to X and the X's to Y, then |
* conversion swizzle should be YX__ (0xfc1). |
* @param swizzle The swizzle to change |
* @param conversion_swizzle Describes the conversion to perform on the swizzle |
* @return A converted swizzle |
*/ |
unsigned int rc_rewrite_swizzle( |
unsigned int swizzle, |
unsigned int conversion_swizzle) |
{ |
unsigned int chan; |
unsigned int out_swizzle = swizzle; |
|
for (chan = 0; chan < 4; chan++) { |
unsigned int swz = GET_SWZ(swizzle, chan); |
unsigned int new_swz; |
if (swz > 3) { |
SET_SWZ(out_swizzle, chan, swz); |
} else { |
new_swz = GET_SWZ(conversion_swizzle, swz); |
if (new_swz != RC_SWIZZLE_UNUSED) { |
SET_SWZ(out_swizzle, chan, new_swz); |
} else { |
SET_SWZ(out_swizzle, chan, swz); |
} |
} |
} |
return out_swizzle; |
} |
|
/** |
* Left multiplication of a register with a swizzle |
*/ |
struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) |
{ |
struct rc_src_register tmp = srcreg; |
int i; |
tmp.Swizzle = 0; |
tmp.Negate = 0; |
for(i = 0; i < 4; ++i) { |
rc_swizzle swz = GET_SWZ(swizzle, i); |
if (swz < 4) { |
tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); |
tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; |
} else { |
tmp.Swizzle |= swz << (i*3); |
} |
} |
return tmp; |
} |
|
void reset_srcreg(struct rc_src_register* reg) |
{ |
memset(reg, 0, sizeof(struct rc_src_register)); |
reg->Swizzle = RC_SWIZZLE_XYZW; |
} |
|
unsigned int rc_src_reads_dst_mask( |
rc_register_file src_file, |
unsigned int src_idx, |
unsigned int src_swz, |
rc_register_file dst_file, |
unsigned int dst_idx, |
unsigned int dst_mask) |
{ |
if (src_file != dst_file || src_idx != dst_idx) { |
return RC_MASK_NONE; |
} |
return dst_mask & rc_swizzle_to_writemask(src_swz); |
} |
|
/** |
* @return A bit mask specifying whether this swizzle will select from an RGB |
* source, an Alpha source, or both. |
*/ |
unsigned int rc_source_type_swz(unsigned int swizzle) |
{ |
unsigned int chan; |
unsigned int swz = RC_SWIZZLE_UNUSED; |
unsigned int ret = RC_SOURCE_NONE; |
|
for(chan = 0; chan < 4; chan++) { |
swz = GET_SWZ(swizzle, chan); |
if (swz == RC_SWIZZLE_W) { |
ret |= RC_SOURCE_ALPHA; |
} else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y |
|| swz == RC_SWIZZLE_Z) { |
ret |= RC_SOURCE_RGB; |
} |
} |
return ret; |
} |
|
unsigned int rc_source_type_mask(unsigned int mask) |
{ |
unsigned int ret = RC_SOURCE_NONE; |
|
if (mask & RC_MASK_XYZ) |
ret |= RC_SOURCE_RGB; |
|
if (mask & RC_MASK_W) |
ret |= RC_SOURCE_ALPHA; |
|
return ret; |
} |
|
struct src_select { |
rc_register_file File; |
int Index; |
unsigned int SrcType; |
}; |
|
struct can_use_presub_data { |
struct src_select Selects[5]; |
unsigned int SelectCount; |
const struct rc_src_register * ReplaceReg; |
unsigned int ReplaceRemoved; |
}; |
|
static void can_use_presub_data_add_select( |
struct can_use_presub_data * data, |
rc_register_file file, |
unsigned int index, |
unsigned int src_type) |
{ |
struct src_select * select; |
|
select = &data->Selects[data->SelectCount++]; |
select->File = file; |
select->Index = index; |
select->SrcType = src_type; |
} |
|
/** |
* This callback function counts the number of sources in inst that are |
* different from the sources in can_use_presub_data->RemoveSrcs. |
*/ |
static void can_use_presub_read_cb( |
void * userdata, |
struct rc_instruction * inst, |
struct rc_src_register * src) |
{ |
struct can_use_presub_data * d = userdata; |
|
if (!d->ReplaceRemoved && src == d->ReplaceReg) { |
d->ReplaceRemoved = 1; |
return; |
} |
|
if (src->File == RC_FILE_NONE) |
return; |
|
can_use_presub_data_add_select(d, src->File, src->Index, |
rc_source_type_swz(src->Swizzle)); |
} |
|
unsigned int rc_inst_can_use_presub( |
struct rc_instruction * inst, |
rc_presubtract_op presub_op, |
unsigned int presub_writemask, |
const struct rc_src_register * replace_reg, |
const struct rc_src_register * presub_src0, |
const struct rc_src_register * presub_src1) |
{ |
struct can_use_presub_data d; |
unsigned int num_presub_srcs; |
unsigned int i; |
const struct rc_opcode_info * info = |
rc_get_opcode_info(inst->U.I.Opcode); |
int rgb_count = 0, alpha_count = 0; |
unsigned int src_type0, src_type1; |
|
if (presub_op == RC_PRESUB_NONE) { |
return 1; |
} |
|
if (info->HasTexture) { |
return 0; |
} |
|
/* We can't use more than one presubtract value in an |
* instruction, unless the two prsubtract operations |
* are the same and read from the same registers. |
* XXX For now we will limit instructions to only one presubtract |
* value.*/ |
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { |
return 0; |
} |
|
memset(&d, 0, sizeof(d)); |
d.ReplaceReg = replace_reg; |
|
rc_for_all_reads_src(inst, can_use_presub_read_cb, &d); |
|
num_presub_srcs = rc_presubtract_src_reg_count(presub_op); |
|
src_type0 = rc_source_type_swz(presub_src0->Swizzle); |
can_use_presub_data_add_select(&d, |
presub_src0->File, |
presub_src0->Index, |
src_type0); |
|
if (num_presub_srcs > 1) { |
src_type1 = rc_source_type_swz(presub_src1->Swizzle); |
can_use_presub_data_add_select(&d, |
presub_src1->File, |
presub_src1->Index, |
src_type1); |
|
/* Even if both of the presub sources read from the same |
* register, we still need to use 2 different source selects |
* for them, so we need to increment the count to compensate. |
*/ |
if (presub_src0->File == presub_src1->File |
&& presub_src0->Index == presub_src1->Index) { |
if (src_type0 & src_type1 & RC_SOURCE_RGB) { |
rgb_count++; |
} |
if (src_type0 & src_type1 & RC_SOURCE_ALPHA) { |
alpha_count++; |
} |
} |
} |
|
/* Count the number of source selects for Alpha and RGB. If we |
* encounter two of the same source selects then we can ignore the |
* first one. */ |
for (i = 0; i < d.SelectCount; i++) { |
unsigned int j; |
unsigned int src_type = d.Selects[i].SrcType; |
for (j = i + 1; j < d.SelectCount; j++) { |
if (d.Selects[i].File == d.Selects[j].File |
&& d.Selects[i].Index == d.Selects[j].Index) { |
src_type &= ~d.Selects[j].SrcType; |
} |
} |
if (src_type & RC_SOURCE_RGB) { |
rgb_count++; |
} |
|
if (src_type & RC_SOURCE_ALPHA) { |
alpha_count++; |
} |
} |
|
if (rgb_count > 3 || alpha_count > 3) { |
return 0; |
} |
|
return 1; |
} |
|
struct max_data { |
unsigned int Max; |
unsigned int HasFileType; |
rc_register_file File; |
}; |
|
static void max_callback( |
void * userdata, |
struct rc_instruction * inst, |
rc_register_file file, |
unsigned int index, |
unsigned int mask) |
{ |
struct max_data * d = (struct max_data*)userdata; |
if (file == d->File && (!d->HasFileType || index > d->Max)) { |
d->Max = index; |
d->HasFileType = 1; |
} |
} |
|
/** |
* @return The maximum index of the specified register file used by the |
* program. |
*/ |
int rc_get_max_index( |
struct radeon_compiler * c, |
rc_register_file file) |
{ |
struct max_data data; |
struct rc_instruction * inst; |
data.Max = 0; |
data.HasFileType = 0; |
data.File = file; |
for (inst = c->Program.Instructions.Next; |
inst != &c->Program.Instructions; |
inst = inst->Next) { |
rc_for_all_reads_mask(inst, max_callback, &data); |
rc_for_all_writes_mask(inst, max_callback, &data); |
} |
if (!data.HasFileType) { |
return -1; |
} else { |
return data.Max; |
} |
} |
|
static unsigned int get_source_readmask( |
struct rc_pair_sub_instruction * sub, |
unsigned int source, |
unsigned int src_type) |
{ |
unsigned int i; |
unsigned int readmask = 0; |
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); |
|
for (i = 0; i < info->NumSrcRegs; i++) { |
if (sub->Arg[i].Source != source |
|| src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) { |
continue; |
} |
readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle); |
} |
return readmask; |
} |
|
/** |
* This function attempts to remove a source from a pair instructions. |
* @param inst |
* @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd |
* @param source The index of the source to remove |
* @param new_readmask A mask representing the components that are read by |
* the source that is intended to replace the one you are removing. If you |
* want to remove a source only and not replace it, this parameter should be |
* zero. |
* @return 1 if the source was successfully removed, 0 if it was not |
*/ |
unsigned int rc_pair_remove_src( |
struct rc_instruction * inst, |
unsigned int src_type, |
unsigned int source, |
unsigned int new_readmask) |
{ |
unsigned int readmask = 0; |
|
readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type); |
readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type); |
|
if ((new_readmask & readmask) != readmask) |
return 0; |
|
if (src_type & RC_SOURCE_RGB) { |
memset(&inst->U.P.RGB.Src[source], 0, |
sizeof(struct rc_pair_instruction_source)); |
} |
|
if (src_type & RC_SOURCE_ALPHA) { |
memset(&inst->U.P.Alpha.Src[source], 0, |
sizeof(struct rc_pair_instruction_source)); |
} |
|
return 1; |
} |
|
/** |
* @return RC_OPCODE_NOOP if inst is not a flow control instruction. |
* @return The opcode of inst if it is a flow control instruction. |
*/ |
rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst) |
{ |
const struct rc_opcode_info * info; |
if (inst->Type == RC_INSTRUCTION_NORMAL) { |
info = rc_get_opcode_info(inst->U.I.Opcode); |
} else { |
info = rc_get_opcode_info(inst->U.P.RGB.Opcode); |
/*A flow control instruction shouldn't have an alpha |
* instruction.*/ |
assert(!info->IsFlowControl || |
inst->U.P.Alpha.Opcode == RC_OPCODE_NOP); |
} |
|
if (info->IsFlowControl) |
return info->Opcode; |
else |
return RC_OPCODE_NOP; |
|
} |
|
/** |
* @return The BGNLOOP instruction that starts the loop ended by endloop. |
*/ |
struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop) |
{ |
unsigned int endloop_count = 0; |
struct rc_instruction * inst; |
for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) { |
rc_opcode op = rc_get_flow_control_inst(inst); |
if (op == RC_OPCODE_ENDLOOP) { |
endloop_count++; |
} else if (op == RC_OPCODE_BGNLOOP) { |
if (endloop_count == 0) { |
return inst; |
} else { |
endloop_count--; |
} |
} |
} |
return NULL; |
} |
|
/** |
* @return The ENDLOOP instruction that ends the loop started by bgnloop. |
*/ |
struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop) |
{ |
unsigned int bgnloop_count = 0; |
struct rc_instruction * inst; |
for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) { |
rc_opcode op = rc_get_flow_control_inst(inst); |
if (op == RC_OPCODE_BGNLOOP) { |
bgnloop_count++; |
} else if (op == RC_OPCODE_ENDLOOP) { |
if (bgnloop_count == 0) { |
return inst; |
} else { |
bgnloop_count--; |
} |
} |
} |
return NULL; |
} |
|
/** |
* @return A conversion swizzle for converting from old_mask->new_mask |
*/ |
unsigned int rc_make_conversion_swizzle( |
unsigned int old_mask, |
unsigned int new_mask) |
{ |
unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); |
unsigned int old_idx; |
unsigned int new_idx = 0; |
for (old_idx = 0; old_idx < 4; old_idx++) { |
if (!GET_BIT(old_mask, old_idx)) |
continue; |
for ( ; new_idx < 4; new_idx++) { |
if (GET_BIT(new_mask, new_idx)) { |
SET_SWZ(conversion_swizzle, old_idx, new_idx); |
new_idx++; |
break; |
} |
} |
} |
return conversion_swizzle; |
} |
|
/** |
* @return 1 if the register contains an immediate value, 0 otherwise. |
*/ |
unsigned int rc_src_reg_is_immediate( |
struct radeon_compiler * c, |
unsigned int file, |
unsigned int index) |
{ |
return file == RC_FILE_CONSTANT && |
c->Program.Constants.Constants[index].Type == RC_CONSTANT_IMMEDIATE; |
} |
|
/** |
* @return The immediate value in the specified register. |
*/ |
float rc_get_constant_value( |
struct radeon_compiler * c, |
unsigned int index, |
unsigned int swizzle, |
unsigned int negate, |
unsigned int chan) |
{ |
float base = 1.0f; |
int swz = GET_SWZ(swizzle, chan); |
if(swz >= 4 || index >= c->Program.Constants.Count ){ |
rc_error(c, "get_constant_value: Can't find a value.\n"); |
return 0.0f; |
} |
if(GET_BIT(negate, chan)){ |
base = -1.0f; |
} |
return base * |
c->Program.Constants.Constants[index].u.Immediate[swz]; |
} |
|
/** |
* This function returns the component value (RC_SWIZZLE_*) of the first used |
* channel in the swizzle. This is only useful for scalar instructions that are |
* known to use only one channel of the swizzle. |
*/ |
unsigned int rc_get_scalar_src_swz(unsigned int swizzle) |
{ |
unsigned int swz, chan; |
for (chan = 0; chan < 4; chan++) { |
swz = GET_SWZ(swizzle, chan); |
if (swz != RC_SWIZZLE_UNUSED) { |
break; |
} |
} |
assert(swz != RC_SWIZZLE_UNUSED); |
return swz; |
} |