/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/Android.mk |
---|
0,0 → 1,44 |
# Mesa 3-D graphics library |
# |
# Copyright (C) 2011 Chia-I Wu <olvaffe@gmail.com> |
# Copyright (C) 2011 LunarG Inc. |
# |
# Permission is hereby granted, free of charge, to any person obtaining a |
# copy of this software and associated documentation files (the "Software"), |
# to deal in the Software without restriction, including without limitation |
# the rights to use, copy, modify, merge, publish, distribute, sublicense, |
# and/or sell copies of the Software, and to permit persons to whom the |
# Software is furnished to do so, subject to the following conditions: |
# |
# The above copyright notice and this permission notice shall be included |
# in all copies or substantial portions of the Software. |
# |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
# DEALINGS IN THE SOFTWARE. |
LOCAL_PATH := $(call my-dir) |
# get C_SOURCES |
include $(LOCAL_PATH)/Makefile.sources |
include $(CLEAR_VARS) |
LOCAL_SRC_FILES := $(C_SOURCES) |
LOCAL_C_INCLUDES := \ |
$(MESA_TOP)/src/mapi \ |
$(MESA_TOP)/src/glsl \ |
$(MESA_TOP)/src/mesa \ |
$(DRM_TOP) \ |
$(DRM_TOP)/include/drm |
LOCAL_MODULE := libmesa_pipe_r300 |
include $(GALLIUM_COMMON_MK) |
include $(BUILD_STATIC_LIBRARY) |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/Makefile.am |
---|
0,0 → 1,44 |
include Makefile.sources |
include $(top_srcdir)/src/gallium/Automake.inc |
noinst_LTLIBRARIES = libr300.la libr300-helper.la |
check_PROGRAMS = r300_compiler_tests |
testdir = compiler/tests |
TESTS = r300_compiler_tests |
AM_CFLAGS = \ |
-I$(top_srcdir)/src/gallium/drivers \ |
-I$(top_srcdir)/include \ |
-I$(top_srcdir)/src/mesa \ |
-I$(top_srcdir)/src/glsl \ |
-I$(top_srcdir)/src/mapi \ |
$(VISIBILITY_CFLAGS) \ |
$(GALLIUM_CFLAGS) \ |
$(LLVM_CFLAGS) \ |
$(RADEON_CFLAGS) |
r300_compiler_tests_LDADD = libr300.la libr300-helper.la \ |
$(top_builddir)/src/gallium/auxiliary/libgallium.la \ |
$(GALLIUM_DRI_LIB_DEPS) |
r300_compiler_tests_CPPFLAGS = \ |
-I$(top_srcdir)/src/gallium/drivers/r300/compiler |
r300_compiler_tests_SOURCES = \ |
$(testdir)/r300_compiler_tests.c \ |
$(testdir)/radeon_compiler_optimize_tests.c \ |
$(testdir)/radeon_compiler_regalloc_tests.c \ |
$(testdir)/radeon_compiler_util_tests.c \ |
$(testdir)/rc_test_helpers.c \ |
$(testdir)/unit_test.c |
libr300_la_SOURCES = $(C_SOURCES) |
# These two files are included in libmesagallium, which is included in the dri |
# targets. So, they were added directly to r300g the dri-r300 target would have |
# duplicated symbols, and if they weren't the other *-r300 targets would fail |
# with undefined symbols. |
# |
# Solve this by building them into a separate helper library that can be linked |
# in place of libmesagallium. |
libr300_helper_la_SOURCES = \ |
$(top_srcdir)/src/glsl/ralloc.c \ |
$(top_srcdir)/src/mesa/program/register_allocate.c |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/Makefile.in |
---|
0,0 → 1,1680 |
# Makefile.in generated by automake 1.14 from Makefile.am. |
# @configure_input@ |
# Copyright (C) 1994-2013 Free Software Foundation, Inc. |
# This Makefile.in is free software; the Free Software Foundation |
# gives unlimited permission to copy and/or distribute it, |
# with or without modifications, as long as this notice is preserved. |
# This program is distributed in the hope that it will be useful, |
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without |
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A |
# PARTICULAR PURPOSE. |
@SET_MAKE@ |
VPATH = @srcdir@ |
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' |
am__make_running_with_option = \ |
case $${target_option-} in \ |
?) ;; \ |
*) echo "am__make_running_with_option: internal error: invalid" \ |
"target option '$${target_option-}' specified" >&2; \ |
exit 1;; \ |
esac; \ |
has_opt=no; \ |
sane_makeflags=$$MAKEFLAGS; \ |
if $(am__is_gnu_make); then \ |
sane_makeflags=$$MFLAGS; \ |
else \ |
case $$MAKEFLAGS in \ |
*\\[\ \ ]*) \ |
bs=\\; \ |
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ |
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ |
esac; \ |
fi; \ |
skip_next=no; \ |
strip_trailopt () \ |
{ \ |
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ |
}; \ |
for flg in $$sane_makeflags; do \ |
test $$skip_next = yes && { skip_next=no; continue; }; \ |
case $$flg in \ |
*=*|--*) continue;; \ |
-*I) strip_trailopt 'I'; skip_next=yes;; \ |
-*I?*) strip_trailopt 'I';; \ |
-*O) strip_trailopt 'O'; skip_next=yes;; \ |
-*O?*) strip_trailopt 'O';; \ |
-*l) strip_trailopt 'l'; skip_next=yes;; \ |
-*l?*) strip_trailopt 'l';; \ |
-[dEDm]) skip_next=yes;; \ |
-[JT]) skip_next=yes;; \ |
esac; \ |
case $$flg in \ |
*$$target_option*) has_opt=yes; break;; \ |
esac; \ |
done; \ |
test $$has_opt = yes |
am__make_dryrun = (target_option=n; $(am__make_running_with_option)) |
am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) |
pkgdatadir = $(datadir)/@PACKAGE@ |
pkgincludedir = $(includedir)/@PACKAGE@ |
pkglibdir = $(libdir)/@PACKAGE@ |
pkglibexecdir = $(libexecdir)/@PACKAGE@ |
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd |
install_sh_DATA = $(install_sh) -c -m 644 |
install_sh_PROGRAM = $(install_sh) -c |
install_sh_SCRIPT = $(install_sh) -c |
INSTALL_HEADER = $(INSTALL_DATA) |
transform = $(program_transform_name) |
NORMAL_INSTALL = : |
PRE_INSTALL = : |
POST_INSTALL = : |
NORMAL_UNINSTALL = : |
PRE_UNINSTALL = : |
POST_UNINSTALL = : |
build_triplet = @build@ |
host_triplet = @host@ |
target_triplet = @target@ |
DIST_COMMON = $(srcdir)/Makefile.sources \ |
$(top_srcdir)/src/gallium/Automake.inc $(srcdir)/Makefile.in \ |
$(srcdir)/Makefile.am $(top_srcdir)/bin/depcomp \ |
$(top_srcdir)/bin/test-driver |
check_PROGRAMS = r300_compiler_tests$(EXEEXT) |
TESTS = r300_compiler_tests$(EXEEXT) |
subdir = src/gallium/drivers/r300 |
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 |
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_prog_bison.m4 \ |
$(top_srcdir)/m4/ax_prog_cc_for_build.m4 \ |
$(top_srcdir)/m4/ax_prog_cxx_for_build.m4 \ |
$(top_srcdir)/m4/ax_prog_flex.m4 \ |
$(top_srcdir)/m4/ax_pthread.m4 \ |
$(top_srcdir)/m4/ax_python_module.m4 \ |
$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \ |
$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ |
$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac |
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ |
$(ACLOCAL_M4) |
mkinstalldirs = $(install_sh) -d |
CONFIG_CLEAN_FILES = |
CONFIG_CLEAN_VPATH_FILES = |
LTLIBRARIES = $(noinst_LTLIBRARIES) |
libr300_helper_la_LIBADD = |
am_libr300_helper_la_OBJECTS = ralloc.lo register_allocate.lo |
libr300_helper_la_OBJECTS = $(am_libr300_helper_la_OBJECTS) |
AM_V_lt = $(am__v_lt_@AM_V@) |
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) |
am__v_lt_0 = --silent |
am__v_lt_1 = |
libr300_la_LIBADD = |
am__objects_1 = r300_blit.lo r300_chipset.lo r300_context.lo \ |
r300_debug.lo r300_emit.lo r300_flush.lo r300_fs.lo \ |
r300_hyperz.lo r300_query.lo r300_render.lo \ |
r300_render_stencilref.lo r300_render_translate.lo \ |
r300_resource.lo r300_screen.lo r300_screen_buffer.lo \ |
r300_state.lo r300_state_derived.lo r300_vs.lo r300_vs_draw.lo \ |
r300_texture.lo r300_texture_desc.lo r300_tgsi_to_rc.lo \ |
r300_transfer.lo radeon_code.lo radeon_compiler.lo \ |
radeon_compiler_util.lo radeon_emulate_branches.lo \ |
radeon_emulate_loops.lo radeon_inline_literals.lo \ |
radeon_program.lo radeon_program_print.lo radeon_opcodes.lo \ |
radeon_program_alu.lo radeon_program_pair.lo \ |
radeon_program_tex.lo radeon_pair_translate.lo \ |
radeon_pair_schedule.lo radeon_pair_regalloc.lo \ |
radeon_pair_dead_sources.lo radeon_dataflow.lo \ |
radeon_dataflow_deadcode.lo radeon_dataflow_swizzles.lo \ |
radeon_list.lo radeon_optimize.lo radeon_remove_constants.lo \ |
radeon_rename_regs.lo radeon_vert_fc.lo radeon_variable.lo \ |
r3xx_fragprog.lo r300_fragprog.lo r300_fragprog_swizzle.lo \ |
r300_fragprog_emit.lo r500_fragprog.lo r500_fragprog_emit.lo \ |
r3xx_vertprog.lo r3xx_vertprog_dump.lo memory_pool.lo |
am_libr300_la_OBJECTS = $(am__objects_1) |
libr300_la_OBJECTS = $(am_libr300_la_OBJECTS) |
am_r300_compiler_tests_OBJECTS = \ |
r300_compiler_tests-r300_compiler_tests.$(OBJEXT) \ |
r300_compiler_tests-radeon_compiler_optimize_tests.$(OBJEXT) \ |
r300_compiler_tests-radeon_compiler_regalloc_tests.$(OBJEXT) \ |
r300_compiler_tests-radeon_compiler_util_tests.$(OBJEXT) \ |
r300_compiler_tests-rc_test_helpers.$(OBJEXT) \ |
r300_compiler_tests-unit_test.$(OBJEXT) |
r300_compiler_tests_OBJECTS = $(am_r300_compiler_tests_OBJECTS) |
am__DEPENDENCIES_1 = |
r300_compiler_tests_DEPENDENCIES = libr300.la libr300-helper.la \ |
$(top_builddir)/src/gallium/auxiliary/libgallium.la \ |
$(am__DEPENDENCIES_1) |
AM_V_P = $(am__v_P_@AM_V@) |
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) |
am__v_P_0 = false |
am__v_P_1 = : |
AM_V_GEN = $(am__v_GEN_@AM_V@) |
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) |
am__v_GEN_0 = @echo " GEN " $@; |
am__v_GEN_1 = |
AM_V_at = $(am__v_at_@AM_V@) |
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) |
am__v_at_0 = @ |
am__v_at_1 = |
DEFAULT_INCLUDES = -I.@am__isrc@ |
depcomp = $(SHELL) $(top_srcdir)/bin/depcomp |
am__depfiles_maybe = depfiles |
am__mv = mv -f |
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ |
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) |
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ |
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ |
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ |
$(AM_CFLAGS) $(CFLAGS) |
AM_V_CC = $(am__v_CC_@AM_V@) |
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) |
am__v_CC_0 = @echo " CC " $@; |
am__v_CC_1 = |
CCLD = $(CC) |
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ |
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ |
$(AM_LDFLAGS) $(LDFLAGS) -o $@ |
AM_V_CCLD = $(am__v_CCLD_@AM_V@) |
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) |
am__v_CCLD_0 = @echo " CCLD " $@; |
am__v_CCLD_1 = |
SOURCES = $(libr300_helper_la_SOURCES) $(libr300_la_SOURCES) \ |
$(r300_compiler_tests_SOURCES) |
DIST_SOURCES = $(libr300_helper_la_SOURCES) $(libr300_la_SOURCES) \ |
$(r300_compiler_tests_SOURCES) |
am__can_run_installinfo = \ |
case $$AM_UPDATE_INFO_DIR in \ |
n|no|NO) false;; \ |
*) (install-info --version) >/dev/null 2>&1;; \ |
esac |
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) |
# Read a list of newline-separated strings from the standard input, |
# and print each of them once, without duplicates. Input order is |
# *not* preserved. |
am__uniquify_input = $(AWK) '\ |
BEGIN { nonempty = 0; } \ |
{ items[$$0] = 1; nonempty = 1; } \ |
END { if (nonempty) { for (i in items) print i; }; } \ |
' |
# Make sure the list of sources is unique. This is necessary because, |
# e.g., the same source file might be shared among _SOURCES variables |
# for different programs/libraries. |
am__define_uniq_tagged_files = \ |
list='$(am__tagged_files)'; \ |
unique=`for i in $$list; do \ |
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ |
done | $(am__uniquify_input)` |
ETAGS = etags |
CTAGS = ctags |
am__tty_colors_dummy = \ |
mgn= red= grn= lgn= blu= brg= std=; \ |
am__color_tests=no |
am__tty_colors = { \ |
$(am__tty_colors_dummy); \ |
if test "X$(AM_COLOR_TESTS)" = Xno; then \ |
am__color_tests=no; \ |
elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ |
am__color_tests=yes; \ |
elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ |
am__color_tests=yes; \ |
fi; \ |
if test $$am__color_tests = yes; then \ |
red='[0;31m'; \ |
grn='[0;32m'; \ |
lgn='[1;32m'; \ |
blu='[1;34m'; \ |
mgn='[0;35m'; \ |
brg='[1m'; \ |
std='[m'; \ |
fi; \ |
} |
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; |
am__vpath_adj = case $$p in \ |
$(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ |
*) f=$$p;; \ |
esac; |
am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; |
am__install_max = 40 |
am__nobase_strip_setup = \ |
srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` |
am__nobase_strip = \ |
for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" |
am__nobase_list = $(am__nobase_strip_setup); \ |
for p in $$list; do echo "$$p $$p"; done | \ |
sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ |
$(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ |
if (++n[$$2] == $(am__install_max)) \ |
{ print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ |
END { for (dir in files) print dir, files[dir] }' |
am__base_list = \ |
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ |
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' |
am__uninstall_files_from_dir = { \ |
test -z "$$files" \ |
|| { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ |
|| { echo " ( cd '$$dir' && rm -f" $$files ")"; \ |
$(am__cd) "$$dir" && rm -f $$files; }; \ |
} |
am__recheck_rx = ^[ ]*:recheck:[ ]* |
am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* |
am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* |
# A command that, given a newline-separated list of test names on the |
# standard input, print the name of the tests that are to be re-run |
# upon "make recheck". |
am__list_recheck_tests = $(AWK) '{ \ |
recheck = 1; \ |
while ((rc = (getline line < ($$0 ".trs"))) != 0) \ |
{ \ |
if (rc < 0) \ |
{ \ |
if ((getline line2 < ($$0 ".log")) < 0) \ |
recheck = 0; \ |
break; \ |
} \ |
else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ |
{ \ |
recheck = 0; \ |
break; \ |
} \ |
else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ |
{ \ |
break; \ |
} \ |
}; \ |
if (recheck) \ |
print $$0; \ |
close ($$0 ".trs"); \ |
close ($$0 ".log"); \ |
}' |
# A command that, given a newline-separated list of test names on the |
# standard input, create the global log from their .trs and .log files. |
am__create_global_log = $(AWK) ' \ |
function fatal(msg) \ |
{ \ |
print "fatal: making $@: " msg | "cat >&2"; \ |
exit 1; \ |
} \ |
function rst_section(header) \ |
{ \ |
print header; \ |
len = length(header); \ |
for (i = 1; i <= len; i = i + 1) \ |
printf "="; \ |
printf "\n\n"; \ |
} \ |
{ \ |
copy_in_global_log = 1; \ |
global_test_result = "RUN"; \ |
while ((rc = (getline line < ($$0 ".trs"))) != 0) \ |
{ \ |
if (rc < 0) \ |
fatal("failed to read from " $$0 ".trs"); \ |
if (line ~ /$(am__global_test_result_rx)/) \ |
{ \ |
sub("$(am__global_test_result_rx)", "", line); \ |
sub("[ ]*$$", "", line); \ |
global_test_result = line; \ |
} \ |
else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ |
copy_in_global_log = 0; \ |
}; \ |
if (copy_in_global_log) \ |
{ \ |
rst_section(global_test_result ": " $$0); \ |
while ((rc = (getline line < ($$0 ".log"))) != 0) \ |
{ \ |
if (rc < 0) \ |
fatal("failed to read from " $$0 ".log"); \ |
print line; \ |
}; \ |
printf "\n"; \ |
}; \ |
close ($$0 ".trs"); \ |
close ($$0 ".log"); \ |
}' |
# Restructured Text title. |
am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } |
# Solaris 10 'make', and several other traditional 'make' implementations, |
# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it |
# by disabling -e (using the XSI extension "set +e") if it's set. |
am__sh_e_setup = case $$- in *e*) set +e;; esac |
# Default flags passed to test drivers. |
am__common_driver_flags = \ |
--color-tests "$$am__color_tests" \ |
--enable-hard-errors "$$am__enable_hard_errors" \ |
--expect-failure "$$am__expect_failure" |
# To be inserted before the command running the test. Creates the |
# directory for the log if needed. Stores in $dir the directory |
# containing $f, in $tst the test, in $log the log. Executes the |
# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and |
# passes TESTS_ENVIRONMENT. Set up options for the wrapper that |
# will run the test scripts (or their associated LOG_COMPILER, if |
# thy have one). |
am__check_pre = \ |
$(am__sh_e_setup); \ |
$(am__vpath_adj_setup) $(am__vpath_adj) \ |
$(am__tty_colors); \ |
srcdir=$(srcdir); export srcdir; \ |
case "$@" in \ |
*/*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ |
*) am__odir=.;; \ |
esac; \ |
test "x$$am__odir" = x"." || test -d "$$am__odir" \ |
|| $(MKDIR_P) "$$am__odir" || exit $$?; \ |
if test -f "./$$f"; then dir=./; \ |
elif test -f "$$f"; then dir=; \ |
else dir="$(srcdir)/"; fi; \ |
tst=$$dir$$f; log='$@'; \ |
if test -n '$(DISABLE_HARD_ERRORS)'; then \ |
am__enable_hard_errors=no; \ |
else \ |
am__enable_hard_errors=yes; \ |
fi; \ |
case " $(XFAIL_TESTS) " in \ |
*[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ |
am__expect_failure=yes;; \ |
*) \ |
am__expect_failure=no;; \ |
esac; \ |
$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) |
# A shell command to get the names of the tests scripts with any registered |
# extension removed (i.e., equivalently, the names of the test logs, with |
# the '.log' extension removed). The result is saved in the shell variable |
# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, |
# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", |
# since that might cause problem with VPATH rewrites for suffix-less tests. |
# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. |
am__set_TESTS_bases = \ |
bases='$(TEST_LOGS)'; \ |
bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ |
bases=`echo $$bases` |
RECHECK_LOGS = $(TEST_LOGS) |
AM_RECURSIVE_TARGETS = check recheck |
TEST_SUITE_LOG = test-suite.log |
TEST_EXTENSIONS = @EXEEXT@ .test |
LOG_DRIVER = $(SHELL) $(top_srcdir)/bin/test-driver |
LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) |
am__set_b = \ |
case '$@' in \ |
*/*) \ |
case '$*' in \ |
*/*) b='$*';; \ |
*) b=`echo '$@' | sed 's/\.log$$//'`; \ |
esac;; \ |
*) \ |
b='$*';; \ |
esac |
am__test_logs1 = $(TESTS:=.log) |
am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) |
TEST_LOGS = $(am__test_logs2:.test.log=.log) |
TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/bin/test-driver |
TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ |
$(TEST_LOG_FLAGS) |
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) |
ACLOCAL = @ACLOCAL@ |
AMTAR = @AMTAR@ |
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ |
AR = @AR@ |
AUTOCONF = @AUTOCONF@ |
AUTOHEADER = @AUTOHEADER@ |
AUTOMAKE = @AUTOMAKE@ |
AWK = @AWK@ |
BUILD_EXEEXT = @BUILD_EXEEXT@ |
BUILD_OBJEXT = @BUILD_OBJEXT@ |
CC = @CC@ |
CCAS = @CCAS@ |
CCASDEPMODE = @CCASDEPMODE@ |
CCASFLAGS = @CCASFLAGS@ |
CCDEPMODE = @CCDEPMODE@ |
CC_FOR_BUILD = @CC_FOR_BUILD@ |
CFLAGS = @CFLAGS@ |
CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@ |
CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@ |
CLOCK_LIB = @CLOCK_LIB@ |
CPP = @CPP@ |
CPPFLAGS = @CPPFLAGS@ |
CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@ |
CPP_FOR_BUILD = @CPP_FOR_BUILD@ |
CXX = @CXX@ |
CXXCPP = @CXXCPP@ |
CXXCPPFLAGS_FOR_BUILD = @CXXCPPFLAGS_FOR_BUILD@ |
CXXCPP_FOR_BUILD = @CXXCPP_FOR_BUILD@ |
CXXDEPMODE = @CXXDEPMODE@ |
CXXFLAGS = @CXXFLAGS@ |
CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@ |
CXX_FOR_BUILD = @CXX_FOR_BUILD@ |
CYGPATH_W = @CYGPATH_W@ |
DEFINES = @DEFINES@ |
DEFINES_FOR_BUILD = @DEFINES_FOR_BUILD@ |
DEFS = @DEFS@ |
DEPDIR = @DEPDIR@ |
DLLTOOL = @DLLTOOL@ |
DLOPEN_LIBS = @DLOPEN_LIBS@ |
DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@ |
DRI2PROTO_LIBS = @DRI2PROTO_LIBS@ |
DRIGL_CFLAGS = @DRIGL_CFLAGS@ |
DRIGL_LIBS = @DRIGL_LIBS@ |
DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@ |
DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@ |
DRI_LIB_DEPS = @DRI_LIB_DEPS@ |
DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@ |
DSYMUTIL = @DSYMUTIL@ |
DUMPBIN = @DUMPBIN@ |
ECHO_C = @ECHO_C@ |
ECHO_N = @ECHO_N@ |
ECHO_T = @ECHO_T@ |
EGL_CFLAGS = @EGL_CFLAGS@ |
EGL_CLIENT_APIS = @EGL_CLIENT_APIS@ |
EGL_DRIVER_INSTALL_DIR = @EGL_DRIVER_INSTALL_DIR@ |
EGL_LIB_DEPS = @EGL_LIB_DEPS@ |
EGL_LIB_GLOB = @EGL_LIB_GLOB@ |
EGL_LIB_NAME = @EGL_LIB_NAME@ |
EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@ |
EGL_PLATFORMS = @EGL_PLATFORMS@ |
EGREP = @EGREP@ |
ELF_LIB = @ELF_LIB@ |
EXEEXT = @EXEEXT@ |
EXPAT_INCLUDES = @EXPAT_INCLUDES@ |
FGREP = @FGREP@ |
FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@ |
FREEDRENO_LIBS = @FREEDRENO_LIBS@ |
GALLIUM_DRI_LIB_DEPS = @GALLIUM_DRI_LIB_DEPS@ |
GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@ |
GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@ |
GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@ |
GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@ |
GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@ |
GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@ |
GLAPI_LIB_GLOB = @GLAPI_LIB_GLOB@ |
GLAPI_LIB_NAME = @GLAPI_LIB_NAME@ |
GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@ |
GLESv1_CM_LIB_GLOB = @GLESv1_CM_LIB_GLOB@ |
GLESv1_CM_LIB_NAME = @GLESv1_CM_LIB_NAME@ |
GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@ |
GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@ |
GLESv2_LIB_GLOB = @GLESv2_LIB_GLOB@ |
GLESv2_LIB_NAME = @GLESv2_LIB_NAME@ |
GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@ |
GLPROTO_CFLAGS = @GLPROTO_CFLAGS@ |
GLPROTO_LIBS = @GLPROTO_LIBS@ |
GLX_TLS = @GLX_TLS@ |
GL_LIB = @GL_LIB@ |
GL_LIB_DEPS = @GL_LIB_DEPS@ |
GL_LIB_GLOB = @GL_LIB_GLOB@ |
GL_LIB_NAME = @GL_LIB_NAME@ |
GL_PC_CFLAGS = @GL_PC_CFLAGS@ |
GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@ |
GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@ |
GREP = @GREP@ |
HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@ |
INDENT = @INDENT@ |
INDENT_FLAGS = @INDENT_FLAGS@ |
INSTALL = @INSTALL@ |
INSTALL_DATA = @INSTALL_DATA@ |
INSTALL_PROGRAM = @INSTALL_PROGRAM@ |
INSTALL_SCRIPT = @INSTALL_SCRIPT@ |
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ |
INTEL_CFLAGS = @INTEL_CFLAGS@ |
INTEL_LIBS = @INTEL_LIBS@ |
LD = @LD@ |
LDFLAGS = @LDFLAGS@ |
LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@ |
LEX = @LEX@ |
LEXLIB = @LEXLIB@ |
LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ |
LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@ |
LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@ |
LIBDRM_CFLAGS = @LIBDRM_CFLAGS@ |
LIBDRM_LIBS = @LIBDRM_LIBS@ |
LIBDRM_XORG_CFLAGS = @LIBDRM_XORG_CFLAGS@ |
LIBDRM_XORG_LIBS = @LIBDRM_XORG_LIBS@ |
LIBKMS_XORG_CFLAGS = @LIBKMS_XORG_CFLAGS@ |
LIBKMS_XORG_LIBS = @LIBKMS_XORG_LIBS@ |
LIBOBJS = @LIBOBJS@ |
LIBS = @LIBS@ |
LIBTOOL = @LIBTOOL@ |
LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@ |
LIBUDEV_LIBS = @LIBUDEV_LIBS@ |
LIB_DIR = @LIB_DIR@ |
LIPO = @LIPO@ |
LLVM_BINDIR = @LLVM_BINDIR@ |
LLVM_CFLAGS = @LLVM_CFLAGS@ |
LLVM_CONFIG = @LLVM_CONFIG@ |
LLVM_CPPFLAGS = @LLVM_CPPFLAGS@ |
LLVM_CXXFLAGS = @LLVM_CXXFLAGS@ |
LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@ |
LLVM_LDFLAGS = @LLVM_LDFLAGS@ |
LLVM_LIBDIR = @LLVM_LIBDIR@ |
LLVM_LIBS = @LLVM_LIBS@ |
LLVM_VERSION = @LLVM_VERSION@ |
LN_S = @LN_S@ |
LTLIBOBJS = @LTLIBOBJS@ |
MAKE = @MAKE@ |
MAKEINFO = @MAKEINFO@ |
MANIFEST_TOOL = @MANIFEST_TOOL@ |
MESA_LLVM = @MESA_LLVM@ |
MKDIR_P = @MKDIR_P@ |
NM = @NM@ |
NMEDIT = @NMEDIT@ |
NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@ |
NOUVEAU_LIBS = @NOUVEAU_LIBS@ |
OBJDUMP = @OBJDUMP@ |
OBJEXT = @OBJEXT@ |
OPENCL_LIB_INSTALL_DIR = @OPENCL_LIB_INSTALL_DIR@ |
OSMESA_LIB = @OSMESA_LIB@ |
OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@ |
OSMESA_LIB_NAME = @OSMESA_LIB_NAME@ |
OSMESA_MESA_DEPS = @OSMESA_MESA_DEPS@ |
OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@ |
OSMESA_PC_REQ = @OSMESA_PC_REQ@ |
OSMESA_VERSION = @OSMESA_VERSION@ |
OTOOL = @OTOOL@ |
OTOOL64 = @OTOOL64@ |
PACKAGE = @PACKAGE@ |
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ |
PACKAGE_NAME = @PACKAGE_NAME@ |
PACKAGE_STRING = @PACKAGE_STRING@ |
PACKAGE_TARNAME = @PACKAGE_TARNAME@ |
PACKAGE_URL = @PACKAGE_URL@ |
PACKAGE_VERSION = @PACKAGE_VERSION@ |
PATH_SEPARATOR = @PATH_SEPARATOR@ |
PERL = @PERL@ |
PKG_CONFIG = @PKG_CONFIG@ |
PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ |
PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ |
POSIX_SHELL = @POSIX_SHELL@ |
PTHREAD_CC = @PTHREAD_CC@ |
PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ |
PTHREAD_LIBS = @PTHREAD_LIBS@ |
PYTHON2 = @PYTHON2@ |
RADEON_CFLAGS = @RADEON_CFLAGS@ |
RADEON_LIBS = @RADEON_LIBS@ |
RANLIB = @RANLIB@ |
SED = @SED@ |
SELINUX_LIBS = @SELINUX_LIBS@ |
SET_MAKE = @SET_MAKE@ |
SHELL = @SHELL@ |
STRIP = @STRIP@ |
VDPAU_CFLAGS = @VDPAU_CFLAGS@ |
VDPAU_LIBS = @VDPAU_LIBS@ |
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@ |
VDPAU_MAJOR = @VDPAU_MAJOR@ |
VDPAU_MINOR = @VDPAU_MINOR@ |
VERSION = @VERSION@ |
VG_LIB_DEPS = @VG_LIB_DEPS@ |
VG_LIB_GLOB = @VG_LIB_GLOB@ |
VG_LIB_NAME = @VG_LIB_NAME@ |
VG_PC_LIB_PRIV = @VG_PC_LIB_PRIV@ |
VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@ |
VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@ |
WAYLAND_CFLAGS = @WAYLAND_CFLAGS@ |
WAYLAND_LIBS = @WAYLAND_LIBS@ |
WAYLAND_SCANNER = @WAYLAND_SCANNER@ |
X11_INCLUDES = @X11_INCLUDES@ |
XA_MAJOR = @XA_MAJOR@ |
XA_MINOR = @XA_MINOR@ |
XA_TINY = @XA_TINY@ |
XA_VERSION = @XA_VERSION@ |
XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@ |
XCB_DRI2_LIBS = @XCB_DRI2_LIBS@ |
XEXT_CFLAGS = @XEXT_CFLAGS@ |
XEXT_LIBS = @XEXT_LIBS@ |
XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@ |
XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@ |
XLIBGL_CFLAGS = @XLIBGL_CFLAGS@ |
XLIBGL_LIBS = @XLIBGL_LIBS@ |
XORG_CFLAGS = @XORG_CFLAGS@ |
XORG_DRIVER_INSTALL_DIR = @XORG_DRIVER_INSTALL_DIR@ |
XORG_LIBS = @XORG_LIBS@ |
XVMC_CFLAGS = @XVMC_CFLAGS@ |
XVMC_LIBS = @XVMC_LIBS@ |
XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@ |
XVMC_MAJOR = @XVMC_MAJOR@ |
XVMC_MINOR = @XVMC_MINOR@ |
YACC = @YACC@ |
YFLAGS = @YFLAGS@ |
abs_builddir = @abs_builddir@ |
abs_srcdir = @abs_srcdir@ |
abs_top_builddir = @abs_top_builddir@ |
abs_top_srcdir = @abs_top_srcdir@ |
ac_ct_AR = @ac_ct_AR@ |
ac_ct_CC = @ac_ct_CC@ |
ac_ct_CC_FOR_BUILD = @ac_ct_CC_FOR_BUILD@ |
ac_ct_CXX = @ac_ct_CXX@ |
ac_ct_CXX_FOR_BUILD = @ac_ct_CXX_FOR_BUILD@ |
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ |
am__include = @am__include@ |
am__leading_dot = @am__leading_dot@ |
am__quote = @am__quote@ |
am__tar = @am__tar@ |
am__untar = @am__untar@ |
ax_pthread_config = @ax_pthread_config@ |
bindir = @bindir@ |
build = @build@ |
build_alias = @build_alias@ |
build_cpu = @build_cpu@ |
build_os = @build_os@ |
build_vendor = @build_vendor@ |
builddir = @builddir@ |
datadir = @datadir@ |
datarootdir = @datarootdir@ |
docdir = @docdir@ |
dvidir = @dvidir@ |
exec_prefix = @exec_prefix@ |
host = @host@ |
host_alias = @host_alias@ |
host_cpu = @host_cpu@ |
host_os = @host_os@ |
host_vendor = @host_vendor@ |
htmldir = @htmldir@ |
includedir = @includedir@ |
infodir = @infodir@ |
install_sh = @install_sh@ |
libdir = @libdir@ |
libexecdir = @libexecdir@ |
localedir = @localedir@ |
localstatedir = @localstatedir@ |
mandir = @mandir@ |
mkdir_p = @mkdir_p@ |
oldincludedir = @oldincludedir@ |
pdfdir = @pdfdir@ |
prefix = @prefix@ |
program_transform_name = @program_transform_name@ |
psdir = @psdir@ |
sbindir = @sbindir@ |
sharedstatedir = @sharedstatedir@ |
srcdir = @srcdir@ |
sysconfdir = @sysconfdir@ |
target = @target@ |
target_alias = @target_alias@ |
target_cpu = @target_cpu@ |
target_os = @target_os@ |
target_vendor = @target_vendor@ |
top_build_prefix = @top_build_prefix@ |
top_builddir = @top_builddir@ |
top_srcdir = @top_srcdir@ |
C_SOURCES = \ |
r300_blit.c \ |
r300_chipset.c \ |
r300_context.c \ |
r300_debug.c \ |
r300_emit.c \ |
r300_flush.c \ |
r300_fs.c \ |
r300_hyperz.c \ |
r300_query.c \ |
r300_render.c \ |
r300_render_stencilref.c \ |
r300_render_translate.c \ |
r300_resource.c \ |
r300_screen.c \ |
r300_screen_buffer.c \ |
r300_state.c \ |
r300_state_derived.c \ |
r300_vs.c \ |
r300_vs_draw.c \ |
r300_texture.c \ |
r300_texture_desc.c \ |
r300_tgsi_to_rc.c \ |
r300_transfer.c \ |
\ |
compiler/radeon_code.c \ |
compiler/radeon_compiler.c \ |
compiler/radeon_compiler_util.c \ |
compiler/radeon_emulate_branches.c \ |
compiler/radeon_emulate_loops.c \ |
compiler/radeon_inline_literals.c \ |
compiler/radeon_program.c \ |
compiler/radeon_program_print.c \ |
compiler/radeon_opcodes.c \ |
compiler/radeon_program_alu.c \ |
compiler/radeon_program_pair.c \ |
compiler/radeon_program_tex.c \ |
compiler/radeon_pair_translate.c \ |
compiler/radeon_pair_schedule.c \ |
compiler/radeon_pair_regalloc.c \ |
compiler/radeon_pair_dead_sources.c \ |
compiler/radeon_dataflow.c \ |
compiler/radeon_dataflow_deadcode.c \ |
compiler/radeon_dataflow_swizzles.c \ |
compiler/radeon_list.c \ |
compiler/radeon_optimize.c \ |
compiler/radeon_remove_constants.c \ |
compiler/radeon_rename_regs.c \ |
compiler/radeon_vert_fc.c \ |
compiler/radeon_variable.c \ |
compiler/r3xx_fragprog.c \ |
compiler/r300_fragprog.c \ |
compiler/r300_fragprog_swizzle.c \ |
compiler/r300_fragprog_emit.c \ |
compiler/r500_fragprog.c \ |
compiler/r500_fragprog_emit.c \ |
compiler/r3xx_vertprog.c \ |
compiler/r3xx_vertprog_dump.c \ |
compiler/memory_pool.c |
GALLIUM_CFLAGS = \ |
-I$(top_srcdir)/include \ |
-I$(top_srcdir)/src/gallium/include \ |
-I$(top_srcdir)/src/gallium/auxiliary \ |
$(DEFINES) |
noinst_LTLIBRARIES = libr300.la libr300-helper.la |
testdir = compiler/tests |
AM_CFLAGS = \ |
-I$(top_srcdir)/src/gallium/drivers \ |
-I$(top_srcdir)/include \ |
-I$(top_srcdir)/src/mesa \ |
-I$(top_srcdir)/src/glsl \ |
-I$(top_srcdir)/src/mapi \ |
$(VISIBILITY_CFLAGS) \ |
$(GALLIUM_CFLAGS) \ |
$(LLVM_CFLAGS) \ |
$(RADEON_CFLAGS) |
r300_compiler_tests_LDADD = libr300.la libr300-helper.la \ |
$(top_builddir)/src/gallium/auxiliary/libgallium.la \ |
$(GALLIUM_DRI_LIB_DEPS) |
r300_compiler_tests_CPPFLAGS = \ |
-I$(top_srcdir)/src/gallium/drivers/r300/compiler |
r300_compiler_tests_SOURCES = \ |
$(testdir)/r300_compiler_tests.c \ |
$(testdir)/radeon_compiler_optimize_tests.c \ |
$(testdir)/radeon_compiler_regalloc_tests.c \ |
$(testdir)/radeon_compiler_util_tests.c \ |
$(testdir)/rc_test_helpers.c \ |
$(testdir)/unit_test.c |
libr300_la_SOURCES = $(C_SOURCES) |
# These two files are included in libmesagallium, which is included in the dri |
# targets. So, they were added directly to r300g the dri-r300 target would have |
# duplicated symbols, and if they weren't the other *-r300 targets would fail |
# with undefined symbols. |
# |
# Solve this by building them into a separate helper library that can be linked |
# in place of libmesagallium. |
libr300_helper_la_SOURCES = \ |
$(top_srcdir)/src/glsl/ralloc.c \ |
$(top_srcdir)/src/mesa/program/register_allocate.c |
all: all-am |
.SUFFIXES: |
.SUFFIXES: .c .lo .log .o .obj .test .test$(EXEEXT) .trs |
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__configure_deps) |
@for dep in $?; do \ |
case '$(am__configure_deps)' in \ |
*$$dep*) \ |
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ |
&& { if test -f $@; then exit 0; else break; fi; }; \ |
exit 1;; \ |
esac; \ |
done; \ |
echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/drivers/r300/Makefile'; \ |
$(am__cd) $(top_srcdir) && \ |
$(AUTOMAKE) --foreign src/gallium/drivers/r300/Makefile |
.PRECIOUS: Makefile |
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status |
@case '$?' in \ |
*config.status*) \ |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ |
*) \ |
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ |
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ |
esac; |
$(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc: |
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
$(top_srcdir)/configure: $(am__configure_deps) |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
$(ACLOCAL_M4): $(am__aclocal_m4_deps) |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
$(am__aclocal_m4_deps): |
clean-noinstLTLIBRARIES: |
-test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) |
@list='$(noinst_LTLIBRARIES)'; \ |
locs=`for p in $$list; do echo $$p; done | \ |
sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ |
sort -u`; \ |
test -z "$$locs" || { \ |
echo rm -f $${locs}; \ |
rm -f $${locs}; \ |
} |
libr300-helper.la: $(libr300_helper_la_OBJECTS) $(libr300_helper_la_DEPENDENCIES) $(EXTRA_libr300_helper_la_DEPENDENCIES) |
$(AM_V_CCLD)$(LINK) $(libr300_helper_la_OBJECTS) $(libr300_helper_la_LIBADD) $(LIBS) |
libr300.la: $(libr300_la_OBJECTS) $(libr300_la_DEPENDENCIES) $(EXTRA_libr300_la_DEPENDENCIES) |
$(AM_V_CCLD)$(LINK) $(libr300_la_OBJECTS) $(libr300_la_LIBADD) $(LIBS) |
clean-checkPROGRAMS: |
@list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ |
echo " rm -f" $$list; \ |
rm -f $$list || exit $$?; \ |
test -n "$(EXEEXT)" || exit 0; \ |
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ |
echo " rm -f" $$list; \ |
rm -f $$list |
r300_compiler_tests$(EXEEXT): $(r300_compiler_tests_OBJECTS) $(r300_compiler_tests_DEPENDENCIES) $(EXTRA_r300_compiler_tests_DEPENDENCIES) |
@rm -f r300_compiler_tests$(EXEEXT) |
$(AM_V_CCLD)$(LINK) $(r300_compiler_tests_OBJECTS) $(r300_compiler_tests_LDADD) $(LIBS) |
mostlyclean-compile: |
-rm -f *.$(OBJEXT) |
distclean-compile: |
-rm -f *.tab.c |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/memory_pool.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_blit.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_chipset.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_compiler_tests-r300_compiler_tests.Po@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_compiler_tests-radeon_compiler_optimize_tests.Po@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_compiler_tests-radeon_compiler_regalloc_tests.Po@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_compiler_tests-radeon_compiler_util_tests.Po@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_compiler_tests-rc_test_helpers.Po@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_compiler_tests-unit_test.Po@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_context.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_debug.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_emit.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_flush.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_fragprog.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_fragprog_emit.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_fragprog_swizzle.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_fs.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_hyperz.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_query.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_render.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_render_stencilref.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_render_translate.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_resource.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_screen.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_screen_buffer.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_state.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_state_derived.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_texture.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_texture_desc.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_tgsi_to_rc.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_transfer.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_vs.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_vs_draw.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r3xx_fragprog.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r3xx_vertprog.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r3xx_vertprog_dump.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r500_fragprog.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r500_fragprog_emit.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_code.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_compiler.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_compiler_util.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_dataflow.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_dataflow_deadcode.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_dataflow_swizzles.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_emulate_branches.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_emulate_loops.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_inline_literals.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_list.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_opcodes.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_optimize.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_pair_dead_sources.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_pair_regalloc.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_pair_schedule.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_pair_translate.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_program.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_program_alu.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_program_pair.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_program_print.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_program_tex.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_remove_constants.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_rename_regs.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_variable.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vert_fc.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ralloc.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/register_allocate.Plo@am__quote@ |
.c.o: |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< |
.c.obj: |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` |
.c.lo: |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< |
ralloc.lo: $(top_srcdir)/src/glsl/ralloc.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ralloc.lo -MD -MP -MF $(DEPDIR)/ralloc.Tpo -c -o ralloc.lo `test -f '$(top_srcdir)/src/glsl/ralloc.c' || echo '$(srcdir)/'`$(top_srcdir)/src/glsl/ralloc.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ralloc.Tpo $(DEPDIR)/ralloc.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(top_srcdir)/src/glsl/ralloc.c' object='ralloc.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ralloc.lo `test -f '$(top_srcdir)/src/glsl/ralloc.c' || echo '$(srcdir)/'`$(top_srcdir)/src/glsl/ralloc.c |
register_allocate.lo: $(top_srcdir)/src/mesa/program/register_allocate.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT register_allocate.lo -MD -MP -MF $(DEPDIR)/register_allocate.Tpo -c -o register_allocate.lo `test -f '$(top_srcdir)/src/mesa/program/register_allocate.c' || echo '$(srcdir)/'`$(top_srcdir)/src/mesa/program/register_allocate.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/register_allocate.Tpo $(DEPDIR)/register_allocate.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(top_srcdir)/src/mesa/program/register_allocate.c' object='register_allocate.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o register_allocate.lo `test -f '$(top_srcdir)/src/mesa/program/register_allocate.c' || echo '$(srcdir)/'`$(top_srcdir)/src/mesa/program/register_allocate.c |
radeon_code.lo: compiler/radeon_code.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_code.lo -MD -MP -MF $(DEPDIR)/radeon_code.Tpo -c -o radeon_code.lo `test -f 'compiler/radeon_code.c' || echo '$(srcdir)/'`compiler/radeon_code.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_code.Tpo $(DEPDIR)/radeon_code.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_code.c' object='radeon_code.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_code.lo `test -f 'compiler/radeon_code.c' || echo '$(srcdir)/'`compiler/radeon_code.c |
radeon_compiler.lo: compiler/radeon_compiler.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_compiler.lo -MD -MP -MF $(DEPDIR)/radeon_compiler.Tpo -c -o radeon_compiler.lo `test -f 'compiler/radeon_compiler.c' || echo '$(srcdir)/'`compiler/radeon_compiler.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_compiler.Tpo $(DEPDIR)/radeon_compiler.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_compiler.c' object='radeon_compiler.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_compiler.lo `test -f 'compiler/radeon_compiler.c' || echo '$(srcdir)/'`compiler/radeon_compiler.c |
radeon_compiler_util.lo: compiler/radeon_compiler_util.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_compiler_util.lo -MD -MP -MF $(DEPDIR)/radeon_compiler_util.Tpo -c -o radeon_compiler_util.lo `test -f 'compiler/radeon_compiler_util.c' || echo '$(srcdir)/'`compiler/radeon_compiler_util.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_compiler_util.Tpo $(DEPDIR)/radeon_compiler_util.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_compiler_util.c' object='radeon_compiler_util.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_compiler_util.lo `test -f 'compiler/radeon_compiler_util.c' || echo '$(srcdir)/'`compiler/radeon_compiler_util.c |
radeon_emulate_branches.lo: compiler/radeon_emulate_branches.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_emulate_branches.lo -MD -MP -MF $(DEPDIR)/radeon_emulate_branches.Tpo -c -o radeon_emulate_branches.lo `test -f 'compiler/radeon_emulate_branches.c' || echo '$(srcdir)/'`compiler/radeon_emulate_branches.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_emulate_branches.Tpo $(DEPDIR)/radeon_emulate_branches.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_emulate_branches.c' object='radeon_emulate_branches.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_emulate_branches.lo `test -f 'compiler/radeon_emulate_branches.c' || echo '$(srcdir)/'`compiler/radeon_emulate_branches.c |
radeon_emulate_loops.lo: compiler/radeon_emulate_loops.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_emulate_loops.lo -MD -MP -MF $(DEPDIR)/radeon_emulate_loops.Tpo -c -o radeon_emulate_loops.lo `test -f 'compiler/radeon_emulate_loops.c' || echo '$(srcdir)/'`compiler/radeon_emulate_loops.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_emulate_loops.Tpo $(DEPDIR)/radeon_emulate_loops.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_emulate_loops.c' object='radeon_emulate_loops.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_emulate_loops.lo `test -f 'compiler/radeon_emulate_loops.c' || echo '$(srcdir)/'`compiler/radeon_emulate_loops.c |
radeon_inline_literals.lo: compiler/radeon_inline_literals.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_inline_literals.lo -MD -MP -MF $(DEPDIR)/radeon_inline_literals.Tpo -c -o radeon_inline_literals.lo `test -f 'compiler/radeon_inline_literals.c' || echo '$(srcdir)/'`compiler/radeon_inline_literals.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_inline_literals.Tpo $(DEPDIR)/radeon_inline_literals.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_inline_literals.c' object='radeon_inline_literals.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_inline_literals.lo `test -f 'compiler/radeon_inline_literals.c' || echo '$(srcdir)/'`compiler/radeon_inline_literals.c |
radeon_program.lo: compiler/radeon_program.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_program.lo -MD -MP -MF $(DEPDIR)/radeon_program.Tpo -c -o radeon_program.lo `test -f 'compiler/radeon_program.c' || echo '$(srcdir)/'`compiler/radeon_program.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_program.Tpo $(DEPDIR)/radeon_program.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_program.c' object='radeon_program.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_program.lo `test -f 'compiler/radeon_program.c' || echo '$(srcdir)/'`compiler/radeon_program.c |
radeon_program_print.lo: compiler/radeon_program_print.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_program_print.lo -MD -MP -MF $(DEPDIR)/radeon_program_print.Tpo -c -o radeon_program_print.lo `test -f 'compiler/radeon_program_print.c' || echo '$(srcdir)/'`compiler/radeon_program_print.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_program_print.Tpo $(DEPDIR)/radeon_program_print.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_program_print.c' object='radeon_program_print.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_program_print.lo `test -f 'compiler/radeon_program_print.c' || echo '$(srcdir)/'`compiler/radeon_program_print.c |
radeon_opcodes.lo: compiler/radeon_opcodes.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_opcodes.lo -MD -MP -MF $(DEPDIR)/radeon_opcodes.Tpo -c -o radeon_opcodes.lo `test -f 'compiler/radeon_opcodes.c' || echo '$(srcdir)/'`compiler/radeon_opcodes.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_opcodes.Tpo $(DEPDIR)/radeon_opcodes.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_opcodes.c' object='radeon_opcodes.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_opcodes.lo `test -f 'compiler/radeon_opcodes.c' || echo '$(srcdir)/'`compiler/radeon_opcodes.c |
radeon_program_alu.lo: compiler/radeon_program_alu.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_program_alu.lo -MD -MP -MF $(DEPDIR)/radeon_program_alu.Tpo -c -o radeon_program_alu.lo `test -f 'compiler/radeon_program_alu.c' || echo '$(srcdir)/'`compiler/radeon_program_alu.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_program_alu.Tpo $(DEPDIR)/radeon_program_alu.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_program_alu.c' object='radeon_program_alu.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_program_alu.lo `test -f 'compiler/radeon_program_alu.c' || echo '$(srcdir)/'`compiler/radeon_program_alu.c |
radeon_program_pair.lo: compiler/radeon_program_pair.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_program_pair.lo -MD -MP -MF $(DEPDIR)/radeon_program_pair.Tpo -c -o radeon_program_pair.lo `test -f 'compiler/radeon_program_pair.c' || echo '$(srcdir)/'`compiler/radeon_program_pair.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_program_pair.Tpo $(DEPDIR)/radeon_program_pair.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_program_pair.c' object='radeon_program_pair.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_program_pair.lo `test -f 'compiler/radeon_program_pair.c' || echo '$(srcdir)/'`compiler/radeon_program_pair.c |
radeon_program_tex.lo: compiler/radeon_program_tex.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_program_tex.lo -MD -MP -MF $(DEPDIR)/radeon_program_tex.Tpo -c -o radeon_program_tex.lo `test -f 'compiler/radeon_program_tex.c' || echo '$(srcdir)/'`compiler/radeon_program_tex.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_program_tex.Tpo $(DEPDIR)/radeon_program_tex.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_program_tex.c' object='radeon_program_tex.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_program_tex.lo `test -f 'compiler/radeon_program_tex.c' || echo '$(srcdir)/'`compiler/radeon_program_tex.c |
radeon_pair_translate.lo: compiler/radeon_pair_translate.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_pair_translate.lo -MD -MP -MF $(DEPDIR)/radeon_pair_translate.Tpo -c -o radeon_pair_translate.lo `test -f 'compiler/radeon_pair_translate.c' || echo '$(srcdir)/'`compiler/radeon_pair_translate.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_pair_translate.Tpo $(DEPDIR)/radeon_pair_translate.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_pair_translate.c' object='radeon_pair_translate.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_pair_translate.lo `test -f 'compiler/radeon_pair_translate.c' || echo '$(srcdir)/'`compiler/radeon_pair_translate.c |
radeon_pair_schedule.lo: compiler/radeon_pair_schedule.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_pair_schedule.lo -MD -MP -MF $(DEPDIR)/radeon_pair_schedule.Tpo -c -o radeon_pair_schedule.lo `test -f 'compiler/radeon_pair_schedule.c' || echo '$(srcdir)/'`compiler/radeon_pair_schedule.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_pair_schedule.Tpo $(DEPDIR)/radeon_pair_schedule.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_pair_schedule.c' object='radeon_pair_schedule.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_pair_schedule.lo `test -f 'compiler/radeon_pair_schedule.c' || echo '$(srcdir)/'`compiler/radeon_pair_schedule.c |
radeon_pair_regalloc.lo: compiler/radeon_pair_regalloc.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_pair_regalloc.lo -MD -MP -MF $(DEPDIR)/radeon_pair_regalloc.Tpo -c -o radeon_pair_regalloc.lo `test -f 'compiler/radeon_pair_regalloc.c' || echo '$(srcdir)/'`compiler/radeon_pair_regalloc.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_pair_regalloc.Tpo $(DEPDIR)/radeon_pair_regalloc.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_pair_regalloc.c' object='radeon_pair_regalloc.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_pair_regalloc.lo `test -f 'compiler/radeon_pair_regalloc.c' || echo '$(srcdir)/'`compiler/radeon_pair_regalloc.c |
radeon_pair_dead_sources.lo: compiler/radeon_pair_dead_sources.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_pair_dead_sources.lo -MD -MP -MF $(DEPDIR)/radeon_pair_dead_sources.Tpo -c -o radeon_pair_dead_sources.lo `test -f 'compiler/radeon_pair_dead_sources.c' || echo '$(srcdir)/'`compiler/radeon_pair_dead_sources.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_pair_dead_sources.Tpo $(DEPDIR)/radeon_pair_dead_sources.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_pair_dead_sources.c' object='radeon_pair_dead_sources.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_pair_dead_sources.lo `test -f 'compiler/radeon_pair_dead_sources.c' || echo '$(srcdir)/'`compiler/radeon_pair_dead_sources.c |
radeon_dataflow.lo: compiler/radeon_dataflow.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_dataflow.lo -MD -MP -MF $(DEPDIR)/radeon_dataflow.Tpo -c -o radeon_dataflow.lo `test -f 'compiler/radeon_dataflow.c' || echo '$(srcdir)/'`compiler/radeon_dataflow.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_dataflow.Tpo $(DEPDIR)/radeon_dataflow.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_dataflow.c' object='radeon_dataflow.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_dataflow.lo `test -f 'compiler/radeon_dataflow.c' || echo '$(srcdir)/'`compiler/radeon_dataflow.c |
radeon_dataflow_deadcode.lo: compiler/radeon_dataflow_deadcode.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_dataflow_deadcode.lo -MD -MP -MF $(DEPDIR)/radeon_dataflow_deadcode.Tpo -c -o radeon_dataflow_deadcode.lo `test -f 'compiler/radeon_dataflow_deadcode.c' || echo '$(srcdir)/'`compiler/radeon_dataflow_deadcode.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_dataflow_deadcode.Tpo $(DEPDIR)/radeon_dataflow_deadcode.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_dataflow_deadcode.c' object='radeon_dataflow_deadcode.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_dataflow_deadcode.lo `test -f 'compiler/radeon_dataflow_deadcode.c' || echo '$(srcdir)/'`compiler/radeon_dataflow_deadcode.c |
radeon_dataflow_swizzles.lo: compiler/radeon_dataflow_swizzles.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_dataflow_swizzles.lo -MD -MP -MF $(DEPDIR)/radeon_dataflow_swizzles.Tpo -c -o radeon_dataflow_swizzles.lo `test -f 'compiler/radeon_dataflow_swizzles.c' || echo '$(srcdir)/'`compiler/radeon_dataflow_swizzles.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_dataflow_swizzles.Tpo $(DEPDIR)/radeon_dataflow_swizzles.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_dataflow_swizzles.c' object='radeon_dataflow_swizzles.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_dataflow_swizzles.lo `test -f 'compiler/radeon_dataflow_swizzles.c' || echo '$(srcdir)/'`compiler/radeon_dataflow_swizzles.c |
radeon_list.lo: compiler/radeon_list.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_list.lo -MD -MP -MF $(DEPDIR)/radeon_list.Tpo -c -o radeon_list.lo `test -f 'compiler/radeon_list.c' || echo '$(srcdir)/'`compiler/radeon_list.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_list.Tpo $(DEPDIR)/radeon_list.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_list.c' object='radeon_list.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_list.lo `test -f 'compiler/radeon_list.c' || echo '$(srcdir)/'`compiler/radeon_list.c |
radeon_optimize.lo: compiler/radeon_optimize.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_optimize.lo -MD -MP -MF $(DEPDIR)/radeon_optimize.Tpo -c -o radeon_optimize.lo `test -f 'compiler/radeon_optimize.c' || echo '$(srcdir)/'`compiler/radeon_optimize.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_optimize.Tpo $(DEPDIR)/radeon_optimize.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_optimize.c' object='radeon_optimize.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_optimize.lo `test -f 'compiler/radeon_optimize.c' || echo '$(srcdir)/'`compiler/radeon_optimize.c |
radeon_remove_constants.lo: compiler/radeon_remove_constants.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_remove_constants.lo -MD -MP -MF $(DEPDIR)/radeon_remove_constants.Tpo -c -o radeon_remove_constants.lo `test -f 'compiler/radeon_remove_constants.c' || echo '$(srcdir)/'`compiler/radeon_remove_constants.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_remove_constants.Tpo $(DEPDIR)/radeon_remove_constants.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_remove_constants.c' object='radeon_remove_constants.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_remove_constants.lo `test -f 'compiler/radeon_remove_constants.c' || echo '$(srcdir)/'`compiler/radeon_remove_constants.c |
radeon_rename_regs.lo: compiler/radeon_rename_regs.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_rename_regs.lo -MD -MP -MF $(DEPDIR)/radeon_rename_regs.Tpo -c -o radeon_rename_regs.lo `test -f 'compiler/radeon_rename_regs.c' || echo '$(srcdir)/'`compiler/radeon_rename_regs.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_rename_regs.Tpo $(DEPDIR)/radeon_rename_regs.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_rename_regs.c' object='radeon_rename_regs.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_rename_regs.lo `test -f 'compiler/radeon_rename_regs.c' || echo '$(srcdir)/'`compiler/radeon_rename_regs.c |
radeon_vert_fc.lo: compiler/radeon_vert_fc.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_vert_fc.lo -MD -MP -MF $(DEPDIR)/radeon_vert_fc.Tpo -c -o radeon_vert_fc.lo `test -f 'compiler/radeon_vert_fc.c' || echo '$(srcdir)/'`compiler/radeon_vert_fc.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_vert_fc.Tpo $(DEPDIR)/radeon_vert_fc.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_vert_fc.c' object='radeon_vert_fc.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_vert_fc.lo `test -f 'compiler/radeon_vert_fc.c' || echo '$(srcdir)/'`compiler/radeon_vert_fc.c |
radeon_variable.lo: compiler/radeon_variable.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_variable.lo -MD -MP -MF $(DEPDIR)/radeon_variable.Tpo -c -o radeon_variable.lo `test -f 'compiler/radeon_variable.c' || echo '$(srcdir)/'`compiler/radeon_variable.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_variable.Tpo $(DEPDIR)/radeon_variable.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_variable.c' object='radeon_variable.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_variable.lo `test -f 'compiler/radeon_variable.c' || echo '$(srcdir)/'`compiler/radeon_variable.c |
r3xx_fragprog.lo: compiler/r3xx_fragprog.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r3xx_fragprog.lo -MD -MP -MF $(DEPDIR)/r3xx_fragprog.Tpo -c -o r3xx_fragprog.lo `test -f 'compiler/r3xx_fragprog.c' || echo '$(srcdir)/'`compiler/r3xx_fragprog.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r3xx_fragprog.Tpo $(DEPDIR)/r3xx_fragprog.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/r3xx_fragprog.c' object='r3xx_fragprog.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r3xx_fragprog.lo `test -f 'compiler/r3xx_fragprog.c' || echo '$(srcdir)/'`compiler/r3xx_fragprog.c |
r300_fragprog.lo: compiler/r300_fragprog.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_fragprog.lo -MD -MP -MF $(DEPDIR)/r300_fragprog.Tpo -c -o r300_fragprog.lo `test -f 'compiler/r300_fragprog.c' || echo '$(srcdir)/'`compiler/r300_fragprog.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_fragprog.Tpo $(DEPDIR)/r300_fragprog.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/r300_fragprog.c' object='r300_fragprog.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_fragprog.lo `test -f 'compiler/r300_fragprog.c' || echo '$(srcdir)/'`compiler/r300_fragprog.c |
r300_fragprog_swizzle.lo: compiler/r300_fragprog_swizzle.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_fragprog_swizzle.lo -MD -MP -MF $(DEPDIR)/r300_fragprog_swizzle.Tpo -c -o r300_fragprog_swizzle.lo `test -f 'compiler/r300_fragprog_swizzle.c' || echo '$(srcdir)/'`compiler/r300_fragprog_swizzle.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_fragprog_swizzle.Tpo $(DEPDIR)/r300_fragprog_swizzle.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/r300_fragprog_swizzle.c' object='r300_fragprog_swizzle.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_fragprog_swizzle.lo `test -f 'compiler/r300_fragprog_swizzle.c' || echo '$(srcdir)/'`compiler/r300_fragprog_swizzle.c |
r300_fragprog_emit.lo: compiler/r300_fragprog_emit.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_fragprog_emit.lo -MD -MP -MF $(DEPDIR)/r300_fragprog_emit.Tpo -c -o r300_fragprog_emit.lo `test -f 'compiler/r300_fragprog_emit.c' || echo '$(srcdir)/'`compiler/r300_fragprog_emit.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_fragprog_emit.Tpo $(DEPDIR)/r300_fragprog_emit.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/r300_fragprog_emit.c' object='r300_fragprog_emit.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_fragprog_emit.lo `test -f 'compiler/r300_fragprog_emit.c' || echo '$(srcdir)/'`compiler/r300_fragprog_emit.c |
r500_fragprog.lo: compiler/r500_fragprog.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r500_fragprog.lo -MD -MP -MF $(DEPDIR)/r500_fragprog.Tpo -c -o r500_fragprog.lo `test -f 'compiler/r500_fragprog.c' || echo '$(srcdir)/'`compiler/r500_fragprog.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r500_fragprog.Tpo $(DEPDIR)/r500_fragprog.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/r500_fragprog.c' object='r500_fragprog.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r500_fragprog.lo `test -f 'compiler/r500_fragprog.c' || echo '$(srcdir)/'`compiler/r500_fragprog.c |
r500_fragprog_emit.lo: compiler/r500_fragprog_emit.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r500_fragprog_emit.lo -MD -MP -MF $(DEPDIR)/r500_fragprog_emit.Tpo -c -o r500_fragprog_emit.lo `test -f 'compiler/r500_fragprog_emit.c' || echo '$(srcdir)/'`compiler/r500_fragprog_emit.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r500_fragprog_emit.Tpo $(DEPDIR)/r500_fragprog_emit.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/r500_fragprog_emit.c' object='r500_fragprog_emit.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r500_fragprog_emit.lo `test -f 'compiler/r500_fragprog_emit.c' || echo '$(srcdir)/'`compiler/r500_fragprog_emit.c |
r3xx_vertprog.lo: compiler/r3xx_vertprog.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r3xx_vertprog.lo -MD -MP -MF $(DEPDIR)/r3xx_vertprog.Tpo -c -o r3xx_vertprog.lo `test -f 'compiler/r3xx_vertprog.c' || echo '$(srcdir)/'`compiler/r3xx_vertprog.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r3xx_vertprog.Tpo $(DEPDIR)/r3xx_vertprog.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/r3xx_vertprog.c' object='r3xx_vertprog.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r3xx_vertprog.lo `test -f 'compiler/r3xx_vertprog.c' || echo '$(srcdir)/'`compiler/r3xx_vertprog.c |
r3xx_vertprog_dump.lo: compiler/r3xx_vertprog_dump.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r3xx_vertprog_dump.lo -MD -MP -MF $(DEPDIR)/r3xx_vertprog_dump.Tpo -c -o r3xx_vertprog_dump.lo `test -f 'compiler/r3xx_vertprog_dump.c' || echo '$(srcdir)/'`compiler/r3xx_vertprog_dump.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r3xx_vertprog_dump.Tpo $(DEPDIR)/r3xx_vertprog_dump.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/r3xx_vertprog_dump.c' object='r3xx_vertprog_dump.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r3xx_vertprog_dump.lo `test -f 'compiler/r3xx_vertprog_dump.c' || echo '$(srcdir)/'`compiler/r3xx_vertprog_dump.c |
memory_pool.lo: compiler/memory_pool.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT memory_pool.lo -MD -MP -MF $(DEPDIR)/memory_pool.Tpo -c -o memory_pool.lo `test -f 'compiler/memory_pool.c' || echo '$(srcdir)/'`compiler/memory_pool.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/memory_pool.Tpo $(DEPDIR)/memory_pool.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/memory_pool.c' object='memory_pool.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o memory_pool.lo `test -f 'compiler/memory_pool.c' || echo '$(srcdir)/'`compiler/memory_pool.c |
r300_compiler_tests-r300_compiler_tests.o: $(testdir)/r300_compiler_tests.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-r300_compiler_tests.o -MD -MP -MF $(DEPDIR)/r300_compiler_tests-r300_compiler_tests.Tpo -c -o r300_compiler_tests-r300_compiler_tests.o `test -f '$(testdir)/r300_compiler_tests.c' || echo '$(srcdir)/'`$(testdir)/r300_compiler_tests.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-r300_compiler_tests.Tpo $(DEPDIR)/r300_compiler_tests-r300_compiler_tests.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/r300_compiler_tests.c' object='r300_compiler_tests-r300_compiler_tests.o' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-r300_compiler_tests.o `test -f '$(testdir)/r300_compiler_tests.c' || echo '$(srcdir)/'`$(testdir)/r300_compiler_tests.c |
r300_compiler_tests-r300_compiler_tests.obj: $(testdir)/r300_compiler_tests.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-r300_compiler_tests.obj -MD -MP -MF $(DEPDIR)/r300_compiler_tests-r300_compiler_tests.Tpo -c -o r300_compiler_tests-r300_compiler_tests.obj `if test -f '$(testdir)/r300_compiler_tests.c'; then $(CYGPATH_W) '$(testdir)/r300_compiler_tests.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/r300_compiler_tests.c'; fi` |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-r300_compiler_tests.Tpo $(DEPDIR)/r300_compiler_tests-r300_compiler_tests.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/r300_compiler_tests.c' object='r300_compiler_tests-r300_compiler_tests.obj' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-r300_compiler_tests.obj `if test -f '$(testdir)/r300_compiler_tests.c'; then $(CYGPATH_W) '$(testdir)/r300_compiler_tests.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/r300_compiler_tests.c'; fi` |
r300_compiler_tests-radeon_compiler_optimize_tests.o: $(testdir)/radeon_compiler_optimize_tests.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-radeon_compiler_optimize_tests.o -MD -MP -MF $(DEPDIR)/r300_compiler_tests-radeon_compiler_optimize_tests.Tpo -c -o r300_compiler_tests-radeon_compiler_optimize_tests.o `test -f '$(testdir)/radeon_compiler_optimize_tests.c' || echo '$(srcdir)/'`$(testdir)/radeon_compiler_optimize_tests.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-radeon_compiler_optimize_tests.Tpo $(DEPDIR)/r300_compiler_tests-radeon_compiler_optimize_tests.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/radeon_compiler_optimize_tests.c' object='r300_compiler_tests-radeon_compiler_optimize_tests.o' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-radeon_compiler_optimize_tests.o `test -f '$(testdir)/radeon_compiler_optimize_tests.c' || echo '$(srcdir)/'`$(testdir)/radeon_compiler_optimize_tests.c |
r300_compiler_tests-radeon_compiler_optimize_tests.obj: $(testdir)/radeon_compiler_optimize_tests.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-radeon_compiler_optimize_tests.obj -MD -MP -MF $(DEPDIR)/r300_compiler_tests-radeon_compiler_optimize_tests.Tpo -c -o r300_compiler_tests-radeon_compiler_optimize_tests.obj `if test -f '$(testdir)/radeon_compiler_optimize_tests.c'; then $(CYGPATH_W) '$(testdir)/radeon_compiler_optimize_tests.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/radeon_compiler_optimize_tests.c'; fi` |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-radeon_compiler_optimize_tests.Tpo $(DEPDIR)/r300_compiler_tests-radeon_compiler_optimize_tests.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/radeon_compiler_optimize_tests.c' object='r300_compiler_tests-radeon_compiler_optimize_tests.obj' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-radeon_compiler_optimize_tests.obj `if test -f '$(testdir)/radeon_compiler_optimize_tests.c'; then $(CYGPATH_W) '$(testdir)/radeon_compiler_optimize_tests.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/radeon_compiler_optimize_tests.c'; fi` |
r300_compiler_tests-radeon_compiler_regalloc_tests.o: $(testdir)/radeon_compiler_regalloc_tests.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-radeon_compiler_regalloc_tests.o -MD -MP -MF $(DEPDIR)/r300_compiler_tests-radeon_compiler_regalloc_tests.Tpo -c -o r300_compiler_tests-radeon_compiler_regalloc_tests.o `test -f '$(testdir)/radeon_compiler_regalloc_tests.c' || echo '$(srcdir)/'`$(testdir)/radeon_compiler_regalloc_tests.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-radeon_compiler_regalloc_tests.Tpo $(DEPDIR)/r300_compiler_tests-radeon_compiler_regalloc_tests.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/radeon_compiler_regalloc_tests.c' object='r300_compiler_tests-radeon_compiler_regalloc_tests.o' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-radeon_compiler_regalloc_tests.o `test -f '$(testdir)/radeon_compiler_regalloc_tests.c' || echo '$(srcdir)/'`$(testdir)/radeon_compiler_regalloc_tests.c |
r300_compiler_tests-radeon_compiler_regalloc_tests.obj: $(testdir)/radeon_compiler_regalloc_tests.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-radeon_compiler_regalloc_tests.obj -MD -MP -MF $(DEPDIR)/r300_compiler_tests-radeon_compiler_regalloc_tests.Tpo -c -o r300_compiler_tests-radeon_compiler_regalloc_tests.obj `if test -f '$(testdir)/radeon_compiler_regalloc_tests.c'; then $(CYGPATH_W) '$(testdir)/radeon_compiler_regalloc_tests.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/radeon_compiler_regalloc_tests.c'; fi` |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-radeon_compiler_regalloc_tests.Tpo $(DEPDIR)/r300_compiler_tests-radeon_compiler_regalloc_tests.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/radeon_compiler_regalloc_tests.c' object='r300_compiler_tests-radeon_compiler_regalloc_tests.obj' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-radeon_compiler_regalloc_tests.obj `if test -f '$(testdir)/radeon_compiler_regalloc_tests.c'; then $(CYGPATH_W) '$(testdir)/radeon_compiler_regalloc_tests.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/radeon_compiler_regalloc_tests.c'; fi` |
r300_compiler_tests-radeon_compiler_util_tests.o: $(testdir)/radeon_compiler_util_tests.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-radeon_compiler_util_tests.o -MD -MP -MF $(DEPDIR)/r300_compiler_tests-radeon_compiler_util_tests.Tpo -c -o r300_compiler_tests-radeon_compiler_util_tests.o `test -f '$(testdir)/radeon_compiler_util_tests.c' || echo '$(srcdir)/'`$(testdir)/radeon_compiler_util_tests.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-radeon_compiler_util_tests.Tpo $(DEPDIR)/r300_compiler_tests-radeon_compiler_util_tests.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/radeon_compiler_util_tests.c' object='r300_compiler_tests-radeon_compiler_util_tests.o' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-radeon_compiler_util_tests.o `test -f '$(testdir)/radeon_compiler_util_tests.c' || echo '$(srcdir)/'`$(testdir)/radeon_compiler_util_tests.c |
r300_compiler_tests-radeon_compiler_util_tests.obj: $(testdir)/radeon_compiler_util_tests.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-radeon_compiler_util_tests.obj -MD -MP -MF $(DEPDIR)/r300_compiler_tests-radeon_compiler_util_tests.Tpo -c -o r300_compiler_tests-radeon_compiler_util_tests.obj `if test -f '$(testdir)/radeon_compiler_util_tests.c'; then $(CYGPATH_W) '$(testdir)/radeon_compiler_util_tests.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/radeon_compiler_util_tests.c'; fi` |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-radeon_compiler_util_tests.Tpo $(DEPDIR)/r300_compiler_tests-radeon_compiler_util_tests.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/radeon_compiler_util_tests.c' object='r300_compiler_tests-radeon_compiler_util_tests.obj' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-radeon_compiler_util_tests.obj `if test -f '$(testdir)/radeon_compiler_util_tests.c'; then $(CYGPATH_W) '$(testdir)/radeon_compiler_util_tests.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/radeon_compiler_util_tests.c'; fi` |
r300_compiler_tests-rc_test_helpers.o: $(testdir)/rc_test_helpers.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-rc_test_helpers.o -MD -MP -MF $(DEPDIR)/r300_compiler_tests-rc_test_helpers.Tpo -c -o r300_compiler_tests-rc_test_helpers.o `test -f '$(testdir)/rc_test_helpers.c' || echo '$(srcdir)/'`$(testdir)/rc_test_helpers.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-rc_test_helpers.Tpo $(DEPDIR)/r300_compiler_tests-rc_test_helpers.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/rc_test_helpers.c' object='r300_compiler_tests-rc_test_helpers.o' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-rc_test_helpers.o `test -f '$(testdir)/rc_test_helpers.c' || echo '$(srcdir)/'`$(testdir)/rc_test_helpers.c |
r300_compiler_tests-rc_test_helpers.obj: $(testdir)/rc_test_helpers.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-rc_test_helpers.obj -MD -MP -MF $(DEPDIR)/r300_compiler_tests-rc_test_helpers.Tpo -c -o r300_compiler_tests-rc_test_helpers.obj `if test -f '$(testdir)/rc_test_helpers.c'; then $(CYGPATH_W) '$(testdir)/rc_test_helpers.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/rc_test_helpers.c'; fi` |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-rc_test_helpers.Tpo $(DEPDIR)/r300_compiler_tests-rc_test_helpers.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/rc_test_helpers.c' object='r300_compiler_tests-rc_test_helpers.obj' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-rc_test_helpers.obj `if test -f '$(testdir)/rc_test_helpers.c'; then $(CYGPATH_W) '$(testdir)/rc_test_helpers.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/rc_test_helpers.c'; fi` |
r300_compiler_tests-unit_test.o: $(testdir)/unit_test.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-unit_test.o -MD -MP -MF $(DEPDIR)/r300_compiler_tests-unit_test.Tpo -c -o r300_compiler_tests-unit_test.o `test -f '$(testdir)/unit_test.c' || echo '$(srcdir)/'`$(testdir)/unit_test.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-unit_test.Tpo $(DEPDIR)/r300_compiler_tests-unit_test.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/unit_test.c' object='r300_compiler_tests-unit_test.o' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-unit_test.o `test -f '$(testdir)/unit_test.c' || echo '$(srcdir)/'`$(testdir)/unit_test.c |
r300_compiler_tests-unit_test.obj: $(testdir)/unit_test.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-unit_test.obj -MD -MP -MF $(DEPDIR)/r300_compiler_tests-unit_test.Tpo -c -o r300_compiler_tests-unit_test.obj `if test -f '$(testdir)/unit_test.c'; then $(CYGPATH_W) '$(testdir)/unit_test.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/unit_test.c'; fi` |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-unit_test.Tpo $(DEPDIR)/r300_compiler_tests-unit_test.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/unit_test.c' object='r300_compiler_tests-unit_test.obj' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-unit_test.obj `if test -f '$(testdir)/unit_test.c'; then $(CYGPATH_W) '$(testdir)/unit_test.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/unit_test.c'; fi` |
mostlyclean-libtool: |
-rm -f *.lo |
clean-libtool: |
-rm -rf .libs _libs |
ID: $(am__tagged_files) |
$(am__define_uniq_tagged_files); mkid -fID $$unique |
tags: tags-am |
TAGS: tags |
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) |
set x; \ |
here=`pwd`; \ |
$(am__define_uniq_tagged_files); \ |
shift; \ |
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ |
test -n "$$unique" || unique=$$empty_fix; \ |
if test $$# -gt 0; then \ |
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ |
"$$@" $$unique; \ |
else \ |
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ |
$$unique; \ |
fi; \ |
fi |
ctags: ctags-am |
CTAGS: ctags |
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) |
$(am__define_uniq_tagged_files); \ |
test -z "$(CTAGS_ARGS)$$unique" \ |
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ |
$$unique |
GTAGS: |
here=`$(am__cd) $(top_builddir) && pwd` \ |
&& $(am__cd) $(top_srcdir) \ |
&& gtags -i $(GTAGS_ARGS) "$$here" |
cscopelist: cscopelist-am |
cscopelist-am: $(am__tagged_files) |
list='$(am__tagged_files)'; \ |
case "$(srcdir)" in \ |
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ |
*) sdir=$(subdir)/$(srcdir) ;; \ |
esac; \ |
for i in $$list; do \ |
if test -f "$$i"; then \ |
echo "$(subdir)/$$i"; \ |
else \ |
echo "$$sdir/$$i"; \ |
fi; \ |
done >> $(top_builddir)/cscope.files |
distclean-tags: |
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags |
# Recover from deleted '.trs' file; this should ensure that |
# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create |
# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells |
# to avoid problems with "make -n". |
.log.trs: |
rm -f $< $@ |
$(MAKE) $(AM_MAKEFLAGS) $< |
# Leading 'am--fnord' is there to ensure the list of targets does not |
# expand to empty, as could happen e.g. with make check TESTS=''. |
am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) |
am--force-recheck: |
@: |
$(TEST_SUITE_LOG): $(TEST_LOGS) |
@$(am__set_TESTS_bases); \ |
am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ |
redo_bases=`for i in $$bases; do \ |
am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ |
done`; \ |
if test -n "$$redo_bases"; then \ |
redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ |
redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ |
if $(am__make_dryrun); then :; else \ |
rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ |
fi; \ |
fi; \ |
if test -n "$$am__remaking_logs"; then \ |
echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ |
"recursion detected" >&2; \ |
else \ |
am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ |
fi; \ |
if $(am__make_dryrun); then :; else \ |
st=0; \ |
errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ |
for i in $$redo_bases; do \ |
test -f $$i.trs && test -r $$i.trs \ |
|| { echo "$$errmsg $$i.trs" >&2; st=1; }; \ |
test -f $$i.log && test -r $$i.log \ |
|| { echo "$$errmsg $$i.log" >&2; st=1; }; \ |
done; \ |
test $$st -eq 0 || exit 1; \ |
fi |
@$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ |
ws='[ ]'; \ |
results=`for b in $$bases; do echo $$b.trs; done`; \ |
test -n "$$results" || results=/dev/null; \ |
all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ |
pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ |
fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ |
skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ |
xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ |
xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ |
error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ |
if test `expr $$fail + $$xpass + $$error` -eq 0; then \ |
success=true; \ |
else \ |
success=false; \ |
fi; \ |
br='==================='; br=$$br$$br$$br$$br; \ |
result_count () \ |
{ \ |
if test x"$$1" = x"--maybe-color"; then \ |
maybe_colorize=yes; \ |
elif test x"$$1" = x"--no-color"; then \ |
maybe_colorize=no; \ |
else \ |
echo "$@: invalid 'result_count' usage" >&2; exit 4; \ |
fi; \ |
shift; \ |
desc=$$1 count=$$2; \ |
if test $$maybe_colorize = yes && test $$count -gt 0; then \ |
color_start=$$3 color_end=$$std; \ |
else \ |
color_start= color_end=; \ |
fi; \ |
echo "$${color_start}# $$desc $$count$${color_end}"; \ |
}; \ |
create_testsuite_report () \ |
{ \ |
result_count $$1 "TOTAL:" $$all "$$brg"; \ |
result_count $$1 "PASS: " $$pass "$$grn"; \ |
result_count $$1 "SKIP: " $$skip "$$blu"; \ |
result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ |
result_count $$1 "FAIL: " $$fail "$$red"; \ |
result_count $$1 "XPASS:" $$xpass "$$red"; \ |
result_count $$1 "ERROR:" $$error "$$mgn"; \ |
}; \ |
{ \ |
echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ |
$(am__rst_title); \ |
create_testsuite_report --no-color; \ |
echo; \ |
echo ".. contents:: :depth: 2"; \ |
echo; \ |
for b in $$bases; do echo $$b; done \ |
| $(am__create_global_log); \ |
} >$(TEST_SUITE_LOG).tmp || exit 1; \ |
mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ |
if $$success; then \ |
col="$$grn"; \ |
else \ |
col="$$red"; \ |
test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ |
fi; \ |
echo "$${col}$$br$${std}"; \ |
echo "$${col}Testsuite summary for $(PACKAGE_STRING)$${std}"; \ |
echo "$${col}$$br$${std}"; \ |
create_testsuite_report --maybe-color; \ |
echo "$$col$$br$$std"; \ |
if $$success; then :; else \ |
echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ |
if test -n "$(PACKAGE_BUGREPORT)"; then \ |
echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ |
fi; \ |
echo "$$col$$br$$std"; \ |
fi; \ |
$$success || exit 1 |
check-TESTS: |
@list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list |
@list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list |
@test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) |
@set +e; $(am__set_TESTS_bases); \ |
log_list=`for i in $$bases; do echo $$i.log; done`; \ |
trs_list=`for i in $$bases; do echo $$i.trs; done`; \ |
log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ |
$(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ |
exit $$?; |
recheck: all $(check_PROGRAMS) |
@test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) |
@set +e; $(am__set_TESTS_bases); \ |
bases=`for i in $$bases; do echo $$i; done \ |
| $(am__list_recheck_tests)` || exit 1; \ |
log_list=`for i in $$bases; do echo $$i.log; done`; \ |
log_list=`echo $$log_list`; \ |
$(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ |
am__force_recheck=am--force-recheck \ |
TEST_LOGS="$$log_list"; \ |
exit $$? |
r300_compiler_tests.log: r300_compiler_tests$(EXEEXT) |
@p='r300_compiler_tests$(EXEEXT)'; \ |
b='r300_compiler_tests'; \ |
$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ |
--log-file $$b.log --trs-file $$b.trs \ |
$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ |
"$$tst" $(AM_TESTS_FD_REDIRECT) |
.test.log: |
@p='$<'; \ |
$(am__set_b); \ |
$(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ |
--log-file $$b.log --trs-file $$b.trs \ |
$(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ |
"$$tst" $(AM_TESTS_FD_REDIRECT) |
@am__EXEEXT_TRUE@.test$(EXEEXT).log: |
@am__EXEEXT_TRUE@ @p='$<'; \ |
@am__EXEEXT_TRUE@ $(am__set_b); \ |
@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ |
@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ |
@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ |
@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) |
distdir: $(DISTFILES) |
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ |
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ |
list='$(DISTFILES)'; \ |
dist_files=`for file in $$list; do echo $$file; done | \ |
sed -e "s|^$$srcdirstrip/||;t" \ |
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ |
case $$dist_files in \ |
*/*) $(MKDIR_P) `echo "$$dist_files" | \ |
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ |
sort -u` ;; \ |
esac; \ |
for file in $$dist_files; do \ |
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ |
if test -d $$d/$$file; then \ |
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ |
if test -d "$(distdir)/$$file"; then \ |
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ |
fi; \ |
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ |
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ |
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ |
fi; \ |
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ |
else \ |
test -f "$(distdir)/$$file" \ |
|| cp -p $$d/$$file "$(distdir)/$$file" \ |
|| exit 1; \ |
fi; \ |
done |
check-am: all-am |
$(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) |
$(MAKE) $(AM_MAKEFLAGS) check-TESTS |
check: check-am |
all-am: Makefile $(LTLIBRARIES) |
installdirs: |
install: install-am |
install-exec: install-exec-am |
install-data: install-data-am |
uninstall: uninstall-am |
install-am: all-am |
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am |
installcheck: installcheck-am |
install-strip: |
if test -z '$(STRIP)'; then \ |
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ |
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ |
install; \ |
else \ |
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ |
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ |
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ |
fi |
mostlyclean-generic: |
-test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) |
-test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) |
-test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) |
clean-generic: |
distclean-generic: |
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) |
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) |
maintainer-clean-generic: |
@echo "This command is intended for maintainers to use" |
@echo "it deletes files that may require special tools to rebuild." |
clean: clean-am |
clean-am: clean-checkPROGRAMS clean-generic clean-libtool \ |
clean-noinstLTLIBRARIES mostlyclean-am |
distclean: distclean-am |
-rm -rf ./$(DEPDIR) |
-rm -f Makefile |
distclean-am: clean-am distclean-compile distclean-generic \ |
distclean-tags |
dvi: dvi-am |
dvi-am: |
html: html-am |
html-am: |
info: info-am |
info-am: |
install-data-am: |
install-dvi: install-dvi-am |
install-dvi-am: |
install-exec-am: |
install-html: install-html-am |
install-html-am: |
install-info: install-info-am |
install-info-am: |
install-man: |
install-pdf: install-pdf-am |
install-pdf-am: |
install-ps: install-ps-am |
install-ps-am: |
installcheck-am: |
maintainer-clean: maintainer-clean-am |
-rm -rf ./$(DEPDIR) |
-rm -f Makefile |
maintainer-clean-am: distclean-am maintainer-clean-generic |
mostlyclean: mostlyclean-am |
mostlyclean-am: mostlyclean-compile mostlyclean-generic \ |
mostlyclean-libtool |
pdf: pdf-am |
pdf-am: |
ps: ps-am |
ps-am: |
uninstall-am: |
.MAKE: check-am install-am install-strip |
.PHONY: CTAGS GTAGS TAGS all all-am check check-TESTS check-am clean \ |
clean-checkPROGRAMS clean-generic clean-libtool \ |
clean-noinstLTLIBRARIES cscopelist-am ctags ctags-am distclean \ |
distclean-compile distclean-generic distclean-libtool \ |
distclean-tags distdir dvi dvi-am html html-am info info-am \ |
install install-am install-data install-data-am install-dvi \ |
install-dvi-am install-exec install-exec-am install-html \ |
install-html-am install-info install-info-am install-man \ |
install-pdf install-pdf-am install-ps install-ps-am \ |
install-strip installcheck installcheck-am installdirs \ |
maintainer-clean maintainer-clean-generic mostlyclean \ |
mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ |
pdf pdf-am ps ps-am recheck tags tags-am uninstall \ |
uninstall-am |
# Tell versions [3.59,3.63) of GNU make to not export all variables. |
# Otherwise a system limit (for SysV at least) may be exceeded. |
.NOEXPORT: |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/Makefile.sources |
---|
0,0 → 1,59 |
C_SOURCES = \ |
r300_blit.c \ |
r300_chipset.c \ |
r300_context.c \ |
r300_debug.c \ |
r300_emit.c \ |
r300_flush.c \ |
r300_fs.c \ |
r300_hyperz.c \ |
r300_query.c \ |
r300_render.c \ |
r300_render_stencilref.c \ |
r300_render_translate.c \ |
r300_resource.c \ |
r300_screen.c \ |
r300_screen_buffer.c \ |
r300_state.c \ |
r300_state_derived.c \ |
r300_vs.c \ |
r300_vs_draw.c \ |
r300_texture.c \ |
r300_texture_desc.c \ |
r300_tgsi_to_rc.c \ |
r300_transfer.c \ |
\ |
compiler/radeon_code.c \ |
compiler/radeon_compiler.c \ |
compiler/radeon_compiler_util.c \ |
compiler/radeon_emulate_branches.c \ |
compiler/radeon_emulate_loops.c \ |
compiler/radeon_inline_literals.c \ |
compiler/radeon_program.c \ |
compiler/radeon_program_print.c \ |
compiler/radeon_opcodes.c \ |
compiler/radeon_program_alu.c \ |
compiler/radeon_program_pair.c \ |
compiler/radeon_program_tex.c \ |
compiler/radeon_pair_translate.c \ |
compiler/radeon_pair_schedule.c \ |
compiler/radeon_pair_regalloc.c \ |
compiler/radeon_pair_dead_sources.c \ |
compiler/radeon_dataflow.c \ |
compiler/radeon_dataflow_deadcode.c \ |
compiler/radeon_dataflow_swizzles.c \ |
compiler/radeon_list.c \ |
compiler/radeon_optimize.c \ |
compiler/radeon_remove_constants.c \ |
compiler/radeon_rename_regs.c \ |
compiler/radeon_vert_fc.c \ |
compiler/radeon_variable.c \ |
compiler/r3xx_fragprog.c \ |
compiler/r300_fragprog.c \ |
compiler/r300_fragprog_swizzle.c \ |
compiler/r300_fragprog_emit.c \ |
compiler/r500_fragprog.c \ |
compiler/r500_fragprog_emit.c \ |
compiler/r3xx_vertprog.c \ |
compiler/r3xx_vertprog_dump.c \ |
compiler/memory_pool.c |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/memory_pool.c |
---|
0,0 → 1,97 |
/* |
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "memory_pool.h" |
#include <assert.h> |
#include <stdlib.h> |
#include <string.h> |
#define POOL_LARGE_ALLOC 4096 |
#define POOL_ALIGN 8 |
struct memory_block { |
struct memory_block * next; |
}; |
void memory_pool_init(struct memory_pool * pool) |
{ |
memset(pool, 0, sizeof(struct memory_pool)); |
} |
void memory_pool_destroy(struct memory_pool * pool) |
{ |
while(pool->blocks) { |
struct memory_block * block = pool->blocks; |
pool->blocks = block->next; |
free(block); |
} |
} |
static void refill_pool(struct memory_pool * pool) |
{ |
unsigned int blocksize = pool->total_allocated; |
struct memory_block * newblock; |
if (!blocksize) |
blocksize = 2*POOL_LARGE_ALLOC; |
newblock = malloc(blocksize); |
newblock->next = pool->blocks; |
pool->blocks = newblock; |
pool->head = (unsigned char*)(newblock + 1); |
pool->end = ((unsigned char*)newblock) + blocksize; |
pool->total_allocated += blocksize; |
} |
void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes) |
{ |
if (bytes < POOL_LARGE_ALLOC) { |
void * ptr; |
if (pool->head + bytes > pool->end) |
refill_pool(pool); |
assert(pool->head + bytes <= pool->end); |
ptr = pool->head; |
pool->head += bytes; |
pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1)); |
return ptr; |
} else { |
struct memory_block * block = malloc(bytes + sizeof(struct memory_block)); |
block->next = pool->blocks; |
pool->blocks = block; |
return (block + 1); |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/memory_pool.h |
---|
0,0 → 1,80 |
/* |
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#ifndef MEMORY_POOL_H |
#define MEMORY_POOL_H |
struct memory_block; |
/** |
* Provides a pool of memory that can quickly be allocated from, at the |
* cost of being unable to explicitly free one of the allocated blocks. |
* Instead, the entire pool can be freed at once. |
* |
* The idea is to allow one to quickly allocate a flexible amount of |
* memory during operations like shader compilation while avoiding |
* reference counting headaches. |
*/ |
struct memory_pool { |
unsigned char * head; |
unsigned char * end; |
unsigned int total_allocated; |
struct memory_block * blocks; |
}; |
void memory_pool_init(struct memory_pool * pool); |
void memory_pool_destroy(struct memory_pool * pool); |
void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes); |
/** |
* Generic helper for growing an array that has separate size/count |
* and reserved counters to accomodate up to num new element. |
* |
* type * Array; |
* unsigned int Size; |
* unsigned int Reserved; |
* |
* memory_pool_array_reserve(pool, type, Array, Size, Reserved, k); |
* assert(Size + k < Reserved); |
* |
* \note Size is not changed by this macro. |
* |
* \warning Array, Size, Reserved have to be lvalues and may be evaluated |
* several times. |
*/ |
#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \ |
unsigned int _num = (num); \ |
if ((size) + _num > (reserved)) { \ |
unsigned int newreserve = (reserved) * 2; \ |
type * newarray; \ |
if (newreserve < _num) \ |
newreserve = 4 * _num; /* arbitrary heuristic */ \ |
newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \ |
memcpy(newarray, (array), (size) * sizeof(type)); \ |
(array) = newarray; \ |
(reserved) = newreserve; \ |
} \ |
} while(0) |
#endif /* MEMORY_POOL_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r300_fragprog.c |
---|
0,0 → 1,338 |
/* |
* Copyright (C) 2005 Ben Skeggs. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include "r300_fragprog.h" |
#include <stdio.h> |
#include "../r300_reg.h" |
static void presub_string(char out[10], unsigned int inst) |
{ |
switch(inst & 0x600000){ |
case R300_ALU_SRCP_1_MINUS_2_SRC0: |
sprintf(out, "bias"); |
break; |
case R300_ALU_SRCP_SRC1_MINUS_SRC0: |
sprintf(out, "sub"); |
break; |
case R300_ALU_SRCP_SRC1_PLUS_SRC0: |
sprintf(out, "add"); |
break; |
case R300_ALU_SRCP_1_MINUS_SRC0: |
sprintf(out, "inv "); |
break; |
} |
} |
static int get_msb(unsigned int bit, unsigned int r400_ext_addr) |
{ |
return (r400_ext_addr & bit) ? 1 << 5 : 0; |
} |
/* just some random things... */ |
void r300FragmentProgramDump(struct radeon_compiler *c, void *user) |
{ |
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; |
struct r300_fragment_program_code *code = &compiler->code->code.r300; |
int n, i, j; |
static int pc = 0; |
fprintf(stderr, "pc=%d*************************************\n", pc++); |
fprintf(stderr, "Hardware program\n"); |
fprintf(stderr, "----------------\n"); |
if (c->is_r400) { |
fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext); |
} |
for (n = 0; n <= (code->config & 3); n++) { |
uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n]; |
unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) + |
(((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6); |
unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) + |
(((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6); |
int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT; |
int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT; |
fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, " |
"alu_end: %u, tex_end: %d (code_addr: %08x)\n", n, |
alu_offset, tex_offset, alu_end, tex_end, code_addr); |
if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) { |
fprintf(stderr, " TEX:\n"); |
for (i = tex_offset; |
i <= tex_offset + tex_end; |
++i) { |
const char *instr; |
switch ((code->tex. |
inst[i] >> R300_TEX_INST_SHIFT) & |
15) { |
case R300_TEX_OP_LD: |
instr = "TEX"; |
break; |
case R300_TEX_OP_KIL: |
instr = "KIL"; |
break; |
case R300_TEX_OP_TXP: |
instr = "TXP"; |
break; |
case R300_TEX_OP_TXB: |
instr = "TXB"; |
break; |
default: |
instr = "UNKNOWN"; |
} |
fprintf(stderr, |
" %s t%i, %c%i, texture[%i] (%08x)\n", |
instr, |
(code->tex. |
inst[i] >> R300_DST_ADDR_SHIFT) & 31, |
't', |
(code->tex. |
inst[i] >> R300_SRC_ADDR_SHIFT) & 31, |
(code->tex. |
inst[i] & R300_TEX_ID_MASK) >> |
R300_TEX_ID_SHIFT, |
code->tex.inst[i]); |
} |
} |
for (i = alu_offset; |
i <= alu_offset + alu_end; ++i) { |
char srcc[4][10], dstc[20]; |
char srca[4][10], dsta[20]; |
char argc[3][20]; |
char arga[3][20]; |
char flags[5], tmp[10]; |
for (j = 0; j < 3; ++j) { |
int regc = code->alu.inst[i].rgb_addr >> (j * 6); |
int rega = code->alu.inst[i].alpha_addr >> (j * 6); |
int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j), |
code->alu.inst[i].r400_ext_addr); |
int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j), |
code->alu.inst[i].r400_ext_addr); |
sprintf(srcc[j], "%c%i", |
(regc & 32) ? 'c' : 't', (regc & 31) | msbc); |
sprintf(srca[j], "%c%i", |
(rega & 32) ? 'c' : 't', (rega & 31) | msba); |
} |
dstc[0] = 0; |
sprintf(flags, "%s%s%s", |
(code->alu.inst[i]. |
rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "", |
(code->alu.inst[i]. |
rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "", |
(code->alu.inst[i]. |
rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : ""); |
if (flags[0] != 0) { |
unsigned int msb = get_msb( |
R400_ADDRD_EXT_RGB_MSB_BIT, |
code->alu.inst[i].r400_ext_addr); |
sprintf(dstc, "t%i.%s ", |
((code->alu.inst[i]. |
rgb_addr >> R300_ALU_DSTC_SHIFT) |
& 31) | msb, |
flags); |
} |
sprintf(flags, "%s%s%s", |
(code->alu.inst[i]. |
rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", |
(code->alu.inst[i]. |
rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", |
(code->alu.inst[i]. |
rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); |
if (flags[0] != 0) { |
sprintf(tmp, "o%i.%s", |
(code->alu.inst[i]. |
rgb_addr >> 29) & 3, |
flags); |
strcat(dstc, tmp); |
} |
/* Presub */ |
presub_string(srcc[3], code->alu.inst[i].rgb_inst); |
presub_string(srca[3], code->alu.inst[i].alpha_inst); |
dsta[0] = 0; |
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { |
unsigned int msb = get_msb( |
R400_ADDRD_EXT_A_MSB_BIT, |
code->alu.inst[i].r400_ext_addr); |
sprintf(dsta, "t%i.w ", |
((code->alu.inst[i]. |
alpha_addr >> R300_ALU_DSTA_SHIFT) & 31) |
| msb); |
} |
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { |
sprintf(tmp, "o%i.w ", |
(code->alu.inst[i]. |
alpha_addr >> 25) & 3); |
strcat(dsta, tmp); |
} |
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) { |
strcat(dsta, "Z"); |
} |
fprintf(stderr, |
"%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n" |
" w: %3s %3s %3s %5s-> %-20s (%08x)\n", i, |
srcc[0], srcc[1], srcc[2], srcc[3], dstc, |
code->alu.inst[i].rgb_addr, srca[0], srca[1], |
srca[2], srca[3], dsta, |
code->alu.inst[i].alpha_addr); |
for (j = 0; j < 3; ++j) { |
int regc = code->alu.inst[i].rgb_inst >> (j * 7); |
int rega = code->alu.inst[i].alpha_inst >> (j * 7); |
int d; |
char buf[20]; |
d = regc & 31; |
if (d < 12) { |
switch (d % 4) { |
case R300_ALU_ARGC_SRC0C_XYZ: |
sprintf(buf, "%s.xyz", |
srcc[d / 4]); |
break; |
case R300_ALU_ARGC_SRC0C_XXX: |
sprintf(buf, "%s.xxx", |
srcc[d / 4]); |
break; |
case R300_ALU_ARGC_SRC0C_YYY: |
sprintf(buf, "%s.yyy", |
srcc[d / 4]); |
break; |
case R300_ALU_ARGC_SRC0C_ZZZ: |
sprintf(buf, "%s.zzz", |
srcc[d / 4]); |
break; |
} |
} else if (d < 15) { |
sprintf(buf, "%s.www", srca[d - 12]); |
} else if (d < 20 ) { |
switch(d) { |
case R300_ALU_ARGC_SRCP_XYZ: |
sprintf(buf, "srcp.xyz"); |
break; |
case R300_ALU_ARGC_SRCP_XXX: |
sprintf(buf, "srcp.xxx"); |
break; |
case R300_ALU_ARGC_SRCP_YYY: |
sprintf(buf, "srcp.yyy"); |
break; |
case R300_ALU_ARGC_SRCP_ZZZ: |
sprintf(buf, "srcp.zzz"); |
break; |
case R300_ALU_ARGC_SRCP_WWW: |
sprintf(buf, "srcp.www"); |
break; |
} |
} else if (d == 20) { |
sprintf(buf, "0.0"); |
} else if (d == 21) { |
sprintf(buf, "1.0"); |
} else if (d == 22) { |
sprintf(buf, "0.5"); |
} else if (d >= 23 && d < 32) { |
d -= 23; |
switch (d / 3) { |
case 0: |
sprintf(buf, "%s.yzx", |
srcc[d % 3]); |
break; |
case 1: |
sprintf(buf, "%s.zxy", |
srcc[d % 3]); |
break; |
case 2: |
sprintf(buf, "%s.Wzy", |
srcc[d % 3]); |
break; |
} |
} else { |
sprintf(buf, "%i", d); |
} |
sprintf(argc[j], "%s%s%s%s", |
(regc & 32) ? "-" : "", |
(regc & 64) ? "|" : "", |
buf, (regc & 64) ? "|" : ""); |
d = rega & 31; |
if (d < 9) { |
sprintf(buf, "%s.%c", srcc[d / 3], |
'x' + (char)(d % 3)); |
} else if (d < 12) { |
sprintf(buf, "%s.w", srca[d - 9]); |
} else if (d < 16) { |
switch(d) { |
case R300_ALU_ARGA_SRCP_X: |
sprintf(buf, "srcp.x"); |
break; |
case R300_ALU_ARGA_SRCP_Y: |
sprintf(buf, "srcp.y"); |
break; |
case R300_ALU_ARGA_SRCP_Z: |
sprintf(buf, "srcp.z"); |
break; |
case R300_ALU_ARGA_SRCP_W: |
sprintf(buf, "srcp.w"); |
break; |
} |
} else if (d == 16) { |
sprintf(buf, "0.0"); |
} else if (d == 17) { |
sprintf(buf, "1.0"); |
} else if (d == 18) { |
sprintf(buf, "0.5"); |
} else { |
sprintf(buf, "%i", d); |
} |
sprintf(arga[j], "%s%s%s%s", |
(rega & 32) ? "-" : "", |
(rega & 64) ? "|" : "", |
buf, (rega & 64) ? "|" : ""); |
} |
fprintf(stderr, " xyz: %8s %8s %8s op: %08x %s\n" |
" w: %8s %8s %8s op: %08x\n", |
argc[0], argc[1], argc[2], |
code->alu.inst[i].rgb_inst, |
code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ? |
"NOP" : "", |
arga[0], arga[1],arga[2], |
code->alu.inst[i].alpha_inst); |
} |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r300_fragprog.h |
---|
0,0 → 1,44 |
/* |
* Copyright (C) 2005 Ben Skeggs. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
/* |
* Authors: |
* Ben Skeggs <darktama@iinet.net.au> |
* Jerome Glisse <j.glisse@gmail.com> |
*/ |
#ifndef __R300_FRAGPROG_H_ |
#define __R300_FRAGPROG_H_ |
#include "radeon_compiler.h" |
#include "radeon_program.h" |
extern void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user); |
extern void r300FragmentProgramDump(struct radeon_compiler *c, void *user); |
#endif |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c |
---|
0,0 → 1,552 |
/* |
* Copyright (C) 2005 Ben Skeggs. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
/** |
* \file |
* |
* Emit the r300_fragment_program_code that can be understood by the hardware. |
* Input is a pre-transformed radeon_program. |
* |
* \author Ben Skeggs <darktama@iinet.net.au> |
* |
* \author Jerome Glisse <j.glisse@gmail.com> |
*/ |
#include "r300_fragprog.h" |
#include "../r300_reg.h" |
#include "radeon_program_pair.h" |
#include "r300_fragprog_swizzle.h" |
struct r300_emit_state { |
struct r300_fragment_program_compiler * compiler; |
unsigned current_node : 2; |
unsigned node_first_tex : 8; |
unsigned node_first_alu : 8; |
uint32_t node_flags; |
}; |
#define PROG_CODE \ |
struct r300_fragment_program_compiler *c = emit->compiler; \ |
struct r300_fragment_program_code *code = &c->code->code.r300 |
#define error(fmt, args...) do { \ |
rc_error(&c->Base, "%s::%s(): " fmt "\n", \ |
__FILE__, __FUNCTION__, ##args); \ |
} while(0) |
static unsigned int get_msbs_alu(unsigned int bits) |
{ |
return (bits >> 6) & 0x7; |
} |
/** |
* @param lsbs The number of least significant bits |
*/ |
static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs) |
{ |
return (bits >> lsbs) & 0x15; |
} |
#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask) |
/** |
* Mark a temporary register as used. |
*/ |
static void use_temporary(struct r300_fragment_program_code *code, unsigned int index) |
{ |
if (index > code->pixsize) |
code->pixsize = index; |
} |
static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src) |
{ |
if (!src.Used) |
return 0; |
if (src.File == RC_FILE_CONSTANT) { |
return src.Index | (1 << 5); |
} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { |
use_temporary(code, src.Index); |
return src.Index & 0x1f; |
} |
return 0; |
} |
static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) |
{ |
switch(opcode) { |
case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP; |
case RC_OPCODE_CND: return R300_ALU_OUTC_CND; |
case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3; |
case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4; |
case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC; |
default: |
error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); |
/* fall through */ |
case RC_OPCODE_NOP: |
/* fall through */ |
case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD; |
case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX; |
case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN; |
case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; |
} |
} |
static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) |
{ |
switch(opcode) { |
case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP; |
case RC_OPCODE_CND: return R300_ALU_OUTA_CND; |
case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4; |
case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4; |
case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2; |
case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC; |
case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2; |
default: |
error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); |
/* fall through */ |
case RC_OPCODE_NOP: |
/* fall through */ |
case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD; |
case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX; |
case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN; |
case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP; |
case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ; |
} |
} |
/** |
* Emit one paired ALU instruction. |
*/ |
static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst) |
{ |
int ip; |
int j; |
PROG_CODE; |
if (code->alu.length >= c->Base.max_alu_insts) { |
error("Too many ALU instructions"); |
return 0; |
} |
ip = code->alu.length++; |
code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode); |
code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); |
for(j = 0; j < 3; ++j) { |
/* Set the RGB address */ |
unsigned int src = use_source(code, inst->RGB.Src[j]); |
unsigned int arg; |
if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) |
code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j); |
code->alu.inst[ip].rgb_addr |= src << (6*j); |
/* Set the Alpha address */ |
src = use_source(code, inst->Alpha.Src[j]); |
if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) |
code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j); |
code->alu.inst[ip].alpha_addr |= src << (6*j); |
arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); |
arg |= inst->RGB.Arg[j].Abs << 6; |
arg |= inst->RGB.Arg[j].Negate << 5; |
code->alu.inst[ip].rgb_inst |= arg << (7*j); |
arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); |
arg |= inst->Alpha.Arg[j].Abs << 6; |
arg |= inst->Alpha.Arg[j].Negate << 5; |
code->alu.inst[ip].alpha_inst |= arg << (7*j); |
} |
/* Presubtract */ |
if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { |
switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { |
case RC_PRESUB_BIAS: |
code->alu.inst[ip].rgb_inst |= |
R300_ALU_SRCP_1_MINUS_2_SRC0; |
break; |
case RC_PRESUB_ADD: |
code->alu.inst[ip].rgb_inst |= |
R300_ALU_SRCP_SRC1_PLUS_SRC0; |
break; |
case RC_PRESUB_SUB: |
code->alu.inst[ip].rgb_inst |= |
R300_ALU_SRCP_SRC1_MINUS_SRC0; |
break; |
case RC_PRESUB_INV: |
code->alu.inst[ip].rgb_inst |= |
R300_ALU_SRCP_1_MINUS_SRC0; |
break; |
default: |
break; |
} |
} |
if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { |
switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { |
case RC_PRESUB_BIAS: |
code->alu.inst[ip].alpha_inst |= |
R300_ALU_SRCP_1_MINUS_2_SRC0; |
break; |
case RC_PRESUB_ADD: |
code->alu.inst[ip].alpha_inst |= |
R300_ALU_SRCP_SRC1_PLUS_SRC0; |
break; |
case RC_PRESUB_SUB: |
code->alu.inst[ip].alpha_inst |= |
R300_ALU_SRCP_SRC1_MINUS_SRC0; |
break; |
case RC_PRESUB_INV: |
code->alu.inst[ip].alpha_inst |= |
R300_ALU_SRCP_1_MINUS_SRC0; |
break; |
default: |
break; |
} |
} |
if (inst->RGB.Saturate) |
code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP; |
if (inst->Alpha.Saturate) |
code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP; |
if (inst->RGB.WriteMask) { |
use_temporary(code, inst->RGB.DestIndex); |
if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS) |
code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT; |
code->alu.inst[ip].rgb_addr |= |
((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) | |
(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); |
} |
if (inst->RGB.OutputWriteMask) { |
code->alu.inst[ip].rgb_addr |= |
(inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) | |
R300_RGB_TARGET(inst->RGB.Target); |
emit->node_flags |= R300_RGBA_OUT; |
} |
if (inst->Alpha.WriteMask) { |
use_temporary(code, inst->Alpha.DestIndex); |
if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS) |
code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT; |
code->alu.inst[ip].alpha_addr |= |
((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) | |
R300_ALU_DSTA_REG; |
} |
if (inst->Alpha.OutputWriteMask) { |
code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT | |
R300_ALPHA_TARGET(inst->Alpha.Target); |
emit->node_flags |= R300_RGBA_OUT; |
} |
if (inst->Alpha.DepthWriteMask) { |
code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH; |
emit->node_flags |= R300_W_OUT; |
c->code->writes_depth = 1; |
} |
if (inst->Nop) |
code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP; |
/* Handle Output Modifier |
* According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */ |
if (inst->RGB.Omod) { |
if (inst->RGB.Omod == RC_OMOD_DISABLE) { |
rc_error(&c->Base, "RC_OMOD_DISABLE not supported"); |
} |
code->alu.inst[ip].rgb_inst |= |
(inst->RGB.Omod << R300_ALU_OUTC_MOD_SHIFT); |
} |
if (inst->Alpha.Omod) { |
if (inst->Alpha.Omod == RC_OMOD_DISABLE) { |
rc_error(&c->Base, "RC_OMOD_DISABLE not supported"); |
} |
code->alu.inst[ip].alpha_inst |= |
(inst->Alpha.Omod << R300_ALU_OUTC_MOD_SHIFT); |
} |
return 1; |
} |
/** |
* Finish the current node without advancing to the next one. |
*/ |
static int finish_node(struct r300_emit_state * emit) |
{ |
struct r300_fragment_program_compiler * c = emit->compiler; |
struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; |
unsigned alu_offset; |
unsigned alu_end; |
unsigned tex_offset; |
unsigned tex_end; |
unsigned int alu_offset_msbs, alu_end_msbs; |
if (code->alu.length == emit->node_first_alu) { |
/* Generate a single NOP for this node */ |
struct rc_pair_instruction inst; |
memset(&inst, 0, sizeof(inst)); |
if (!emit_alu(emit, &inst)) |
return 0; |
} |
alu_offset = emit->node_first_alu; |
alu_end = code->alu.length - alu_offset - 1; |
tex_offset = emit->node_first_tex; |
tex_end = code->tex.length - tex_offset - 1; |
if (code->tex.length == emit->node_first_tex) { |
if (emit->current_node > 0) { |
error("Node %i has no TEX instructions", emit->current_node); |
return 0; |
} |
tex_end = 0; |
} else { |
if (emit->current_node == 0) |
code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX; |
} |
/* Write the config register. |
* Note: The order in which the words for each node are written |
* is not correct here and needs to be fixed up once we're entirely |
* done |
* |
* Also note that the register specification from AMD is slightly |
* incorrect in its description of this register. */ |
code->code_addr[emit->current_node] = |
((alu_offset << R300_ALU_START_SHIFT) |
& R300_ALU_START_MASK) |
| ((alu_end << R300_ALU_SIZE_SHIFT) |
& R300_ALU_SIZE_MASK) |
| ((tex_offset << R300_TEX_START_SHIFT) |
& R300_TEX_START_MASK) |
| ((tex_end << R300_TEX_SIZE_SHIFT) |
& R300_TEX_SIZE_MASK) |
| emit->node_flags |
| (get_msbs_tex(tex_offset, 5) |
<< R400_TEX_START_MSB_SHIFT) |
| (get_msbs_tex(tex_end, 5) |
<< R400_TEX_SIZE_MSB_SHIFT) |
; |
/* Write r400 extended instruction fields. These will be ignored on |
* r300 cards. */ |
alu_offset_msbs = get_msbs_alu(alu_offset); |
alu_end_msbs = get_msbs_alu(alu_end); |
switch(emit->current_node) { |
case 0: |
code->r400_code_offset_ext |= |
alu_offset_msbs << R400_ALU_START3_MSB_SHIFT |
| alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT; |
break; |
case 1: |
code->r400_code_offset_ext |= |
alu_offset_msbs << R400_ALU_START2_MSB_SHIFT |
| alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT; |
break; |
case 2: |
code->r400_code_offset_ext |= |
alu_offset_msbs << R400_ALU_START1_MSB_SHIFT |
| alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT; |
break; |
case 3: |
code->r400_code_offset_ext |= |
alu_offset_msbs << R400_ALU_START0_MSB_SHIFT |
| alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT; |
break; |
} |
return 1; |
} |
/** |
* Begin a block of texture instructions. |
* Create the necessary indirection. |
*/ |
static int begin_tex(struct r300_emit_state * emit) |
{ |
PROG_CODE; |
if (code->alu.length == emit->node_first_alu && |
code->tex.length == emit->node_first_tex) { |
return 1; |
} |
if (emit->current_node == 3) { |
error("Too many texture indirections"); |
return 0; |
} |
if (!finish_node(emit)) |
return 0; |
emit->current_node++; |
emit->node_first_tex = code->tex.length; |
emit->node_first_alu = code->alu.length; |
emit->node_flags = 0; |
return 1; |
} |
static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) |
{ |
unsigned int unit; |
unsigned int dest; |
unsigned int opcode; |
PROG_CODE; |
if (code->tex.length >= emit->compiler->Base.max_tex_insts) { |
error("Too many TEX instructions"); |
return 0; |
} |
unit = inst->U.I.TexSrcUnit; |
dest = inst->U.I.DstReg.Index; |
switch(inst->U.I.Opcode) { |
case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; |
case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; |
case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; |
case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; |
default: |
error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name); |
return 0; |
} |
if (inst->U.I.Opcode == RC_OPCODE_KIL) { |
unit = 0; |
dest = 0; |
} else { |
use_temporary(code, dest); |
} |
use_temporary(code, inst->U.I.SrcReg[0].Index); |
code->tex.inst[code->tex.length++] = |
((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
& R300_SRC_ADDR_MASK) |
| ((dest << R300_DST_ADDR_SHIFT) |
& R300_DST_ADDR_MASK) |
| (unit << R300_TEX_ID_SHIFT) |
| (opcode << R300_TEX_INST_SHIFT) |
| (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ? |
R400_SRC_ADDR_EXT_BIT : 0) |
| (dest >= R300_PFS_NUM_TEMP_REGS ? |
R400_DST_ADDR_EXT_BIT : 0) |
; |
return 1; |
} |
/** |
* Final compilation step: Turn the intermediate radeon_program into |
* machine-readable instructions. |
*/ |
void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) |
{ |
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; |
struct r300_emit_state emit; |
struct r300_fragment_program_code *code = &compiler->code->code.r300; |
unsigned int tex_end; |
memset(&emit, 0, sizeof(emit)); |
emit.compiler = compiler; |
memset(code, 0, sizeof(struct r300_fragment_program_code)); |
for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; |
inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; |
inst = inst->Next) { |
if (inst->Type == RC_INSTRUCTION_NORMAL) { |
if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { |
begin_tex(&emit); |
continue; |
} |
emit_tex(&emit, inst); |
} else { |
emit_alu(&emit, &inst->U.P); |
} |
} |
if (code->pixsize >= compiler->Base.max_temp_regs) |
rc_error(&compiler->Base, "Too many hardware temporaries used.\n"); |
if (compiler->Base.Error) |
return; |
/* Finish the program */ |
finish_node(&emit); |
code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ |
/* Set r400 extended instruction fields. These values will be ignored |
* on r300 cards. */ |
code->r400_code_offset_ext |= |
(get_msbs_alu(0) |
<< R400_ALU_OFFSET_MSB_SHIFT) |
| (get_msbs_alu(code->alu.length - 1) |
<< R400_ALU_SIZE_MSB_SHIFT); |
tex_end = code->tex.length ? code->tex.length - 1 : 0; |
code->code_offset = |
((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) |
& R300_PFS_CNTL_ALU_OFFSET_MASK) |
| (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT) |
& R300_PFS_CNTL_ALU_END_MASK) |
| ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) |
& R300_PFS_CNTL_TEX_OFFSET_MASK) |
| ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT) |
& R300_PFS_CNTL_TEX_END_MASK) |
| (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT) |
| (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT) |
; |
if (emit.current_node < 3) { |
int shift = 3 - emit.current_node; |
int i; |
for(i = emit.current_node; i >= 0; --i) |
code->code_addr[shift + i] = code->code_addr[i]; |
for(i = 0; i < shift; ++i) |
code->code_addr[i] = 0; |
} |
if (code->pixsize >= R300_PFS_NUM_TEMP_REGS |
|| code->alu.length > R300_PFS_MAX_ALU_INST |
|| code->tex.length > R300_PFS_MAX_TEX_INST) { |
code->r390_mode = 1; |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c |
---|
0,0 → 1,243 |
/* |
* Copyright (C) 2008 Nicolai Haehnle. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
/** |
* @file |
* Utilities to deal with the somewhat odd restriction on R300 fragment |
* program swizzles. |
*/ |
#include "r300_fragprog_swizzle.h" |
#include <stdio.h> |
#include "../r300_reg.h" |
#include "radeon_compiler.h" |
#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO)) |
struct swizzle_data { |
unsigned int hash; /**< swizzle value this matches */ |
unsigned int base; /**< base value for hw swizzle */ |
unsigned int stride; /**< difference in base between arg0/1/2 */ |
unsigned int srcp_stride; /**< difference in base between arg0/scrp */ |
}; |
static const struct swizzle_data native_swizzles[] = { |
{MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15}, |
{MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15}, |
{MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15}, |
{MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15}, |
{MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7}, |
{MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0}, |
{MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0}, |
{MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0}, |
{MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0}, |
{MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0}, |
{MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0} |
}; |
static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); |
/** |
* Find a native RGB swizzle that matches the given swizzle. |
* Returns 0 if none found. |
*/ |
static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle) |
{ |
int i, comp; |
for(i = 0; i < num_native_swizzles; ++i) { |
const struct swizzle_data* sd = &native_swizzles[i]; |
for(comp = 0; comp < 3; ++comp) { |
unsigned int swz = GET_SWZ(swizzle, comp); |
if (swz == RC_SWIZZLE_UNUSED) |
continue; |
if (swz != GET_SWZ(sd->hash, comp)) |
break; |
} |
if (comp == 3) |
return sd; |
} |
return 0; |
} |
/** |
* Determines if the given swizzle is valid for r300/r400. In most situations |
* it is better to use r300_swizzle_is_native() which can be accesed via |
* struct radeon_compiler *c; c->SwizzleCaps->IsNative(). |
*/ |
int r300_swizzle_is_native_basic(unsigned int swizzle) |
{ |
if(lookup_native_swizzle(swizzle)) |
return 1; |
else |
return 0; |
} |
/** |
* Check whether the given instruction supports the swizzle and negate |
* combinations in the given source register. |
*/ |
static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) |
{ |
const struct swizzle_data* sd; |
unsigned int relevant; |
int j; |
if (opcode == RC_OPCODE_KIL || |
opcode == RC_OPCODE_TEX || |
opcode == RC_OPCODE_TXB || |
opcode == RC_OPCODE_TXP) { |
if (reg.Abs || reg.Negate) |
return 0; |
for(j = 0; j < 4; ++j) { |
unsigned int swz = GET_SWZ(reg.Swizzle, j); |
if (swz == RC_SWIZZLE_UNUSED) |
continue; |
if (swz != j) |
return 0; |
} |
return 1; |
} |
relevant = 0; |
for(j = 0; j < 3; ++j) |
if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED) |
relevant |= 1 << j; |
if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) |
return 0; |
sd = lookup_native_swizzle(reg.Swizzle); |
if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0)) |
return 0; |
return 1; |
} |
static void r300_swizzle_split( |
struct rc_src_register src, unsigned int mask, |
struct rc_swizzle_split * split) |
{ |
split->NumPhases = 0; |
while(mask) { |
unsigned int best_matchcount = 0; |
unsigned int best_matchmask = 0; |
int i, comp; |
for(i = 0; i < num_native_swizzles; ++i) { |
const struct swizzle_data *sd = &native_swizzles[i]; |
unsigned int matchcount = 0; |
unsigned int matchmask = 0; |
for(comp = 0; comp < 3; ++comp) { |
unsigned int swz; |
if (!GET_BIT(mask, comp)) |
continue; |
swz = GET_SWZ(src.Swizzle, comp); |
if (swz == RC_SWIZZLE_UNUSED) |
continue; |
if (swz == GET_SWZ(sd->hash, comp)) { |
/* check if the negate bit of current component |
* is the same for already matched components */ |
if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp)))) |
continue; |
matchcount++; |
matchmask |= 1 << comp; |
} |
} |
if (matchcount > best_matchcount) { |
best_matchcount = matchcount; |
best_matchmask = matchmask; |
if (matchmask == (mask & RC_MASK_XYZ)) |
break; |
} |
} |
if (mask & RC_MASK_W) |
best_matchmask |= RC_MASK_W; |
split->Phase[split->NumPhases++] = best_matchmask; |
mask &= ~best_matchmask; |
} |
} |
struct rc_swizzle_caps r300_swizzle_caps = { |
.IsNative = r300_swizzle_is_native, |
.Split = r300_swizzle_split |
}; |
/** |
* Translate an RGB (XYZ) swizzle into the hardware code for the given |
* instruction source. |
*/ |
unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) |
{ |
const struct swizzle_data* sd = lookup_native_swizzle(swizzle); |
if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) { |
fprintf(stderr, "Not a native swizzle: %08x\n", swizzle); |
return 0; |
} |
if (src == RC_PAIR_PRESUB_SRC) { |
return sd->base + sd->srcp_stride; |
} else { |
return sd->base + src*sd->stride; |
} |
} |
/** |
* Translate an Alpha (W) swizzle into the hardware code for the given |
* instruction source. |
*/ |
unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle) |
{ |
unsigned int swz = GET_SWZ(swizzle, 0); |
if (src == RC_PAIR_PRESUB_SRC) { |
return R300_ALU_ARGA_SRCP_X + swz; |
} |
if (swz < 3) |
return swz + 3*src; |
switch(swz) { |
case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; |
case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE; |
case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; |
case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF; |
default: return R300_ALU_ARGA_ONE; |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h |
---|
0,0 → 1,39 |
/* |
* Copyright (C) 2008 Nicolai Haehnle. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#ifndef __R300_FRAGPROG_SWIZZLE_H_ |
#define __R300_FRAGPROG_SWIZZLE_H_ |
#include "radeon_swizzle.h" |
extern struct rc_swizzle_caps r300_swizzle_caps; |
unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle); |
unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle); |
int r300_swizzle_is_native_basic(unsigned int swizzle); |
#endif /* __R300_FRAGPROG_SWIZZLE_H_ */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r3xx_fragprog.c |
---|
0,0 → 1,156 |
/* |
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "radeon_compiler.h" |
#include <stdio.h> |
#include "radeon_compiler_util.h" |
#include "radeon_dataflow.h" |
#include "radeon_emulate_branches.h" |
#include "radeon_emulate_loops.h" |
#include "radeon_program_alu.h" |
#include "radeon_program_tex.h" |
#include "radeon_rename_regs.h" |
#include "radeon_remove_constants.h" |
#include "r300_fragprog.h" |
#include "r300_fragprog_swizzle.h" |
#include "r500_fragprog.h" |
static void dataflow_outputs_mark_use(void * userdata, void * data, |
void (*callback)(void *, unsigned int, unsigned int)) |
{ |
struct r300_fragment_program_compiler * c = userdata; |
callback(data, c->OutputColor[0], RC_MASK_XYZW); |
callback(data, c->OutputColor[1], RC_MASK_XYZW); |
callback(data, c->OutputColor[2], RC_MASK_XYZW); |
callback(data, c->OutputColor[3], RC_MASK_XYZW); |
callback(data, c->OutputDepth, RC_MASK_W); |
} |
static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) |
{ |
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; |
struct rc_instruction *rci; |
for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { |
struct rc_sub_instruction * inst = &rci->U.I; |
unsigned i; |
const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode); |
if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth) |
continue; |
if (inst->DstReg.WriteMask & RC_MASK_Z) { |
inst->DstReg.WriteMask = RC_MASK_W; |
} else { |
inst->DstReg.WriteMask = 0; |
continue; |
} |
if (!info->IsComponentwise) { |
continue; |
} |
for (i = 0; i < info->NumSrcRegs; i++) { |
inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]); |
} |
} |
} |
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) |
{ |
int is_r500 = c->Base.is_r500; |
int opt = !c->Base.disable_optimizations; |
int alpha2one = c->state.alpha_to_one; |
/* Lists of instruction transformations. */ |
struct radeon_program_transformation force_alpha_to_one[] = { |
{ &rc_force_output_alpha_to_one, c }, |
{ 0, 0 } |
}; |
struct radeon_program_transformation rewrite_tex[] = { |
{ &radeonTransformTEX, c }, |
{ 0, 0 } |
}; |
struct radeon_program_transformation rewrite_if[] = { |
{ &r500_transform_IF, 0 }, |
{0, 0} |
}; |
struct radeon_program_transformation native_rewrite_r500[] = { |
{ &radeonTransformALU, 0 }, |
{ &radeonTransformDeriv, 0 }, |
{ &radeonTransformTrigScale, 0 }, |
{ 0, 0 } |
}; |
struct radeon_program_transformation native_rewrite_r300[] = { |
{ &radeonTransformALU, 0 }, |
{ &r300_transform_trig_simple, 0 }, |
{ 0, 0 } |
}; |
/* List of compiler passes. */ |
struct radeon_compiler_pass fs_list[] = { |
/* NAME DUMP PREDICATE FUNCTION PARAM */ |
{"rewrite depth out", 1, 1, rc_rewrite_depth_out, NULL}, |
/* This transformation needs to be done before any of the IF |
* instructions are modified. */ |
{"transform KILP", 1, 1, rc_transform_KILL, NULL}, |
{"unroll loops", 1, is_r500, rc_unroll_loops, NULL}, |
{"transform loops", 1, !is_r500, rc_transform_loops, NULL}, |
{"emulate branches", 1, !is_r500, rc_emulate_branches, NULL}, |
{"force alpha to one", 1, alpha2one, rc_local_transform, force_alpha_to_one}, |
{"transform TEX", 1, 1, rc_local_transform, rewrite_tex}, |
{"transform IF", 1, is_r500, rc_local_transform, rewrite_if}, |
{"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500}, |
{"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300}, |
{"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_use}, |
{"emulate loops", 1, !is_r500, rc_emulate_loops, NULL}, |
{"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL}, |
{"dataflow optimize", 1, opt, rc_optimize, NULL}, |
{"inline literals", 1, is_r500 && opt, rc_inline_literals, NULL}, |
{"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL}, |
{"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, |
{"pair translate", 1, 1, rc_pair_translate, NULL}, |
{"pair scheduling", 1, 1, rc_pair_schedule, &opt}, |
{"dead sources", 1, 1, rc_pair_remove_dead_sources, NULL}, |
{"register allocation", 1, 1, rc_pair_regalloc, &opt}, |
{"final code validation", 0, 1, rc_validate_final_shader, NULL}, |
{"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL}, |
{"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL}, |
{"dump machine code", 0, is_r500 && (c->Base.Debug & RC_DBG_LOG), r500FragmentProgramDump, NULL}, |
{"dump machine code", 0, !is_r500 && (c->Base.Debug & RC_DBG_LOG), r300FragmentProgramDump, NULL}, |
{NULL, 0, 0, NULL, NULL} |
}; |
c->Base.type = RC_FRAGMENT_PROGRAM; |
c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps; |
rc_run_compiler(&c->Base, fs_list); |
rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r3xx_vertprog.c |
---|
0,0 → 1,931 |
/* |
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "radeon_compiler.h" |
#include <stdio.h> |
#include "../r300_reg.h" |
#include "radeon_compiler_util.h" |
#include "radeon_dataflow.h" |
#include "radeon_program.h" |
#include "radeon_program_alu.h" |
#include "radeon_swizzle.h" |
#include "radeon_emulate_branches.h" |
#include "radeon_emulate_loops.h" |
#include "radeon_remove_constants.h" |
/* |
* Take an already-setup and valid source then swizzle it appropriately to |
* obtain a constant ZERO or ONE source. |
*/ |
#define __CONST(x, y) \ |
(PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \ |
t_swizzle(y), \ |
t_swizzle(y), \ |
t_swizzle(y), \ |
t_swizzle(y), \ |
t_src_class(vpi->SrcReg[x].File), \ |
RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4)) |
static unsigned long t_dst_mask(unsigned int mask) |
{ |
/* RC_MASK_* is equivalent to VSF_FLAG_* */ |
return mask & RC_MASK_XYZW; |
} |
static unsigned long t_dst_class(rc_register_file file) |
{ |
switch (file) { |
default: |
fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); |
/* fall-through */ |
case RC_FILE_TEMPORARY: |
return PVS_DST_REG_TEMPORARY; |
case RC_FILE_OUTPUT: |
return PVS_DST_REG_OUT; |
case RC_FILE_ADDRESS: |
return PVS_DST_REG_A0; |
} |
} |
static unsigned long t_dst_index(struct r300_vertex_program_code *vp, |
struct rc_dst_register *dst) |
{ |
if (dst->File == RC_FILE_OUTPUT) |
return vp->outputs[dst->Index]; |
return dst->Index; |
} |
static unsigned long t_src_class(rc_register_file file) |
{ |
switch (file) { |
default: |
fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); |
/* fall-through */ |
case RC_FILE_NONE: |
case RC_FILE_TEMPORARY: |
return PVS_SRC_REG_TEMPORARY; |
case RC_FILE_INPUT: |
return PVS_SRC_REG_INPUT; |
case RC_FILE_CONSTANT: |
return PVS_SRC_REG_CONSTANT; |
} |
} |
static int t_src_conflict(struct rc_src_register a, struct rc_src_register b) |
{ |
unsigned long aclass = t_src_class(a.File); |
unsigned long bclass = t_src_class(b.File); |
if (aclass != bclass) |
return 0; |
if (aclass == PVS_SRC_REG_TEMPORARY) |
return 0; |
if (a.RelAddr || b.RelAddr) |
return 1; |
if (a.Index != b.Index) |
return 1; |
return 0; |
} |
static inline unsigned long t_swizzle(unsigned int swizzle) |
{ |
/* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ |
return swizzle; |
} |
static unsigned long t_src_index(struct r300_vertex_program_code *vp, |
struct rc_src_register *src) |
{ |
if (src->File == RC_FILE_INPUT) { |
assert(vp->inputs[src->Index] != -1); |
return vp->inputs[src->Index]; |
} else { |
if (src->Index < 0) { |
fprintf(stderr, |
"negative offsets for indirect addressing do not work.\n"); |
return 0; |
} |
return src->Index; |
} |
} |
/* these two functions should probably be merged... */ |
static unsigned long t_src(struct r300_vertex_program_code *vp, |
struct rc_src_register *src) |
{ |
/* src->Negate uses the RC_MASK_ flags from program_instruction.h, |
* which equal our VSF_FLAGS_ values, so it's safe to just pass it here. |
*/ |
return PVS_SRC_OPERAND(t_src_index(vp, src), |
t_swizzle(GET_SWZ(src->Swizzle, 0)), |
t_swizzle(GET_SWZ(src->Swizzle, 1)), |
t_swizzle(GET_SWZ(src->Swizzle, 2)), |
t_swizzle(GET_SWZ(src->Swizzle, 3)), |
t_src_class(src->File), |
src->Negate) | |
(src->RelAddr << 4) | (src->Abs << 3); |
} |
static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, |
struct rc_src_register *src) |
{ |
/* src->Negate uses the RC_MASK_ flags from program_instruction.h, |
* which equal our VSF_FLAGS_ values, so it's safe to just pass it here. |
*/ |
unsigned int swz = rc_get_scalar_src_swz(src->Swizzle); |
return PVS_SRC_OPERAND(t_src_index(vp, src), |
t_swizzle(swz), |
t_swizzle(swz), |
t_swizzle(swz), |
t_swizzle(swz), |
t_src_class(src->File), |
src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) | |
(src->RelAddr << 4) | (src->Abs << 3); |
} |
static int valid_dst(struct r300_vertex_program_code *vp, |
struct rc_dst_register *dst) |
{ |
if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) { |
return 0; |
} else if (dst->File == RC_FILE_ADDRESS) { |
assert(dst->Index == 0); |
} |
return 1; |
} |
static void ei_vector1(struct r300_vertex_program_code *vp, |
unsigned int hw_opcode, |
struct rc_sub_instruction *vpi, |
unsigned int * inst) |
{ |
inst[0] = PVS_OP_DST_OPERAND(hw_opcode, |
0, |
0, |
t_dst_index(vp, &vpi->DstReg), |
t_dst_mask(vpi->DstReg.WriteMask), |
t_dst_class(vpi->DstReg.File), |
vpi->SaturateMode == RC_SATURATE_ZERO_ONE); |
inst[1] = t_src(vp, &vpi->SrcReg[0]); |
inst[2] = __CONST(0, RC_SWIZZLE_ZERO); |
inst[3] = __CONST(0, RC_SWIZZLE_ZERO); |
} |
static void ei_vector2(struct r300_vertex_program_code *vp, |
unsigned int hw_opcode, |
struct rc_sub_instruction *vpi, |
unsigned int * inst) |
{ |
inst[0] = PVS_OP_DST_OPERAND(hw_opcode, |
0, |
0, |
t_dst_index(vp, &vpi->DstReg), |
t_dst_mask(vpi->DstReg.WriteMask), |
t_dst_class(vpi->DstReg.File), |
vpi->SaturateMode == RC_SATURATE_ZERO_ONE); |
inst[1] = t_src(vp, &vpi->SrcReg[0]); |
inst[2] = t_src(vp, &vpi->SrcReg[1]); |
inst[3] = __CONST(1, RC_SWIZZLE_ZERO); |
} |
static void ei_math1(struct r300_vertex_program_code *vp, |
unsigned int hw_opcode, |
struct rc_sub_instruction *vpi, |
unsigned int * inst) |
{ |
inst[0] = PVS_OP_DST_OPERAND(hw_opcode, |
1, |
0, |
t_dst_index(vp, &vpi->DstReg), |
t_dst_mask(vpi->DstReg.WriteMask), |
t_dst_class(vpi->DstReg.File), |
vpi->SaturateMode == RC_SATURATE_ZERO_ONE); |
inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); |
inst[2] = __CONST(0, RC_SWIZZLE_ZERO); |
inst[3] = __CONST(0, RC_SWIZZLE_ZERO); |
} |
static void ei_lit(struct r300_vertex_program_code *vp, |
struct rc_sub_instruction *vpi, |
unsigned int * inst) |
{ |
//LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} |
inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, |
1, |
0, |
t_dst_index(vp, &vpi->DstReg), |
t_dst_mask(vpi->DstReg.WriteMask), |
t_dst_class(vpi->DstReg.File), |
vpi->SaturateMode == RC_SATURATE_ZERO_ONE); |
/* NOTE: Users swizzling might not work. */ |
inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X |
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W |
PVS_SRC_SELECT_FORCE_0, // Z |
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y |
t_src_class(vpi->SrcReg[0].File), |
vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | |
(vpi->SrcReg[0].RelAddr << 4); |
inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y |
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W |
PVS_SRC_SELECT_FORCE_0, // Z |
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X |
t_src_class(vpi->SrcReg[0].File), |
vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | |
(vpi->SrcReg[0].RelAddr << 4); |
inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y |
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X |
PVS_SRC_SELECT_FORCE_0, // Z |
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W |
t_src_class(vpi->SrcReg[0].File), |
vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | |
(vpi->SrcReg[0].RelAddr << 4); |
} |
static void ei_mad(struct r300_vertex_program_code *vp, |
struct rc_sub_instruction *vpi, |
unsigned int * inst) |
{ |
unsigned int i; |
/* Remarks about hardware limitations of MAD |
* (please preserve this comment, as this information is _NOT_ |
* in the documentation provided by AMD). |
* |
* As described in the documentation, MAD with three unique temporary |
* source registers requires the use of the macro version. |
* |
* However (and this is not mentioned in the documentation), apparently |
* the macro version is _NOT_ a full superset of the normal version. |
* In particular, the macro version does not always work when relative |
* addressing is used in the source operands. |
* |
* This limitation caused incorrect rendering in Sauerbraten's OpenGL |
* assembly shader path when using medium quality animations |
* (i.e. animations with matrix blending instead of quaternion blending). |
* |
* Unfortunately, I (nha) have been unable to extract a Piglit regression |
* test for this issue - for some reason, it is possible to have vertex |
* programs whose prefix is *exactly* the same as the prefix of the |
* offending program in Sauerbraten up to the offending instruction |
* without causing any trouble. |
* |
* Bottom line: Only use the macro version only when really necessary; |
* according to AMD docs, this should improve performance by one clock |
* as a nice side bonus. |
*/ |
if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY && |
vpi->SrcReg[1].File == RC_FILE_TEMPORARY && |
vpi->SrcReg[2].File == RC_FILE_TEMPORARY && |
vpi->SrcReg[0].Index != vpi->SrcReg[1].Index && |
vpi->SrcReg[0].Index != vpi->SrcReg[2].Index && |
vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) { |
inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, |
0, |
1, |
t_dst_index(vp, &vpi->DstReg), |
t_dst_mask(vpi->DstReg.WriteMask), |
t_dst_class(vpi->DstReg.File), |
vpi->SaturateMode == RC_SATURATE_ZERO_ONE); |
} else { |
inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, |
0, |
0, |
t_dst_index(vp, &vpi->DstReg), |
t_dst_mask(vpi->DstReg.WriteMask), |
t_dst_class(vpi->DstReg.File), |
vpi->SaturateMode == RC_SATURATE_ZERO_ONE); |
/* Arguments with constant swizzles still count as a unique |
* temporary, so we should make sure these arguments share a |
* register index with one of the other arguments. */ |
for (i = 0; i < 3; i++) { |
unsigned int j; |
if (vpi->SrcReg[i].File != RC_FILE_NONE) |
continue; |
for (j = 0; j < 3; j++) { |
if (i != j) { |
vpi->SrcReg[i].Index = |
vpi->SrcReg[j].Index; |
break; |
} |
} |
} |
} |
inst[1] = t_src(vp, &vpi->SrcReg[0]); |
inst[2] = t_src(vp, &vpi->SrcReg[1]); |
inst[3] = t_src(vp, &vpi->SrcReg[2]); |
} |
static void ei_pow(struct r300_vertex_program_code *vp, |
struct rc_sub_instruction *vpi, |
unsigned int * inst) |
{ |
inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, |
1, |
0, |
t_dst_index(vp, &vpi->DstReg), |
t_dst_mask(vpi->DstReg.WriteMask), |
t_dst_class(vpi->DstReg.File), |
vpi->SaturateMode == RC_SATURATE_ZERO_ONE); |
inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); |
inst[2] = __CONST(0, RC_SWIZZLE_ZERO); |
inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); |
} |
static void translate_vertex_program(struct radeon_compiler *c, void *user) |
{ |
struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c; |
struct rc_instruction *rci; |
unsigned loops[R500_PVS_MAX_LOOP_DEPTH]; |
unsigned loop_depth = 0; |
compiler->code->pos_end = 0; /* Not supported yet */ |
compiler->code->length = 0; |
compiler->code->num_temporaries = 0; |
compiler->SetHwInputOutput(compiler); |
for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { |
struct rc_sub_instruction *vpi = &rci->U.I; |
unsigned int *inst = compiler->code->body.d + compiler->code->length; |
const struct rc_opcode_info *info = rc_get_opcode_info(vpi->Opcode); |
/* Skip instructions writing to non-existing destination */ |
if (!valid_dst(compiler->code, &vpi->DstReg)) |
continue; |
if (info->HasDstReg) { |
/* Neither is Saturate. */ |
if (vpi->SaturateMode != RC_SATURATE_NONE && !c->is_r500) { |
rc_error(&compiler->Base, "Vertex program does not support the Saturate " |
"modifier (yet).\n"); |
} |
} |
if (compiler->code->length >= c->max_alu_insts * 4) { |
rc_error(&compiler->Base, "Vertex program has too many instructions\n"); |
return; |
} |
assert(compiler->Base.is_r500 || |
(vpi->Opcode != RC_OPCODE_SEQ && |
vpi->Opcode != RC_OPCODE_SNE)); |
switch (vpi->Opcode) { |
case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; |
case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; |
case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break; |
case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; |
case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; |
case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; |
case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; |
case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; |
case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; |
case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; |
case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; |
case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; |
case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; |
case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; |
case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; |
case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; |
case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; |
case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; |
case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; |
case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break; |
case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; |
case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break; |
case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; |
case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break; |
case RC_OPCODE_BGNLOOP: |
{ |
if ((!compiler->Base.is_r500 |
&& loop_depth >= R300_VS_MAX_LOOP_DEPTH) |
|| loop_depth >= R500_PVS_MAX_LOOP_DEPTH) { |
rc_error(&compiler->Base, |
"Loops are nested too deep."); |
return; |
} |
loops[loop_depth++] = ((compiler->code->length)/ 4) + 1; |
break; |
} |
case RC_OPCODE_ENDLOOP: |
{ |
unsigned int act_addr; |
unsigned int last_addr; |
unsigned int ret_addr; |
ret_addr = loops[--loop_depth]; |
act_addr = ret_addr - 1; |
last_addr = (compiler->code->length / 4) - 1; |
if (loop_depth >= R300_VS_MAX_FC_OPS) { |
rc_error(&compiler->Base, |
"Too many flow control instructions."); |
return; |
} |
if (compiler->Base.is_r500) { |
compiler->code->fc_op_addrs.r500 |
[compiler->code->num_fc_ops].lw = |
R500_PVS_FC_ACT_ADRS(act_addr) |
| R500_PVS_FC_LOOP_CNT_JMP_INST(0x00ff) |
; |
compiler->code->fc_op_addrs.r500 |
[compiler->code->num_fc_ops].uw = |
R500_PVS_FC_LAST_INST(last_addr) |
| R500_PVS_FC_RTN_INST(ret_addr) |
; |
} else { |
compiler->code->fc_op_addrs.r300 |
[compiler->code->num_fc_ops] = |
R300_PVS_FC_ACT_ADRS(act_addr) |
| R300_PVS_FC_LOOP_CNT_JMP_INST(0xff) |
| R300_PVS_FC_LAST_INST(last_addr) |
| R300_PVS_FC_RTN_INST(ret_addr) |
; |
} |
compiler->code->fc_loop_index[compiler->code->num_fc_ops] = |
R300_PVS_FC_LOOP_INIT_VAL(0x0) |
| R300_PVS_FC_LOOP_STEP_VAL(0x1) |
; |
compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP( |
compiler->code->num_fc_ops); |
compiler->code->num_fc_ops++; |
break; |
} |
case RC_ME_PRED_SET_CLR: |
ei_math1(compiler->code, ME_PRED_SET_CLR, vpi, inst); |
break; |
case RC_ME_PRED_SET_INV: |
ei_math1(compiler->code, ME_PRED_SET_INV, vpi, inst); |
break; |
case RC_ME_PRED_SET_POP: |
ei_math1(compiler->code, ME_PRED_SET_POP, vpi, inst); |
break; |
case RC_ME_PRED_SET_RESTORE: |
ei_math1(compiler->code, ME_PRED_SET_RESTORE, vpi, inst); |
break; |
case RC_ME_PRED_SEQ: |
ei_math1(compiler->code, ME_PRED_SET_EQ, vpi, inst); |
break; |
case RC_ME_PRED_SNEQ: |
ei_math1(compiler->code, ME_PRED_SET_NEQ, vpi, inst); |
break; |
case RC_VE_PRED_SNEQ_PUSH: |
ei_vector2(compiler->code, VE_PRED_SET_NEQ_PUSH, |
vpi, inst); |
break; |
default: |
rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name); |
return; |
} |
if (vpi->DstReg.Pred != RC_PRED_DISABLED) { |
inst[0] |= (PVS_DST_PRED_ENABLE_MASK |
<< PVS_DST_PRED_ENABLE_SHIFT); |
if (vpi->DstReg.Pred == RC_PRED_SET) { |
inst[0] |= (PVS_DST_PRED_SENSE_MASK |
<< PVS_DST_PRED_SENSE_SHIFT); |
} |
} |
/* Update the number of temporaries. */ |
if (info->HasDstReg && vpi->DstReg.File == RC_FILE_TEMPORARY && |
vpi->DstReg.Index >= compiler->code->num_temporaries) |
compiler->code->num_temporaries = vpi->DstReg.Index + 1; |
for (unsigned i = 0; i < info->NumSrcRegs; i++) |
if (vpi->SrcReg[i].File == RC_FILE_TEMPORARY && |
vpi->SrcReg[i].Index >= compiler->code->num_temporaries) |
compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1; |
if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) { |
rc_error(&compiler->Base, "Too many temporaries.\n"); |
return; |
} |
compiler->code->length += 4; |
if (compiler->Base.Error) |
return; |
} |
} |
struct temporary_allocation { |
unsigned int Allocated:1; |
unsigned int HwTemp:15; |
struct rc_instruction * LastRead; |
}; |
static void allocate_temporary_registers(struct radeon_compiler *c, void *user) |
{ |
struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c; |
struct rc_instruction *inst; |
struct rc_instruction *end_loop = NULL; |
unsigned int num_orig_temps = 0; |
char hwtemps[RC_REGISTER_MAX_INDEX]; |
struct temporary_allocation * ta; |
unsigned int i, j; |
memset(hwtemps, 0, sizeof(hwtemps)); |
rc_recompute_ips(c); |
/* Pass 1: Count original temporaries. */ |
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
for (i = 0; i < opcode->NumSrcRegs; ++i) { |
if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { |
if (inst->U.I.SrcReg[i].Index >= num_orig_temps) |
num_orig_temps = inst->U.I.SrcReg[i].Index + 1; |
} |
} |
if (opcode->HasDstReg) { |
if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { |
if (inst->U.I.DstReg.Index >= num_orig_temps) |
num_orig_temps = inst->U.I.DstReg.Index + 1; |
} |
} |
} |
ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, |
sizeof(struct temporary_allocation) * num_orig_temps); |
memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); |
/* Pass 2: Determine original temporary lifetimes */ |
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
/* Instructions inside of loops need to use the ENDLOOP |
* instruction as their LastRead. */ |
if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { |
int endloops = 1; |
struct rc_instruction * ptr; |
for(ptr = inst->Next; |
ptr != &compiler->Base.Program.Instructions; |
ptr = ptr->Next){ |
if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { |
endloops++; |
} else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { |
endloops--; |
if (endloops <= 0) { |
end_loop = ptr; |
break; |
} |
} |
} |
} |
if (inst == end_loop) { |
end_loop = NULL; |
continue; |
} |
for (i = 0; i < opcode->NumSrcRegs; ++i) { |
if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { |
ta[inst->U.I.SrcReg[i].Index].LastRead = end_loop ? end_loop : inst; |
} |
} |
} |
/* Pass 3: Register allocation */ |
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
for (i = 0; i < opcode->NumSrcRegs; ++i) { |
if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { |
unsigned int orig = inst->U.I.SrcReg[i].Index; |
inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; |
if (ta[orig].Allocated && inst == ta[orig].LastRead) |
hwtemps[ta[orig].HwTemp] = 0; |
} |
} |
if (opcode->HasDstReg) { |
if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { |
unsigned int orig = inst->U.I.DstReg.Index; |
if (!ta[orig].Allocated) { |
for(j = 0; j < c->max_temp_regs; ++j) { |
if (!hwtemps[j]) |
break; |
} |
ta[orig].Allocated = 1; |
ta[orig].HwTemp = j; |
hwtemps[ta[orig].HwTemp] = 1; |
} |
inst->U.I.DstReg.Index = ta[orig].HwTemp; |
} |
} |
} |
} |
/** |
* R3xx-R4xx vertex engine does not support the Absolute source operand modifier |
* and the Saturate opcode modifier. Only Absolute is currently transformed. |
*/ |
static int transform_nonnative_modifiers( |
struct radeon_compiler *c, |
struct rc_instruction *inst, |
void* unused) |
{ |
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); |
unsigned i; |
/* Transform ABS(a) to MAX(a, -a). */ |
for (i = 0; i < opcode->NumSrcRegs; i++) { |
if (inst->U.I.SrcReg[i].Abs) { |
struct rc_instruction *new_inst; |
unsigned temp; |
inst->U.I.SrcReg[i].Abs = 0; |
temp = rc_find_free_temporary(c); |
new_inst = rc_insert_new_instruction(c, inst->Prev); |
new_inst->U.I.Opcode = RC_OPCODE_MAX; |
new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY; |
new_inst->U.I.DstReg.Index = temp; |
new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i]; |
new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i]; |
new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; |
memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i])); |
inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; |
inst->U.I.SrcReg[i].Index = temp; |
inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW; |
} |
} |
return 1; |
} |
/** |
* Vertex engine cannot read two inputs or two constants at the same time. |
* Introduce intermediate MOVs to temporary registers to account for this. |
*/ |
static int transform_source_conflicts( |
struct radeon_compiler *c, |
struct rc_instruction* inst, |
void* unused) |
{ |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
if (opcode->NumSrcRegs == 3) { |
if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]) |
|| t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) { |
int tmpreg = rc_find_free_temporary(c); |
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); |
inst_mov->U.I.Opcode = RC_OPCODE_MOV; |
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_mov->U.I.DstReg.Index = tmpreg; |
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; |
reset_srcreg(&inst->U.I.SrcReg[2]); |
inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; |
inst->U.I.SrcReg[2].Index = tmpreg; |
} |
} |
if (opcode->NumSrcRegs >= 2) { |
if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) { |
int tmpreg = rc_find_free_temporary(c); |
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); |
inst_mov->U.I.Opcode = RC_OPCODE_MOV; |
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_mov->U.I.DstReg.Index = tmpreg; |
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; |
reset_srcreg(&inst->U.I.SrcReg[1]); |
inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; |
inst->U.I.SrcReg[1].Index = tmpreg; |
} |
} |
return 1; |
} |
static void rc_vs_add_artificial_outputs(struct radeon_compiler *c, void *user) |
{ |
struct r300_vertex_program_compiler * compiler = (struct r300_vertex_program_compiler*)c; |
int i; |
for(i = 0; i < 32; ++i) { |
if ((compiler->RequiredOutputs & (1 << i)) && |
!(compiler->Base.Program.OutputsWritten & (1 << i))) { |
struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); |
inst->U.I.Opcode = RC_OPCODE_MOV; |
inst->U.I.DstReg.File = RC_FILE_OUTPUT; |
inst->U.I.DstReg.Index = i; |
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; |
inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT; |
inst->U.I.SrcReg[0].Index = 0; |
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; |
compiler->Base.Program.OutputsWritten |= 1 << i; |
} |
} |
} |
static void dataflow_outputs_mark_used(void * userdata, void * data, |
void (*callback)(void *, unsigned int, unsigned int)) |
{ |
struct r300_vertex_program_compiler * c = userdata; |
int i; |
for(i = 0; i < 32; ++i) { |
if (c->RequiredOutputs & (1 << i)) |
callback(data, i, RC_MASK_XYZW); |
} |
} |
static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) |
{ |
(void) opcode; |
(void) reg; |
return 1; |
} |
static void transform_negative_addressing(struct r300_vertex_program_compiler *c, |
struct rc_instruction *arl, |
struct rc_instruction *end, |
int min_offset) |
{ |
struct rc_instruction *inst, *add; |
unsigned const_swizzle; |
/* Transform ARL */ |
add = rc_insert_new_instruction(&c->Base, arl->Prev); |
add->U.I.Opcode = RC_OPCODE_ADD; |
add->U.I.DstReg.File = RC_FILE_TEMPORARY; |
add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base); |
add->U.I.DstReg.WriteMask = RC_MASK_X; |
add->U.I.SrcReg[0] = arl->U.I.SrcReg[0]; |
add->U.I.SrcReg[1].File = RC_FILE_CONSTANT; |
add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants, |
min_offset, &const_swizzle); |
add->U.I.SrcReg[1].Swizzle = const_swizzle; |
arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; |
arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index; |
arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX; |
/* Rewrite offsets up to and excluding inst. */ |
for (inst = arl->Next; inst != end; inst = inst->Next) { |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
for (unsigned i = 0; i < opcode->NumSrcRegs; i++) |
if (inst->U.I.SrcReg[i].RelAddr) |
inst->U.I.SrcReg[i].Index -= min_offset; |
} |
} |
static void rc_emulate_negative_addressing(struct radeon_compiler *compiler, void *user) |
{ |
struct r300_vertex_program_compiler * c = (struct r300_vertex_program_compiler*)compiler; |
struct rc_instruction *inst, *lastARL = NULL; |
int min_offset = 0; |
for (inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) { |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
if (inst->U.I.Opcode == RC_OPCODE_ARL) { |
if (lastARL != NULL && min_offset < 0) |
transform_negative_addressing(c, lastARL, inst, min_offset); |
lastARL = inst; |
min_offset = 0; |
continue; |
} |
for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { |
if (inst->U.I.SrcReg[i].RelAddr && |
inst->U.I.SrcReg[i].Index < 0) { |
/* ARL must precede any indirect addressing. */ |
if (lastARL == NULL) { |
rc_error(&c->Base, "Vertex shader: Found relative addressing without ARL."); |
return; |
} |
if (inst->U.I.SrcReg[i].Index < min_offset) |
min_offset = inst->U.I.SrcReg[i].Index; |
} |
} |
} |
if (lastARL != NULL && min_offset < 0) |
transform_negative_addressing(c, lastARL, inst, min_offset); |
} |
struct rc_swizzle_caps r300_vertprog_swizzle_caps = { |
.IsNative = &swizzle_is_native, |
.Split = 0 /* should never be called */ |
}; |
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) |
{ |
int is_r500 = c->Base.is_r500; |
int opt = !c->Base.disable_optimizations; |
/* Lists of instruction transformations. */ |
struct radeon_program_transformation alu_rewrite_r500[] = { |
{ &r300_transform_vertex_alu, 0 }, |
{ &r300_transform_trig_scale_vertex, 0 }, |
{ 0, 0 } |
}; |
struct radeon_program_transformation alu_rewrite_r300[] = { |
{ &r300_transform_vertex_alu, 0 }, |
{ &r300_transform_trig_simple, 0 }, |
{ 0, 0 } |
}; |
/* Note: These passes have to be done seperately from ALU rewrite, |
* otherwise non-native ALU instructions with source conflits |
* or non-native modifiers will not be treated properly. |
*/ |
struct radeon_program_transformation emulate_modifiers[] = { |
{ &transform_nonnative_modifiers, 0 }, |
{ 0, 0 } |
}; |
struct radeon_program_transformation resolve_src_conflicts[] = { |
{ &transform_source_conflicts, 0 }, |
{ 0, 0 } |
}; |
/* List of compiler passes. */ |
struct radeon_compiler_pass vs_list[] = { |
/* NAME DUMP PREDICATE FUNCTION PARAM */ |
{"add artificial outputs", 0, 1, rc_vs_add_artificial_outputs, NULL}, |
{"emulate branches", 1, !is_r500, rc_emulate_branches, NULL}, |
{"emulate negative addressing", 1, 1, rc_emulate_negative_addressing, NULL}, |
{"native rewrite", 1, is_r500, rc_local_transform, alu_rewrite_r500}, |
{"native rewrite", 1, !is_r500, rc_local_transform, alu_rewrite_r300}, |
{"emulate modifiers", 1, !is_r500, rc_local_transform, emulate_modifiers}, |
{"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_used}, |
{"dataflow optimize", 1, opt, rc_optimize, NULL}, |
/* This pass must be done after optimizations. */ |
{"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts}, |
{"register allocation", 1, opt, allocate_temporary_registers, NULL}, |
{"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, |
{"lower control flow opcodes", 1, is_r500, rc_vert_fc, NULL}, |
{"final code validation", 0, 1, rc_validate_final_shader, NULL}, |
{"machine code generation", 0, 1, translate_vertex_program, NULL}, |
{"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL}, |
{NULL, 0, 0, NULL, NULL} |
}; |
c->Base.type = RC_VERTEX_PROGRAM; |
c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; |
rc_run_compiler(&c->Base, vs_list); |
c->code->InputsRead = c->Base.Program.InputsRead; |
c->code->OutputsWritten = c->Base.Program.OutputsWritten; |
rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c |
---|
0,0 → 1,216 |
/* |
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "radeon_compiler.h" |
#include "radeon_code.h" |
#include "../r300_reg.h" |
#include <stdio.h> |
static char* r300_vs_ve_ops[] = { |
/* R300 vector ops */ |
" VE_NO_OP", |
" VE_DOT_PRODUCT", |
" VE_MULTIPLY", |
" VE_ADD", |
" VE_MULTIPLY_ADD", |
" VE_DISTANCE_FACTOR", |
" VE_FRACTION", |
" VE_MAXIMUM", |
" VE_MINIMUM", |
"VE_SET_GREATER_THAN_EQUAL", |
" VE_SET_LESS_THAN", |
" VE_MULTIPLYX2_ADD", |
" VE_MULTIPLY_CLAMP", |
" VE_FLT2FIX_DX", |
" VE_FLT2FIX_DX_RND", |
/* R500 vector ops */ |
" VE_PRED_SET_EQ_PUSH", |
" VE_PRED_SET_GT_PUSH", |
" VE_PRED_SET_GTE_PUSH", |
" VE_PRED_SET_NEQ_PUSH", |
" VE_COND_WRITE_EQ", |
" VE_COND_WRITE_GT", |
" VE_COND_WRITE_GTE", |
" VE_COND_WRITE_NEQ", |
" VE_COND_MUX_EQ", |
" VE_COND_MUX_GT", |
" VE_COND_MUX_GTE", |
" VE_SET_GREATER_THAN", |
" VE_SET_EQUAL", |
" VE_SET_NOT_EQUAL", |
" (reserved)", |
" (reserved)", |
" (reserved)", |
}; |
static char* r300_vs_me_ops[] = { |
/* R300 math ops */ |
" ME_NO_OP", |
" ME_EXP_BASE2_DX", |
" ME_LOG_BASE2_DX", |
" ME_EXP_BASEE_FF", |
" ME_LIGHT_COEFF_DX", |
" ME_POWER_FUNC_FF", |
" ME_RECIP_DX", |
" ME_RECIP_FF", |
" ME_RECIP_SQRT_DX", |
" ME_RECIP_SQRT_FF", |
" ME_MULTIPLY", |
" ME_EXP_BASE2_FULL_DX", |
" ME_LOG_BASE2_FULL_DX", |
" ME_POWER_FUNC_FF_CLAMP_B", |
"ME_POWER_FUNC_FF_CLAMP_B1", |
"ME_POWER_FUNC_FF_CLAMP_01", |
" ME_SIN", |
" ME_COS", |
/* R500 math ops */ |
" ME_LOG_BASE2_IEEE", |
" ME_RECIP_IEEE", |
" ME_RECIP_SQRT_IEEE", |
" ME_PRED_SET_EQ", |
" ME_PRED_SET_GT", |
" ME_PRED_SET_GTE", |
" ME_PRED_SET_NEQ", |
" ME_PRED_SET_CLR", |
" ME_PRED_SET_INV", |
" ME_PRED_SET_POP", |
" ME_PRED_SET_RESTORE", |
" (reserved)", |
" (reserved)", |
" (reserved)", |
}; |
/* XXX refactor to avoid clashing symbols */ |
static char* r300_vs_src_debug[] = { |
"t", |
"i", |
"c", |
"a", |
}; |
static char* r300_vs_dst_debug[] = { |
"t", |
"a0", |
"o", |
"ox", |
"a", |
"i", |
"u", |
"u", |
}; |
static char* r300_vs_swiz_debug[] = { |
"X", |
"Y", |
"Z", |
"W", |
"0", |
"1", |
"U", |
"U", |
}; |
static void r300_vs_op_dump(uint32_t op) |
{ |
fprintf(stderr, " dst: %d%s op: ", |
(op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]); |
if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) { |
fprintf(stderr, "PRED %u", |
(op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1); |
} |
if (op & 0x80) { |
if (op & 0x1) { |
fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n"); |
} else { |
fprintf(stderr, " PVS_MACRO_OP_2CLK_MADD\n"); |
} |
} else if (op & 0x40) { |
fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]); |
} else { |
fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]); |
} |
} |
static void r300_vs_src_dump(uint32_t src) |
{ |
fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", |
(src >> 5) & 0xff, r300_vs_src_debug[src & 0x3], |
src & (1 << 25) ? "-" : " ", |
r300_vs_swiz_debug[(src >> 13) & 0x7], |
src & (1 << 26) ? "-" : " ", |
r300_vs_swiz_debug[(src >> 16) & 0x7], |
src & (1 << 27) ? "-" : " ", |
r300_vs_swiz_debug[(src >> 19) & 0x7], |
src & (1 << 28) ? "-" : " ", |
r300_vs_swiz_debug[(src >> 22) & 0x7]); |
} |
void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user) |
{ |
struct r300_vertex_program_compiler *c = (struct r300_vertex_program_compiler*)compiler; |
struct r300_vertex_program_code * vs = c->code; |
unsigned instrcount = vs->length / 4; |
unsigned i; |
fprintf(stderr, "Final vertex program code:\n"); |
for(i = 0; i < instrcount; i++) { |
unsigned offset = i*4; |
unsigned src; |
fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]); |
r300_vs_op_dump(vs->body.d[offset]); |
for(src = 0; src < 3; ++src) { |
fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]); |
r300_vs_src_dump(vs->body.d[offset+1+src]); |
} |
} |
fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops); |
for(i = 0; i < vs->num_fc_ops; i++) { |
unsigned is_loop = 0; |
switch((vs->fc_ops >> (i * 2)) & 0x3 ) { |
case 0: fprintf(stderr, "NOP"); break; |
case 1: fprintf(stderr, "JUMP"); break; |
case 2: fprintf(stderr, "LOOP"); is_loop = 1; break; |
case 3: fprintf(stderr, "JSR"); break; |
} |
if (c->Base.is_r500) { |
fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x " |
"loop data->0x%08x\n", |
vs->fc_op_addrs.r500[i].uw, |
vs->fc_op_addrs.r500[i].lw, |
vs->fc_loop_index[i]); |
if (is_loop) { |
fprintf(stderr, "Before = %u First = %u Last = %u\n", |
vs->fc_op_addrs.r500[i].lw & 0xffff, |
(vs->fc_op_addrs.r500[i].uw >> 16) & 0xffff, |
vs->fc_op_addrs.r500[i].uw & 0xffff); |
} |
} else { |
fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]); |
} |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r500_fragprog.c |
---|
0,0 → 1,541 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include "r500_fragprog.h" |
#include <stdio.h> |
#include "radeon_compiler_util.h" |
#include "radeon_list.h" |
#include "radeon_variable.h" |
#include "../r300_reg.h" |
/** |
* Rewrite IF instructions to use the ALU result special register. |
*/ |
int r500_transform_IF( |
struct radeon_compiler * c, |
struct rc_instruction * inst_if, |
void *data) |
{ |
struct rc_variable * writer; |
struct rc_list * writer_list, * list_ptr; |
struct rc_list * var_list = rc_get_variables(c); |
unsigned int generic_if = 0; |
unsigned int alu_chan; |
if (inst_if->U.I.Opcode != RC_OPCODE_IF) { |
return 0; |
} |
writer_list = rc_variable_list_get_writers( |
var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]); |
if (!writer_list) { |
generic_if = 1; |
} else { |
/* Make sure it is safe for the writers to write to |
* ALU Result */ |
for (list_ptr = writer_list; list_ptr; |
list_ptr = list_ptr->Next) { |
struct rc_instruction * inst; |
writer = list_ptr->Item; |
/* We are going to modify the destination register |
* of writer, so if it has a reader other than |
* inst_if (aka ReaderCount > 1) we must fall back to |
* our generic IF. |
* If the writer has a lower IP than inst_if, this |
* means that inst_if is above the writer in a loop. |
* I'm not sure why this would ever happen, but |
* if it does we want to make sure we fall back |
* to our generic IF. */ |
if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) { |
generic_if = 1; |
break; |
} |
/* The ALU Result is not preserved across IF |
* instructions, so if there is another IF |
* instruction between writer and inst_if, then |
* we need to fall back to generic IF. */ |
for (inst = writer->Inst; inst != inst_if; inst = inst->Next) { |
const struct rc_opcode_info * info = |
rc_get_opcode_info(inst->U.I.Opcode); |
if (info->IsFlowControl) { |
generic_if = 1; |
break; |
} |
} |
if (generic_if) { |
break; |
} |
} |
} |
if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) { |
alu_chan = RC_ALURESULT_X; |
} else { |
alu_chan = RC_ALURESULT_W; |
} |
if (generic_if) { |
struct rc_instruction * inst_mov = |
rc_insert_new_instruction(c, inst_if->Prev); |
inst_mov->U.I.Opcode = RC_OPCODE_MOV; |
inst_mov->U.I.DstReg.WriteMask = 0; |
inst_mov->U.I.DstReg.File = RC_FILE_NONE; |
inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL; |
inst_mov->U.I.WriteALUResult = alu_chan; |
inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0]; |
if (alu_chan == RC_ALURESULT_X) { |
inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4( |
inst_mov->U.I.SrcReg[0].Swizzle, |
RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, |
RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); |
} else { |
inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4( |
inst_mov->U.I.SrcReg[0].Swizzle, |
RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, |
RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z); |
} |
} else { |
rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER; |
unsigned int reverse_srcs = 0; |
unsigned int preserve_opcode = 0; |
for (list_ptr = writer_list; list_ptr; |
list_ptr = list_ptr->Next) { |
writer = list_ptr->Item; |
switch(writer->Inst->U.I.Opcode) { |
case RC_OPCODE_SEQ: |
compare_func = RC_COMPARE_FUNC_EQUAL; |
break; |
case RC_OPCODE_SNE: |
compare_func = RC_COMPARE_FUNC_NOTEQUAL; |
break; |
case RC_OPCODE_SLE: |
reverse_srcs = 1; |
/* Fall through */ |
case RC_OPCODE_SGE: |
compare_func = RC_COMPARE_FUNC_GEQUAL; |
break; |
case RC_OPCODE_SGT: |
reverse_srcs = 1; |
/* Fall through */ |
case RC_OPCODE_SLT: |
compare_func = RC_COMPARE_FUNC_LESS; |
break; |
default: |
compare_func = RC_COMPARE_FUNC_NOTEQUAL; |
preserve_opcode = 1; |
break; |
} |
if (!preserve_opcode) { |
writer->Inst->U.I.Opcode = RC_OPCODE_SUB; |
} |
writer->Inst->U.I.DstReg.WriteMask = 0; |
writer->Inst->U.I.DstReg.File = RC_FILE_NONE; |
writer->Inst->U.I.WriteALUResult = alu_chan; |
writer->Inst->U.I.ALUResultCompare = compare_func; |
if (reverse_srcs) { |
struct rc_src_register temp_src; |
temp_src = writer->Inst->U.I.SrcReg[0]; |
writer->Inst->U.I.SrcReg[0] = |
writer->Inst->U.I.SrcReg[1]; |
writer->Inst->U.I.SrcReg[1] = temp_src; |
} |
} |
} |
inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL; |
inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT; |
inst_if->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE( |
RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, |
RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); |
inst_if->U.I.SrcReg[0].Negate = 0; |
return 1; |
} |
static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) |
{ |
unsigned int relevant; |
int i; |
if (opcode == RC_OPCODE_TEX || |
opcode == RC_OPCODE_TXB || |
opcode == RC_OPCODE_TXP || |
opcode == RC_OPCODE_TXD || |
opcode == RC_OPCODE_TXL || |
opcode == RC_OPCODE_KIL) { |
if (reg.Abs) |
return 0; |
if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE)) |
return 0; |
for(i = 0; i < 4; ++i) { |
unsigned int swz = GET_SWZ(reg.Swizzle, i); |
if (swz == RC_SWIZZLE_UNUSED) { |
reg.Negate &= ~(1 << i); |
continue; |
} |
if (swz >= 4) |
return 0; |
} |
if (reg.Negate) |
return 0; |
return 1; |
} else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) { |
/* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; |
* if it doesn't fit perfectly into a .xyzw case... */ |
if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate) |
return 1; |
return 0; |
} else if (reg.File == RC_FILE_INLINE) { |
return 1; |
} else { |
/* ALU instructions support almost everything */ |
relevant = 0; |
for(i = 0; i < 3; ++i) { |
unsigned int swz = GET_SWZ(reg.Swizzle, i); |
if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO) |
relevant |= 1 << i; |
} |
if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) |
return 0; |
return 1; |
} |
} |
/** |
* Split source register access. |
* |
* The only thing we *cannot* do in an ALU instruction is per-component |
* negation. |
*/ |
static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask, |
struct rc_swizzle_split * split) |
{ |
unsigned int negatebase[2] = { 0, 0 }; |
int i; |
for(i = 0; i < 4; ++i) { |
unsigned int swz = GET_SWZ(src.Swizzle, i); |
if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i)) |
continue; |
negatebase[GET_BIT(src.Negate, i)] |= 1 << i; |
} |
split->NumPhases = 0; |
for(i = 0; i <= 1; ++i) { |
if (!negatebase[i]) |
continue; |
split->Phase[split->NumPhases++] = negatebase[i]; |
} |
} |
struct rc_swizzle_caps r500_swizzle_caps = { |
.IsNative = r500_swizzle_is_native, |
.Split = r500_swizzle_split |
}; |
static char *toswiz(int swiz_val) { |
switch(swiz_val) { |
case 0: return "R"; |
case 1: return "G"; |
case 2: return "B"; |
case 3: return "A"; |
case 4: return "0"; |
case 5: return "H"; |
case 6: return "1"; |
case 7: return "U"; |
} |
return NULL; |
} |
static char *toop(int op_val) |
{ |
char *str = NULL; |
switch (op_val) { |
case 0: str = "MAD"; break; |
case 1: str = "DP3"; break; |
case 2: str = "DP4"; break; |
case 3: str = "D2A"; break; |
case 4: str = "MIN"; break; |
case 5: str = "MAX"; break; |
case 6: str = "Reserved"; break; |
case 7: str = "CND"; break; |
case 8: str = "CMP"; break; |
case 9: str = "FRC"; break; |
case 10: str = "SOP"; break; |
case 11: str = "MDH"; break; |
case 12: str = "MDV"; break; |
} |
return str; |
} |
static char *to_alpha_op(int op_val) |
{ |
char *str = NULL; |
switch (op_val) { |
case 0: str = "MAD"; break; |
case 1: str = "DP"; break; |
case 2: str = "MIN"; break; |
case 3: str = "MAX"; break; |
case 4: str = "Reserved"; break; |
case 5: str = "CND"; break; |
case 6: str = "CMP"; break; |
case 7: str = "FRC"; break; |
case 8: str = "EX2"; break; |
case 9: str = "LN2"; break; |
case 10: str = "RCP"; break; |
case 11: str = "RSQ"; break; |
case 12: str = "SIN"; break; |
case 13: str = "COS"; break; |
case 14: str = "MDH"; break; |
case 15: str = "MDV"; break; |
} |
return str; |
} |
static char *to_mask(int val) |
{ |
char *str = NULL; |
switch(val) { |
case 0: str = "NONE"; break; |
case 1: str = "R"; break; |
case 2: str = "G"; break; |
case 3: str = "RG"; break; |
case 4: str = "B"; break; |
case 5: str = "RB"; break; |
case 6: str = "GB"; break; |
case 7: str = "RGB"; break; |
case 8: str = "A"; break; |
case 9: str = "AR"; break; |
case 10: str = "AG"; break; |
case 11: str = "ARG"; break; |
case 12: str = "AB"; break; |
case 13: str = "ARB"; break; |
case 14: str = "AGB"; break; |
case 15: str = "ARGB"; break; |
} |
return str; |
} |
static char *to_texop(int val) |
{ |
switch(val) { |
case 0: return "NOP"; |
case 1: return "LD"; |
case 2: return "TEXKILL"; |
case 3: return "PROJ"; |
case 4: return "LODBIAS"; |
case 5: return "LOD"; |
case 6: return "DXDY"; |
} |
return NULL; |
} |
void r500FragmentProgramDump(struct radeon_compiler *c, void *user) |
{ |
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; |
struct r500_fragment_program_code *code = &compiler->code->code.r500; |
int n, i; |
uint32_t inst; |
uint32_t inst0; |
char *str = NULL; |
fprintf(stderr, "R500 Fragment Program:\n--------\n"); |
for (n = 0; n < code->inst_end+1; n++) { |
inst0 = inst = code->inst[n].inst0; |
fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); |
switch(inst & 0x3) { |
case R500_INST_TYPE_ALU: str = "ALU"; break; |
case R500_INST_TYPE_OUT: str = "OUT"; break; |
case R500_INST_TYPE_FC: str = "FC"; break; |
case R500_INST_TYPE_TEX: str = "TEX"; break; |
}; |
fprintf(stderr,"%s %s %s %s %s ", str, |
inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", |
inst & R500_INST_LAST ? "LAST" : "", |
inst & R500_INST_NOP ? "NOP" : "", |
inst & R500_INST_ALU_WAIT ? "ALU WAIT" : ""); |
fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf), |
to_mask((inst >> 15) & 0xf)); |
switch(inst0 & 0x3) { |
case R500_INST_TYPE_ALU: |
case R500_INST_TYPE_OUT: |
fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1); |
inst = code->inst[n].inst1; |
fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", |
inst & 0xff, (inst & (1<<8)) ? 'c' : 't', |
(inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', |
(inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', |
(inst >> 30)); |
fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2); |
inst = code->inst[n].inst2; |
fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", |
inst & 0xff, (inst & (1<<8)) ? 'c' : 't', |
(inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', |
(inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', |
(inst >> 30)); |
fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); |
inst = code->inst[n].inst3; |
fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n", |
(inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), |
(inst >> 11) & 0x3, |
(inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), |
(inst >> 24) & 0x3, (inst >> 29) & 0x3); |
fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); |
inst = code->inst[n].inst4; |
fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf), |
(inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", |
(inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, |
(inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, |
(inst >> 29) & 0x3, |
(inst >> 31) & 0x1); |
fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); |
inst = code->inst[n].inst5; |
fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), |
(inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", |
(inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), |
(inst >> 23) & 0x3, |
(inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); |
break; |
case R500_INST_TYPE_FC: |
fprintf(stderr, "\t2:FC_INST 0x%08x:", code->inst[n].inst2); |
inst = code->inst[n].inst2; |
/* JUMP_FUNC JUMP_ANY*/ |
fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff, |
(inst & R500_FC_JUMP_ANY) >> 5); |
/* OP */ |
switch(inst & 0x7){ |
case R500_FC_OP_JUMP: |
fprintf(stderr, "JUMP"); |
break; |
case R500_FC_OP_LOOP: |
fprintf(stderr, "LOOP"); |
break; |
case R500_FC_OP_ENDLOOP: |
fprintf(stderr, "ENDLOOP"); |
break; |
case R500_FC_OP_REP: |
fprintf(stderr, "REP"); |
break; |
case R500_FC_OP_ENDREP: |
fprintf(stderr, "ENDREP"); |
break; |
case R500_FC_OP_BREAKLOOP: |
fprintf(stderr, "BREAKLOOP"); |
break; |
case R500_FC_OP_BREAKREP: |
fprintf(stderr, "BREAKREP"); |
break; |
case R500_FC_OP_CONTINUE: |
fprintf(stderr, "CONTINUE"); |
break; |
} |
fprintf(stderr," "); |
/* A_OP */ |
switch(inst & (0x3 << 6)){ |
case R500_FC_A_OP_NONE: |
fprintf(stderr, "NONE"); |
break; |
case R500_FC_A_OP_POP: |
fprintf(stderr, "POP"); |
break; |
case R500_FC_A_OP_PUSH: |
fprintf(stderr, "PUSH"); |
break; |
} |
/* B_OP0 B_OP1 */ |
for(i=0; i<2; i++){ |
fprintf(stderr, " "); |
switch(inst & (0x3 << (24 + (i * 2)))){ |
/* R500_FC_B_OP0_NONE |
* R500_FC_B_OP1_NONE */ |
case 0: |
fprintf(stderr, "NONE"); |
break; |
case R500_FC_B_OP0_DECR: |
case R500_FC_B_OP1_DECR: |
fprintf(stderr, "DECR"); |
break; |
case R500_FC_B_OP0_INCR: |
case R500_FC_B_OP1_INCR: |
fprintf(stderr, "INCR"); |
break; |
} |
} |
/*POP_CNT B_ELSE */ |
fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4); |
inst = code->inst[n].inst3; |
/* JUMP_ADDR */ |
fprintf(stderr, " %d", inst >> 16); |
if(code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED){ |
fprintf(stderr, " IGN_UNC"); |
} |
inst = code->inst[n].inst3; |
fprintf(stderr, "\n\t3:FC_ADDR 0x%08x:", inst); |
fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n", |
inst & 0x1f, (inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31); |
break; |
case R500_INST_TYPE_TEX: |
inst = code->inst[n].inst1; |
fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, |
to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", |
(inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED"); |
inst = code->inst[n].inst2; |
fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, |
inst & 127, inst & (1<<7) ? "(rel)" : "", |
toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3), |
toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3), |
(inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "", |
toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3), |
toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3)); |
fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3); |
break; |
} |
fprintf(stderr,"\n"); |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r500_fragprog.h |
---|
0,0 → 1,50 |
/* |
* Copyright (C) 2005 Ben Skeggs. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
/* |
* Authors: |
* Ben Skeggs <darktama@iinet.net.au> |
* Jerome Glisse <j.glisse@gmail.com> |
*/ |
#ifndef __R500_FRAGPROG_H_ |
#define __R500_FRAGPROG_H_ |
#include "radeon_compiler.h" |
#include "radeon_swizzle.h" |
extern void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user); |
extern void r500FragmentProgramDump(struct radeon_compiler *c, void *user); |
extern struct rc_swizzle_caps r500_swizzle_caps; |
extern int r500_transform_IF( |
struct radeon_compiler * c, |
struct rc_instruction * inst_if, |
void* data); |
#endif |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c |
---|
0,0 → 1,687 |
/* |
* Copyright (C) 2005 Ben Skeggs. |
* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
/** |
* \file |
* |
* \author Ben Skeggs <darktama@iinet.net.au> |
* |
* \author Jerome Glisse <j.glisse@gmail.com> |
* |
* \author Corbin Simpson <MostAwesomeDude@gmail.com> |
* |
*/ |
#include "r500_fragprog.h" |
#include "../r300_reg.h" |
#include "radeon_program_pair.h" |
#define PROG_CODE \ |
struct r500_fragment_program_code *code = &c->code->code.r500 |
#define error(fmt, args...) do { \ |
rc_error(&c->Base, "%s::%s(): " fmt "\n", \ |
__FILE__, __FUNCTION__, ##args); \ |
} while(0) |
struct branch_info { |
int If; |
int Else; |
int Endif; |
}; |
struct r500_loop_info { |
int BgnLoop; |
int BranchDepth; |
int * Brks; |
int BrkCount; |
int BrkReserved; |
int * Conts; |
int ContCount; |
int ContReserved; |
}; |
struct emit_state { |
struct radeon_compiler * C; |
struct r500_fragment_program_code * Code; |
struct branch_info * Branches; |
unsigned int CurrentBranchDepth; |
unsigned int BranchesReserved; |
struct r500_loop_info * Loops; |
unsigned int CurrentLoopDepth; |
unsigned int LoopsReserved; |
unsigned int MaxBranchDepth; |
}; |
static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) |
{ |
switch(opcode) { |
case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; |
case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND; |
case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; |
case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; |
case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; |
case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; |
case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; |
default: |
error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); |
/* fall through */ |
case RC_OPCODE_NOP: |
/* fall through */ |
case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; |
case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; |
case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; |
case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; |
} |
} |
static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) |
{ |
switch(opcode) { |
case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP; |
case RC_OPCODE_CND: return R500_ALPHA_OP_CND; |
case RC_OPCODE_COS: return R500_ALPHA_OP_COS; |
case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH; |
case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV; |
case RC_OPCODE_DP3: return R500_ALPHA_OP_DP; |
case RC_OPCODE_DP4: return R500_ALPHA_OP_DP; |
case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2; |
case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC; |
case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2; |
default: |
error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); |
/* fall through */ |
case RC_OPCODE_NOP: |
/* fall through */ |
case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD; |
case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX; |
case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN; |
case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP; |
case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ; |
case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN; |
} |
} |
static unsigned int fix_hw_swizzle(unsigned int swz) |
{ |
switch (swz) { |
case RC_SWIZZLE_ZERO: |
case RC_SWIZZLE_UNUSED: |
swz = 4; |
break; |
case RC_SWIZZLE_HALF: |
swz = 5; |
break; |
case RC_SWIZZLE_ONE: |
swz = 6; |
break; |
} |
return swz; |
} |
static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg) |
{ |
unsigned int t = inst->RGB.Arg[arg].Source; |
int comp; |
t |= inst->RGB.Arg[arg].Negate << 11; |
t |= inst->RGB.Arg[arg].Abs << 12; |
for(comp = 0; comp < 3; ++comp) |
t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2); |
return t; |
} |
static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i) |
{ |
unsigned int t = inst->Alpha.Arg[i].Source; |
t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2; |
t |= inst->Alpha.Arg[i].Negate << 5; |
t |= inst->Alpha.Arg[i].Abs << 6; |
return t; |
} |
static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func) |
{ |
switch(func) { |
case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ; |
case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT; |
case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE; |
case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE; |
default: |
rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func); |
return 0; |
} |
} |
static void use_temporary(struct r500_fragment_program_code* code, unsigned int index) |
{ |
if (index > code->max_temp_idx) |
code->max_temp_idx = index; |
} |
static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src) |
{ |
/* From docs: |
* Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST. |
* MSB = 1 << 7 */ |
if (!src.Used) |
return 1 << 7; |
if (src.File == RC_FILE_CONSTANT) { |
return src.Index | R500_RGB_ADDR0_CONST; |
} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { |
use_temporary(code, src.Index); |
return src.Index; |
} else if (src.File == RC_FILE_INLINE) { |
return src.Index | (1 << 7); |
} |
return 0; |
} |
/** |
* NOP the specified instruction if it is not a texture lookup. |
*/ |
static void alu_nop(struct r300_fragment_program_compiler *c, int ip) |
{ |
PROG_CODE; |
if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) { |
code->inst[ip].inst0 |= R500_INST_NOP; |
} |
} |
/** |
* Emit a paired ALU instruction. |
*/ |
static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst) |
{ |
int ip; |
PROG_CODE; |
if (code->inst_end >= c->Base.max_alu_insts-1) { |
error("emit_alu: Too many instructions"); |
return; |
} |
ip = ++code->inst_end; |
/* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */ |
if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX || |
inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) { |
if (ip > 0) { |
alu_nop(c, ip - 1); |
} |
} |
code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); |
code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); |
if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) { |
code->inst[ip].inst0 = R500_INST_TYPE_OUT; |
if (inst->WriteALUResult) { |
error("Cannot write output and ALU result at the same time"); |
return; |
} |
} else { |
code->inst[ip].inst0 = R500_INST_TYPE_ALU; |
} |
code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT); |
code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11); |
code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0; |
code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); |
if (inst->Nop) { |
code->inst[ip].inst0 |= R500_INST_NOP; |
} |
if (inst->Alpha.DepthWriteMask) { |
code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; |
c->code->writes_depth = 1; |
} |
code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); |
code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex); |
use_temporary(code, inst->Alpha.DestIndex); |
use_temporary(code, inst->RGB.DestIndex); |
if (inst->RGB.Saturate) |
code->inst[ip].inst0 |= R500_INST_RGB_CLAMP; |
if (inst->Alpha.Saturate) |
code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; |
/* Set the presubtract operation. */ |
switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { |
case RC_PRESUB_BIAS: |
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0; |
break; |
case RC_PRESUB_SUB: |
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0; |
break; |
case RC_PRESUB_ADD: |
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0; |
break; |
case RC_PRESUB_INV: |
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0; |
break; |
default: |
break; |
} |
switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { |
case RC_PRESUB_BIAS: |
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0; |
break; |
case RC_PRESUB_SUB: |
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0; |
break; |
case RC_PRESUB_ADD: |
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0; |
break; |
case RC_PRESUB_INV: |
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0; |
break; |
default: |
break; |
} |
/* Set the output modifier */ |
code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT; |
code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT; |
code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); |
code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); |
code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); |
code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0])); |
code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1])); |
code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2])); |
code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT; |
code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT; |
code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT; |
code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT; |
code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; |
code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; |
code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target); |
code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target); |
if (inst->WriteALUResult) { |
code->inst[ip].inst3 |= R500_ALU_RGB_WMASK; |
if (inst->WriteALUResult == RC_ALURESULT_X) |
code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED; |
else |
code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA; |
code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare); |
} |
} |
static unsigned int translate_strq_swizzle(unsigned int swizzle) |
{ |
unsigned int swiz = 0; |
int i; |
for (i = 0; i < 4; i++) |
swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2; |
return swiz; |
} |
/** |
* Emit a single TEX instruction |
*/ |
static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst) |
{ |
int ip; |
PROG_CODE; |
if (code->inst_end >= c->Base.max_alu_insts-1) { |
error("emit_tex: Too many instructions"); |
return 0; |
} |
ip = ++code->inst_end; |
code->inst[ip].inst0 = R500_INST_TYPE_TEX |
| (inst->DstReg.WriteMask << 11) |
| (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT); |
code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) |
| (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT); |
if (inst->TexSrcTarget == RC_TEXTURE_RECT) |
code->inst[ip].inst1 |= R500_TEX_UNSCALED; |
switch (inst->Opcode) { |
case RC_OPCODE_KIL: |
code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; |
break; |
case RC_OPCODE_TEX: |
code->inst[ip].inst1 |= R500_TEX_INST_LD; |
break; |
case RC_OPCODE_TXB: |
code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; |
break; |
case RC_OPCODE_TXP: |
code->inst[ip].inst1 |= R500_TEX_INST_PROJ; |
break; |
case RC_OPCODE_TXD: |
code->inst[ip].inst1 |= R500_TEX_INST_DXDY; |
break; |
case RC_OPCODE_TXL: |
code->inst[ip].inst1 |= R500_TEX_INST_LOD; |
break; |
default: |
error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name); |
} |
use_temporary(code, inst->SrcReg[0].Index); |
if (inst->Opcode != RC_OPCODE_KIL) |
use_temporary(code, inst->DstReg.Index); |
code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) |
| (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) |
| R500_TEX_DST_ADDR(inst->DstReg.Index) |
| (GET_SWZ(inst->TexSwizzle, 0) << 24) |
| (GET_SWZ(inst->TexSwizzle, 1) << 26) |
| (GET_SWZ(inst->TexSwizzle, 2) << 28) |
| (GET_SWZ(inst->TexSwizzle, 3) << 30) |
; |
if (inst->Opcode == RC_OPCODE_TXD) { |
use_temporary(code, inst->SrcReg[1].Index); |
use_temporary(code, inst->SrcReg[2].Index); |
/* DX and DY parameters are specified in a separate register. */ |
code->inst[ip].inst3 = |
R500_DX_ADDR(inst->SrcReg[1].Index) | |
(translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) | |
R500_DY_ADDR(inst->SrcReg[2].Index) | |
(translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24); |
} |
return 1; |
} |
static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst) |
{ |
unsigned int newip; |
if (s->Code->inst_end >= s->C->max_alu_insts-1) { |
rc_error(s->C, "emit_tex: Too many instructions"); |
return; |
} |
newip = ++s->Code->inst_end; |
/* Currently all loops use the same integer constant to intialize |
* the loop variables. */ |
if(!s->Code->int_constants[0]) { |
s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff); |
s->Code->int_constant_count = 1; |
} |
s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; |
switch(inst->U.I.Opcode){ |
struct branch_info * branch; |
struct r500_loop_info * loop; |
case RC_OPCODE_BGNLOOP: |
memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info, |
s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1); |
loop = &s->Loops[s->CurrentLoopDepth++]; |
memset(loop, 0, sizeof(struct r500_loop_info)); |
loop->BranchDepth = s->CurrentBranchDepth; |
loop->BgnLoop = newip; |
s->Code->inst[newip].inst2 = R500_FC_OP_LOOP |
| R500_FC_JUMP_FUNC(0x00) |
| R500_FC_IGNORE_UNCOVERED |
; |
break; |
case RC_OPCODE_BRK: |
loop = &s->Loops[s->CurrentLoopDepth - 1]; |
memory_pool_array_reserve(&s->C->Pool, int, loop->Brks, |
loop->BrkCount, loop->BrkReserved, 1); |
loop->Brks[loop->BrkCount++] = newip; |
s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP |
| R500_FC_JUMP_FUNC(0xff) |
| R500_FC_B_OP1_DECR |
| R500_FC_B_POP_CNT( |
s->CurrentBranchDepth - loop->BranchDepth) |
| R500_FC_IGNORE_UNCOVERED |
; |
break; |
case RC_OPCODE_CONT: |
loop = &s->Loops[s->CurrentLoopDepth - 1]; |
memory_pool_array_reserve(&s->C->Pool, int, loop->Conts, |
loop->ContCount, loop->ContReserved, 1); |
loop->Conts[loop->ContCount++] = newip; |
s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE |
| R500_FC_JUMP_FUNC(0xff) |
| R500_FC_B_OP1_DECR |
| R500_FC_B_POP_CNT( |
s->CurrentBranchDepth - loop->BranchDepth) |
| R500_FC_IGNORE_UNCOVERED |
; |
break; |
case RC_OPCODE_ENDLOOP: |
{ |
loop = &s->Loops[s->CurrentLoopDepth - 1]; |
/* Emit ENDLOOP */ |
s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP |
| R500_FC_JUMP_FUNC(0xff) |
| R500_FC_JUMP_ANY |
| R500_FC_IGNORE_UNCOVERED |
; |
/* The constant integer at index 0 is used by all loops. */ |
s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0) |
| R500_FC_JUMP_ADDR(loop->BgnLoop + 1) |
; |
/* Set jump address and int constant for BGNLOOP */ |
s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0) |
| R500_FC_JUMP_ADDR(newip) |
; |
/* Set jump address for the BRK instructions. */ |
while(loop->BrkCount--) { |
s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = |
R500_FC_JUMP_ADDR(newip + 1); |
} |
/* Set jump address for CONT instructions. */ |
while(loop->ContCount--) { |
s->Code->inst[loop->Conts[loop->ContCount]].inst3 = |
R500_FC_JUMP_ADDR(newip); |
} |
s->CurrentLoopDepth--; |
break; |
} |
case RC_OPCODE_IF: |
if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) { |
rc_error(s->C, "Branch depth exceeds hardware limit"); |
return; |
} |
memory_pool_array_reserve(&s->C->Pool, struct branch_info, |
s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1); |
branch = &s->Branches[s->CurrentBranchDepth++]; |
branch->If = newip; |
branch->Else = -1; |
branch->Endif = -1; |
if (s->CurrentBranchDepth > s->MaxBranchDepth) |
s->MaxBranchDepth = s->CurrentBranchDepth; |
/* actual instruction is filled in at ENDIF time */ |
break; |
case RC_OPCODE_ELSE: |
if (!s->CurrentBranchDepth) { |
rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); |
return; |
} |
branch = &s->Branches[s->CurrentBranchDepth - 1]; |
branch->Else = newip; |
/* actual instruction is filled in at ENDIF time */ |
break; |
case RC_OPCODE_ENDIF: |
if (!s->CurrentBranchDepth) { |
rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); |
return; |
} |
branch = &s->Branches[s->CurrentBranchDepth - 1]; |
branch->Endif = newip; |
s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP |
| R500_FC_A_OP_NONE /* no address stack */ |
| R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ |
| R500_FC_B_OP0_DECR /* decrement branch counter if stay */ |
| R500_FC_B_OP1_NONE /* no branch counter if stay */ |
| R500_FC_B_POP_CNT(1) |
; |
s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); |
s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP |
| R500_FC_A_OP_NONE /* no address stack */ |
| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ |
| R500_FC_B_OP0_INCR /* increment branch counter if stay */ |
| R500_FC_IGNORE_UNCOVERED |
; |
if (branch->Else >= 0) { |
/* increment branch counter also if jump */ |
s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR; |
s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1); |
s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP |
| R500_FC_A_OP_NONE /* no address stack */ |
| R500_FC_B_ELSE /* all active pixels want to jump */ |
| R500_FC_B_OP0_NONE /* no counter op if stay */ |
| R500_FC_B_OP1_DECR /* decrement branch counter if jump */ |
| R500_FC_B_POP_CNT(1) |
; |
s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); |
} else { |
/* don't touch branch counter on jump */ |
s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE; |
s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); |
} |
s->CurrentBranchDepth--; |
break; |
default: |
rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name); |
} |
} |
void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) |
{ |
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; |
struct emit_state s; |
struct r500_fragment_program_code *code = &compiler->code->code.r500; |
memset(&s, 0, sizeof(s)); |
s.C = &compiler->Base; |
s.Code = code; |
memset(code, 0, sizeof(*code)); |
code->max_temp_idx = 1; |
code->inst_end = -1; |
for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; |
inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; |
inst = inst->Next) { |
if (inst->Type == RC_INSTRUCTION_NORMAL) { |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
if (opcode->IsFlowControl) { |
emit_flowcontrol(&s, inst); |
} else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { |
continue; |
} else { |
emit_tex(compiler, &inst->U.I); |
} |
} else { |
emit_paired(compiler, &inst->U.P); |
} |
} |
if (code->max_temp_idx >= compiler->Base.max_temp_regs) |
rc_error(&compiler->Base, "Too many hardware temporaries used"); |
if (compiler->Base.Error) |
return; |
if (code->inst_end == -1 || |
(code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { |
int ip; |
/* This may happen when dead-code elimination is disabled or |
* when most of the fragment program logic is leading to a KIL */ |
if (code->inst_end >= compiler->Base.max_alu_insts-1) { |
rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions"); |
return; |
} |
ip = ++code->inst_end; |
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; |
} |
/* Make sure TEX_SEM_WAIT is set on the last instruction */ |
code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT; |
/* Enable full flow control mode if we are using loops or have if |
* statements nested at least four deep. */ |
if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) { |
if (code->max_temp_idx < 1) |
code->max_temp_idx = 1; |
code->us_fc_ctrl |= R500_FC_FULL_FC_EN; |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_code.c |
---|
0,0 → 1,187 |
/* |
* Copyright (C) 2009 Nicolai Haehnle. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include "radeon_code.h" |
#include <stdlib.h> |
#include <stdio.h> |
#include <string.h> |
#include "radeon_program.h" |
void rc_constants_init(struct rc_constant_list * c) |
{ |
memset(c, 0, sizeof(*c)); |
} |
/** |
* Copy a constants structure, assuming that the destination structure |
* is not initialized. |
*/ |
void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src) |
{ |
dst->Constants = malloc(sizeof(struct rc_constant) * src->Count); |
memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count); |
dst->Count = src->Count; |
dst->_Reserved = src->Count; |
} |
void rc_constants_destroy(struct rc_constant_list * c) |
{ |
free(c->Constants); |
memset(c, 0, sizeof(*c)); |
} |
unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant) |
{ |
unsigned index = c->Count; |
if (c->Count >= c->_Reserved) { |
struct rc_constant * newlist; |
c->_Reserved = c->_Reserved * 2; |
if (!c->_Reserved) |
c->_Reserved = 16; |
newlist = malloc(sizeof(struct rc_constant) * c->_Reserved); |
memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count); |
free(c->Constants); |
c->Constants = newlist; |
} |
c->Constants[index] = *constant; |
c->Count++; |
return index; |
} |
/** |
* Add a state vector to the constant list, while trying to avoid duplicates. |
*/ |
unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1) |
{ |
unsigned index; |
struct rc_constant constant; |
for(index = 0; index < c->Count; ++index) { |
if (c->Constants[index].Type == RC_CONSTANT_STATE) { |
if (c->Constants[index].u.State[0] == state0 && |
c->Constants[index].u.State[1] == state1) |
return index; |
} |
} |
memset(&constant, 0, sizeof(constant)); |
constant.Type = RC_CONSTANT_STATE; |
constant.Size = 4; |
constant.u.State[0] = state0; |
constant.u.State[1] = state1; |
return rc_constants_add(c, &constant); |
} |
/** |
* Add an immediate vector to the constant list, while trying to avoid |
* duplicates. |
*/ |
unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data) |
{ |
unsigned index; |
struct rc_constant constant; |
for(index = 0; index < c->Count; ++index) { |
if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { |
if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4)) |
return index; |
} |
} |
memset(&constant, 0, sizeof(constant)); |
constant.Type = RC_CONSTANT_IMMEDIATE; |
constant.Size = 4; |
memcpy(constant.u.Immediate, data, sizeof(float) * 4); |
return rc_constants_add(c, &constant); |
} |
/** |
* Add an immediate scalar to the constant list, while trying to avoid |
* duplicates. |
*/ |
unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle) |
{ |
unsigned index; |
int free_index = -1; |
struct rc_constant constant; |
for(index = 0; index < c->Count; ++index) { |
if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { |
unsigned comp; |
for(comp = 0; comp < c->Constants[index].Size; ++comp) { |
if (c->Constants[index].u.Immediate[comp] == data) { |
*swizzle = RC_MAKE_SWIZZLE_SMEAR(comp); |
return index; |
} |
} |
if (c->Constants[index].Size < 4) |
free_index = index; |
} |
} |
if (free_index >= 0) { |
unsigned comp = c->Constants[free_index].Size++; |
c->Constants[free_index].u.Immediate[comp] = data; |
*swizzle = RC_MAKE_SWIZZLE_SMEAR(comp); |
return free_index; |
} |
memset(&constant, 0, sizeof(constant)); |
constant.Type = RC_CONSTANT_IMMEDIATE; |
constant.Size = 1; |
constant.u.Immediate[0] = data; |
*swizzle = RC_SWIZZLE_XXXX; |
return rc_constants_add(c, &constant); |
} |
void rc_constants_print(struct rc_constant_list * c) |
{ |
unsigned int i; |
for(i = 0; i < c->Count; i++) { |
if (c->Constants[i].Type == RC_CONSTANT_IMMEDIATE) { |
float * values = c->Constants[i].u.Immediate; |
fprintf(stderr, "CONST[%u] = " |
"{ %10.4f %10.4f %10.4f %10.4f }\n", |
i, values[0],values[1], values[2], values[3]); |
} |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_code.h |
---|
0,0 → 1,306 |
/* |
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#ifndef RADEON_CODE_H |
#define RADEON_CODE_H |
#include <stdint.h> |
#define R300_PFS_MAX_ALU_INST 64 |
#define R300_PFS_MAX_TEX_INST 32 |
#define R300_PFS_MAX_TEX_INDIRECT 4 |
#define R300_PFS_NUM_TEMP_REGS 32 |
#define R300_PFS_NUM_CONST_REGS 32 |
#define R400_PFS_MAX_ALU_INST 512 |
#define R400_PFS_MAX_TEX_INST 512 |
#define R500_PFS_MAX_INST 512 |
#define R500_PFS_NUM_TEMP_REGS 128 |
#define R500_PFS_NUM_CONST_REGS 256 |
#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32 |
#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4 |
/* The r500 maximum depth is not just for loops, but any combination of loops |
* and subroutine jumps. */ |
#define R500_PVS_MAX_LOOP_DEPTH 8 |
#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) |
enum { |
/** |
* External constants are constants whose meaning is unknown to this |
* compiler. For example, a Mesa gl_program's constants are turned |
* into external constants. |
*/ |
RC_CONSTANT_EXTERNAL = 0, |
RC_CONSTANT_IMMEDIATE, |
/** |
* Constant referring to state that is known by this compiler, |
* see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state. |
*/ |
RC_CONSTANT_STATE |
}; |
enum { |
RC_STATE_SHADOW_AMBIENT = 0, |
RC_STATE_R300_WINDOW_DIMENSION, |
RC_STATE_R300_TEXRECT_FACTOR, |
RC_STATE_R300_TEXSCALE_FACTOR, |
RC_STATE_R300_VIEWPORT_SCALE, |
RC_STATE_R300_VIEWPORT_OFFSET |
}; |
struct rc_constant { |
unsigned Type:2; /**< RC_CONSTANT_xxx */ |
unsigned Size:3; |
union { |
unsigned External; |
float Immediate[4]; |
unsigned State[2]; |
} u; |
}; |
struct rc_constant_list { |
struct rc_constant * Constants; |
unsigned Count; |
unsigned _Reserved; |
}; |
void rc_constants_init(struct rc_constant_list * c); |
void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src); |
void rc_constants_destroy(struct rc_constant_list * c); |
unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant); |
unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2); |
unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data); |
unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle); |
void rc_constants_print(struct rc_constant_list * c); |
/** |
* Compare functions. |
* |
* \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you |
* the correct GL compare function. |
*/ |
typedef enum { |
RC_COMPARE_FUNC_NEVER = 0, |
RC_COMPARE_FUNC_LESS, |
RC_COMPARE_FUNC_EQUAL, |
RC_COMPARE_FUNC_LEQUAL, |
RC_COMPARE_FUNC_GREATER, |
RC_COMPARE_FUNC_NOTEQUAL, |
RC_COMPARE_FUNC_GEQUAL, |
RC_COMPARE_FUNC_ALWAYS |
} rc_compare_func; |
/** |
* Coordinate wrapping modes. |
* |
* These are not quite the same as their GL counterparts yet. |
*/ |
typedef enum { |
RC_WRAP_NONE = 0, |
RC_WRAP_REPEAT, |
RC_WRAP_MIRRORED_REPEAT, |
RC_WRAP_MIRRORED_CLAMP |
} rc_wrap_mode; |
/** |
* Stores state that influences the compilation of a fragment program. |
*/ |
struct r300_fragment_program_external_state { |
struct { |
/** |
* This field contains swizzle for some lowering passes |
* (shadow comparison, unorm->snorm conversion) |
*/ |
unsigned texture_swizzle:12; |
/** |
* If the sampler is used as a shadow sampler, |
* this field specifies the compare function. |
* |
* Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0). |
* \sa rc_compare_func |
*/ |
unsigned texture_compare_func : 3; |
/** |
* No matter what the sampler type is, |
* this field turns it into a shadow sampler. |
*/ |
unsigned compare_mode_enabled : 1; |
/** |
* If the sampler will receive non-normalized coords, |
* this field is set. The scaling factor is given by |
* RC_STATE_R300_TEXRECT_FACTOR. |
*/ |
unsigned non_normalized_coords : 1; |
/** |
* This field specifies wrapping modes for the sampler. |
* |
* If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths |
* will be performed on the coordinates. |
*/ |
unsigned wrap_mode : 3; |
/** |
* The coords are scaled after applying the wrap mode emulation |
* and right before texture fetch. The scaling factor is given by |
* RC_STATE_R300_TEXSCALE_FACTOR. */ |
unsigned clamp_and_scale_before_fetch : 1; |
/** |
* Fetch RGTC1_SNORM or LATC1_SNORM as UNORM and convert UNORM -> SNORM |
* in the shader. |
*/ |
unsigned convert_unorm_to_snorm:1; |
} unit[16]; |
unsigned alpha_to_one:1; |
}; |
struct r300_fragment_program_node { |
int tex_offset; /**< first tex instruction */ |
int tex_end; /**< last tex instruction, relative to tex_offset */ |
int alu_offset; /**< first ALU instruction */ |
int alu_end; /**< last ALU instruction, relative to alu_offset */ |
int flags; |
}; |
/** |
* Stores an R300 fragment program in its compiled-to-hardware form. |
*/ |
struct r300_fragment_program_code { |
struct { |
unsigned int length; /**< total # of texture instructions used */ |
uint32_t inst[R400_PFS_MAX_TEX_INST]; |
} tex; |
struct { |
unsigned int length; /**< total # of ALU instructions used */ |
struct { |
uint32_t rgb_inst; |
uint32_t rgb_addr; |
uint32_t alpha_inst; |
uint32_t alpha_addr; |
uint32_t r400_ext_addr; |
} inst[R400_PFS_MAX_ALU_INST]; |
} alu; |
uint32_t config; /* US_CONFIG */ |
uint32_t pixsize; /* US_PIXSIZE */ |
uint32_t code_offset; /* US_CODE_OFFSET */ |
uint32_t r400_code_offset_ext; /* US_CODE_EXT */ |
uint32_t code_addr[4]; /* US_CODE_ADDR */ |
/*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries |
* for r400 cards */ |
unsigned int r390_mode:1; |
}; |
struct r500_fragment_program_code { |
struct { |
uint32_t inst0; |
uint32_t inst1; |
uint32_t inst2; |
uint32_t inst3; |
uint32_t inst4; |
uint32_t inst5; |
} inst[R500_PFS_MAX_INST]; |
int inst_end; /* Number of instructions - 1; also, last instruction to be executed */ |
int max_temp_idx; |
uint32_t us_fc_ctrl; |
uint32_t int_constants[32]; |
uint32_t int_constant_count; |
}; |
struct rX00_fragment_program_code { |
union { |
struct r300_fragment_program_code r300; |
struct r500_fragment_program_code r500; |
} code; |
unsigned writes_depth:1; |
struct rc_constant_list constants; |
unsigned *constants_remap_table; |
}; |
#define R300_VS_MAX_ALU 256 |
#define R300_VS_MAX_ALU_DWORDS (R300_VS_MAX_ALU * 4) |
#define R500_VS_MAX_ALU 1024 |
#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4) |
#define R300_VS_MAX_TEMPS 32 |
/* This is the max for all chipsets (r300-r500) */ |
#define R300_VS_MAX_FC_OPS 16 |
#define R300_VS_MAX_LOOP_DEPTH 1 |
#define VSF_MAX_INPUTS 32 |
#define VSF_MAX_OUTPUTS 32 |
struct r300_vertex_program_code { |
int length; |
union { |
uint32_t d[R500_VS_MAX_ALU_DWORDS]; |
float f[R500_VS_MAX_ALU_DWORDS]; |
} body; |
int pos_end; |
int num_temporaries; /* Number of temp vars used by program */ |
int inputs[VSF_MAX_INPUTS]; |
int outputs[VSF_MAX_OUTPUTS]; |
struct rc_constant_list constants; |
unsigned *constants_remap_table; |
uint32_t InputsRead; |
uint32_t OutputsWritten; |
unsigned int num_fc_ops; |
uint32_t fc_ops; |
union { |
uint32_t r300[R300_VS_MAX_FC_OPS]; |
struct { |
uint32_t lw; |
uint32_t uw; |
} r500[R300_VS_MAX_FC_OPS]; |
} fc_op_addrs; |
int32_t fc_loop_index[R300_VS_MAX_FC_OPS]; |
}; |
#endif /* RADEON_CODE_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_compiler.c |
---|
0,0 → 1,504 |
/* |
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "radeon_compiler.h" |
#include <stdarg.h> |
#include <stdio.h> |
#include <stdlib.h> |
#include "radeon_dataflow.h" |
#include "radeon_program.h" |
#include "radeon_program_pair.h" |
#include "radeon_regalloc.h" |
#include "radeon_compiler_util.h" |
void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs) |
{ |
memset(c, 0, sizeof(*c)); |
memory_pool_init(&c->Pool); |
c->Program.Instructions.Prev = &c->Program.Instructions; |
c->Program.Instructions.Next = &c->Program.Instructions; |
c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; |
c->regalloc_state = rs; |
} |
void rc_destroy(struct radeon_compiler * c) |
{ |
rc_constants_destroy(&c->Program.Constants); |
memory_pool_destroy(&c->Pool); |
free(c->ErrorMsg); |
} |
void rc_debug(struct radeon_compiler * c, const char * fmt, ...) |
{ |
va_list ap; |
if (!(c->Debug & RC_DBG_LOG)) |
return; |
va_start(ap, fmt); |
vfprintf(stderr, fmt, ap); |
va_end(ap); |
} |
void rc_error(struct radeon_compiler * c, const char * fmt, ...) |
{ |
va_list ap; |
c->Error = 1; |
if (!c->ErrorMsg) { |
/* Only remember the first error */ |
char buf[1024]; |
int written; |
va_start(ap, fmt); |
written = vsnprintf(buf, sizeof(buf), fmt, ap); |
va_end(ap); |
if (written < sizeof(buf)) { |
c->ErrorMsg = strdup(buf); |
} else { |
c->ErrorMsg = malloc(written + 1); |
va_start(ap, fmt); |
vsnprintf(c->ErrorMsg, written + 1, fmt, ap); |
va_end(ap); |
} |
} |
if (c->Debug & RC_DBG_LOG) { |
fprintf(stderr, "r300compiler error: "); |
va_start(ap, fmt); |
vfprintf(stderr, fmt, ap); |
va_end(ap); |
} |
} |
int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion) |
{ |
rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion); |
return 1; |
} |
/** |
* Recompute c->Program.InputsRead and c->Program.OutputsWritten |
* based on which inputs and outputs are actually referenced |
* in program instructions. |
*/ |
void rc_calculate_inputs_outputs(struct radeon_compiler * c) |
{ |
struct rc_instruction *inst; |
c->Program.InputsRead = 0; |
c->Program.OutputsWritten = 0; |
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) |
{ |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
int i; |
for (i = 0; i < opcode->NumSrcRegs; ++i) { |
if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT) |
c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index; |
} |
if (opcode->HasDstReg) { |
if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) |
c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index; |
} |
} |
} |
/** |
* Rewrite the program such that everything that source the given input |
* register will source new_input instead. |
*/ |
void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input) |
{ |
struct rc_instruction * inst; |
c->Program.InputsRead &= ~(1 << input); |
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
unsigned i; |
for(i = 0; i < opcode->NumSrcRegs; ++i) { |
if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) { |
inst->U.I.SrcReg[i].File = new_input.File; |
inst->U.I.SrcReg[i].Index = new_input.Index; |
inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle); |
if (!inst->U.I.SrcReg[i].Abs) { |
inst->U.I.SrcReg[i].Negate ^= new_input.Negate; |
inst->U.I.SrcReg[i].Abs = new_input.Abs; |
} |
c->Program.InputsRead |= 1 << new_input.Index; |
} |
} |
} |
} |
/** |
* Rewrite the program such that everything that writes into the given |
* output register will instead write to new_output. The new_output |
* writemask is honoured. |
*/ |
void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask) |
{ |
struct rc_instruction * inst; |
c->Program.OutputsWritten &= ~(1 << output); |
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
if (opcode->HasDstReg) { |
if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { |
inst->U.I.DstReg.Index = new_output; |
inst->U.I.DstReg.WriteMask &= writemask; |
c->Program.OutputsWritten |= 1 << new_output; |
} |
} |
} |
} |
/** |
* Rewrite the program such that a given output is duplicated. |
*/ |
void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output) |
{ |
unsigned tempreg = rc_find_free_temporary(c); |
struct rc_instruction * inst; |
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
if (opcode->HasDstReg) { |
if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { |
inst->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst->U.I.DstReg.Index = tempreg; |
} |
} |
} |
inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); |
inst->U.I.Opcode = RC_OPCODE_MOV; |
inst->U.I.DstReg.File = RC_FILE_OUTPUT; |
inst->U.I.DstReg.Index = output; |
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; |
inst->U.I.SrcReg[0].Index = tempreg; |
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; |
inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); |
inst->U.I.Opcode = RC_OPCODE_MOV; |
inst->U.I.DstReg.File = RC_FILE_OUTPUT; |
inst->U.I.DstReg.Index = dup_output; |
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; |
inst->U.I.SrcReg[0].Index = tempreg; |
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; |
c->Program.OutputsWritten |= 1 << dup_output; |
} |
/** |
* Introduce standard code fragment to deal with fragment.position. |
*/ |
void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, |
int full_vtransform) |
{ |
unsigned tempregi = rc_find_free_temporary(c); |
struct rc_instruction * inst_rcp; |
struct rc_instruction * inst_mul; |
struct rc_instruction * inst_mad; |
struct rc_instruction * inst; |
c->Program.InputsRead &= ~(1 << wpos); |
c->Program.InputsRead |= 1 << new_input; |
/* perspective divide */ |
inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); |
inst_rcp->U.I.Opcode = RC_OPCODE_RCP; |
inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_rcp->U.I.DstReg.Index = tempregi; |
inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; |
inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT; |
inst_rcp->U.I.SrcReg[0].Index = new_input; |
inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; |
inst_mul = rc_insert_new_instruction(c, inst_rcp); |
inst_mul->U.I.Opcode = RC_OPCODE_MUL; |
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_mul->U.I.DstReg.Index = tempregi; |
inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; |
inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT; |
inst_mul->U.I.SrcReg[0].Index = new_input; |
inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; |
inst_mul->U.I.SrcReg[1].Index = tempregi; |
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; |
/* viewport transformation */ |
inst_mad = rc_insert_new_instruction(c, inst_mul); |
inst_mad->U.I.Opcode = RC_OPCODE_MAD; |
inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_mad->U.I.DstReg.Index = tempregi; |
inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; |
inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; |
inst_mad->U.I.SrcReg[0].Index = tempregi; |
inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; |
inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; |
inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; |
inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; |
inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0; |
if (full_vtransform) { |
inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0); |
inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0); |
} else { |
inst_mad->U.I.SrcReg[1].Index = |
inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); |
} |
for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
unsigned i; |
for(i = 0; i < opcode->NumSrcRegs; i++) { |
if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && |
inst->U.I.SrcReg[i].Index == wpos) { |
inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; |
inst->U.I.SrcReg[i].Index = tempregi; |
} |
} |
} |
} |
/** |
* The FACE input in hardware contains 1 if it's a back face, 0 otherwise. |
* Gallium and OpenGL define it the other way around. |
* |
* So let's just negate FACE at the beginning of the shader and rewrite the rest |
* of the shader to read from the newly allocated temporary. |
*/ |
void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) |
{ |
unsigned tempregi = rc_find_free_temporary(c); |
struct rc_instruction *inst_add; |
struct rc_instruction *inst; |
/* perspective divide */ |
inst_add = rc_insert_new_instruction(c, &c->Program.Instructions); |
inst_add->U.I.Opcode = RC_OPCODE_ADD; |
inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_add->U.I.DstReg.Index = tempregi; |
inst_add->U.I.DstReg.WriteMask = RC_MASK_X; |
inst_add->U.I.SrcReg[0].File = RC_FILE_NONE; |
inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; |
inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT; |
inst_add->U.I.SrcReg[1].Index = face; |
inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; |
inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW; |
for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) { |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
unsigned i; |
for(i = 0; i < opcode->NumSrcRegs; i++) { |
if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && |
inst->U.I.SrcReg[i].Index == face) { |
inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; |
inst->U.I.SrcReg[i].Index = tempregi; |
} |
} |
} |
} |
static void reg_count_callback(void * userdata, struct rc_instruction * inst, |
rc_register_file file, unsigned int index, unsigned int mask) |
{ |
struct rc_program_stats *s = userdata; |
if (file == RC_FILE_TEMPORARY) |
(int)index > s->num_temp_regs ? s->num_temp_regs = index : 0; |
if (file == RC_FILE_INLINE) |
s->num_inline_literals++; |
} |
void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) |
{ |
struct rc_instruction * tmp; |
memset(s, 0, sizeof(*s)); |
for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; |
tmp = tmp->Next){ |
const struct rc_opcode_info * info; |
rc_for_all_reads_mask(tmp, reg_count_callback, s); |
if (tmp->Type == RC_INSTRUCTION_NORMAL) { |
info = rc_get_opcode_info(tmp->U.I.Opcode); |
if (info->Opcode == RC_OPCODE_BEGIN_TEX) |
continue; |
if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE) |
s->num_presub_ops++; |
} else { |
if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) |
s->num_presub_ops++; |
if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) |
s->num_presub_ops++; |
/* Assuming alpha will never be a flow control or |
* a tex instruction. */ |
if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) |
s->num_alpha_insts++; |
if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) |
s->num_rgb_insts++; |
if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 && |
tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) { |
s->num_omod_ops++; |
} |
if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 && |
tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) { |
s->num_omod_ops++; |
} |
info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); |
} |
if (info->IsFlowControl) |
s->num_fc_insts++; |
if (info->HasTexture) |
s->num_tex_insts++; |
s->num_insts++; |
} |
/* Increment here because the reg_count_callback store the max |
* temporary reg index in s->nun_temp_regs. */ |
s->num_temp_regs++; |
} |
static void print_stats(struct radeon_compiler * c) |
{ |
struct rc_program_stats s; |
if (c->initial_num_insts <= 5) |
return; |
rc_get_stats(c, &s); |
switch (c->type) { |
case RC_VERTEX_PROGRAM: |
fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n" |
"~%4u Instructions\n" |
"~%4u Flow Control Instructions\n" |
"~%4u Temporary Registers\n" |
"~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", |
s.num_insts, s.num_fc_insts, s.num_temp_regs); |
break; |
case RC_FRAGMENT_PROGRAM: |
fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n" |
"~%4u Instructions\n" |
"~%4u Vector Instructions (RGB)\n" |
"~%4u Scalar Instructions (Alpha)\n" |
"~%4u Flow Control Instructions\n" |
"~%4u Texture Instructions\n" |
"~%4u Presub Operations\n" |
"~%4u OMOD Operations\n" |
"~%4u Temporary Registers\n" |
"~%4u Inline Literals\n" |
"~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", |
s.num_insts, s.num_rgb_insts, s.num_alpha_insts, |
s.num_fc_insts, s.num_tex_insts, s.num_presub_ops, |
s.num_omod_ops, s.num_temp_regs, s.num_inline_literals); |
break; |
default: |
assert(0); |
} |
} |
static const char *shader_name[RC_NUM_PROGRAM_TYPES] = { |
"Vertex Program", |
"Fragment Program" |
}; |
void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list) |
{ |
for (unsigned i = 0; list[i].name; i++) { |
if (list[i].predicate) { |
list[i].run(c, list[i].user); |
if (c->Error) |
return; |
if ((c->Debug & RC_DBG_LOG) && list[i].dump) { |
fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name); |
rc_print_program(&c->Program); |
} |
} |
} |
} |
/* Executes a list of compiler passes given in the parameter 'list'. */ |
void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list) |
{ |
struct rc_program_stats s; |
rc_get_stats(c, &s); |
c->initial_num_insts = s.num_insts; |
if (c->Debug & RC_DBG_LOG) { |
fprintf(stderr, "%s: before compilation\n", shader_name[c->type]); |
rc_print_program(&c->Program); |
} |
rc_run_compiler_passes(c, list); |
if (c->Debug & RC_DBG_STATS) |
print_stats(c); |
} |
void rc_validate_final_shader(struct radeon_compiler *c, void *user) |
{ |
/* Check the number of constants. */ |
if (c->Program.Constants.Count > c->max_constants) { |
rc_error(c, "Too many constants. Max: %i, Got: %i\n", |
c->max_constants, c->Program.Constants.Count); |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_compiler.h |
---|
0,0 → 1,173 |
/* |
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#ifndef RADEON_COMPILER_H |
#define RADEON_COMPILER_H |
#include "main/compiler.h" |
#include "memory_pool.h" |
#include "radeon_code.h" |
#include "radeon_program.h" |
#include "radeon_emulate_loops.h" |
#define RC_DBG_LOG (1 << 0) |
#define RC_DBG_STATS (1 << 1) |
struct rc_swizzle_caps; |
enum rc_program_type { |
RC_VERTEX_PROGRAM, |
RC_FRAGMENT_PROGRAM, |
RC_NUM_PROGRAM_TYPES |
}; |
struct radeon_compiler { |
struct memory_pool Pool; |
struct rc_program Program; |
const struct rc_regalloc_state *regalloc_state; |
enum rc_program_type type; |
unsigned Debug:2; |
unsigned Error:1; |
char * ErrorMsg; |
/* Hardware specification. */ |
unsigned is_r400:1; |
unsigned is_r500:1; |
unsigned has_half_swizzles:1; |
unsigned has_presub:1; |
unsigned has_omod:1; |
unsigned disable_optimizations:1; |
unsigned max_temp_regs; |
unsigned max_constants; |
int max_alu_insts; |
unsigned max_tex_insts; |
/* Whether to remove unused constants and empty holes in constant space. */ |
unsigned remove_unused_constants:1; |
/** |
* Variables used internally, not be touched by callers |
* of the compiler |
*/ |
/*@{*/ |
struct rc_swizzle_caps * SwizzleCaps; |
/*@}*/ |
struct emulate_loop_state loop_state; |
unsigned initial_num_insts; /* Number of instructions at start. */ |
}; |
void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs); |
void rc_destroy(struct radeon_compiler * c); |
void rc_debug(struct radeon_compiler * c, const char * fmt, ...); |
void rc_error(struct radeon_compiler * c, const char * fmt, ...); |
int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion); |
/** |
* This macro acts like an if-statement that can be used to implement |
* non-aborting assertions in the compiler. |
* |
* It checks whether \p cond is true. If not, an internal compiler error is |
* flagged and the if-clause is run. |
* |
* A typical use-case would be: |
* |
* if (rc_assert(c, condition-that-must-be-true)) |
* return; |
*/ |
#define rc_assert(c, cond) \ |
(!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond)) |
void rc_calculate_inputs_outputs(struct radeon_compiler * c); |
void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input); |
void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask); |
void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); |
void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, |
int full_vtransform); |
void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face); |
struct r300_fragment_program_compiler { |
struct radeon_compiler Base; |
struct rX00_fragment_program_code *code; |
/* Optional transformations and features. */ |
struct r300_fragment_program_external_state state; |
/* Register corresponding to the depthbuffer. */ |
unsigned OutputDepth; |
/* Registers corresponding to the four colorbuffers. */ |
unsigned OutputColor[4]; |
void * UserData; |
void (*AllocateHwInputs)( |
struct r300_fragment_program_compiler * c, |
void (*allocate)(void * data, unsigned input, unsigned hwreg), |
void * mydata); |
}; |
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c); |
struct r300_vertex_program_compiler { |
struct radeon_compiler Base; |
struct r300_vertex_program_code *code; |
uint32_t RequiredOutputs; |
void * UserData; |
void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c); |
}; |
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c); |
void rc_vert_fc(struct radeon_compiler *compiler, void *user); |
void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user); |
struct radeon_compiler_pass { |
const char *name; /* Name of the pass. */ |
int dump; /* Dump the program if Debug == 1? */ |
int predicate; /* Run this pass? */ |
void (*run)(struct radeon_compiler *c, void *user); /* The main entrypoint. */ |
void *user; /* Optional parameter which is passed to the run function. */ |
}; |
struct rc_program_stats { |
unsigned num_insts; |
unsigned num_fc_insts; |
unsigned num_tex_insts; |
unsigned num_rgb_insts; |
unsigned num_alpha_insts; |
unsigned num_presub_ops; |
unsigned num_temp_regs; |
unsigned num_omod_ops; |
unsigned num_inline_literals; |
}; |
void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s); |
/* Executes a list of compiler passes given in the parameter 'list'. */ |
void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list); |
void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list); |
void rc_validate_final_shader(struct radeon_compiler *c, void *user); |
#endif /* RADEON_COMPILER_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_compiler_util.c |
---|
0,0 → 1,753 |
/* |
* Copyright 2010 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
/** |
* \file |
*/ |
#include "radeon_compiler_util.h" |
#include "radeon_compiler.h" |
#include "radeon_dataflow.h" |
/** |
*/ |
unsigned int rc_swizzle_to_writemask(unsigned int swz) |
{ |
unsigned int mask = 0; |
unsigned int i; |
for(i = 0; i < 4; i++) { |
mask |= 1 << GET_SWZ(swz, i); |
} |
mask &= RC_MASK_XYZW; |
return mask; |
} |
rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) |
{ |
if (idx & 0x4) |
return idx; |
return GET_SWZ(swz, idx); |
} |
/** |
* The purpose of this function is to standardize the number channels used by |
* swizzles. All swizzles regardless of what instruction they are a part of |
* should have 4 channels initialized with values. |
* @param channels The number of channels in initial_value that have a |
* meaningful value. |
* @return An initialized swizzle that has all of the unused channels set to |
* RC_SWIZZLE_UNUSED. |
*/ |
unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels) |
{ |
unsigned int i; |
for (i = channels; i < 4; i++) { |
SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED); |
} |
return initial_value; |
} |
unsigned int combine_swizzles4(unsigned int src, |
rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) |
{ |
unsigned int ret = 0; |
ret |= get_swz(src, swz_x); |
ret |= get_swz(src, swz_y) << 3; |
ret |= get_swz(src, swz_z) << 6; |
ret |= get_swz(src, swz_w) << 9; |
return ret; |
} |
unsigned int combine_swizzles(unsigned int src, unsigned int swz) |
{ |
unsigned int ret = 0; |
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); |
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; |
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; |
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; |
return ret; |
} |
/** |
* @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W |
*/ |
rc_swizzle rc_mask_to_swizzle(unsigned int mask) |
{ |
switch (mask) { |
case RC_MASK_X: return RC_SWIZZLE_X; |
case RC_MASK_Y: return RC_SWIZZLE_Y; |
case RC_MASK_Z: return RC_SWIZZLE_Z; |
case RC_MASK_W: return RC_SWIZZLE_W; |
} |
return RC_SWIZZLE_UNUSED; |
} |
/* Reorder mask bits according to swizzle. */ |
unsigned swizzle_mask(unsigned swizzle, unsigned mask) |
{ |
unsigned ret = 0; |
for (unsigned chan = 0; chan < 4; ++chan) { |
unsigned swz = GET_SWZ(swizzle, chan); |
if (swz < 4) |
ret |= GET_BIT(mask, swz) << chan; |
} |
return ret; |
} |
static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info) |
{ |
if (info->HasTexture) { |
return 0; |
} |
switch (info->Opcode) { |
case RC_OPCODE_DP2: |
case RC_OPCODE_DP3: |
case RC_OPCODE_DP4: |
case RC_OPCODE_DDX: |
case RC_OPCODE_DDY: |
return 0; |
default: |
return 1; |
} |
} |
/** |
* @return A swizzle the results from converting old_swizzle using |
* conversion_swizzle |
*/ |
unsigned int rc_adjust_channels( |
unsigned int old_swizzle, |
unsigned int conversion_swizzle) |
{ |
unsigned int i; |
unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); |
for (i = 0; i < 4; i++) { |
unsigned int new_chan = get_swz(conversion_swizzle, i); |
if (new_chan == RC_SWIZZLE_UNUSED) { |
continue; |
} |
SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i)); |
} |
return new_swizzle; |
} |
static unsigned int rewrite_writemask( |
unsigned int old_mask, |
unsigned int conversion_swizzle) |
{ |
unsigned int new_mask = 0; |
unsigned int i; |
for (i = 0; i < 4; i++) { |
if (!GET_BIT(old_mask, i) |
|| GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) { |
continue; |
} |
new_mask |= (1 << GET_SWZ(conversion_swizzle, i)); |
} |
return new_mask; |
} |
/** |
* This function rewrites the writemask of sub and adjusts the swizzles |
* of all its source registers based on the conversion_swizzle. |
* conversion_swizzle represents a mapping of the old writemask to the |
* new writemask. For a detailed description of how conversion swizzles |
* work see rc_rewrite_swizzle(). |
*/ |
void rc_pair_rewrite_writemask( |
struct rc_pair_sub_instruction * sub, |
unsigned int conversion_swizzle) |
{ |
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); |
unsigned int i; |
sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle); |
if (!srcs_need_rewrite(info)) { |
return ; |
} |
for (i = 0; i < info->NumSrcRegs; i++) { |
sub->Arg[i].Swizzle = |
rc_adjust_channels(sub->Arg[i].Swizzle, |
conversion_swizzle); |
} |
} |
static void normal_rewrite_writemask_cb( |
void * userdata, |
struct rc_instruction * inst, |
struct rc_src_register * src) |
{ |
unsigned int * conversion_swizzle = (unsigned int *)userdata; |
src->Swizzle = rc_adjust_channels(src->Swizzle, *conversion_swizzle); |
} |
/** |
* This function is the same as rc_pair_rewrite_writemask() except it |
* operates on normal instructions. |
*/ |
void rc_normal_rewrite_writemask( |
struct rc_instruction * inst, |
unsigned int conversion_swizzle) |
{ |
struct rc_sub_instruction * sub = &inst->U.I; |
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); |
sub->DstReg.WriteMask = |
rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle); |
if (info->HasTexture) { |
unsigned int i; |
assert(sub->TexSwizzle == RC_SWIZZLE_XYZW); |
for (i = 0; i < 4; i++) { |
unsigned int swz = GET_SWZ(conversion_swizzle, i); |
if (swz > 3) |
continue; |
SET_SWZ(sub->TexSwizzle, swz, i); |
} |
} |
if (!srcs_need_rewrite(info)) { |
return; |
} |
rc_for_all_reads_src(inst, normal_rewrite_writemask_cb, |
&conversion_swizzle); |
} |
/** |
* This function replaces each value 'swz' in swizzle with the value of |
* GET_SWZ(conversion_swizzle, swz). So, if you want to change all the X's |
* in swizzle to Y, then conversion_swizzle should be Y___ (0xff9). If you want |
* to change all the Y's in swizzle to X, then conversion_swizzle should be |
* _X__ (0xfc7). If you want to change the Y's to X and the X's to Y, then |
* conversion swizzle should be YX__ (0xfc1). |
* @param swizzle The swizzle to change |
* @param conversion_swizzle Describes the conversion to perform on the swizzle |
* @return A converted swizzle |
*/ |
unsigned int rc_rewrite_swizzle( |
unsigned int swizzle, |
unsigned int conversion_swizzle) |
{ |
unsigned int chan; |
unsigned int out_swizzle = swizzle; |
for (chan = 0; chan < 4; chan++) { |
unsigned int swz = GET_SWZ(swizzle, chan); |
unsigned int new_swz; |
if (swz > 3) { |
SET_SWZ(out_swizzle, chan, swz); |
} else { |
new_swz = GET_SWZ(conversion_swizzle, swz); |
if (new_swz != RC_SWIZZLE_UNUSED) { |
SET_SWZ(out_swizzle, chan, new_swz); |
} else { |
SET_SWZ(out_swizzle, chan, swz); |
} |
} |
} |
return out_swizzle; |
} |
/** |
* Left multiplication of a register with a swizzle |
*/ |
struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) |
{ |
struct rc_src_register tmp = srcreg; |
int i; |
tmp.Swizzle = 0; |
tmp.Negate = 0; |
for(i = 0; i < 4; ++i) { |
rc_swizzle swz = GET_SWZ(swizzle, i); |
if (swz < 4) { |
tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); |
tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; |
} else { |
tmp.Swizzle |= swz << (i*3); |
} |
} |
return tmp; |
} |
void reset_srcreg(struct rc_src_register* reg) |
{ |
memset(reg, 0, sizeof(struct rc_src_register)); |
reg->Swizzle = RC_SWIZZLE_XYZW; |
} |
unsigned int rc_src_reads_dst_mask( |
rc_register_file src_file, |
unsigned int src_idx, |
unsigned int src_swz, |
rc_register_file dst_file, |
unsigned int dst_idx, |
unsigned int dst_mask) |
{ |
if (src_file != dst_file || src_idx != dst_idx) { |
return RC_MASK_NONE; |
} |
return dst_mask & rc_swizzle_to_writemask(src_swz); |
} |
/** |
* @return A bit mask specifying whether this swizzle will select from an RGB |
* source, an Alpha source, or both. |
*/ |
unsigned int rc_source_type_swz(unsigned int swizzle) |
{ |
unsigned int chan; |
unsigned int swz = RC_SWIZZLE_UNUSED; |
unsigned int ret = RC_SOURCE_NONE; |
for(chan = 0; chan < 4; chan++) { |
swz = GET_SWZ(swizzle, chan); |
if (swz == RC_SWIZZLE_W) { |
ret |= RC_SOURCE_ALPHA; |
} else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y |
|| swz == RC_SWIZZLE_Z) { |
ret |= RC_SOURCE_RGB; |
} |
} |
return ret; |
} |
unsigned int rc_source_type_mask(unsigned int mask) |
{ |
unsigned int ret = RC_SOURCE_NONE; |
if (mask & RC_MASK_XYZ) |
ret |= RC_SOURCE_RGB; |
if (mask & RC_MASK_W) |
ret |= RC_SOURCE_ALPHA; |
return ret; |
} |
struct src_select { |
rc_register_file File; |
int Index; |
unsigned int SrcType; |
}; |
struct can_use_presub_data { |
struct src_select Selects[5]; |
unsigned int SelectCount; |
const struct rc_src_register * ReplaceReg; |
unsigned int ReplaceRemoved; |
}; |
static void can_use_presub_data_add_select( |
struct can_use_presub_data * data, |
rc_register_file file, |
unsigned int index, |
unsigned int src_type) |
{ |
struct src_select * select; |
select = &data->Selects[data->SelectCount++]; |
select->File = file; |
select->Index = index; |
select->SrcType = src_type; |
} |
/** |
* This callback function counts the number of sources in inst that are |
* different from the sources in can_use_presub_data->RemoveSrcs. |
*/ |
static void can_use_presub_read_cb( |
void * userdata, |
struct rc_instruction * inst, |
struct rc_src_register * src) |
{ |
struct can_use_presub_data * d = userdata; |
if (!d->ReplaceRemoved && src == d->ReplaceReg) { |
d->ReplaceRemoved = 1; |
return; |
} |
if (src->File == RC_FILE_NONE) |
return; |
can_use_presub_data_add_select(d, src->File, src->Index, |
rc_source_type_swz(src->Swizzle)); |
} |
unsigned int rc_inst_can_use_presub( |
struct rc_instruction * inst, |
rc_presubtract_op presub_op, |
unsigned int presub_writemask, |
const struct rc_src_register * replace_reg, |
const struct rc_src_register * presub_src0, |
const struct rc_src_register * presub_src1) |
{ |
struct can_use_presub_data d; |
unsigned int num_presub_srcs; |
unsigned int i; |
const struct rc_opcode_info * info = |
rc_get_opcode_info(inst->U.I.Opcode); |
int rgb_count = 0, alpha_count = 0; |
unsigned int src_type0, src_type1; |
if (presub_op == RC_PRESUB_NONE) { |
return 1; |
} |
if (info->HasTexture) { |
return 0; |
} |
/* We can't use more than one presubtract value in an |
* instruction, unless the two prsubtract operations |
* are the same and read from the same registers. |
* XXX For now we will limit instructions to only one presubtract |
* value.*/ |
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { |
return 0; |
} |
memset(&d, 0, sizeof(d)); |
d.ReplaceReg = replace_reg; |
rc_for_all_reads_src(inst, can_use_presub_read_cb, &d); |
num_presub_srcs = rc_presubtract_src_reg_count(presub_op); |
src_type0 = rc_source_type_swz(presub_src0->Swizzle); |
can_use_presub_data_add_select(&d, |
presub_src0->File, |
presub_src0->Index, |
src_type0); |
if (num_presub_srcs > 1) { |
src_type1 = rc_source_type_swz(presub_src1->Swizzle); |
can_use_presub_data_add_select(&d, |
presub_src1->File, |
presub_src1->Index, |
src_type1); |
/* Even if both of the presub sources read from the same |
* register, we still need to use 2 different source selects |
* for them, so we need to increment the count to compensate. |
*/ |
if (presub_src0->File == presub_src1->File |
&& presub_src0->Index == presub_src1->Index) { |
if (src_type0 & src_type1 & RC_SOURCE_RGB) { |
rgb_count++; |
} |
if (src_type0 & src_type1 & RC_SOURCE_ALPHA) { |
alpha_count++; |
} |
} |
} |
/* Count the number of source selects for Alpha and RGB. If we |
* encounter two of the same source selects then we can ignore the |
* first one. */ |
for (i = 0; i < d.SelectCount; i++) { |
unsigned int j; |
unsigned int src_type = d.Selects[i].SrcType; |
for (j = i + 1; j < d.SelectCount; j++) { |
if (d.Selects[i].File == d.Selects[j].File |
&& d.Selects[i].Index == d.Selects[j].Index) { |
src_type &= ~d.Selects[j].SrcType; |
} |
} |
if (src_type & RC_SOURCE_RGB) { |
rgb_count++; |
} |
if (src_type & RC_SOURCE_ALPHA) { |
alpha_count++; |
} |
} |
if (rgb_count > 3 || alpha_count > 3) { |
return 0; |
} |
return 1; |
} |
struct max_data { |
unsigned int Max; |
unsigned int HasFileType; |
rc_register_file File; |
}; |
static void max_callback( |
void * userdata, |
struct rc_instruction * inst, |
rc_register_file file, |
unsigned int index, |
unsigned int mask) |
{ |
struct max_data * d = (struct max_data*)userdata; |
if (file == d->File && (!d->HasFileType || index > d->Max)) { |
d->Max = index; |
d->HasFileType = 1; |
} |
} |
/** |
* @return The maximum index of the specified register file used by the |
* program. |
*/ |
int rc_get_max_index( |
struct radeon_compiler * c, |
rc_register_file file) |
{ |
struct max_data data; |
struct rc_instruction * inst; |
data.Max = 0; |
data.HasFileType = 0; |
data.File = file; |
for (inst = c->Program.Instructions.Next; |
inst != &c->Program.Instructions; |
inst = inst->Next) { |
rc_for_all_reads_mask(inst, max_callback, &data); |
rc_for_all_writes_mask(inst, max_callback, &data); |
} |
if (!data.HasFileType) { |
return -1; |
} else { |
return data.Max; |
} |
} |
static unsigned int get_source_readmask( |
struct rc_pair_sub_instruction * sub, |
unsigned int source, |
unsigned int src_type) |
{ |
unsigned int i; |
unsigned int readmask = 0; |
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); |
for (i = 0; i < info->NumSrcRegs; i++) { |
if (sub->Arg[i].Source != source |
|| src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) { |
continue; |
} |
readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle); |
} |
return readmask; |
} |
/** |
* This function attempts to remove a source from a pair instructions. |
* @param inst |
* @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd |
* @param source The index of the source to remove |
* @param new_readmask A mask representing the components that are read by |
* the source that is intended to replace the one you are removing. If you |
* want to remove a source only and not replace it, this parameter should be |
* zero. |
* @return 1 if the source was successfully removed, 0 if it was not |
*/ |
unsigned int rc_pair_remove_src( |
struct rc_instruction * inst, |
unsigned int src_type, |
unsigned int source, |
unsigned int new_readmask) |
{ |
unsigned int readmask = 0; |
readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type); |
readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type); |
if ((new_readmask & readmask) != readmask) |
return 0; |
if (src_type & RC_SOURCE_RGB) { |
memset(&inst->U.P.RGB.Src[source], 0, |
sizeof(struct rc_pair_instruction_source)); |
} |
if (src_type & RC_SOURCE_ALPHA) { |
memset(&inst->U.P.Alpha.Src[source], 0, |
sizeof(struct rc_pair_instruction_source)); |
} |
return 1; |
} |
/** |
* @return RC_OPCODE_NOOP if inst is not a flow control instruction. |
* @return The opcode of inst if it is a flow control instruction. |
*/ |
rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst) |
{ |
const struct rc_opcode_info * info; |
if (inst->Type == RC_INSTRUCTION_NORMAL) { |
info = rc_get_opcode_info(inst->U.I.Opcode); |
} else { |
info = rc_get_opcode_info(inst->U.P.RGB.Opcode); |
/*A flow control instruction shouldn't have an alpha |
* instruction.*/ |
assert(!info->IsFlowControl || |
inst->U.P.Alpha.Opcode == RC_OPCODE_NOP); |
} |
if (info->IsFlowControl) |
return info->Opcode; |
else |
return RC_OPCODE_NOP; |
} |
/** |
* @return The BGNLOOP instruction that starts the loop ended by endloop. |
*/ |
struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop) |
{ |
unsigned int endloop_count = 0; |
struct rc_instruction * inst; |
for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) { |
rc_opcode op = rc_get_flow_control_inst(inst); |
if (op == RC_OPCODE_ENDLOOP) { |
endloop_count++; |
} else if (op == RC_OPCODE_BGNLOOP) { |
if (endloop_count == 0) { |
return inst; |
} else { |
endloop_count--; |
} |
} |
} |
return NULL; |
} |
/** |
* @return The ENDLOOP instruction that ends the loop started by bgnloop. |
*/ |
struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop) |
{ |
unsigned int bgnloop_count = 0; |
struct rc_instruction * inst; |
for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) { |
rc_opcode op = rc_get_flow_control_inst(inst); |
if (op == RC_OPCODE_BGNLOOP) { |
bgnloop_count++; |
} else if (op == RC_OPCODE_ENDLOOP) { |
if (bgnloop_count == 0) { |
return inst; |
} else { |
bgnloop_count--; |
} |
} |
} |
return NULL; |
} |
/** |
* @return A conversion swizzle for converting from old_mask->new_mask |
*/ |
unsigned int rc_make_conversion_swizzle( |
unsigned int old_mask, |
unsigned int new_mask) |
{ |
unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); |
unsigned int old_idx; |
unsigned int new_idx = 0; |
for (old_idx = 0; old_idx < 4; old_idx++) { |
if (!GET_BIT(old_mask, old_idx)) |
continue; |
for ( ; new_idx < 4; new_idx++) { |
if (GET_BIT(new_mask, new_idx)) { |
SET_SWZ(conversion_swizzle, old_idx, new_idx); |
new_idx++; |
break; |
} |
} |
} |
return conversion_swizzle; |
} |
/** |
* @return 1 if the register contains an immediate value, 0 otherwise. |
*/ |
unsigned int rc_src_reg_is_immediate( |
struct radeon_compiler * c, |
unsigned int file, |
unsigned int index) |
{ |
return file == RC_FILE_CONSTANT && |
c->Program.Constants.Constants[index].Type == RC_CONSTANT_IMMEDIATE; |
} |
/** |
* @return The immediate value in the specified register. |
*/ |
float rc_get_constant_value( |
struct radeon_compiler * c, |
unsigned int index, |
unsigned int swizzle, |
unsigned int negate, |
unsigned int chan) |
{ |
float base = 1.0f; |
int swz = GET_SWZ(swizzle, chan); |
if(swz >= 4 || index >= c->Program.Constants.Count ){ |
rc_error(c, "get_constant_value: Can't find a value.\n"); |
return 0.0f; |
} |
if(GET_BIT(negate, chan)){ |
base = -1.0f; |
} |
return base * |
c->Program.Constants.Constants[index].u.Immediate[swz]; |
} |
/** |
* This function returns the component value (RC_SWIZZLE_*) of the first used |
* channel in the swizzle. This is only useful for scalar instructions that are |
* known to use only one channel of the swizzle. |
*/ |
unsigned int rc_get_scalar_src_swz(unsigned int swizzle) |
{ |
unsigned int swz, chan; |
for (chan = 0; chan < 4; chan++) { |
swz = GET_SWZ(swizzle, chan); |
if (swz != RC_SWIZZLE_UNUSED) { |
break; |
} |
} |
assert(swz != RC_SWIZZLE_UNUSED); |
return swz; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_compiler_util.h |
---|
0,0 → 1,130 |
/* |
* Copyright 2010 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include "radeon_program_constants.h" |
#ifndef RADEON_PROGRAM_UTIL_H |
#define RADEON_PROGRAM_UTIL_H |
#include "radeon_opcodes.h" |
struct radeon_compiler; |
struct rc_instruction; |
struct rc_pair_instruction; |
struct rc_pair_sub_instruction; |
struct rc_src_register; |
unsigned int rc_swizzle_to_writemask(unsigned int swz); |
rc_swizzle get_swz(unsigned int swz, rc_swizzle idx); |
unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels); |
unsigned int combine_swizzles4(unsigned int src, |
rc_swizzle swz_x, rc_swizzle swz_y, |
rc_swizzle swz_z, rc_swizzle swz_w); |
unsigned int combine_swizzles(unsigned int src, unsigned int swz); |
rc_swizzle rc_mask_to_swizzle(unsigned int mask); |
unsigned swizzle_mask(unsigned swizzle, unsigned mask); |
unsigned int rc_adjust_channels( |
unsigned int old_swizzle, |
unsigned int conversion_swizzle); |
void rc_pair_rewrite_writemask( |
struct rc_pair_sub_instruction * sub, |
unsigned int conversion_swizzle); |
void rc_normal_rewrite_writemask( |
struct rc_instruction * inst, |
unsigned int conversion_swizzle); |
unsigned int rc_rewrite_swizzle( |
unsigned int swizzle, |
unsigned int new_mask); |
struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); |
void reset_srcreg(struct rc_src_register* reg); |
unsigned int rc_src_reads_dst_mask( |
rc_register_file src_file, |
unsigned int src_idx, |
unsigned int src_swz, |
rc_register_file dst_file, |
unsigned int dst_idx, |
unsigned int dst_mask); |
unsigned int rc_source_type_swz(unsigned int swizzle); |
unsigned int rc_source_type_mask(unsigned int mask); |
unsigned int rc_inst_can_use_presub( |
struct rc_instruction * inst, |
rc_presubtract_op presub_op, |
unsigned int presub_writemask, |
const struct rc_src_register * replace_reg, |
const struct rc_src_register * presub_src0, |
const struct rc_src_register * presub_src1); |
int rc_get_max_index( |
struct radeon_compiler * c, |
rc_register_file file); |
unsigned int rc_pair_remove_src( |
struct rc_instruction * inst, |
unsigned int src_type, |
unsigned int source, |
unsigned int new_readmask); |
rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst); |
struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop); |
struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop); |
unsigned int rc_make_conversion_swizzle( |
unsigned int old_mask, |
unsigned int new_mask); |
unsigned int rc_src_reg_is_immediate( |
struct radeon_compiler * c, |
unsigned int file, |
unsigned int index); |
float rc_get_constant_value( |
struct radeon_compiler * c, |
unsigned int index, |
unsigned int swizzle, |
unsigned int negate, |
unsigned int chan); |
unsigned int rc_get_scalar_src_swz(unsigned int swizzle); |
#endif /* RADEON_PROGRAM_UTIL_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_dataflow.c |
---|
0,0 → 1,892 |
/* |
* Copyright (C) 2009 Nicolai Haehnle. |
* Copyright 2010 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include "radeon_dataflow.h" |
#include "radeon_compiler.h" |
#include "radeon_compiler_util.h" |
#include "radeon_program.h" |
struct read_write_mask_data { |
void * UserData; |
rc_read_write_mask_fn Cb; |
}; |
static void reads_normal_callback( |
void * userdata, |
struct rc_instruction * fullinst, |
struct rc_src_register * src) |
{ |
struct read_write_mask_data * cb_data = userdata; |
unsigned int refmask = 0; |
unsigned int chan; |
for(chan = 0; chan < 4; chan++) { |
refmask |= 1 << GET_SWZ(src->Swizzle, chan); |
} |
refmask &= RC_MASK_XYZW; |
if (refmask) { |
cb_data->Cb(cb_data->UserData, fullinst, src->File, |
src->Index, refmask); |
} |
if (refmask && src->RelAddr) { |
cb_data->Cb(cb_data->UserData, fullinst, RC_FILE_ADDRESS, 0, |
RC_MASK_X); |
} |
} |
static void pair_get_src_refmasks(unsigned int * refmasks, |
struct rc_pair_instruction * inst, |
unsigned int swz, unsigned int src) |
{ |
if (swz >= 4) |
return; |
if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) { |
if(src == RC_PAIR_PRESUB_SRC) { |
unsigned int i; |
int srcp_regs = |
rc_presubtract_src_reg_count( |
inst->RGB.Src[src].Index); |
for(i = 0; i < srcp_regs; i++) { |
refmasks[i] |= 1 << swz; |
} |
} |
else { |
refmasks[src] |= 1 << swz; |
} |
} |
if (swz == RC_SWIZZLE_W) { |
if (src == RC_PAIR_PRESUB_SRC) { |
unsigned int i; |
int srcp_regs = rc_presubtract_src_reg_count( |
inst->Alpha.Src[src].Index); |
for(i = 0; i < srcp_regs; i++) { |
refmasks[i] |= 1 << swz; |
} |
} |
else { |
refmasks[src] |= 1 << swz; |
} |
} |
} |
static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) |
{ |
struct rc_pair_instruction * inst = &fullinst->U.P; |
unsigned int refmasks[3] = { 0, 0, 0 }; |
unsigned int arg; |
for(arg = 0; arg < 3; ++arg) { |
unsigned int chan; |
for(chan = 0; chan < 3; ++chan) { |
unsigned int swz_rgb = |
GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan); |
unsigned int swz_alpha = |
GET_SWZ(inst->Alpha.Arg[arg].Swizzle, chan); |
pair_get_src_refmasks(refmasks, inst, swz_rgb, |
inst->RGB.Arg[arg].Source); |
pair_get_src_refmasks(refmasks, inst, swz_alpha, |
inst->Alpha.Arg[arg].Source); |
} |
} |
for(unsigned int src = 0; src < 3; ++src) { |
if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ)) |
cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, |
refmasks[src] & RC_MASK_XYZ); |
if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W)) |
cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W); |
} |
} |
static void pair_sub_for_all_args( |
struct rc_instruction * fullinst, |
struct rc_pair_sub_instruction * sub, |
rc_pair_read_arg_fn cb, |
void * userdata) |
{ |
int i; |
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); |
for(i = 0; i < info->NumSrcRegs; i++) { |
unsigned int src_type; |
src_type = rc_source_type_swz(sub->Arg[i].Swizzle); |
if (src_type == RC_SOURCE_NONE) |
continue; |
if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) { |
unsigned int presub_type; |
unsigned int presub_src_count; |
struct rc_pair_instruction_source * src_array; |
unsigned int j; |
if (src_type & RC_SOURCE_RGB) { |
presub_type = fullinst-> |
U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index; |
src_array = fullinst->U.P.RGB.Src; |
} else { |
presub_type = fullinst-> |
U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index; |
src_array = fullinst->U.P.Alpha.Src; |
} |
presub_src_count |
= rc_presubtract_src_reg_count(presub_type); |
for(j = 0; j < presub_src_count; j++) { |
cb(userdata, fullinst, &sub->Arg[i], |
&src_array[j]); |
} |
} else { |
struct rc_pair_instruction_source * src = |
rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]); |
if (src) { |
cb(userdata, fullinst, &sub->Arg[i], src); |
} |
} |
} |
} |
/* This function calls the callback function (cb) for each source used by |
* the instruction. |
* */ |
void rc_for_all_reads_src( |
struct rc_instruction * inst, |
rc_read_src_fn cb, |
void * userdata) |
{ |
const struct rc_opcode_info * opcode = |
rc_get_opcode_info(inst->U.I.Opcode); |
/* This function only works with normal instructions. */ |
if (inst->Type != RC_INSTRUCTION_NORMAL) { |
assert(0); |
return; |
} |
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { |
if (inst->U.I.SrcReg[src].File == RC_FILE_NONE) |
continue; |
if (inst->U.I.SrcReg[src].File == RC_FILE_PRESUB) { |
unsigned int i; |
unsigned int srcp_regs = rc_presubtract_src_reg_count( |
inst->U.I.PreSub.Opcode); |
for( i = 0; i < srcp_regs; i++) { |
cb(userdata, inst, &inst->U.I.PreSub.SrcReg[i]); |
} |
} else { |
cb(userdata, inst, &inst->U.I.SrcReg[src]); |
} |
} |
} |
/** |
* This function calls the callback function (cb) for each arg of the RGB and |
* alpha components. |
*/ |
void rc_pair_for_all_reads_arg( |
struct rc_instruction * inst, |
rc_pair_read_arg_fn cb, |
void * userdata) |
{ |
/* This function only works with pair instructions. */ |
if (inst->Type != RC_INSTRUCTION_PAIR) { |
assert(0); |
return; |
} |
pair_sub_for_all_args(inst, &inst->U.P.RGB, cb, userdata); |
pair_sub_for_all_args(inst, &inst->U.P.Alpha, cb, userdata); |
} |
/** |
* Calls a callback function for all register reads. |
* |
* This is conservative, i.e. if the same register is referenced multiple times, |
* the callback may also be called multiple times. |
* Also, the writemask of the instruction is not taken into account. |
*/ |
void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) |
{ |
if (inst->Type == RC_INSTRUCTION_NORMAL) { |
struct read_write_mask_data cb_data; |
cb_data.UserData = userdata; |
cb_data.Cb = cb; |
rc_for_all_reads_src(inst, reads_normal_callback, &cb_data); |
} else { |
reads_pair(inst, cb, userdata); |
} |
} |
static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) |
{ |
struct rc_sub_instruction * inst = &fullinst->U.I; |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); |
if (opcode->HasDstReg && inst->DstReg.WriteMask) |
cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask); |
if (inst->WriteALUResult) |
cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); |
} |
static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) |
{ |
struct rc_pair_instruction * inst = &fullinst->U.P; |
if (inst->RGB.WriteMask) |
cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask); |
if (inst->Alpha.WriteMask) |
cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W); |
if (inst->WriteALUResult) |
cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); |
} |
/** |
* Calls a callback function for all register writes in the instruction, |
* reporting writemasks to the callback function. |
* |
* \warning Does not report output registers for paired instructions! |
*/ |
void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) |
{ |
if (inst->Type == RC_INSTRUCTION_NORMAL) { |
writes_normal(inst, cb, userdata); |
} else { |
writes_pair(inst, cb, userdata); |
} |
} |
struct mask_to_chan_data { |
void * UserData; |
rc_read_write_chan_fn Fn; |
}; |
static void mask_to_chan_cb(void * data, struct rc_instruction * inst, |
rc_register_file file, unsigned int index, unsigned int mask) |
{ |
struct mask_to_chan_data * d = data; |
for(unsigned int chan = 0; chan < 4; ++chan) { |
if (GET_BIT(mask, chan)) |
d->Fn(d->UserData, inst, file, index, chan); |
} |
} |
/** |
* Calls a callback function for all sourced register channels. |
* |
* This is conservative, i.e. channels may be called multiple times, |
* and the writemask of the instruction is not taken into account. |
*/ |
void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) |
{ |
struct mask_to_chan_data d; |
d.UserData = userdata; |
d.Fn = cb; |
rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d); |
} |
/** |
* Calls a callback function for all written register channels. |
* |
* \warning Does not report output registers for paired instructions! |
*/ |
void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) |
{ |
struct mask_to_chan_data d; |
d.UserData = userdata; |
d.Fn = cb; |
rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d); |
} |
static void remap_normal_instruction(struct rc_instruction * fullinst, |
rc_remap_register_fn cb, void * userdata) |
{ |
struct rc_sub_instruction * inst = &fullinst->U.I; |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); |
unsigned int remapped_presub = 0; |
if (opcode->HasDstReg) { |
rc_register_file file = inst->DstReg.File; |
unsigned int index = inst->DstReg.Index; |
cb(userdata, fullinst, &file, &index); |
inst->DstReg.File = file; |
inst->DstReg.Index = index; |
} |
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { |
rc_register_file file = inst->SrcReg[src].File; |
unsigned int index = inst->SrcReg[src].Index; |
if (file == RC_FILE_PRESUB) { |
unsigned int i; |
unsigned int srcp_srcs = rc_presubtract_src_reg_count( |
inst->PreSub.Opcode); |
/* Make sure we only remap presubtract sources once in |
* case more than one source register reads the |
* presubtract result. */ |
if (remapped_presub) |
continue; |
for(i = 0; i < srcp_srcs; i++) { |
file = inst->PreSub.SrcReg[i].File; |
index = inst->PreSub.SrcReg[i].Index; |
cb(userdata, fullinst, &file, &index); |
inst->PreSub.SrcReg[i].File = file; |
inst->PreSub.SrcReg[i].Index = index; |
} |
remapped_presub = 1; |
} |
else { |
cb(userdata, fullinst, &file, &index); |
inst->SrcReg[src].File = file; |
inst->SrcReg[src].Index = index; |
} |
} |
} |
static void remap_pair_instruction(struct rc_instruction * fullinst, |
rc_remap_register_fn cb, void * userdata) |
{ |
struct rc_pair_instruction * inst = &fullinst->U.P; |
if (inst->RGB.WriteMask) { |
rc_register_file file = RC_FILE_TEMPORARY; |
unsigned int index = inst->RGB.DestIndex; |
cb(userdata, fullinst, &file, &index); |
inst->RGB.DestIndex = index; |
} |
if (inst->Alpha.WriteMask) { |
rc_register_file file = RC_FILE_TEMPORARY; |
unsigned int index = inst->Alpha.DestIndex; |
cb(userdata, fullinst, &file, &index); |
inst->Alpha.DestIndex = index; |
} |
for(unsigned int src = 0; src < 3; ++src) { |
if (inst->RGB.Src[src].Used) { |
rc_register_file file = inst->RGB.Src[src].File; |
unsigned int index = inst->RGB.Src[src].Index; |
cb(userdata, fullinst, &file, &index); |
inst->RGB.Src[src].File = file; |
inst->RGB.Src[src].Index = index; |
} |
if (inst->Alpha.Src[src].Used) { |
rc_register_file file = inst->Alpha.Src[src].File; |
unsigned int index = inst->Alpha.Src[src].Index; |
cb(userdata, fullinst, &file, &index); |
inst->Alpha.Src[src].File = file; |
inst->Alpha.Src[src].Index = index; |
} |
} |
} |
/** |
* Remap all register accesses according to the given function. |
* That is, call the function \p cb for each referenced register (both read and written) |
* and update the given instruction \p inst accordingly |
* if it modifies its \ref pfile and \ref pindex contents. |
*/ |
void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata) |
{ |
if (inst->Type == RC_INSTRUCTION_NORMAL) |
remap_normal_instruction(inst, cb, userdata); |
else |
remap_pair_instruction(inst, cb, userdata); |
} |
struct branch_write_mask { |
unsigned int IfWriteMask:4; |
unsigned int ElseWriteMask:4; |
unsigned int HasElse:1; |
}; |
union get_readers_read_cb { |
rc_read_src_fn I; |
rc_pair_read_arg_fn P; |
}; |
struct get_readers_callback_data { |
struct radeon_compiler * C; |
struct rc_reader_data * ReaderData; |
rc_read_src_fn ReadNormalCB; |
rc_pair_read_arg_fn ReadPairCB; |
rc_read_write_mask_fn WriteCB; |
rc_register_file DstFile; |
unsigned int DstIndex; |
unsigned int DstMask; |
unsigned int AliveWriteMask; |
/* For convenience, this is indexed starting at 1 */ |
struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1]; |
}; |
static struct rc_reader * add_reader( |
struct memory_pool * pool, |
struct rc_reader_data * data, |
struct rc_instruction * inst, |
unsigned int mask) |
{ |
struct rc_reader * new; |
memory_pool_array_reserve(pool, struct rc_reader, data->Readers, |
data->ReaderCount, data->ReadersReserved, 1); |
new = &data->Readers[data->ReaderCount++]; |
new->Inst = inst; |
new->WriteMask = mask; |
return new; |
} |
static void add_reader_normal( |
struct memory_pool * pool, |
struct rc_reader_data * data, |
struct rc_instruction * inst, |
unsigned int mask, |
struct rc_src_register * src) |
{ |
struct rc_reader * new = add_reader(pool, data, inst, mask); |
new->U.I.Src = src; |
} |
static void add_reader_pair( |
struct memory_pool * pool, |
struct rc_reader_data * data, |
struct rc_instruction * inst, |
unsigned int mask, |
struct rc_pair_instruction_arg * arg, |
struct rc_pair_instruction_source * src) |
{ |
struct rc_reader * new = add_reader(pool, data, inst, mask); |
new->U.P.Src = src; |
new->U.P.Arg = arg; |
} |
static unsigned int get_readers_read_callback( |
struct get_readers_callback_data * cb_data, |
unsigned int has_rel_addr, |
rc_register_file file, |
unsigned int index, |
unsigned int swizzle) |
{ |
unsigned int shared_mask, read_mask; |
if (has_rel_addr) { |
cb_data->ReaderData->Abort = 1; |
return RC_MASK_NONE; |
} |
shared_mask = rc_src_reads_dst_mask(file, index, swizzle, |
cb_data->DstFile, cb_data->DstIndex, cb_data->AliveWriteMask); |
if (shared_mask == RC_MASK_NONE) |
return shared_mask; |
/* If we make it this far, it means that this source reads from the |
* same register written to by d->ReaderData->Writer. */ |
read_mask = rc_swizzle_to_writemask(swizzle); |
if (cb_data->ReaderData->AbortOnRead & read_mask) { |
cb_data->ReaderData->Abort = 1; |
return shared_mask; |
} |
if (cb_data->ReaderData->LoopDepth > 0) { |
cb_data->ReaderData->AbortOnWrite |= |
(read_mask & cb_data->AliveWriteMask); |
} |
/* XXX The behavior in this case should be configurable. */ |
if ((read_mask & cb_data->AliveWriteMask) != read_mask) { |
cb_data->ReaderData->Abort = 1; |
return shared_mask; |
} |
return shared_mask; |
} |
static void get_readers_pair_read_callback( |
void * userdata, |
struct rc_instruction * inst, |
struct rc_pair_instruction_arg * arg, |
struct rc_pair_instruction_source * src) |
{ |
unsigned int shared_mask; |
struct get_readers_callback_data * d = userdata; |
shared_mask = get_readers_read_callback(d, |
0 /*Pair Instructions don't use RelAddr*/, |
src->File, src->Index, arg->Swizzle); |
if (shared_mask == RC_MASK_NONE) |
return; |
if (d->ReadPairCB) |
d->ReadPairCB(d->ReaderData, inst, arg, src); |
if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) |
return; |
add_reader_pair(&d->C->Pool, d->ReaderData, inst, shared_mask, arg, src); |
} |
/** |
* This function is used by rc_get_readers_normal() to determine whether inst |
* is a reader of userdata->ReaderData->Writer |
*/ |
static void get_readers_normal_read_callback( |
void * userdata, |
struct rc_instruction * inst, |
struct rc_src_register * src) |
{ |
struct get_readers_callback_data * d = userdata; |
unsigned int shared_mask; |
shared_mask = get_readers_read_callback(d, |
src->RelAddr, src->File, src->Index, src->Swizzle); |
if (shared_mask == RC_MASK_NONE) |
return; |
/* The callback function could potentially clear d->ReaderData->Abort, |
* so we need to call it before we return. */ |
if (d->ReadNormalCB) |
d->ReadNormalCB(d->ReaderData, inst, src); |
if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) |
return; |
add_reader_normal(&d->C->Pool, d->ReaderData, inst, shared_mask, src); |
} |
/** |
* This function is used by rc_get_readers_normal() to determine when |
* userdata->ReaderData->Writer is dead (i. e. All compontents of its |
* destination register have been overwritten by other instructions). |
*/ |
static void get_readers_write_callback( |
void *userdata, |
struct rc_instruction * inst, |
rc_register_file file, |
unsigned int index, |
unsigned int mask) |
{ |
struct get_readers_callback_data * d = userdata; |
if (index == d->DstIndex && file == d->DstFile) { |
unsigned int shared_mask = mask & d->DstMask; |
d->ReaderData->AbortOnRead &= ~shared_mask; |
d->AliveWriteMask &= ~shared_mask; |
if (d->ReaderData->AbortOnWrite & shared_mask) { |
d->ReaderData->Abort = 1; |
} |
} |
if(d->WriteCB) |
d->WriteCB(d->ReaderData, inst, file, index, mask); |
} |
static void push_branch_mask( |
struct get_readers_callback_data * d, |
unsigned int * branch_depth) |
{ |
(*branch_depth)++; |
if (*branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) { |
d->ReaderData->Abort = 1; |
return; |
} |
d->BranchMasks[*branch_depth].IfWriteMask = |
d->AliveWriteMask; |
} |
static void pop_branch_mask( |
struct get_readers_callback_data * d, |
unsigned int * branch_depth) |
{ |
struct branch_write_mask * masks = &d->BranchMasks[*branch_depth]; |
if (masks->HasElse) { |
/* Abort on read for components that were written in the IF |
* block. */ |
d->ReaderData->AbortOnRead |= |
masks->IfWriteMask & ~masks->ElseWriteMask; |
/* Abort on read for components that were written in the ELSE |
* block. */ |
d->ReaderData->AbortOnRead |= |
masks->ElseWriteMask & ~d->AliveWriteMask; |
d->AliveWriteMask = masks->IfWriteMask |
^ ((masks->IfWriteMask ^ masks->ElseWriteMask) |
& (masks->IfWriteMask ^ d->AliveWriteMask)); |
} else { |
d->ReaderData->AbortOnRead |= |
masks->IfWriteMask & ~d->AliveWriteMask; |
d->AliveWriteMask = masks->IfWriteMask; |
} |
memset(masks, 0, sizeof(struct branch_write_mask)); |
(*branch_depth)--; |
} |
static void get_readers_for_single_write( |
void * userdata, |
struct rc_instruction * writer, |
rc_register_file dst_file, |
unsigned int dst_index, |
unsigned int dst_mask) |
{ |
struct rc_instruction * tmp; |
unsigned int branch_depth = 0; |
struct rc_instruction * endloop = NULL; |
unsigned int abort_on_read_at_endloop = 0; |
struct get_readers_callback_data * d = userdata; |
d->ReaderData->Writer = writer; |
d->ReaderData->AbortOnRead = 0; |
d->ReaderData->AbortOnWrite = 0; |
d->ReaderData->LoopDepth = 0; |
d->ReaderData->InElse = 0; |
d->DstFile = dst_file; |
d->DstIndex = dst_index; |
d->DstMask = dst_mask; |
d->AliveWriteMask = dst_mask; |
memset(d->BranchMasks, 0, sizeof(d->BranchMasks)); |
if (!dst_mask) |
return; |
for(tmp = writer->Next; tmp != &d->C->Program.Instructions; |
tmp = tmp->Next){ |
rc_opcode opcode = rc_get_flow_control_inst(tmp); |
switch(opcode) { |
case RC_OPCODE_BGNLOOP: |
d->ReaderData->LoopDepth++; |
push_branch_mask(d, &branch_depth); |
break; |
case RC_OPCODE_ENDLOOP: |
if (d->ReaderData->LoopDepth > 0) { |
d->ReaderData->LoopDepth--; |
if (d->ReaderData->LoopDepth == 0) { |
d->ReaderData->AbortOnWrite = 0; |
} |
pop_branch_mask(d, &branch_depth); |
} else { |
/* Here we have reached an ENDLOOP without |
* seeing its BGNLOOP. These means that |
* the writer was written inside of a loop, |
* so it could have readers that are above it |
* (i.e. they have a lower IP). To find these |
* readers we jump to the BGNLOOP instruction |
* and check each instruction until we get |
* back to the writer. |
*/ |
endloop = tmp; |
tmp = rc_match_endloop(tmp); |
if (!tmp) { |
rc_error(d->C, "Failed to match endloop.\n"); |
d->ReaderData->Abort = 1; |
return; |
} |
abort_on_read_at_endloop = d->ReaderData->AbortOnRead; |
d->ReaderData->AbortOnRead |= d->AliveWriteMask; |
continue; |
} |
break; |
case RC_OPCODE_IF: |
push_branch_mask(d, &branch_depth); |
break; |
case RC_OPCODE_ELSE: |
if (branch_depth == 0) { |
d->ReaderData->InElse = 1; |
} else { |
unsigned int temp_mask = d->AliveWriteMask; |
d->AliveWriteMask = |
d->BranchMasks[branch_depth].IfWriteMask; |
d->BranchMasks[branch_depth].ElseWriteMask = |
temp_mask; |
d->BranchMasks[branch_depth].HasElse = 1; |
} |
break; |
case RC_OPCODE_ENDIF: |
if (branch_depth == 0) { |
d->ReaderData->AbortOnRead = d->AliveWriteMask; |
d->ReaderData->InElse = 0; |
} |
else { |
pop_branch_mask(d, &branch_depth); |
} |
break; |
default: |
break; |
} |
if (d->ReaderData->InElse) |
continue; |
if (tmp->Type == RC_INSTRUCTION_NORMAL) { |
rc_for_all_reads_src(tmp, |
get_readers_normal_read_callback, d); |
} else { |
rc_pair_for_all_reads_arg(tmp, |
get_readers_pair_read_callback, d); |
} |
/* This can happen when we jump from an ENDLOOP to BGNLOOP */ |
if (tmp == writer) { |
tmp = endloop; |
endloop = NULL; |
d->ReaderData->AbortOnRead = abort_on_read_at_endloop; |
continue; |
} |
rc_for_all_writes_mask(tmp, get_readers_write_callback, d); |
if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) |
return; |
if (branch_depth == 0 && !d->AliveWriteMask) |
return; |
} |
} |
static void init_get_readers_callback_data( |
struct get_readers_callback_data * d, |
struct rc_reader_data * reader_data, |
struct radeon_compiler * c, |
rc_read_src_fn read_normal_cb, |
rc_pair_read_arg_fn read_pair_cb, |
rc_read_write_mask_fn write_cb) |
{ |
reader_data->Abort = 0; |
reader_data->ReaderCount = 0; |
reader_data->ReadersReserved = 0; |
reader_data->Readers = NULL; |
d->C = c; |
d->ReaderData = reader_data; |
d->ReadNormalCB = read_normal_cb; |
d->ReadPairCB = read_pair_cb; |
d->WriteCB = write_cb; |
} |
/** |
* This function will create a list of readers via the rc_reader_data struct. |
* This function will abort (set the flag data->Abort) and return if it |
* encounters an instruction that reads from @param writer and also a different |
* instruction. Here are some examples: |
* |
* writer = instruction 0; |
* 0 MOV TEMP[0].xy, TEMP[1].xy |
* 1 MOV TEMP[0].zw, TEMP[2].xy |
* 2 MOV TEMP[3], TEMP[0] |
* The Abort flag will be set on instruction 2, because it reads values written |
* by instructions 0 and 1. |
* |
* writer = instruction 1; |
* 0 IF TEMP[0].x |
* 1 MOV TEMP[1], TEMP[2] |
* 2 ELSE |
* 3 MOV TEMP[1], TEMP[2] |
* 4 ENDIF |
* 5 MOV TEMP[3], TEMP[1] |
* The Abort flag will be set on instruction 5, because it could read from the |
* value written by either instruction 1 or 3, depending on the jump decision |
* made at instruction 0. |
* |
* writer = instruction 0; |
* 0 MOV TEMP[0], TEMP[1] |
* 2 BGNLOOP |
* 3 ADD TEMP[0], TEMP[0], none.1 |
* 4 ENDLOOP |
* The Abort flag will be set on instruction 3, because in the first iteration |
* of the loop it reads the value written by instruction 0 and in all other |
* iterations it reads the value written by instruction 3. |
* |
* @param read_cb This function will be called for for every instruction that |
* has been determined to be a reader of writer. |
* @param write_cb This function will be called for every instruction after |
* writer. |
*/ |
void rc_get_readers( |
struct radeon_compiler * c, |
struct rc_instruction * writer, |
struct rc_reader_data * data, |
rc_read_src_fn read_normal_cb, |
rc_pair_read_arg_fn read_pair_cb, |
rc_read_write_mask_fn write_cb) |
{ |
struct get_readers_callback_data d; |
init_get_readers_callback_data(&d, data, c, read_normal_cb, |
read_pair_cb, write_cb); |
rc_for_all_writes_mask(writer, get_readers_for_single_write, &d); |
} |
void rc_get_readers_sub( |
struct radeon_compiler * c, |
struct rc_instruction * writer, |
struct rc_pair_sub_instruction * sub_writer, |
struct rc_reader_data * data, |
rc_read_src_fn read_normal_cb, |
rc_pair_read_arg_fn read_pair_cb, |
rc_read_write_mask_fn write_cb) |
{ |
struct get_readers_callback_data d; |
init_get_readers_callback_data(&d, data, c, read_normal_cb, |
read_pair_cb, write_cb); |
if (sub_writer->WriteMask) { |
get_readers_for_single_write(&d, writer, RC_FILE_TEMPORARY, |
sub_writer->DestIndex, sub_writer->WriteMask); |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_dataflow.h |
---|
0,0 → 1,135 |
/* |
* Copyright (C) 2009 Nicolai Haehnle. |
* Copyright 2010 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#ifndef RADEON_DATAFLOW_H |
#define RADEON_DATAFLOW_H |
#include "radeon_program_constants.h" |
struct radeon_compiler; |
struct rc_instruction; |
struct rc_swizzle_caps; |
struct rc_src_register; |
struct rc_pair_instruction_arg; |
struct rc_pair_instruction_source; |
struct rc_pair_sub_instruction; |
struct rc_compiler; |
/** |
* Help analyze and modify the register accesses of instructions. |
*/ |
/*@{*/ |
typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst, |
rc_register_file file, unsigned int index, unsigned int chan); |
void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); |
void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); |
typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst, |
rc_register_file file, unsigned int index, unsigned int mask); |
void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); |
void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); |
typedef void (*rc_read_src_fn)(void * userdata, struct rc_instruction * inst, |
struct rc_src_register * src); |
void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb, |
void * userdata); |
typedef void (*rc_pair_read_arg_fn)(void * userdata, |
struct rc_instruction * inst, struct rc_pair_instruction_arg * arg, |
struct rc_pair_instruction_source * src); |
void rc_pair_for_all_reads_arg(struct rc_instruction * inst, |
rc_pair_read_arg_fn cb, void * userdata); |
typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst, |
rc_register_file * pfile, unsigned int * pindex); |
void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata); |
/*@}*/ |
struct rc_reader { |
struct rc_instruction * Inst; |
unsigned int WriteMask; |
union { |
struct { |
struct rc_src_register * Src; |
} I; |
struct { |
struct rc_pair_instruction_arg * Arg; |
struct rc_pair_instruction_source * Src; |
} P; |
} U; |
}; |
struct rc_reader_data { |
unsigned int Abort; |
unsigned int AbortOnRead; |
unsigned int AbortOnWrite; |
unsigned int LoopDepth; |
unsigned int InElse; |
struct rc_instruction * Writer; |
unsigned int ReaderCount; |
unsigned int ReadersReserved; |
struct rc_reader * Readers; |
/* If this flag is enabled, rc_get_readers will exit as soon possbile |
* after the Abort flag is set.*/ |
unsigned int ExitOnAbort; |
void * CbData; |
}; |
void rc_get_readers( |
struct radeon_compiler * c, |
struct rc_instruction * writer, |
struct rc_reader_data * data, |
rc_read_src_fn read_normal_cb, |
rc_pair_read_arg_fn read_pair_cb, |
rc_read_write_mask_fn write_cb); |
void rc_get_readers_sub( |
struct radeon_compiler * c, |
struct rc_instruction * writer, |
struct rc_pair_sub_instruction * sub_writer, |
struct rc_reader_data * data, |
rc_read_src_fn read_normal_cb, |
rc_pair_read_arg_fn read_pair_cb, |
rc_read_write_mask_fn write_cb); |
/** |
* Compiler passes based on dataflow analysis. |
*/ |
/*@{*/ |
typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data, |
void (*mark_fn)(void * data, unsigned int index, unsigned int mask)); |
void rc_dataflow_deadcode(struct radeon_compiler * c, void *user); |
void rc_dataflow_swizzles(struct radeon_compiler * c, void *user); |
/*@}*/ |
void rc_optimize(struct radeon_compiler * c, void *user); |
void rc_inline_literals(struct radeon_compiler *c, void *user); |
#endif /* RADEON_DATAFLOW_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c |
---|
0,0 → 1,359 |
/* |
* Copyright (C) 2009 Nicolai Haehnle. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include "radeon_dataflow.h" |
#include "radeon_compiler.h" |
struct updatemask_state { |
unsigned char Output[RC_REGISTER_MAX_INDEX]; |
unsigned char Temporary[RC_REGISTER_MAX_INDEX]; |
unsigned char Address; |
unsigned char Special[RC_NUM_SPECIAL_REGISTERS]; |
}; |
struct instruction_state { |
unsigned char WriteMask:4; |
unsigned char WriteALUResult:1; |
unsigned char SrcReg[3]; |
}; |
struct loopinfo { |
struct updatemask_state * Breaks; |
unsigned int BreakCount; |
unsigned int BreaksReserved; |
}; |
struct branchinfo { |
unsigned int HaveElse:1; |
struct updatemask_state StoreEndif; |
struct updatemask_state StoreElse; |
}; |
struct deadcode_state { |
struct radeon_compiler * C; |
struct instruction_state * Instructions; |
struct updatemask_state R; |
struct branchinfo * BranchStack; |
unsigned int BranchStackSize; |
unsigned int BranchStackReserved; |
struct loopinfo * LoopStack; |
unsigned int LoopStackSize; |
unsigned int LoopStackReserved; |
}; |
static void or_updatemasks( |
struct updatemask_state * dst, |
struct updatemask_state * a, |
struct updatemask_state * b) |
{ |
for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) { |
dst->Output[i] = a->Output[i] | b->Output[i]; |
dst->Temporary[i] = a->Temporary[i] | b->Temporary[i]; |
} |
for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i) |
dst->Special[i] = a->Special[i] | b->Special[i]; |
dst->Address = a->Address | b->Address; |
} |
static void push_break(struct deadcode_state *s) |
{ |
struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1]; |
memory_pool_array_reserve(&s->C->Pool, struct updatemask_state, |
loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1); |
memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R)); |
} |
static void push_loop(struct deadcode_state * s) |
{ |
memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack, |
s->LoopStackSize, s->LoopStackReserved, 1); |
memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo)); |
} |
static void push_branch(struct deadcode_state * s) |
{ |
struct branchinfo * branch; |
memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, |
s->BranchStackSize, s->BranchStackReserved, 1); |
branch = &s->BranchStack[s->BranchStackSize++]; |
branch->HaveElse = 0; |
memcpy(&branch->StoreEndif, &s->R, sizeof(s->R)); |
} |
static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index) |
{ |
if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) { |
if (index >= RC_REGISTER_MAX_INDEX) { |
rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file); |
return 0; |
} |
if (file == RC_FILE_OUTPUT) |
return &s->R.Output[index]; |
else |
return &s->R.Temporary[index]; |
} else if (file == RC_FILE_ADDRESS) { |
return &s->R.Address; |
} else if (file == RC_FILE_SPECIAL) { |
if (index >= RC_NUM_SPECIAL_REGISTERS) { |
rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index); |
return 0; |
} |
return &s->R.Special[index]; |
} |
return 0; |
} |
static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask) |
{ |
unsigned char * pused = get_used_ptr(s, file, index); |
if (pused) |
*pused |= mask; |
} |
static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst) |
{ |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
struct instruction_state * insts = &s->Instructions[inst->IP]; |
unsigned int usedmask = 0; |
unsigned int srcmasks[3]; |
if (opcode->HasDstReg) { |
unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); |
if (pused) { |
usedmask = *pused & inst->U.I.DstReg.WriteMask; |
*pused &= ~usedmask; |
} |
} |
insts->WriteMask |= usedmask; |
if (inst->U.I.WriteALUResult) { |
unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT); |
if (pused && *pused) { |
if (inst->U.I.WriteALUResult == RC_ALURESULT_X) |
usedmask |= RC_MASK_X; |
else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) |
usedmask |= RC_MASK_W; |
*pused = 0; |
insts->WriteALUResult = 1; |
} |
} |
rc_compute_sources_for_writemask(inst, usedmask, srcmasks); |
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { |
unsigned int refmask = 0; |
unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src]; |
insts->SrcReg[src] |= newsrcmask; |
for(unsigned int chan = 0; chan < 4; ++chan) { |
if (GET_BIT(newsrcmask, chan)) |
refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); |
} |
/* get rid of spurious bits from ZERO, ONE, etc. swizzles */ |
refmask &= RC_MASK_XYZW; |
if (!refmask) |
continue; |
mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask); |
if (inst->U.I.SrcReg[src].RelAddr) |
mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); |
} |
} |
static void mark_output_use(void * data, unsigned int index, unsigned int mask) |
{ |
struct deadcode_state * s = data; |
mark_used(s, RC_FILE_OUTPUT, index, mask); |
} |
void rc_dataflow_deadcode(struct radeon_compiler * c, void *user) |
{ |
struct deadcode_state s; |
unsigned int nr_instructions; |
rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user; |
unsigned int ip; |
memset(&s, 0, sizeof(s)); |
s.C = c; |
nr_instructions = rc_recompute_ips(c); |
s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions); |
memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions); |
dce(c, &s, &mark_output_use); |
for(struct rc_instruction * inst = c->Program.Instructions.Prev; |
inst != &c->Program.Instructions; |
inst = inst->Prev) { |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
switch(opcode->Opcode){ |
/* Mark all sources in the loop body as used before doing |
* normal deadcode analysis. This is probably not optimal. |
*/ |
case RC_OPCODE_ENDLOOP: |
{ |
int endloops = 1; |
struct rc_instruction *ptr; |
for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){ |
opcode = rc_get_opcode_info(ptr->U.I.Opcode); |
if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ |
endloops--; |
continue; |
} |
if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){ |
endloops++; |
continue; |
} |
if(opcode->HasDstReg){ |
int src = 0; |
unsigned int srcmasks[3]; |
rc_compute_sources_for_writemask(ptr, |
ptr->U.I.DstReg.WriteMask, srcmasks); |
for(src=0; src < opcode->NumSrcRegs; src++){ |
mark_used(&s, |
ptr->U.I.SrcReg[src].File, |
ptr->U.I.SrcReg[src].Index, |
srcmasks[src]); |
} |
} |
} |
push_loop(&s); |
break; |
} |
case RC_OPCODE_BRK: |
push_break(&s); |
break; |
case RC_OPCODE_BGNLOOP: |
{ |
unsigned int i; |
struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1]; |
for(i = 0; i < loop->BreakCount; i++) { |
or_updatemasks(&s.R, &s.R, &loop->Breaks[i]); |
} |
break; |
} |
case RC_OPCODE_CONT: |
break; |
case RC_OPCODE_ENDIF: |
push_branch(&s); |
break; |
default: |
if (opcode->IsFlowControl && s.BranchStackSize) { |
struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1]; |
if (opcode->Opcode == RC_OPCODE_IF) { |
or_updatemasks(&s.R, |
&s.R, |
branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif); |
s.BranchStackSize--; |
} else if (opcode->Opcode == RC_OPCODE_ELSE) { |
if (branch->HaveElse) { |
rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__); |
} else { |
memcpy(&branch->StoreElse, &s.R, sizeof(s.R)); |
memcpy(&s.R, &branch->StoreEndif, sizeof(s.R)); |
branch->HaveElse = 1; |
} |
} else { |
rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); |
} |
} |
} |
update_instruction(&s, inst); |
} |
ip = 0; |
for(struct rc_instruction * inst = c->Program.Instructions.Next; |
inst != &c->Program.Instructions; |
inst = inst->Next, ++ip) { |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
int dead = 1; |
unsigned int srcmasks[3]; |
unsigned int usemask; |
if (!opcode->HasDstReg) { |
dead = 0; |
} else { |
inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask; |
if (s.Instructions[ip].WriteMask) |
dead = 0; |
if (s.Instructions[ip].WriteALUResult) |
dead = 0; |
else |
inst->U.I.WriteALUResult = RC_ALURESULT_NONE; |
} |
if (dead) { |
struct rc_instruction * todelete = inst; |
inst = inst->Prev; |
rc_remove_instruction(todelete); |
continue; |
} |
usemask = s.Instructions[ip].WriteMask; |
if (inst->U.I.WriteALUResult == RC_ALURESULT_X) |
usemask |= RC_MASK_X; |
else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) |
usemask |= RC_MASK_W; |
rc_compute_sources_for_writemask(inst, usemask, srcmasks); |
for(unsigned int src = 0; src < 3; ++src) { |
for(unsigned int chan = 0; chan < 4; ++chan) { |
if (!GET_BIT(srcmasks[src], chan)) |
SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); |
} |
} |
} |
rc_calculate_inputs_outputs(c); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c |
---|
0,0 → 1,448 |
/* |
* Copyright (C) 2009 Nicolai Haehnle. |
* Copyright 2012 Advanced Micro Devices, Inc. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Nicolai Haehnle |
* Tom Stellard <thomas.stellard@amd.com> |
*/ |
#include "radeon_dataflow.h" |
#include "radeon_code.h" |
#include "radeon_compiler.h" |
#include "radeon_compiler_util.h" |
#include "radeon_swizzle.h" |
static void rewrite_source(struct radeon_compiler * c, |
struct rc_instruction * inst, unsigned src) |
{ |
struct rc_swizzle_split split; |
unsigned int tempreg = rc_find_free_temporary(c); |
unsigned int usemask; |
usemask = 0; |
for(unsigned int chan = 0; chan < 4; ++chan) { |
if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED) |
usemask |= 1 << chan; |
} |
c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split); |
for(unsigned int phase = 0; phase < split.NumPhases; ++phase) { |
struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev); |
unsigned int phase_refmask; |
unsigned int masked_negate; |
mov->U.I.Opcode = RC_OPCODE_MOV; |
mov->U.I.DstReg.File = RC_FILE_TEMPORARY; |
mov->U.I.DstReg.Index = tempreg; |
mov->U.I.DstReg.WriteMask = split.Phase[phase]; |
mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; |
mov->U.I.PreSub = inst->U.I.PreSub; |
phase_refmask = 0; |
for(unsigned int chan = 0; chan < 4; ++chan) { |
if (!GET_BIT(split.Phase[phase], chan)) |
SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED); |
else |
phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan); |
} |
phase_refmask &= RC_MASK_XYZW; |
masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate; |
if (masked_negate == 0) |
mov->U.I.SrcReg[0].Negate = 0; |
else if (masked_negate == split.Phase[phase]) |
mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW; |
} |
inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY; |
inst->U.I.SrcReg[src].Index = tempreg; |
inst->U.I.SrcReg[src].Swizzle = 0; |
inst->U.I.SrcReg[src].Negate = RC_MASK_NONE; |
inst->U.I.SrcReg[src].Abs = 0; |
for(unsigned int chan = 0; chan < 4; ++chan) { |
SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, |
GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED); |
} |
} |
/** |
* This function will attempt to rewrite non-native swizzles that read from |
* immediate registers by rearranging the immediates to allow the |
* instruction to use native swizzles. |
*/ |
static unsigned try_rewrite_constant(struct radeon_compiler *c, |
struct rc_src_register *reg) |
{ |
unsigned new_swizzle, chan, swz0, swz1, swz2, swz3, found_swizzle, swz; |
unsigned all_inline = 0; |
float imms[4] = {0.0f, 0.0f, 0.0f, 0.0f}; |
if (!rc_src_reg_is_immediate(c, reg->File, reg->Index)) { |
/* The register does not contain immediates, but if all |
* the swizzles are inline constants, we can still rewrite |
* it. */ |
new_swizzle = RC_SWIZZLE_XYZW; |
for (chan = 0 ; chan < 4; chan++) { |
unsigned swz = GET_SWZ(reg->Swizzle, chan); |
if (swz <= RC_SWIZZLE_W) { |
return 0; |
} |
if (swz == RC_SWIZZLE_UNUSED) { |
SET_SWZ(new_swizzle, chan, RC_SWIZZLE_UNUSED); |
} |
} |
all_inline = 1; |
} else { |
new_swizzle = reg->Swizzle; |
} |
swz = RC_SWIZZLE_UNUSED; |
found_swizzle = 1; |
/* Check if all channels have the same swizzle. If they do we can skip |
* the search for a native swizzle. We only need to check the first |
* three channels, because any swizzle is legal in the fourth channel. |
*/ |
for (chan = 0; chan < 3; chan++) { |
unsigned chan_swz = GET_SWZ(reg->Swizzle, chan); |
if (chan_swz == RC_SWIZZLE_UNUSED) { |
continue; |
} |
if (swz == RC_SWIZZLE_UNUSED) { |
swz = chan_swz; |
} else if (swz != chan_swz) { |
found_swizzle = 0; |
break; |
} |
} |
/* Find a legal swizzle */ |
/* This loop attempts to find a native swizzle where all the |
* channels are different. */ |
while (!found_swizzle && !all_inline) { |
swz0 = GET_SWZ(new_swizzle, 0); |
swz1 = GET_SWZ(new_swizzle, 1); |
swz2 = GET_SWZ(new_swizzle, 2); |
/* Swizzle .W. is never legal. */ |
if (swz1 == RC_SWIZZLE_W || |
swz1 == RC_SWIZZLE_UNUSED || |
swz1 == RC_SWIZZLE_ZERO || |
swz1 == RC_SWIZZLE_HALF || |
swz1 == RC_SWIZZLE_ONE) { |
/* We chose Z, because there are two non-repeating |
* swizzle combinations of the form .Z. There are |
* only one combination each for .X. and .Y. */ |
SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); |
continue; |
} |
if (swz2 == RC_SWIZZLE_UNUSED) { |
/* We choose Y, because there are two non-repeating |
* swizzle combinations of the form ..Y */ |
SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); |
continue; |
} |
switch (swz0) { |
/* X.. */ |
case RC_SWIZZLE_X: |
/* Legal swizzles that start with X: XYZ, XXX */ |
switch (swz1) { |
/* XX. */ |
case RC_SWIZZLE_X: |
/* The new swizzle will be: |
* ZXY (XX. => ZX. => ZXY) */ |
SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z); |
break; |
/* XY. */ |
case RC_SWIZZLE_Y: |
/* The new swizzle is XYZ */ |
SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Z); |
found_swizzle = 1; |
break; |
/* XZ. */ |
case RC_SWIZZLE_Z: |
/* XZZ */ |
if (swz2 == RC_SWIZZLE_Z) { |
/* The new swizzle is XYZ */ |
SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Y); |
found_swizzle = 1; |
} else { /* XZ[^Z] */ |
/* The new swizzle will be: |
* YZX (XZ. => YZ. => YZX) */ |
SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Y); |
} |
break; |
/* XW. Should have already been handled. */ |
case RC_SWIZZLE_W: |
assert(0); |
break; |
} |
break; |
/* Y.. */ |
case RC_SWIZZLE_Y: |
/* Legal swizzles that start with Y: YYY, YZX */ |
switch (swz1) { |
/* YY. */ |
case RC_SWIZZLE_Y: |
/* The new swizzle will be: |
* XYZ (YY. => XY. => XYZ) */ |
SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); |
break; |
/* YZ. */ |
case RC_SWIZZLE_Z: |
/* The new swizzle is YZX */ |
SET_SWZ(new_swizzle, 2, RC_SWIZZLE_X); |
found_swizzle = 1; |
break; |
/* YX. */ |
case RC_SWIZZLE_X: |
/* YXX */ |
if (swz2 == RC_SWIZZLE_X) { |
/*The new swizzle is YZX */ |
SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); |
found_swizzle = 1; |
} else { /* YX[^X] */ |
/* The new swizzle will be: |
* ZXY (YX. => ZX. -> ZXY) */ |
SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z); |
} |
break; |
/* YW. Should have already been handled. */ |
case RC_SWIZZLE_W: |
assert(0); |
break; |
} |
break; |
/* Z.. */ |
case RC_SWIZZLE_Z: |
/* Legal swizzles that start with Z: ZZZ, ZXY */ |
switch (swz1) { |
/* ZZ. */ |
case RC_SWIZZLE_Z: |
/* The new swizzle will be: |
* WZY (ZZ. => WZ. => WZY) */ |
SET_SWZ(new_swizzle, 0, RC_SWIZZLE_W); |
break; |
/* ZX. */ |
case RC_SWIZZLE_X: |
/* The new swizzle is ZXY */ |
SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); |
found_swizzle = 1; |
break; |
/* ZY. */ |
case RC_SWIZZLE_Y: |
/* ZYY */ |
if (swz2 == RC_SWIZZLE_Y) { |
/* The new swizzle is ZXY */ |
SET_SWZ(new_swizzle, 1, RC_SWIZZLE_X); |
found_swizzle = 1; |
} else { /* ZY[^Y] */ |
/* The new swizzle will be: |
* XYZ (ZY. => XY. => XYZ) */ |
SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); |
} |
break; |
/* ZW. Should have already been handled. */ |
case RC_SWIZZLE_W: |
assert(0); |
break; |
} |
break; |
/* W.. */ |
case RC_SWIZZLE_W: |
/* Legal swizzles that start with X: WWW, WZY */ |
switch (swz1) { |
/* WW. Should have already been handled. */ |
case RC_SWIZZLE_W: |
assert(0); |
break; |
/* WZ. */ |
case RC_SWIZZLE_Z: |
/* The new swizzle will be WZY */ |
SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); |
found_swizzle = 1; |
break; |
/* WX. */ |
case RC_SWIZZLE_X: |
/* WY. */ |
case RC_SWIZZLE_Y: |
/* W[XY]Y */ |
if (swz2 == RC_SWIZZLE_Y) { |
/* The new swizzle will be WZY */ |
SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); |
found_swizzle = 1; |
} else { /* W[XY][^Y] */ |
/* The new swizzle will be: |
* ZXY (WX. => XX. => ZX. => ZXY) or |
* XYZ (WY. => XY. => XYZ) |
*/ |
SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); |
} |
break; |
} |
break; |
/* U.. 0.. 1.. H..*/ |
case RC_SWIZZLE_UNUSED: |
case RC_SWIZZLE_ZERO: |
case RC_SWIZZLE_ONE: |
case RC_SWIZZLE_HALF: |
SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); |
break; |
} |
} |
/* Handle the swizzle in the w channel. */ |
swz3 = GET_SWZ(reg->Swizzle, 3); |
/* We can skip this if the swizzle in channel w is an inline constant. */ |
if (swz3 <= RC_SWIZZLE_W) { |
for (chan = 0; chan < 3; chan++) { |
unsigned old_swz = GET_SWZ(reg->Swizzle, chan); |
unsigned new_swz = GET_SWZ(new_swizzle, chan); |
/* If the swizzle in the w channel is the same as the |
* swizzle in any other channels, we need to rewrite it. |
* For example: |
* reg->Swizzle == XWZW |
* new_swizzle == XYZX |
* Since the swizzle in the y channel is being |
* rewritten from W -> Y we need to change the swizzle |
* in the w channel from W -> Y as well. |
*/ |
if (old_swz == swz3) { |
SET_SWZ(new_swizzle, 3, |
GET_SWZ(new_swizzle, chan)); |
break; |
} |
/* The swizzle in channel w will be overwritten by one |
* of the new swizzles. */ |
if (new_swz == swz3) { |
/* Find an unused swizzle */ |
unsigned i; |
unsigned used = 0; |
for (i = 0; i < 3; i++) { |
used |= 1 << GET_SWZ(new_swizzle, i); |
} |
for (i = 0; i < 4; i++) { |
if (used & (1 << i)) { |
continue; |
} |
SET_SWZ(new_swizzle, 3, i); |
} |
} |
} |
} |
for (chan = 0; chan < 4; chan++) { |
unsigned old_swz = GET_SWZ(reg->Swizzle, chan); |
unsigned new_swz = GET_SWZ(new_swizzle, chan); |
if (old_swz == RC_SWIZZLE_UNUSED) { |
continue; |
} |
/* We don't need to change the swizzle in channel w if it is |
* an inline constant. These are always legal in the w channel. |
* |
* Swizzles with a value > RC_SWIZZLE_W are inline constants. |
*/ |
if (chan == 3 && old_swz > RC_SWIZZLE_W) { |
continue; |
} |
assert(new_swz <= RC_SWIZZLE_W); |
switch (old_swz) { |
case RC_SWIZZLE_ZERO: |
imms[new_swz] = 0.0f; |
break; |
case RC_SWIZZLE_HALF: |
if (reg->Negate & (1 << chan)) { |
imms[new_swz] = -0.5f; |
} else { |
imms[new_swz] = 0.5f; |
} |
break; |
case RC_SWIZZLE_ONE: |
if (reg->Negate & (1 << chan)) { |
imms[new_swz] = -1.0f; |
} else { |
imms[new_swz] = 1.0f; |
} |
break; |
default: |
imms[new_swz] = rc_get_constant_value(c, reg->Index, |
reg->Swizzle, reg->Negate, chan); |
} |
SET_SWZ(reg->Swizzle, chan, new_swz); |
} |
reg->Index = rc_constants_add_immediate_vec4(&c->Program.Constants, |
imms); |
/* We need to set the register file to CONSTANT in case we are |
* converting a non-constant register with constant swizzles (e.g. |
* ONE, ZERO, HALF). |
*/ |
reg->File = RC_FILE_CONSTANT; |
reg->Negate = 0; |
return 1; |
} |
void rc_dataflow_swizzles(struct radeon_compiler * c, void *user) |
{ |
struct rc_instruction * inst; |
for(inst = c->Program.Instructions.Next; |
inst != &c->Program.Instructions; |
inst = inst->Next) { |
const struct rc_opcode_info * opcode = |
rc_get_opcode_info(inst->U.I.Opcode); |
unsigned int src; |
for(src = 0; src < opcode->NumSrcRegs; ++src) { |
struct rc_src_register *reg = &inst->U.I.SrcReg[src]; |
if (c->SwizzleCaps->IsNative(inst->U.I.Opcode, *reg)) { |
continue; |
} |
if (!c->is_r500 && |
c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS && |
try_rewrite_constant(c, reg)) { |
continue; |
} |
rewrite_source(c, inst, src); |
} |
} |
if (c->Debug & RC_DBG_LOG) |
rc_constants_print(&c->Program.Constants); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c |
---|
0,0 → 1,342 |
/* |
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "radeon_emulate_branches.h" |
#include <stdio.h> |
#include "radeon_compiler.h" |
#include "radeon_dataflow.h" |
#define VERBOSE 0 |
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) |
struct proxy_info { |
unsigned int Proxied:1; |
unsigned int Index:RC_REGISTER_INDEX_BITS; |
}; |
struct register_proxies { |
struct proxy_info Temporary[RC_REGISTER_MAX_INDEX]; |
}; |
struct branch_info { |
struct rc_instruction * If; |
struct rc_instruction * Else; |
}; |
struct emulate_branch_state { |
struct radeon_compiler * C; |
struct branch_info * Branches; |
unsigned int BranchCount; |
unsigned int BranchReserved; |
}; |
static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst) |
{ |
struct branch_info * branch; |
struct rc_instruction * inst_mov; |
memory_pool_array_reserve(&s->C->Pool, struct branch_info, |
s->Branches, s->BranchCount, s->BranchReserved, 1); |
DBG("%s\n", __FUNCTION__); |
branch = &s->Branches[s->BranchCount++]; |
memset(branch, 0, sizeof(struct branch_info)); |
branch->If = inst; |
/* Make a safety copy of the decision register, because we will need |
* it at ENDIF time and it might be overwritten in both branches. */ |
inst_mov = rc_insert_new_instruction(s->C, inst->Prev); |
inst_mov->U.I.Opcode = RC_OPCODE_MOV; |
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C); |
inst_mov->U.I.DstReg.WriteMask = RC_MASK_X; |
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; |
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; |
inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; |
inst->U.I.SrcReg[0].Swizzle = 0; |
inst->U.I.SrcReg[0].Abs = 0; |
inst->U.I.SrcReg[0].Negate = 0; |
} |
static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst) |
{ |
struct branch_info * branch; |
if (!s->BranchCount) { |
rc_error(s->C, "Encountered ELSE outside of branches"); |
return; |
} |
DBG("%s\n", __FUNCTION__); |
branch = &s->Branches[s->BranchCount - 1]; |
branch->Else = inst; |
} |
struct state_and_proxies { |
struct emulate_branch_state * S; |
struct register_proxies * Proxies; |
}; |
static struct proxy_info * get_proxy_info(struct state_and_proxies * sap, |
rc_register_file file, unsigned int index) |
{ |
if (file == RC_FILE_TEMPORARY) { |
return &sap->Proxies->Temporary[index]; |
} else { |
return 0; |
} |
} |
static void scan_write(void * userdata, struct rc_instruction * inst, |
rc_register_file file, unsigned int index, unsigned int comp) |
{ |
struct state_and_proxies * sap = userdata; |
struct proxy_info * proxy = get_proxy_info(sap, file, index); |
if (proxy && !proxy->Proxied) { |
proxy->Proxied = 1; |
proxy->Index = rc_find_free_temporary(sap->S->C); |
} |
} |
static void remap_proxy_function(void * userdata, struct rc_instruction * inst, |
rc_register_file * pfile, unsigned int * pindex) |
{ |
struct state_and_proxies * sap = userdata; |
struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex); |
if (proxy && proxy->Proxied) { |
*pfile = RC_FILE_TEMPORARY; |
*pindex = proxy->Index; |
} |
} |
/** |
* Redirect all writes in the instruction range [begin, end) to proxy |
* temporary registers. |
*/ |
static void allocate_and_insert_proxies(struct emulate_branch_state * s, |
struct register_proxies * proxies, |
struct rc_instruction * begin, |
struct rc_instruction * end) |
{ |
struct state_and_proxies sap; |
sap.S = s; |
sap.Proxies = proxies; |
for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { |
rc_for_all_writes_mask(inst, scan_write, &sap); |
rc_remap_registers(inst, remap_proxy_function, &sap); |
} |
for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { |
if (proxies->Temporary[index].Proxied) { |
struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev); |
inst_mov->U.I.Opcode = RC_OPCODE_MOV; |
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index; |
inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW; |
inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; |
inst_mov->U.I.SrcReg[0].Index = index; |
} |
} |
} |
static void inject_cmp(struct emulate_branch_state * s, |
struct rc_instruction * inst_if, |
struct rc_instruction * inst_endif, |
rc_register_file file, unsigned int index, |
struct proxy_info ifproxy, |
struct proxy_info elseproxy) |
{ |
struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif); |
inst_cmp->U.I.Opcode = RC_OPCODE_CMP; |
inst_cmp->U.I.DstReg.File = file; |
inst_cmp->U.I.DstReg.Index = index; |
inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW; |
inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0]; |
inst_cmp->U.I.SrcReg[0].Abs = 1; |
inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW; |
inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; |
inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index; |
inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; |
inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index; |
} |
static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst) |
{ |
struct branch_info * branch; |
struct register_proxies IfProxies; |
struct register_proxies ElseProxies; |
if (!s->BranchCount) { |
rc_error(s->C, "Encountered ENDIF outside of branches"); |
return; |
} |
DBG("%s\n", __FUNCTION__); |
branch = &s->Branches[s->BranchCount - 1]; |
memset(&IfProxies, 0, sizeof(IfProxies)); |
memset(&ElseProxies, 0, sizeof(ElseProxies)); |
allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst); |
if (branch->Else) |
allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst); |
/* Insert the CMP instructions at the end. */ |
for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { |
if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) { |
inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index, |
IfProxies.Temporary[index], ElseProxies.Temporary[index]); |
} |
} |
/* Remove all traces of the branch instructions */ |
rc_remove_instruction(branch->If); |
if (branch->Else) |
rc_remove_instruction(branch->Else); |
rc_remove_instruction(inst); |
s->BranchCount--; |
if (VERBOSE) { |
DBG("Program after ENDIF handling:\n"); |
rc_print_program(&s->C->Program); |
} |
} |
struct remap_output_data { |
unsigned int Output:RC_REGISTER_INDEX_BITS; |
unsigned int Temporary:RC_REGISTER_INDEX_BITS; |
}; |
static void remap_output_function(void * userdata, struct rc_instruction * inst, |
rc_register_file * pfile, unsigned int * pindex) |
{ |
struct remap_output_data * data = userdata; |
if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) { |
*pfile = RC_FILE_TEMPORARY; |
*pindex = data->Temporary; |
} |
} |
/** |
* Output registers cannot be read from and so cannot be dealt with like |
* temporary registers. |
* |
* We do the simplest thing: If an output registers is written within |
* a branch, then *all* writes to this register are proxied to a |
* temporary register, and a final MOV is appended to the end of |
* the program. |
*/ |
static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst) |
{ |
const struct rc_opcode_info * opcode; |
if (!s->BranchCount) |
return; |
opcode = rc_get_opcode_info(inst->U.I.Opcode); |
if (!opcode->HasDstReg) |
return; |
if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) { |
struct remap_output_data remap; |
struct rc_instruction * inst_mov; |
remap.Output = inst->U.I.DstReg.Index; |
remap.Temporary = rc_find_free_temporary(s->C); |
for(struct rc_instruction * inst = s->C->Program.Instructions.Next; |
inst != &s->C->Program.Instructions; |
inst = inst->Next) { |
rc_remap_registers(inst, &remap_output_function, &remap); |
} |
inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev); |
inst_mov->U.I.Opcode = RC_OPCODE_MOV; |
inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT; |
inst_mov->U.I.DstReg.Index = remap.Output; |
inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW; |
inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; |
inst_mov->U.I.SrcReg[0].Index = remap.Temporary; |
} |
} |
/** |
* Remove branch instructions; instead, execute both branches |
* on different register sets and choose between their results |
* using CMP instructions in place of the original ENDIF. |
*/ |
void rc_emulate_branches(struct radeon_compiler *c, void *user) |
{ |
struct emulate_branch_state s; |
struct rc_instruction * ptr; |
memset(&s, 0, sizeof(s)); |
s.C = c; |
/* Untypical loop because we may remove the current instruction */ |
ptr = c->Program.Instructions.Next; |
while(ptr != &c->Program.Instructions) { |
struct rc_instruction * inst = ptr; |
ptr = ptr->Next; |
if (inst->Type == RC_INSTRUCTION_NORMAL) { |
switch(inst->U.I.Opcode) { |
case RC_OPCODE_IF: |
handle_if(&s, inst); |
break; |
case RC_OPCODE_ELSE: |
handle_else(&s, inst); |
break; |
case RC_OPCODE_ENDIF: |
handle_endif(&s, inst); |
break; |
default: |
fix_output_writes(&s, inst); |
break; |
} |
} else { |
rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__); |
} |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h |
---|
0,0 → 1,30 |
/* |
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#ifndef RADEON_EMULATE_BRANCHES_H |
#define RADEON_EMULATE_BRANCHES_H |
struct radeon_compiler; |
void rc_emulate_branches(struct radeon_compiler *c, void *user); |
#endif /* RADEON_EMULATE_BRANCHES_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c |
---|
0,0 → 1,521 |
/* |
* Copyright 2010 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
/** |
* \file |
*/ |
#include "radeon_emulate_loops.h" |
#include "radeon_compiler.h" |
#include "radeon_compiler_util.h" |
#include "radeon_dataflow.h" |
#define VERBOSE 0 |
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) |
struct const_value { |
struct radeon_compiler * C; |
struct rc_src_register * Src; |
float Value; |
int HasValue; |
}; |
struct count_inst { |
struct radeon_compiler * C; |
int Index; |
rc_swizzle Swz; |
float Amount; |
int Unknown; |
unsigned BranchDepth; |
}; |
static unsigned int loop_max_possible_iterations(struct radeon_compiler *c, |
struct loop_info * loop) |
{ |
unsigned int total_i = rc_recompute_ips(c); |
unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1; |
/* +1 because the program already has one iteration of the loop. */ |
return 1 + ((c->max_alu_insts - total_i) / loop_i); |
} |
static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop, |
unsigned int iterations) |
{ |
unsigned int i; |
struct rc_instruction * ptr; |
struct rc_instruction * first = loop->BeginLoop->Next; |
struct rc_instruction * last = loop->EndLoop->Prev; |
struct rc_instruction * append_to = last; |
rc_remove_instruction(loop->BeginLoop); |
rc_remove_instruction(loop->EndLoop); |
for( i = 1; i < iterations; i++){ |
for(ptr = first; ptr != last->Next; ptr = ptr->Next){ |
struct rc_instruction *new = rc_alloc_instruction(c); |
memcpy(new, ptr, sizeof(struct rc_instruction)); |
rc_insert_instruction(append_to, new); |
append_to = new; |
} |
} |
} |
static void update_const_value(void * data, struct rc_instruction * inst, |
rc_register_file file, unsigned int index, unsigned int mask) |
{ |
struct const_value * value = data; |
if(value->Src->File != file || |
value->Src->Index != index || |
!(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){ |
return; |
} |
switch(inst->U.I.Opcode){ |
case RC_OPCODE_MOV: |
if(!rc_src_reg_is_immediate(value->C, inst->U.I.SrcReg[0].File, |
inst->U.I.SrcReg[0].Index)){ |
return; |
} |
value->HasValue = 1; |
value->Value = |
rc_get_constant_value(value->C, |
inst->U.I.SrcReg[0].Index, |
inst->U.I.SrcReg[0].Swizzle, |
inst->U.I.SrcReg[0].Negate, 0); |
break; |
} |
} |
static void get_incr_amount(void * data, struct rc_instruction * inst, |
rc_register_file file, unsigned int index, unsigned int mask) |
{ |
struct count_inst * count_inst = data; |
int amnt_src_index; |
const struct rc_opcode_info * opcode; |
float amount; |
if(file != RC_FILE_TEMPORARY || |
count_inst->Index != index || |
(1 << GET_SWZ(count_inst->Swz,0) != mask)){ |
return; |
} |
/* XXX: Give up if the counter is modified within an IF block. We |
* could handle this case with better analysis. */ |
if (count_inst->BranchDepth > 0) { |
count_inst->Unknown = 1; |
return; |
} |
/* Find the index of the counter register. */ |
opcode = rc_get_opcode_info(inst->U.I.Opcode); |
if(opcode->NumSrcRegs != 2){ |
count_inst->Unknown = 1; |
return; |
} |
if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY && |
inst->U.I.SrcReg[0].Index == count_inst->Index && |
inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){ |
amnt_src_index = 1; |
} else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY && |
inst->U.I.SrcReg[1].Index == count_inst->Index && |
inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){ |
amnt_src_index = 0; |
} |
else{ |
count_inst->Unknown = 1; |
return; |
} |
if(rc_src_reg_is_immediate(count_inst->C, |
inst->U.I.SrcReg[amnt_src_index].File, |
inst->U.I.SrcReg[amnt_src_index].Index)){ |
amount = rc_get_constant_value(count_inst->C, |
inst->U.I.SrcReg[amnt_src_index].Index, |
inst->U.I.SrcReg[amnt_src_index].Swizzle, |
inst->U.I.SrcReg[amnt_src_index].Negate, 0); |
} |
else{ |
count_inst->Unknown = 1 ; |
return; |
} |
switch(inst->U.I.Opcode){ |
case RC_OPCODE_ADD: |
count_inst->Amount += amount; |
break; |
case RC_OPCODE_SUB: |
if(amnt_src_index == 0){ |
count_inst->Unknown = 0; |
return; |
} |
count_inst->Amount -= amount; |
break; |
default: |
count_inst->Unknown = 1; |
return; |
} |
} |
/** |
* If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless |
* of how many iterations they have. |
*/ |
static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop) |
{ |
int end_loops; |
int iterations; |
struct count_inst count_inst; |
float limit_value; |
struct rc_src_register * counter; |
struct rc_src_register * limit; |
struct const_value counter_value; |
struct rc_instruction * inst; |
/* Find the counter and the upper limit */ |
if(rc_src_reg_is_immediate(c, loop->Cond->U.I.SrcReg[0].File, |
loop->Cond->U.I.SrcReg[0].Index)){ |
limit = &loop->Cond->U.I.SrcReg[0]; |
counter = &loop->Cond->U.I.SrcReg[1]; |
} |
else if(rc_src_reg_is_immediate(c, loop->Cond->U.I.SrcReg[1].File, |
loop->Cond->U.I.SrcReg[1].Index)){ |
limit = &loop->Cond->U.I.SrcReg[1]; |
counter = &loop->Cond->U.I.SrcReg[0]; |
} |
else{ |
DBG("No constant limit.\n"); |
return 0; |
} |
/* Find the initial value of the counter */ |
counter_value.Src = counter; |
counter_value.Value = 0.0f; |
counter_value.HasValue = 0; |
counter_value.C = c; |
for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop; |
inst = inst->Next){ |
rc_for_all_writes_mask(inst, update_const_value, &counter_value); |
} |
if(!counter_value.HasValue){ |
DBG("Initial counter value cannot be determined.\n"); |
return 0; |
} |
DBG("Initial counter value is %f\n", counter_value.Value); |
/* Determine how the counter is modified each loop */ |
count_inst.C = c; |
count_inst.Index = counter->Index; |
count_inst.Swz = counter->Swizzle; |
count_inst.Amount = 0.0f; |
count_inst.Unknown = 0; |
count_inst.BranchDepth = 0; |
end_loops = 1; |
for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){ |
switch(inst->U.I.Opcode){ |
/* XXX In the future we might want to try to unroll nested |
* loops here.*/ |
case RC_OPCODE_BGNLOOP: |
end_loops++; |
break; |
case RC_OPCODE_ENDLOOP: |
loop->EndLoop = inst; |
end_loops--; |
break; |
case RC_OPCODE_BRK: |
/* Don't unroll loops if it has a BRK instruction |
* other one used when testing the main conditional |
* of the loop. */ |
/* Make sure we haven't entered a nested loops. */ |
if(inst != loop->Brk && end_loops == 1) { |
return 0; |
} |
break; |
case RC_OPCODE_IF: |
count_inst.BranchDepth++; |
break; |
case RC_OPCODE_ENDIF: |
count_inst.BranchDepth--; |
break; |
default: |
rc_for_all_writes_mask(inst, get_incr_amount, &count_inst); |
if(count_inst.Unknown){ |
return 0; |
} |
break; |
} |
} |
/* Infinite loop */ |
if(count_inst.Amount == 0.0f){ |
return 0; |
} |
DBG("Counter is increased by %f each iteration.\n", count_inst.Amount); |
/* Calculate the number of iterations of this loop. Keeping this |
* simple, since we only support increment and decrement loops. |
*/ |
limit_value = rc_get_constant_value(c, limit->Index, limit->Swizzle, |
limit->Negate, 0); |
DBG("Limit is %f.\n", limit_value); |
/* The iteration calculations are opposite of what you would expect. |
* In a normal loop, if the condition is met, then loop continues, but |
* with our loops, if the condition is met, the is exited. */ |
switch(loop->Cond->U.I.Opcode){ |
case RC_OPCODE_SGE: |
case RC_OPCODE_SLE: |
iterations = (int) ceilf((limit_value - counter_value.Value) / |
count_inst.Amount); |
break; |
case RC_OPCODE_SGT: |
case RC_OPCODE_SLT: |
iterations = (int) floorf((limit_value - counter_value.Value) / |
count_inst.Amount) + 1; |
break; |
default: |
return 0; |
} |
if (c->max_alu_insts > 0 |
&& iterations > loop_max_possible_iterations(c, loop)) { |
return 0; |
} |
DBG("Loop will have %d iterations.\n", iterations); |
/* Prepare loop for unrolling */ |
rc_remove_instruction(loop->Cond); |
rc_remove_instruction(loop->If); |
rc_remove_instruction(loop->Brk); |
rc_remove_instruction(loop->EndIf); |
unroll_loop(c, loop, iterations); |
loop->EndLoop = NULL; |
return 1; |
} |
/** |
* @param c |
* @param loop |
* @param inst A pointer to a BGNLOOP instruction. |
* @return 1 if all of the members of loop where set. |
* @return 0 if there was an error and some members of loop are still NULL. |
*/ |
static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop, |
struct rc_instruction * inst) |
{ |
struct rc_instruction * ptr; |
if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){ |
rc_error(c, "%s: expected BGNLOOP", __FUNCTION__); |
return 0; |
} |
memset(loop, 0, sizeof(struct loop_info)); |
loop->BeginLoop = inst; |
for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) { |
if (ptr == &c->Program.Instructions) { |
rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n", |
__FUNCTION__); |
return 0; |
} |
switch(ptr->U.I.Opcode){ |
case RC_OPCODE_BGNLOOP: |
{ |
/* Nested loop, skip ahead to the end. */ |
unsigned int loop_depth = 1; |
for(ptr = ptr->Next; ptr != &c->Program.Instructions; |
ptr = ptr->Next){ |
if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { |
loop_depth++; |
} else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { |
if (!--loop_depth) { |
break; |
} |
} |
} |
if (ptr == &c->Program.Instructions) { |
rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n", |
__FUNCTION__); |
return 0; |
} |
break; |
} |
case RC_OPCODE_BRK: |
if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF |
|| ptr->Prev->U.I.Opcode != RC_OPCODE_IF |
|| loop->Brk){ |
continue; |
} |
loop->Brk = ptr; |
loop->If = ptr->Prev; |
loop->EndIf = ptr->Next; |
switch(loop->If->Prev->U.I.Opcode){ |
case RC_OPCODE_SLT: |
case RC_OPCODE_SGE: |
case RC_OPCODE_SGT: |
case RC_OPCODE_SLE: |
case RC_OPCODE_SEQ: |
case RC_OPCODE_SNE: |
break; |
default: |
return 0; |
} |
loop->Cond = loop->If->Prev; |
break; |
case RC_OPCODE_ENDLOOP: |
loop->EndLoop = ptr; |
break; |
} |
} |
if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf |
&& loop->Cond && loop->EndLoop) { |
return 1; |
} |
return 0; |
} |
/** |
* This function prepares a loop to be unrolled by converting it into an if |
* statement. Here is an outline of the conversion process: |
* BGNLOOP; -> BGNLOOP; |
* <Additional conditional code> -> <Additional conditional code> |
* SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; |
* IF temp[0]; -> IF temp[0]; |
* BRK; -> |
* ENDIF; -> <Loop Body> |
* <Loop Body> -> ENDIF; |
* ENDLOOP; -> ENDLOOP |
* |
* @param inst A pointer to a BGNLOOP instruction. |
* @return 1 for success, 0 for failure |
*/ |
static int transform_loop(struct emulate_loop_state * s, |
struct rc_instruction * inst) |
{ |
struct loop_info * loop; |
memory_pool_array_reserve(&s->C->Pool, struct loop_info, |
s->Loops, s->LoopCount, s->LoopReserved, 1); |
loop = &s->Loops[s->LoopCount++]; |
if (!build_loop_info(s->C, loop, inst)) { |
rc_error(s->C, "Failed to build loop info\n"); |
return 0; |
} |
if(try_unroll_loop(s->C, loop)){ |
return 1; |
} |
/* Reverse the conditional instruction */ |
switch(loop->Cond->U.I.Opcode){ |
case RC_OPCODE_SGE: |
loop->Cond->U.I.Opcode = RC_OPCODE_SLT; |
break; |
case RC_OPCODE_SLT: |
loop->Cond->U.I.Opcode = RC_OPCODE_SGE; |
break; |
case RC_OPCODE_SLE: |
loop->Cond->U.I.Opcode = RC_OPCODE_SGT; |
break; |
case RC_OPCODE_SGT: |
loop->Cond->U.I.Opcode = RC_OPCODE_SLE; |
break; |
case RC_OPCODE_SEQ: |
loop->Cond->U.I.Opcode = RC_OPCODE_SNE; |
break; |
case RC_OPCODE_SNE: |
loop->Cond->U.I.Opcode = RC_OPCODE_SEQ; |
break; |
default: |
rc_error(s->C, "loop->Cond is not a conditional.\n"); |
return 0; |
} |
/* Prepare the loop to be emulated */ |
rc_remove_instruction(loop->Brk); |
rc_remove_instruction(loop->EndIf); |
rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf); |
return 1; |
} |
void rc_transform_loops(struct radeon_compiler *c, void *user) |
{ |
struct emulate_loop_state * s = &c->loop_state; |
struct rc_instruction * ptr; |
memset(s, 0, sizeof(struct emulate_loop_state)); |
s->C = c; |
for(ptr = s->C->Program.Instructions.Next; |
ptr != &s->C->Program.Instructions; ptr = ptr->Next) { |
if(ptr->Type == RC_INSTRUCTION_NORMAL && |
ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ |
if (!transform_loop(s, ptr)) |
return; |
} |
} |
} |
void rc_unroll_loops(struct radeon_compiler *c, void *user) |
{ |
struct rc_instruction * inst; |
struct loop_info loop; |
for(inst = c->Program.Instructions.Next; |
inst != &c->Program.Instructions; inst = inst->Next) { |
if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { |
if (build_loop_info(c, &loop, inst)) { |
try_unroll_loop(c, &loop); |
} |
} |
} |
} |
void rc_emulate_loops(struct radeon_compiler *c, void *user) |
{ |
struct emulate_loop_state * s = &c->loop_state; |
int i; |
/* Iterate backwards of the list of loops so that loops that nested |
* loops are unrolled first. |
*/ |
for( i = s->LoopCount - 1; i >= 0; i-- ){ |
unsigned int iterations; |
if(!s->Loops[i].EndLoop){ |
continue; |
} |
iterations = loop_max_possible_iterations(s->C, &s->Loops[i]); |
unroll_loop(s->C, &s->Loops[i], iterations); |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h |
---|
0,0 → 1,57 |
/* |
* Copyright 2010 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#ifndef RADEON_EMULATE_LOOPS_H |
#define RADEON_EMULATE_LOOPS_H |
#define MAX_ITERATIONS 8 |
struct radeon_compiler; |
struct loop_info { |
struct rc_instruction * BeginLoop; |
struct rc_instruction * Cond; |
struct rc_instruction * If; |
struct rc_instruction * Brk; |
struct rc_instruction * EndIf; |
struct rc_instruction * EndLoop; |
}; |
struct emulate_loop_state { |
struct radeon_compiler * C; |
struct loop_info * Loops; |
unsigned int LoopCount; |
unsigned int LoopReserved; |
}; |
void rc_transform_loops(struct radeon_compiler *c, void *user); |
void rc_unroll_loops(struct radeon_compiler * c, void *user); |
void rc_emulate_loops(struct radeon_compiler * c, void *user); |
#endif /* RADEON_EMULATE_LOOPS_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_inline_literals.c |
---|
0,0 → 1,164 |
/* |
* Copyright 2012 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Author: Tom Stellard <thomas.stellard@amd.com> |
*/ |
#include "radeon_compiler.h" |
#include "radeon_compiler_util.h" |
#include "radeon_dataflow.h" |
#include "radeon_program.h" |
#include "radeon_program_constants.h" |
#include <stdio.h> |
#define VERBOSE 0 |
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) |
/* IEEE-754: |
* 22:0 mantissa |
* 30:23 exponent |
* 31 sign |
* |
* R300: |
* 0:2 mantissa |
* 3:6 exponent (bias 7) |
*/ |
static int ieee_754_to_r300_float(float f, unsigned char *r300_float_out) |
{ |
unsigned float_bits = *((unsigned *)&f); |
/* XXX: Handle big-endian */ |
unsigned mantissa = float_bits & 0x007fffff; |
unsigned biased_exponent = (float_bits & 0x7f800000) >> 23; |
unsigned negate = !!(float_bits & 0x80000000); |
int exponent = biased_exponent - 127; |
unsigned mantissa_mask = 0xff8fffff; |
unsigned r300_exponent, r300_mantissa; |
DBG("Converting %f (0x%x) to 7-bit:\n", f, float_bits); |
DBG("Raw exponent = %d\n", exponent); |
if (exponent < -7 || exponent > 8) { |
DBG("Failed exponent out of range\n\n"); |
return 0; |
} |
if (mantissa & mantissa_mask) { |
DBG("Failed mantisa has too many bits:\n" |
"manitssa=0x%x mantissa_mask=0x%x, and=0x%x\n\n", |
mantissa, mantissa_mask, |
mantissa & mantissa_mask); |
return 0; |
} |
r300_exponent = exponent + 7; |
r300_mantissa = (mantissa & ~mantissa_mask) >> 20; |
*r300_float_out = r300_mantissa | (r300_exponent << 3); |
DBG("Success! r300_float = 0x%x\n\n", *r300_float_out); |
if (negate) |
return -1; |
else |
return 1; |
} |
void rc_inline_literals(struct radeon_compiler *c, void *user) |
{ |
struct rc_instruction * inst; |
for(inst = c->Program.Instructions.Next; |
inst != &c->Program.Instructions; |
inst = inst->Next) { |
const struct rc_opcode_info * info = |
rc_get_opcode_info(inst->U.I.Opcode); |
unsigned src_idx; |
struct rc_constant * constant; |
float float_value; |
unsigned char r300_float = 0; |
int ret; |
/* XXX: Handle presub */ |
/* We aren't using rc_for_all_reads_src here, because presub |
* sources need to be handled differently. */ |
for (src_idx = 0; src_idx < info->NumSrcRegs; src_idx++) { |
unsigned new_swizzle; |
unsigned use_literal = 0; |
unsigned negate_mask = 0; |
unsigned swz, chan; |
struct rc_src_register * src_reg = |
&inst->U.I.SrcReg[src_idx]; |
swz = RC_SWIZZLE_UNUSED; |
if (src_reg->File != RC_FILE_CONSTANT) { |
continue; |
} |
constant = |
&c->Program.Constants.Constants[src_reg->Index]; |
if (constant->Type != RC_CONSTANT_IMMEDIATE) { |
continue; |
} |
new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); |
for (chan = 0; chan < 4; chan++) { |
unsigned char r300_float_tmp; |
swz = GET_SWZ(src_reg->Swizzle, chan); |
if (swz == RC_SWIZZLE_UNUSED) { |
continue; |
} |
float_value = constant->u.Immediate[swz]; |
ret = ieee_754_to_r300_float(float_value, |
&r300_float_tmp); |
if (!ret || (use_literal && |
r300_float != r300_float_tmp)) { |
use_literal = 0; |
break; |
} |
if (ret == -1 && src_reg->Abs) { |
use_literal = 0; |
break; |
} |
if (!use_literal) { |
r300_float = r300_float_tmp; |
use_literal = 1; |
} |
/* Use RC_SWIZZLE_W for the inline constant, so |
* it will become one of the alpha sources. */ |
SET_SWZ(new_swizzle, chan, RC_SWIZZLE_W); |
if (ret == -1) { |
negate_mask |= (1 << chan); |
} |
} |
if (!use_literal) { |
continue; |
} |
src_reg->File = RC_FILE_INLINE; |
src_reg->Index = r300_float; |
src_reg->Swizzle = new_swizzle; |
src_reg->Negate = src_reg->Negate ^ negate_mask; |
} |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_list.c |
---|
0,0 → 1,90 |
/* |
* Copyright 2011 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include "radeon_list.h" |
#include <stdlib.h> |
#include <stdio.h> |
#include "memory_pool.h" |
struct rc_list * rc_list(struct memory_pool * pool, void * item) |
{ |
struct rc_list * new = memory_pool_malloc(pool, sizeof(struct rc_list)); |
new->Item = item; |
new->Next = NULL; |
new->Prev = NULL; |
return new; |
} |
void rc_list_add(struct rc_list ** list, struct rc_list * new_value) |
{ |
struct rc_list * temp; |
if (*list == NULL) { |
*list = new_value; |
return; |
} |
for (temp = *list; temp->Next; temp = temp->Next); |
temp->Next = new_value; |
new_value->Prev = temp; |
} |
void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value) |
{ |
if (*list == rm_value) { |
*list = rm_value->Next; |
return; |
} |
rm_value->Prev->Next = rm_value->Next; |
if (rm_value->Next) { |
rm_value->Next->Prev = rm_value->Prev; |
} |
} |
unsigned int rc_list_count(struct rc_list * list) |
{ |
unsigned int count = 0; |
while (list) { |
count++; |
list = list->Next; |
} |
return count; |
} |
void rc_list_print(struct rc_list * list) |
{ |
while(list) { |
fprintf(stderr, "%p->", list->Item); |
list = list->Next; |
} |
fprintf(stderr, "\n"); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_list.h |
---|
0,0 → 1,46 |
/* |
* Copyright 2011 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#ifndef RADEON_LIST_H |
#define RADEON_LIST_H |
struct memory_pool; |
struct rc_list { |
void * Item; |
struct rc_list * Prev; |
struct rc_list * Next; |
}; |
struct rc_list * rc_list(struct memory_pool * pool, void * item); |
void rc_list_add(struct rc_list ** list, struct rc_list * new_value); |
void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value); |
unsigned int rc_list_count(struct rc_list * list); |
void rc_list_print(struct rc_list * list); |
#endif /* RADEON_LIST_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_opcodes.c |
---|
0,0 → 1,632 |
/* |
* Copyright (C) 2009 Nicolai Haehnle. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include "radeon_opcodes.h" |
#include "radeon_program.h" |
#include "radeon_program_constants.h" |
struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { |
{ |
.Opcode = RC_OPCODE_NOP, |
.Name = "NOP" |
}, |
{ |
.Opcode = RC_OPCODE_ILLEGAL_OPCODE, |
.Name = "ILLEGAL OPCODE" |
}, |
{ |
.Opcode = RC_OPCODE_ABS, |
.Name = "ABS", |
.NumSrcRegs = 1, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_ADD, |
.Name = "ADD", |
.NumSrcRegs = 2, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_ARL, |
.Name = "ARL", |
.NumSrcRegs = 1, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_OPCODE_CEIL, |
.Name = "CEIL", |
.NumSrcRegs = 1, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_CLAMP, |
.Name = "CLAMP", |
.NumSrcRegs = 3, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_CMP, |
.Name = "CMP", |
.NumSrcRegs = 3, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_CND, |
.Name = "CND", |
.NumSrcRegs = 3, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_COS, |
.Name = "COS", |
.NumSrcRegs = 1, |
.HasDstReg = 1, |
.IsStandardScalar = 1 |
}, |
{ |
.Opcode = RC_OPCODE_DDX, |
.Name = "DDX", |
.NumSrcRegs = 2, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_DDY, |
.Name = "DDY", |
.NumSrcRegs = 2, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_DP2, |
.Name = "DP2", |
.NumSrcRegs = 2, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_OPCODE_DP3, |
.Name = "DP3", |
.NumSrcRegs = 2, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_OPCODE_DP4, |
.Name = "DP4", |
.NumSrcRegs = 2, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_OPCODE_DPH, |
.Name = "DPH", |
.NumSrcRegs = 2, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_OPCODE_DST, |
.Name = "DST", |
.NumSrcRegs = 2, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_OPCODE_EX2, |
.Name = "EX2", |
.NumSrcRegs = 1, |
.HasDstReg = 1, |
.IsStandardScalar = 1 |
}, |
{ |
.Opcode = RC_OPCODE_EXP, |
.Name = "EXP", |
.NumSrcRegs = 1, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_OPCODE_FLR, |
.Name = "FLR", |
.NumSrcRegs = 1, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_FRC, |
.Name = "FRC", |
.NumSrcRegs = 1, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_KIL, |
.Name = "KIL", |
.NumSrcRegs = 1 |
}, |
{ |
.Opcode = RC_OPCODE_LG2, |
.Name = "LG2", |
.NumSrcRegs = 1, |
.HasDstReg = 1, |
.IsStandardScalar = 1 |
}, |
{ |
.Opcode = RC_OPCODE_LIT, |
.Name = "LIT", |
.NumSrcRegs = 1, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_OPCODE_LOG, |
.Name = "LOG", |
.NumSrcRegs = 1, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_OPCODE_LRP, |
.Name = "LRP", |
.NumSrcRegs = 3, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_MAD, |
.Name = "MAD", |
.NumSrcRegs = 3, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_MAX, |
.Name = "MAX", |
.NumSrcRegs = 2, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_MIN, |
.Name = "MIN", |
.NumSrcRegs = 2, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_MOV, |
.Name = "MOV", |
.NumSrcRegs = 1, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_MUL, |
.Name = "MUL", |
.NumSrcRegs = 2, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_POW, |
.Name = "POW", |
.NumSrcRegs = 2, |
.HasDstReg = 1, |
.IsStandardScalar = 1 |
}, |
{ |
.Opcode = RC_OPCODE_RCP, |
.Name = "RCP", |
.NumSrcRegs = 1, |
.HasDstReg = 1, |
.IsStandardScalar = 1 |
}, |
{ |
.Opcode = RC_OPCODE_ROUND, |
.Name = "ROUND", |
.NumSrcRegs = 1, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_RSQ, |
.Name = "RSQ", |
.NumSrcRegs = 1, |
.HasDstReg = 1, |
.IsStandardScalar = 1 |
}, |
{ |
.Opcode = RC_OPCODE_SCS, |
.Name = "SCS", |
.NumSrcRegs = 1, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_OPCODE_SEQ, |
.Name = "SEQ", |
.NumSrcRegs = 2, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_SFL, |
.Name = "SFL", |
.NumSrcRegs = 0, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_SGE, |
.Name = "SGE", |
.NumSrcRegs = 2, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_SGT, |
.Name = "SGT", |
.NumSrcRegs = 2, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_SIN, |
.Name = "SIN", |
.NumSrcRegs = 1, |
.HasDstReg = 1, |
.IsStandardScalar = 1 |
}, |
{ |
.Opcode = RC_OPCODE_SLE, |
.Name = "SLE", |
.NumSrcRegs = 2, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_SLT, |
.Name = "SLT", |
.NumSrcRegs = 2, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_SNE, |
.Name = "SNE", |
.NumSrcRegs = 2, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_SSG, |
.Name = "SSG", |
.NumSrcRegs = 1, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_SUB, |
.Name = "SUB", |
.NumSrcRegs = 2, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_SWZ, |
.Name = "SWZ", |
.NumSrcRegs = 1, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_TRUNC, |
.Name = "TRUNC", |
.NumSrcRegs = 1, |
.HasDstReg = 1, |
.IsComponentwise = 1 |
}, |
{ |
.Opcode = RC_OPCODE_XPD, |
.Name = "XPD", |
.NumSrcRegs = 2, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_OPCODE_TEX, |
.Name = "TEX", |
.HasTexture = 1, |
.NumSrcRegs = 1, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_OPCODE_TXB, |
.Name = "TXB", |
.HasTexture = 1, |
.NumSrcRegs = 1, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_OPCODE_TXD, |
.Name = "TXD", |
.HasTexture = 1, |
.NumSrcRegs = 3, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_OPCODE_TXL, |
.Name = "TXL", |
.HasTexture = 1, |
.NumSrcRegs = 1, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_OPCODE_TXP, |
.Name = "TXP", |
.HasTexture = 1, |
.NumSrcRegs = 1, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_OPCODE_IF, |
.Name = "IF", |
.IsFlowControl = 1, |
.NumSrcRegs = 1 |
}, |
{ |
.Opcode = RC_OPCODE_ELSE, |
.Name = "ELSE", |
.IsFlowControl = 1, |
.NumSrcRegs = 0 |
}, |
{ |
.Opcode = RC_OPCODE_ENDIF, |
.Name = "ENDIF", |
.IsFlowControl = 1, |
.NumSrcRegs = 0 |
}, |
{ |
.Opcode = RC_OPCODE_BGNLOOP, |
.Name = "BGNLOOP", |
.IsFlowControl = 1, |
.NumSrcRegs = 0 |
}, |
{ |
.Opcode = RC_OPCODE_BRK, |
.Name = "BRK", |
.IsFlowControl = 1, |
.NumSrcRegs = 0 |
}, |
{ |
.Opcode = RC_OPCODE_ENDLOOP, |
.Name = "ENDLOOP", |
.IsFlowControl = 1, |
.NumSrcRegs = 0, |
}, |
{ |
.Opcode = RC_OPCODE_CONT, |
.Name = "CONT", |
.IsFlowControl = 1, |
.NumSrcRegs = 0 |
}, |
{ |
.Opcode = RC_OPCODE_REPL_ALPHA, |
.Name = "REPL_ALPHA", |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_OPCODE_BEGIN_TEX, |
.Name = "BEGIN_TEX" |
}, |
{ |
.Opcode = RC_OPCODE_KILP, |
.Name = "KILP", |
}, |
{ |
.Opcode = RC_ME_PRED_SEQ, |
.Name = "ME_PRED_SEQ", |
.NumSrcRegs = 1, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_ME_PRED_SGT, |
.Name = "ME_PRED_SGT", |
.NumSrcRegs = 1, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_ME_PRED_SGE, |
.Name = "ME_PRED_SGE", |
.NumSrcRegs = 1, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_ME_PRED_SNEQ, |
.Name = "ME_PRED_SNEQ", |
.NumSrcRegs = 1, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_ME_PRED_SET_CLR, |
.Name = "ME_PRED_SET_CLEAR", |
.NumSrcRegs = 1, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_ME_PRED_SET_INV, |
.Name = "ME_PRED_SET_INV", |
.NumSrcRegs = 1, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_ME_PRED_SET_POP, |
.Name = "ME_PRED_SET_POP", |
.NumSrcRegs = 1, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_ME_PRED_SET_RESTORE, |
.Name = "ME_PRED_SET_RESTORE", |
.NumSrcRegs = 1, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_VE_PRED_SEQ_PUSH, |
.Name = "VE_PRED_SEQ_PUSH", |
.NumSrcRegs = 2, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_VE_PRED_SGT_PUSH, |
.Name = "VE_PRED_SGT_PUSH", |
.NumSrcRegs = 2, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_VE_PRED_SGE_PUSH, |
.Name = "VE_PRED_SGE_PUSH", |
.NumSrcRegs = 2, |
.HasDstReg = 1 |
}, |
{ |
.Opcode = RC_VE_PRED_SNEQ_PUSH, |
.Name = "VE_PRED_SNEQ_PUSH", |
.NumSrcRegs = 2, |
.HasDstReg = 1 |
} |
}; |
void rc_compute_sources_for_writemask( |
const struct rc_instruction *inst, |
unsigned int writemask, |
unsigned int *srcmasks) |
{ |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
srcmasks[0] = 0; |
srcmasks[1] = 0; |
srcmasks[2] = 0; |
if (opcode->Opcode == RC_OPCODE_KIL) |
srcmasks[0] |= RC_MASK_XYZW; |
else if (opcode->Opcode == RC_OPCODE_IF) |
srcmasks[0] |= RC_MASK_X; |
if (!writemask) |
return; |
if (opcode->IsComponentwise) { |
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) |
srcmasks[src] |= writemask; |
} else if (opcode->IsStandardScalar) { |
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) |
srcmasks[src] |= writemask; |
} else { |
switch(opcode->Opcode) { |
case RC_OPCODE_ARL: |
srcmasks[0] |= RC_MASK_X; |
break; |
case RC_OPCODE_DP2: |
srcmasks[0] |= RC_MASK_XY; |
srcmasks[1] |= RC_MASK_XY; |
break; |
case RC_OPCODE_DP3: |
case RC_OPCODE_XPD: |
srcmasks[0] |= RC_MASK_XYZ; |
srcmasks[1] |= RC_MASK_XYZ; |
break; |
case RC_OPCODE_DP4: |
srcmasks[0] |= RC_MASK_XYZW; |
srcmasks[1] |= RC_MASK_XYZW; |
break; |
case RC_OPCODE_DPH: |
srcmasks[0] |= RC_MASK_XYZ; |
srcmasks[1] |= RC_MASK_XYZW; |
break; |
case RC_OPCODE_TXB: |
case RC_OPCODE_TXP: |
case RC_OPCODE_TXL: |
srcmasks[0] |= RC_MASK_W; |
/* Fall through */ |
case RC_OPCODE_TEX: |
switch (inst->U.I.TexSrcTarget) { |
case RC_TEXTURE_1D: |
srcmasks[0] |= RC_MASK_X; |
break; |
case RC_TEXTURE_2D: |
case RC_TEXTURE_RECT: |
case RC_TEXTURE_1D_ARRAY: |
srcmasks[0] |= RC_MASK_XY; |
break; |
case RC_TEXTURE_3D: |
case RC_TEXTURE_CUBE: |
case RC_TEXTURE_2D_ARRAY: |
srcmasks[0] |= RC_MASK_XYZ; |
break; |
} |
break; |
case RC_OPCODE_TXD: |
switch (inst->U.I.TexSrcTarget) { |
case RC_TEXTURE_1D_ARRAY: |
srcmasks[0] |= RC_MASK_Y; |
/* Fall through. */ |
case RC_TEXTURE_1D: |
srcmasks[0] |= RC_MASK_X; |
srcmasks[1] |= RC_MASK_X; |
srcmasks[2] |= RC_MASK_X; |
break; |
case RC_TEXTURE_2D_ARRAY: |
srcmasks[0] |= RC_MASK_Z; |
/* Fall through. */ |
case RC_TEXTURE_2D: |
case RC_TEXTURE_RECT: |
srcmasks[0] |= RC_MASK_XY; |
srcmasks[1] |= RC_MASK_XY; |
srcmasks[2] |= RC_MASK_XY; |
break; |
case RC_TEXTURE_3D: |
case RC_TEXTURE_CUBE: |
srcmasks[0] |= RC_MASK_XYZ; |
srcmasks[1] |= RC_MASK_XYZ; |
srcmasks[2] |= RC_MASK_XYZ; |
break; |
} |
break; |
case RC_OPCODE_DST: |
srcmasks[0] |= RC_MASK_Y | RC_MASK_Z; |
srcmasks[1] |= RC_MASK_Y | RC_MASK_W; |
break; |
case RC_OPCODE_EXP: |
case RC_OPCODE_LOG: |
srcmasks[0] |= RC_MASK_XY; |
break; |
case RC_OPCODE_LIT: |
srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W; |
break; |
default: |
break; |
} |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_opcodes.h |
---|
0,0 → 1,284 |
/* |
* Copyright (C) 2009 Nicolai Haehnle. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#ifndef RADEON_OPCODES_H |
#define RADEON_OPCODES_H |
#include <assert.h> |
/** |
* Opcodes understood by the Radeon compiler. |
*/ |
typedef enum { |
RC_OPCODE_NOP = 0, |
RC_OPCODE_ILLEGAL_OPCODE, |
/** vec4 instruction: dst.c = abs(src0.c); */ |
RC_OPCODE_ABS, |
/** vec4 instruction: dst.c = src0.c + src1.c; */ |
RC_OPCODE_ADD, |
/** special instruction: load address register |
* dst.x = floor(src.x), where dst must be an address register */ |
RC_OPCODE_ARL, |
/** vec4 instruction: dst.c = ceil(src0.c) */ |
RC_OPCODE_CEIL, |
/** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */ |
RC_OPCODE_CLAMP, |
/** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ |
RC_OPCODE_CMP, |
/** vec4 instruction: dst.c = src2.c > 0.5 ? src0.c : src1.c */ |
RC_OPCODE_CND, |
/** scalar instruction: dst = cos(src0.x) */ |
RC_OPCODE_COS, |
/** special instruction: take vec4 partial derivative in X direction |
* dst.c = d src0.c / dx */ |
RC_OPCODE_DDX, |
/** special instruction: take vec4 partial derivative in Y direction |
* dst.c = d src0.c / dy */ |
RC_OPCODE_DDY, |
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */ |
RC_OPCODE_DP2, |
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */ |
RC_OPCODE_DP3, |
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */ |
RC_OPCODE_DP4, |
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */ |
RC_OPCODE_DPH, |
/** special instruction, see ARB_fragment_program */ |
RC_OPCODE_DST, |
/** scalar instruction: dst = 2**src0.x */ |
RC_OPCODE_EX2, |
/** special instruction, see ARB_vertex_program */ |
RC_OPCODE_EXP, |
/** vec4 instruction: dst.c = floor(src0.c) */ |
RC_OPCODE_FLR, |
/** vec4 instruction: dst.c = src0.c - floor(src0.c) */ |
RC_OPCODE_FRC, |
/** special instruction: stop execution if any component of src0 is negative */ |
RC_OPCODE_KIL, |
/** scalar instruction: dst = log_2(src0.x) */ |
RC_OPCODE_LG2, |
/** special instruction, see ARB_vertex_program */ |
RC_OPCODE_LIT, |
/** special instruction, see ARB_vertex_program */ |
RC_OPCODE_LOG, |
/** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */ |
RC_OPCODE_LRP, |
/** vec4 instruction: dst.c = src0.c*src1.c + src2.c */ |
RC_OPCODE_MAD, |
/** vec4 instruction: dst.c = max(src0.c, src1.c) */ |
RC_OPCODE_MAX, |
/** vec4 instruction: dst.c = min(src0.c, src1.c) */ |
RC_OPCODE_MIN, |
/** vec4 instruction: dst.c = src0.c */ |
RC_OPCODE_MOV, |
/** vec4 instruction: dst.c = src0.c*src1.c */ |
RC_OPCODE_MUL, |
/** scalar instruction: dst = src0.x ** src1.x */ |
RC_OPCODE_POW, |
/** scalar instruction: dst = 1 / src0.x */ |
RC_OPCODE_RCP, |
/** vec4 instruction: dst.c = floor(src0.c + 0.5) */ |
RC_OPCODE_ROUND, |
/** scalar instruction: dst = 1 / sqrt(src0.x) */ |
RC_OPCODE_RSQ, |
/** special instruction, see ARB_fragment_program */ |
RC_OPCODE_SCS, |
/** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */ |
RC_OPCODE_SEQ, |
/** vec4 instruction: dst.c = 0.0 */ |
RC_OPCODE_SFL, |
/** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */ |
RC_OPCODE_SGE, |
/** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */ |
RC_OPCODE_SGT, |
/** scalar instruction: dst = sin(src0.x) */ |
RC_OPCODE_SIN, |
/** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */ |
RC_OPCODE_SLE, |
/** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */ |
RC_OPCODE_SLT, |
/** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */ |
RC_OPCODE_SNE, |
/** vec4 instruction: dst.c = (src0.c < 0 ?) -1 : ((src0.c > 0) : 1 : 0) */ |
RC_OPCODE_SSG, |
/** vec4 instruction: dst.c = src0.c - src1.c */ |
RC_OPCODE_SUB, |
/** vec4 instruction: dst.c = src0.c */ |
RC_OPCODE_SWZ, |
/** vec4 instruction: dst.c = (abs(src0.c) - fract(abs(src0.c))) * sgn(src0.c) */ |
RC_OPCODE_TRUNC, |
/** special instruction, see ARB_fragment_program */ |
RC_OPCODE_XPD, |
RC_OPCODE_TEX, |
RC_OPCODE_TXB, |
RC_OPCODE_TXD, |
RC_OPCODE_TXL, |
RC_OPCODE_TXP, |
/** branch instruction: |
* If src0.x != 0.0, continue with the next instruction; |
* otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF. |
*/ |
RC_OPCODE_IF, |
/** branch instruction: jump to matching RC_OPCODE_ENDIF */ |
RC_OPCODE_ELSE, |
/** branch instruction: has no effect */ |
RC_OPCODE_ENDIF, |
RC_OPCODE_BGNLOOP, |
RC_OPCODE_BRK, |
RC_OPCODE_ENDLOOP, |
RC_OPCODE_CONT, |
/** special instruction, used in R300-R500 fragment program pair instructions |
* indicates that the result of the alpha operation shall be replicated |
* across all other channels */ |
RC_OPCODE_REPL_ALPHA, |
/** special instruction, used in R300-R500 fragment programs |
* to indicate the start of a block of texture instructions that |
* can run simultaneously. */ |
RC_OPCODE_BEGIN_TEX, |
/** Stop execution of the shader (GLSL discard) */ |
RC_OPCODE_KILP, |
/* Vertex shader CF Instructions */ |
RC_ME_PRED_SEQ, |
RC_ME_PRED_SGT, |
RC_ME_PRED_SGE, |
RC_ME_PRED_SNEQ, |
RC_ME_PRED_SET_CLR, |
RC_ME_PRED_SET_INV, |
RC_ME_PRED_SET_POP, |
RC_ME_PRED_SET_RESTORE, |
RC_VE_PRED_SEQ_PUSH, |
RC_VE_PRED_SGT_PUSH, |
RC_VE_PRED_SGE_PUSH, |
RC_VE_PRED_SNEQ_PUSH, |
MAX_RC_OPCODE |
} rc_opcode; |
struct rc_opcode_info { |
rc_opcode Opcode; |
const char * Name; |
/** true if the instruction reads from a texture. |
* |
* \note This is false for the KIL instruction, even though KIL is |
* a texture instruction from a hardware point of view. */ |
unsigned int HasTexture:1; |
unsigned int NumSrcRegs:2; |
unsigned int HasDstReg:1; |
/** true if this instruction affects control flow */ |
unsigned int IsFlowControl:1; |
/** true if this is a vector instruction that operates on components in parallel |
* without any cross-component interaction */ |
unsigned int IsComponentwise:1; |
/** true if this instruction sources only its operands X components |
* to compute one result which is smeared across all output channels */ |
unsigned int IsStandardScalar:1; |
}; |
extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE]; |
static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode) |
{ |
assert((unsigned int)opcode < MAX_RC_OPCODE); |
assert(rc_opcodes[opcode].Opcode == opcode); |
return &rc_opcodes[opcode]; |
} |
struct rc_instruction; |
void rc_compute_sources_for_writemask( |
const struct rc_instruction *inst, |
unsigned int writemask, |
unsigned int *srcmasks); |
#endif /* RADEON_OPCODES_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_optimize.c |
---|
0,0 → 1,908 |
/* |
* Copyright (C) 2009 Nicolai Haehnle. |
* Copyright 2010 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include "radeon_dataflow.h" |
#include "radeon_compiler.h" |
#include "radeon_compiler_util.h" |
#include "radeon_list.h" |
#include "radeon_swizzle.h" |
#include "radeon_variable.h" |
struct src_clobbered_reads_cb_data { |
rc_register_file File; |
unsigned int Index; |
unsigned int Mask; |
struct rc_reader_data * ReaderData; |
}; |
typedef void (*rc_presub_replace_fn)(struct rc_instruction *, |
struct rc_instruction *, |
unsigned int); |
static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) |
{ |
struct rc_src_register combine; |
combine.File = inner.File; |
combine.Index = inner.Index; |
combine.RelAddr = inner.RelAddr; |
if (outer.Abs) { |
combine.Abs = 1; |
combine.Negate = outer.Negate; |
} else { |
combine.Abs = inner.Abs; |
combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); |
combine.Negate ^= outer.Negate; |
} |
combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); |
return combine; |
} |
static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, |
struct rc_src_register * src) |
{ |
rc_register_file file = src->File; |
struct rc_reader_data * reader_data = data; |
if(!rc_inst_can_use_presub(inst, |
reader_data->Writer->U.I.PreSub.Opcode, |
rc_swizzle_to_writemask(src->Swizzle), |
src, |
&reader_data->Writer->U.I.PreSub.SrcReg[0], |
&reader_data->Writer->U.I.PreSub.SrcReg[1])) { |
reader_data->Abort = 1; |
return; |
} |
/* XXX This could probably be handled better. */ |
if (file == RC_FILE_ADDRESS) { |
reader_data->Abort = 1; |
return; |
} |
/* These instructions cannot read from the constants file. |
* see radeonTransformTEX() |
*/ |
if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && |
reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && |
(inst->U.I.Opcode == RC_OPCODE_TEX || |
inst->U.I.Opcode == RC_OPCODE_TXB || |
inst->U.I.Opcode == RC_OPCODE_TXP || |
inst->U.I.Opcode == RC_OPCODE_TXD || |
inst->U.I.Opcode == RC_OPCODE_TXL || |
inst->U.I.Opcode == RC_OPCODE_KIL)){ |
reader_data->Abort = 1; |
return; |
} |
} |
static void src_clobbered_reads_cb( |
void * data, |
struct rc_instruction * inst, |
struct rc_src_register * src) |
{ |
struct src_clobbered_reads_cb_data * sc_data = data; |
if (src->File == sc_data->File |
&& src->Index == sc_data->Index |
&& (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { |
sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; |
} |
if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { |
sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; |
} |
} |
static void is_src_clobbered_scan_write( |
void * data, |
struct rc_instruction * inst, |
rc_register_file file, |
unsigned int index, |
unsigned int mask) |
{ |
struct src_clobbered_reads_cb_data sc_data; |
struct rc_reader_data * reader_data = data; |
sc_data.File = file; |
sc_data.Index = index; |
sc_data.Mask = mask; |
sc_data.ReaderData = reader_data; |
rc_for_all_reads_src(reader_data->Writer, |
src_clobbered_reads_cb, &sc_data); |
} |
static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) |
{ |
struct rc_reader_data reader_data; |
unsigned int i; |
if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || |
inst_mov->U.I.WriteALUResult) |
return; |
/* Get a list of all the readers of this MOV instruction. */ |
reader_data.ExitOnAbort = 1; |
rc_get_readers(c, inst_mov, &reader_data, |
copy_propagate_scan_read, NULL, |
is_src_clobbered_scan_write); |
if (reader_data.Abort || reader_data.ReaderCount == 0) |
return; |
/* We can propagate SaturateMode if all the readers are MOV instructions |
* without a presubtract operation, source negation and absolute. |
* In that case, we just move SaturateMode to all readers. */ |
if (inst_mov->U.I.SaturateMode) { |
for (i = 0; i < reader_data.ReaderCount; i++) { |
struct rc_instruction * inst = reader_data.Readers[i].Inst; |
if (inst->U.I.Opcode != RC_OPCODE_MOV || |
inst->U.I.SrcReg[0].File == RC_FILE_PRESUB || |
inst->U.I.SrcReg[0].Abs || |
inst->U.I.SrcReg[0].Negate) { |
return; |
} |
} |
} |
/* Propagate the MOV instruction. */ |
for (i = 0; i < reader_data.ReaderCount; i++) { |
struct rc_instruction * inst = reader_data.Readers[i].Inst; |
*reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]); |
if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) |
inst->U.I.PreSub = inst_mov->U.I.PreSub; |
if (!inst->U.I.SaturateMode) |
inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode; |
} |
/* Finally, remove the original MOV instruction */ |
rc_remove_instruction(inst_mov); |
} |
/** |
* Check if a source register is actually always the same |
* swizzle constant. |
*/ |
static int is_src_uniform_constant(struct rc_src_register src, |
rc_swizzle * pswz, unsigned int * pnegate) |
{ |
int have_used = 0; |
if (src.File != RC_FILE_NONE) { |
*pswz = 0; |
return 0; |
} |
for(unsigned int chan = 0; chan < 4; ++chan) { |
unsigned int swz = GET_SWZ(src.Swizzle, chan); |
if (swz < 4) { |
*pswz = 0; |
return 0; |
} |
if (swz == RC_SWIZZLE_UNUSED) |
continue; |
if (!have_used) { |
*pswz = swz; |
*pnegate = GET_BIT(src.Negate, chan); |
have_used = 1; |
} else { |
if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { |
*pswz = 0; |
return 0; |
} |
} |
} |
return 1; |
} |
static void constant_folding_mad(struct rc_instruction * inst) |
{ |
rc_swizzle swz = 0; |
unsigned int negate= 0; |
if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { |
if (swz == RC_SWIZZLE_ZERO) { |
inst->U.I.Opcode = RC_OPCODE_MUL; |
return; |
} |
} |
if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { |
if (swz == RC_SWIZZLE_ONE) { |
inst->U.I.Opcode = RC_OPCODE_ADD; |
if (negate) |
inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; |
inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; |
return; |
} else if (swz == RC_SWIZZLE_ZERO) { |
inst->U.I.Opcode = RC_OPCODE_MOV; |
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; |
return; |
} |
} |
if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { |
if (swz == RC_SWIZZLE_ONE) { |
inst->U.I.Opcode = RC_OPCODE_ADD; |
if (negate) |
inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; |
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; |
return; |
} else if (swz == RC_SWIZZLE_ZERO) { |
inst->U.I.Opcode = RC_OPCODE_MOV; |
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; |
return; |
} |
} |
} |
static void constant_folding_mul(struct rc_instruction * inst) |
{ |
rc_swizzle swz = 0; |
unsigned int negate = 0; |
if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { |
if (swz == RC_SWIZZLE_ONE) { |
inst->U.I.Opcode = RC_OPCODE_MOV; |
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; |
if (negate) |
inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; |
return; |
} else if (swz == RC_SWIZZLE_ZERO) { |
inst->U.I.Opcode = RC_OPCODE_MOV; |
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; |
return; |
} |
} |
if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { |
if (swz == RC_SWIZZLE_ONE) { |
inst->U.I.Opcode = RC_OPCODE_MOV; |
if (negate) |
inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; |
return; |
} else if (swz == RC_SWIZZLE_ZERO) { |
inst->U.I.Opcode = RC_OPCODE_MOV; |
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; |
return; |
} |
} |
} |
static void constant_folding_add(struct rc_instruction * inst) |
{ |
rc_swizzle swz = 0; |
unsigned int negate = 0; |
if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { |
if (swz == RC_SWIZZLE_ZERO) { |
inst->U.I.Opcode = RC_OPCODE_MOV; |
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; |
return; |
} |
} |
if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { |
if (swz == RC_SWIZZLE_ZERO) { |
inst->U.I.Opcode = RC_OPCODE_MOV; |
return; |
} |
} |
} |
/** |
* Replace 0.0, 1.0 and 0.5 immediate constants by their |
* respective swizzles. Simplify instructions like ADD dst, src, 0; |
*/ |
static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) |
{ |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
unsigned int i; |
/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ |
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { |
struct rc_constant * constant; |
struct rc_src_register newsrc; |
int have_real_reference; |
unsigned int chan; |
/* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ |
for (chan = 0; chan < 4; ++chan) |
if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) |
break; |
if (chan == 4) { |
inst->U.I.SrcReg[src].File = RC_FILE_NONE; |
continue; |
} |
/* Convert immediates to swizzles. */ |
if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || |
inst->U.I.SrcReg[src].RelAddr || |
inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) |
continue; |
constant = |
&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; |
if (constant->Type != RC_CONSTANT_IMMEDIATE) |
continue; |
newsrc = inst->U.I.SrcReg[src]; |
have_real_reference = 0; |
for (chan = 0; chan < 4; ++chan) { |
unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); |
unsigned int newswz; |
float imm; |
float baseimm; |
if (swz >= 4) |
continue; |
imm = constant->u.Immediate[swz]; |
baseimm = imm; |
if (imm < 0.0) |
baseimm = -baseimm; |
if (baseimm == 0.0) { |
newswz = RC_SWIZZLE_ZERO; |
} else if (baseimm == 1.0) { |
newswz = RC_SWIZZLE_ONE; |
} else if (baseimm == 0.5 && c->has_half_swizzles) { |
newswz = RC_SWIZZLE_HALF; |
} else { |
have_real_reference = 1; |
continue; |
} |
SET_SWZ(newsrc.Swizzle, chan, newswz); |
if (imm < 0.0 && !newsrc.Abs) |
newsrc.Negate ^= 1 << chan; |
} |
if (!have_real_reference) { |
newsrc.File = RC_FILE_NONE; |
newsrc.Index = 0; |
} |
/* don't make the swizzle worse */ |
if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) && |
c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) |
continue; |
inst->U.I.SrcReg[src] = newsrc; |
} |
/* Simplify instructions based on constants */ |
if (inst->U.I.Opcode == RC_OPCODE_MAD) |
constant_folding_mad(inst); |
/* note: MAD can simplify to MUL or ADD */ |
if (inst->U.I.Opcode == RC_OPCODE_MUL) |
constant_folding_mul(inst); |
else if (inst->U.I.Opcode == RC_OPCODE_ADD) |
constant_folding_add(inst); |
/* In case this instruction has been converted, make sure all of the |
* registers that are no longer used are empty. */ |
opcode = rc_get_opcode_info(inst->U.I.Opcode); |
for(i = opcode->NumSrcRegs; i < 3; i++) { |
memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); |
} |
} |
/** |
* If src and dst use the same register, this function returns a writemask that |
* indicates wich components are read by src. Otherwise zero is returned. |
*/ |
static unsigned int src_reads_dst_mask(struct rc_src_register src, |
struct rc_dst_register dst) |
{ |
if (dst.File != src.File || dst.Index != src.Index) { |
return 0; |
} |
return rc_swizzle_to_writemask(src.Swizzle); |
} |
/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) |
* in any of its channels. Return 0 otherwise. */ |
static int src_has_const_swz(struct rc_src_register src) { |
int chan; |
for(chan = 0; chan < 4; chan++) { |
unsigned int swz = GET_SWZ(src.Swizzle, chan); |
if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF |
|| swz == RC_SWIZZLE_ONE) { |
return 1; |
} |
} |
return 0; |
} |
static void presub_scan_read( |
void * data, |
struct rc_instruction * inst, |
struct rc_src_register * src) |
{ |
struct rc_reader_data * reader_data = data; |
rc_presubtract_op * presub_opcode = reader_data->CbData; |
if (!rc_inst_can_use_presub(inst, *presub_opcode, |
reader_data->Writer->U.I.DstReg.WriteMask, |
src, |
&reader_data->Writer->U.I.SrcReg[0], |
&reader_data->Writer->U.I.SrcReg[1])) { |
reader_data->Abort = 1; |
return; |
} |
} |
static int presub_helper( |
struct radeon_compiler * c, |
struct rc_instruction * inst_add, |
rc_presubtract_op presub_opcode, |
rc_presub_replace_fn presub_replace) |
{ |
struct rc_reader_data reader_data; |
unsigned int i; |
rc_presubtract_op cb_op = presub_opcode; |
reader_data.CbData = &cb_op; |
reader_data.ExitOnAbort = 1; |
rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, |
is_src_clobbered_scan_write); |
if (reader_data.Abort || reader_data.ReaderCount == 0) |
return 0; |
for(i = 0; i < reader_data.ReaderCount; i++) { |
unsigned int src_index; |
struct rc_reader reader = reader_data.Readers[i]; |
const struct rc_opcode_info * info = |
rc_get_opcode_info(reader.Inst->U.I.Opcode); |
for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { |
if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src) |
presub_replace(inst_add, reader.Inst, src_index); |
} |
} |
return 1; |
} |
/* This function assumes that inst_add->U.I.SrcReg[0] and |
* inst_add->U.I.SrcReg[1] aren't both negative. */ |
static void presub_replace_add( |
struct rc_instruction * inst_add, |
struct rc_instruction * inst_reader, |
unsigned int src_index) |
{ |
rc_presubtract_op presub_opcode; |
if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) |
presub_opcode = RC_PRESUB_SUB; |
else |
presub_opcode = RC_PRESUB_ADD; |
if (inst_add->U.I.SrcReg[1].Negate) { |
inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; |
inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; |
} else { |
inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; |
inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; |
} |
inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; |
inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; |
inst_reader->U.I.PreSub.Opcode = presub_opcode; |
inst_reader->U.I.SrcReg[src_index] = |
chain_srcregs(inst_reader->U.I.SrcReg[src_index], |
inst_reader->U.I.PreSub.SrcReg[0]); |
inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; |
inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; |
} |
static int is_presub_candidate( |
struct radeon_compiler * c, |
struct rc_instruction * inst) |
{ |
const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); |
unsigned int i; |
unsigned int is_constant[2] = {0, 0}; |
assert(inst->U.I.Opcode == RC_OPCODE_ADD); |
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE |
|| inst->U.I.SaturateMode |
|| inst->U.I.WriteALUResult |
|| inst->U.I.Omod) { |
return 0; |
} |
/* If both sources use a constant swizzle, then we can't convert it to |
* a presubtract operation. In fact for the ADD and SUB presubtract |
* operations neither source can contain a constant swizzle. This |
* specific case is checked in peephole_add_presub_add() when |
* we make sure the swizzles for both sources are equal, so we |
* don't need to worry about it here. */ |
for (i = 0; i < 2; i++) { |
int chan; |
for (chan = 0; chan < 4; chan++) { |
rc_swizzle swz = |
get_swz(inst->U.I.SrcReg[i].Swizzle, chan); |
if (swz == RC_SWIZZLE_ONE |
|| swz == RC_SWIZZLE_ZERO |
|| swz == RC_SWIZZLE_HALF) { |
is_constant[i] = 1; |
} |
} |
} |
if (is_constant[0] && is_constant[1]) |
return 0; |
for(i = 0; i < info->NumSrcRegs; i++) { |
struct rc_src_register src = inst->U.I.SrcReg[i]; |
if (src_reads_dst_mask(src, inst->U.I.DstReg)) |
return 0; |
src.File = RC_FILE_PRESUB; |
if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) |
return 0; |
} |
return 1; |
} |
static int peephole_add_presub_add( |
struct radeon_compiler * c, |
struct rc_instruction * inst_add) |
{ |
unsigned dstmask = inst_add->U.I.DstReg.WriteMask; |
unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; |
unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; |
if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) |
return 0; |
/* src0 and src1 can't have absolute values */ |
if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) |
return 0; |
/* presub_replace_add() assumes only one is negative */ |
if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) |
return 0; |
/* if src0 is negative, at least all bits of dstmask have to be set */ |
if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) |
return 0; |
/* if src1 is negative, at least all bits of dstmask have to be set */ |
if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) |
return 0; |
if (!is_presub_candidate(c, inst_add)) |
return 0; |
if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { |
rc_remove_instruction(inst_add); |
return 1; |
} |
return 0; |
} |
static void presub_replace_inv( |
struct rc_instruction * inst_add, |
struct rc_instruction * inst_reader, |
unsigned int src_index) |
{ |
/* We must be careful not to modify inst_add, since it |
* is possible it will remain part of the program.*/ |
inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; |
inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; |
inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; |
inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], |
inst_reader->U.I.PreSub.SrcReg[0]); |
inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; |
inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; |
} |
/** |
* PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] |
* Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source |
* of the add instruction must have the constatnt 1 swizzle. This function |
* does not check const registers to see if their value is 1.0, so it should |
* be called after the constant_folding optimization. |
* @return |
* 0 if the ADD instruction is still part of the program. |
* 1 if the ADD instruction is no longer part of the program. |
*/ |
static int peephole_add_presub_inv( |
struct radeon_compiler * c, |
struct rc_instruction * inst_add) |
{ |
unsigned int i, swz; |
if (!is_presub_candidate(c, inst_add)) |
return 0; |
/* Check if src0 is 1. */ |
/* XXX It would be nice to use is_src_uniform_constant here, but that |
* function only works if the register's file is RC_FILE_NONE */ |
for(i = 0; i < 4; i++ ) { |
swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); |
if(((1 << i) & inst_add->U.I.DstReg.WriteMask) |
&& swz != RC_SWIZZLE_ONE) { |
return 0; |
} |
} |
/* Check src1. */ |
if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != |
inst_add->U.I.DstReg.WriteMask |
|| inst_add->U.I.SrcReg[1].Abs |
|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY |
&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) |
|| src_has_const_swz(inst_add->U.I.SrcReg[1])) { |
return 0; |
} |
if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { |
rc_remove_instruction(inst_add); |
return 1; |
} |
return 0; |
} |
struct peephole_mul_cb_data { |
struct rc_dst_register * Writer; |
unsigned int Clobbered; |
}; |
static void omod_filter_reader_cb( |
void * userdata, |
struct rc_instruction * inst, |
rc_register_file file, |
unsigned int index, |
unsigned int mask) |
{ |
struct peephole_mul_cb_data * d = userdata; |
if (rc_src_reads_dst_mask(file, mask, index, |
d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) { |
d->Clobbered = 1; |
} |
} |
static void omod_filter_writer_cb( |
void * userdata, |
struct rc_instruction * inst, |
rc_register_file file, |
unsigned int index, |
unsigned int mask) |
{ |
struct peephole_mul_cb_data * d = userdata; |
if (file == d->Writer->File && index == d->Writer->Index && |
(mask & d->Writer->WriteMask)) { |
d->Clobbered = 1; |
} |
} |
static int peephole_mul_omod( |
struct radeon_compiler * c, |
struct rc_instruction * inst_mul, |
struct rc_list * var_list) |
{ |
unsigned int chan = 0, swz, i; |
int const_index = -1; |
int temp_index = -1; |
float const_value; |
rc_omod_op omod_op = RC_OMOD_DISABLE; |
struct rc_list * writer_list; |
struct rc_variable * var; |
struct peephole_mul_cb_data cb_data; |
unsigned writemask_sum; |
for (i = 0; i < 2; i++) { |
unsigned int j; |
if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT |
&& inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) { |
return 0; |
} |
if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { |
if (temp_index != -1) { |
/* The instruction has two temp sources */ |
return 0; |
} else { |
temp_index = i; |
continue; |
} |
} |
/* If we get this far Src[i] must be a constant src */ |
if (inst_mul->U.I.SrcReg[i].Negate) { |
return 0; |
} |
/* The constant src needs to read from the same swizzle */ |
swz = RC_SWIZZLE_UNUSED; |
chan = 0; |
for (j = 0; j < 4; j++) { |
unsigned int j_swz = |
GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j); |
if (j_swz == RC_SWIZZLE_UNUSED) { |
continue; |
} |
if (swz == RC_SWIZZLE_UNUSED) { |
swz = j_swz; |
chan = j; |
} else if (j_swz != swz) { |
return 0; |
} |
} |
if (const_index != -1) { |
/* The instruction has two constant sources */ |
return 0; |
} else { |
const_index = i; |
} |
} |
if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File, |
inst_mul->U.I.SrcReg[const_index].Index)) { |
return 0; |
} |
const_value = rc_get_constant_value(c, |
inst_mul->U.I.SrcReg[const_index].Index, |
inst_mul->U.I.SrcReg[const_index].Swizzle, |
inst_mul->U.I.SrcReg[const_index].Negate, |
chan); |
if (const_value == 2.0f) { |
omod_op = RC_OMOD_MUL_2; |
} else if (const_value == 4.0f) { |
omod_op = RC_OMOD_MUL_4; |
} else if (const_value == 8.0f) { |
omod_op = RC_OMOD_MUL_8; |
} else if (const_value == (1.0f / 2.0f)) { |
omod_op = RC_OMOD_DIV_2; |
} else if (const_value == (1.0f / 4.0f)) { |
omod_op = RC_OMOD_DIV_4; |
} else if (const_value == (1.0f / 8.0f)) { |
omod_op = RC_OMOD_DIV_8; |
} else { |
return 0; |
} |
writer_list = rc_variable_list_get_writers_one_reader(var_list, |
RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]); |
if (!writer_list) { |
return 0; |
} |
cb_data.Clobbered = 0; |
cb_data.Writer = &inst_mul->U.I.DstReg; |
for (var = writer_list->Item; var; var = var->Friend) { |
struct rc_instruction * inst; |
const struct rc_opcode_info * info = rc_get_opcode_info( |
var->Inst->U.I.Opcode); |
if (info->HasTexture) { |
return 0; |
} |
if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) { |
return 0; |
} |
for (inst = inst_mul->Prev; inst != var->Inst; |
inst = inst->Prev) { |
rc_for_all_reads_mask(inst, omod_filter_reader_cb, |
&cb_data); |
rc_for_all_writes_mask(inst, omod_filter_writer_cb, |
&cb_data); |
if (cb_data.Clobbered) { |
break; |
} |
} |
} |
if (cb_data.Clobbered) { |
return 0; |
} |
/* Rewrite the instructions */ |
writemask_sum = rc_variable_writemask_sum(writer_list->Item); |
for (var = writer_list->Item; var; var = var->Friend) { |
struct rc_variable * writer = var; |
unsigned conversion_swizzle = rc_make_conversion_swizzle( |
writemask_sum, |
inst_mul->U.I.DstReg.WriteMask); |
writer->Inst->U.I.Omod = omod_op; |
writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File; |
writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index; |
rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle); |
writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode; |
} |
rc_remove_instruction(inst_mul); |
return 1; |
} |
/** |
* @return |
* 0 if inst is still part of the program. |
* 1 if inst is no longer part of the program. |
*/ |
static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) |
{ |
switch(inst->U.I.Opcode){ |
case RC_OPCODE_ADD: |
if (c->has_presub) { |
if(peephole_add_presub_inv(c, inst)) |
return 1; |
if(peephole_add_presub_add(c, inst)) |
return 1; |
} |
break; |
default: |
break; |
} |
return 0; |
} |
void rc_optimize(struct radeon_compiler * c, void *user) |
{ |
struct rc_instruction * inst = c->Program.Instructions.Next; |
struct rc_list * var_list; |
while(inst != &c->Program.Instructions) { |
struct rc_instruction * cur = inst; |
inst = inst->Next; |
constant_folding(c, cur); |
if(peephole(c, cur)) |
continue; |
if (cur->U.I.Opcode == RC_OPCODE_MOV) { |
copy_propagate(c, cur); |
/* cur may no longer be part of the program */ |
} |
} |
if (!c->has_omod) { |
return; |
} |
inst = c->Program.Instructions.Next; |
while(inst != &c->Program.Instructions) { |
struct rc_instruction * cur = inst; |
inst = inst->Next; |
if (cur->U.I.Opcode == RC_OPCODE_MUL) { |
var_list = rc_get_variables(c); |
peephole_mul_omod(c, cur, var_list); |
} |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c |
---|
0,0 → 1,88 |
/* |
* Copyright 2011 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include "radeon_compiler.h" |
#include "radeon_compiler_util.h" |
#include "radeon_opcodes.h" |
#include "radeon_program_pair.h" |
static void mark_used_presub(struct rc_pair_sub_instruction * sub) |
{ |
if (sub->Src[RC_PAIR_PRESUB_SRC].Used) { |
unsigned int presub_reg_count = rc_presubtract_src_reg_count( |
sub->Src[RC_PAIR_PRESUB_SRC].Index); |
unsigned int i; |
for (i = 0; i < presub_reg_count; i++) { |
sub->Src[i].Used = 1; |
} |
} |
} |
static void mark_used( |
struct rc_instruction * inst, |
struct rc_pair_sub_instruction * sub) |
{ |
unsigned int i; |
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); |
for (i = 0; i < info->NumSrcRegs; i++) { |
unsigned int src_type = rc_source_type_swz(sub->Arg[i].Swizzle); |
if (src_type & RC_SOURCE_RGB) { |
inst->U.P.RGB.Src[sub->Arg[i].Source].Used = 1; |
} |
if (src_type & RC_SOURCE_ALPHA) { |
inst->U.P.Alpha.Src[sub->Arg[i].Source].Used = 1; |
} |
} |
} |
/** |
* This pass finds sources that are not used by their instruction and marks |
* them as unused. |
*/ |
void rc_pair_remove_dead_sources(struct radeon_compiler * c, void *user) |
{ |
struct rc_instruction * inst; |
for (inst = c->Program.Instructions.Next; |
inst != &c->Program.Instructions; |
inst = inst->Next) { |
unsigned int i; |
if (inst->Type == RC_INSTRUCTION_NORMAL) |
continue; |
/* Mark all sources as unused */ |
for (i = 0; i < 4; i++) { |
inst->U.P.RGB.Src[i].Used = 0; |
inst->U.P.Alpha.Src[i].Used = 0; |
} |
mark_used(inst, &inst->U.P.RGB); |
mark_used(inst, &inst->U.P.Alpha); |
mark_used_presub(&inst->U.P.RGB); |
mark_used_presub(&inst->U.P.Alpha); |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c |
---|
0,0 → 1,789 |
/* |
* Copyright (C) 2009 Nicolai Haehnle. |
* Copyright 2011 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include "radeon_program_pair.h" |
#include <stdio.h> |
#include "main/glheader.h" |
#include "program/register_allocate.h" |
#include "util/u_memory.h" |
#include "ralloc.h" |
#include "r300_fragprog_swizzle.h" |
#include "radeon_compiler.h" |
#include "radeon_compiler_util.h" |
#include "radeon_dataflow.h" |
#include "radeon_list.h" |
#include "radeon_regalloc.h" |
#include "radeon_variable.h" |
#define VERBOSE 0 |
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) |
struct register_info { |
struct live_intervals Live[4]; |
unsigned int Used:1; |
unsigned int Allocated:1; |
unsigned int File:3; |
unsigned int Index:RC_REGISTER_INDEX_BITS; |
unsigned int Writemask; |
}; |
struct regalloc_state { |
struct radeon_compiler * C; |
struct register_info * Input; |
unsigned int NumInputs; |
struct register_info * Temporary; |
unsigned int NumTemporaries; |
unsigned int Simple; |
int LoopEnd; |
}; |
struct rc_class { |
enum rc_reg_class ID; |
unsigned int WritemaskCount; |
/** List of writemasks that belong to this class */ |
unsigned int Writemasks[3]; |
}; |
static const struct rc_class rc_class_list [] = { |
{RC_REG_CLASS_SINGLE, 3, |
{RC_MASK_X, |
RC_MASK_Y, |
RC_MASK_Z}}, |
{RC_REG_CLASS_DOUBLE, 3, |
{RC_MASK_X | RC_MASK_Y, |
RC_MASK_X | RC_MASK_Z, |
RC_MASK_Y | RC_MASK_Z}}, |
{RC_REG_CLASS_TRIPLE, 1, |
{RC_MASK_X | RC_MASK_Y | RC_MASK_Z, |
RC_MASK_NONE, |
RC_MASK_NONE}}, |
{RC_REG_CLASS_ALPHA, 1, |
{RC_MASK_W, |
RC_MASK_NONE, |
RC_MASK_NONE}}, |
{RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, |
{RC_MASK_X | RC_MASK_W, |
RC_MASK_Y | RC_MASK_W, |
RC_MASK_Z | RC_MASK_W}}, |
{RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, |
{RC_MASK_X | RC_MASK_Y | RC_MASK_W, |
RC_MASK_X | RC_MASK_Z | RC_MASK_W, |
RC_MASK_Y | RC_MASK_Z | RC_MASK_W}}, |
{RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, |
{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W, |
RC_MASK_NONE, |
RC_MASK_NONE}}, |
{RC_REG_CLASS_X, 1, |
{RC_MASK_X, |
RC_MASK_NONE, |
RC_MASK_NONE}}, |
{RC_REG_CLASS_Y, 1, |
{RC_MASK_Y, |
RC_MASK_NONE, |
RC_MASK_NONE}}, |
{RC_REG_CLASS_Z, 1, |
{RC_MASK_Z, |
RC_MASK_NONE, |
RC_MASK_NONE}}, |
{RC_REG_CLASS_XY, 1, |
{RC_MASK_X | RC_MASK_Y, |
RC_MASK_NONE, |
RC_MASK_NONE}}, |
{RC_REG_CLASS_YZ, 1, |
{RC_MASK_Y | RC_MASK_Z, |
RC_MASK_NONE, |
RC_MASK_NONE}}, |
{RC_REG_CLASS_XZ, 1, |
{RC_MASK_X | RC_MASK_Z, |
RC_MASK_NONE, |
RC_MASK_NONE}}, |
{RC_REG_CLASS_XW, 1, |
{RC_MASK_X | RC_MASK_W, |
RC_MASK_NONE, |
RC_MASK_NONE}}, |
{RC_REG_CLASS_YW, 1, |
{RC_MASK_Y | RC_MASK_W, |
RC_MASK_NONE, |
RC_MASK_NONE}}, |
{RC_REG_CLASS_ZW, 1, |
{RC_MASK_Z | RC_MASK_W, |
RC_MASK_NONE, |
RC_MASK_NONE}}, |
{RC_REG_CLASS_XYW, 1, |
{RC_MASK_X | RC_MASK_Y | RC_MASK_W, |
RC_MASK_NONE, |
RC_MASK_NONE}}, |
{RC_REG_CLASS_YZW, 1, |
{RC_MASK_Y | RC_MASK_Z | RC_MASK_W, |
RC_MASK_NONE, |
RC_MASK_NONE}}, |
{RC_REG_CLASS_XZW, 1, |
{RC_MASK_X | RC_MASK_Z | RC_MASK_W, |
RC_MASK_NONE, |
RC_MASK_NONE}} |
}; |
static void print_live_intervals(struct live_intervals * src) |
{ |
if (!src || !src->Used) { |
DBG("(null)"); |
return; |
} |
DBG("(%i,%i)", src->Start, src->End); |
} |
static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b) |
{ |
if (VERBOSE) { |
DBG("overlap_live_intervals: "); |
print_live_intervals(a); |
DBG(" to "); |
print_live_intervals(b); |
DBG("\n"); |
} |
if (!a->Used || !b->Used) { |
DBG(" unused interval\n"); |
return 0; |
} |
if (a->Start > b->Start) { |
if (a->Start < b->End) { |
DBG(" overlap\n"); |
return 1; |
} |
} else if (b->Start > a->Start) { |
if (b->Start < a->End) { |
DBG(" overlap\n"); |
return 1; |
} |
} else { /* a->Start == b->Start */ |
if (a->Start != a->End && b->Start != b->End) { |
DBG(" overlap\n"); |
return 1; |
} |
} |
DBG(" no overlap\n"); |
return 0; |
} |
static void scan_read_callback(void * data, struct rc_instruction * inst, |
rc_register_file file, unsigned int index, unsigned int mask) |
{ |
struct regalloc_state * s = data; |
struct register_info * reg; |
unsigned int i; |
if (file != RC_FILE_INPUT) |
return; |
s->Input[index].Used = 1; |
reg = &s->Input[index]; |
for (i = 0; i < 4; i++) { |
if (!((mask >> i) & 0x1)) { |
continue; |
} |
reg->Live[i].Used = 1; |
reg->Live[i].Start = 0; |
reg->Live[i].End = |
s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP; |
} |
} |
static void remap_register(void * data, struct rc_instruction * inst, |
rc_register_file * file, unsigned int * index) |
{ |
struct regalloc_state * s = data; |
const struct register_info * reg; |
if (*file == RC_FILE_TEMPORARY && s->Simple) |
reg = &s->Temporary[*index]; |
else if (*file == RC_FILE_INPUT) |
reg = &s->Input[*index]; |
else |
return; |
if (reg->Allocated) { |
*index = reg->Index; |
} |
} |
static void alloc_input_simple(void * data, unsigned int input, |
unsigned int hwreg) |
{ |
struct regalloc_state * s = data; |
if (input >= s->NumInputs) |
return; |
s->Input[input].Allocated = 1; |
s->Input[input].File = RC_FILE_TEMPORARY; |
s->Input[input].Index = hwreg; |
} |
/* This functions offsets the temporary register indices by the number |
* of input registers, because input registers are actually temporaries and |
* should not occupy the same space. |
* |
* This pass is supposed to be used to maintain correct allocation of inputs |
* if the standard register allocation is disabled. */ |
static void do_regalloc_inputs_only(struct regalloc_state * s) |
{ |
for (unsigned i = 0; i < s->NumTemporaries; i++) { |
s->Temporary[i].Allocated = 1; |
s->Temporary[i].File = RC_FILE_TEMPORARY; |
s->Temporary[i].Index = i + s->NumInputs; |
} |
} |
static unsigned int is_derivative(rc_opcode op) |
{ |
return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY); |
} |
static int find_class( |
const struct rc_class * classes, |
unsigned int writemask, |
unsigned int max_writemask_count) |
{ |
unsigned int i; |
for (i = 0; i < RC_REG_CLASS_COUNT; i++) { |
unsigned int j; |
if (classes[i].WritemaskCount > max_writemask_count) { |
continue; |
} |
for (j = 0; j < 3; j++) { |
if (classes[i].Writemasks[j] == writemask) { |
return i; |
} |
} |
} |
return -1; |
} |
struct variable_get_class_cb_data { |
unsigned int * can_change_writemask; |
unsigned int conversion_swizzle; |
}; |
static void variable_get_class_read_cb( |
void * userdata, |
struct rc_instruction * inst, |
struct rc_pair_instruction_arg * arg, |
struct rc_pair_instruction_source * src) |
{ |
struct variable_get_class_cb_data * d = userdata; |
unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle, |
d->conversion_swizzle); |
if (!r300_swizzle_is_native_basic(new_swizzle)) { |
*d->can_change_writemask = 0; |
} |
} |
static enum rc_reg_class variable_get_class( |
struct rc_variable * variable, |
const struct rc_class * classes) |
{ |
unsigned int i; |
unsigned int can_change_writemask= 1; |
unsigned int writemask = rc_variable_writemask_sum(variable); |
struct rc_list * readers = rc_variable_readers_union(variable); |
int class_index; |
if (!variable->C->is_r500) { |
struct rc_class c; |
struct rc_variable * var_ptr; |
/* The assumption here is that if an instruction has type |
* RC_INSTRUCTION_NORMAL then it is a TEX instruction. |
* r300 and r400 can't swizzle the result of a TEX lookup. */ |
for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) { |
if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) { |
writemask = RC_MASK_XYZW; |
} |
} |
/* Check if it is possible to do swizzle packing for r300/r400 |
* without creating non-native swizzles. */ |
class_index = find_class(classes, writemask, 3); |
if (class_index < 0) { |
goto error; |
} |
c = classes[class_index]; |
if (c.WritemaskCount == 1) { |
goto done; |
} |
for (i = 0; i < c.WritemaskCount; i++) { |
struct rc_variable * var_ptr; |
for (var_ptr = variable; var_ptr; |
var_ptr = var_ptr->Friend) { |
int j; |
unsigned int conversion_swizzle = |
rc_make_conversion_swizzle( |
writemask, c.Writemasks[i]); |
struct variable_get_class_cb_data d; |
d.can_change_writemask = &can_change_writemask; |
d.conversion_swizzle = conversion_swizzle; |
/* If we get this far var_ptr->Inst has to |
* be a pair instruction. If variable or any |
* of its friends are normal instructions, |
* then the writemask will be set to RC_MASK_XYZW |
* and the function will return before it gets |
* here. */ |
rc_pair_for_all_reads_arg(var_ptr->Inst, |
variable_get_class_read_cb, &d); |
for (j = 0; j < var_ptr->ReaderCount; j++) { |
unsigned int old_swizzle; |
unsigned int new_swizzle; |
struct rc_reader r = var_ptr->Readers[j]; |
if (r.Inst->Type == |
RC_INSTRUCTION_PAIR ) { |
old_swizzle = r.U.P.Arg->Swizzle; |
} else { |
/* Source operands of TEX |
* instructions can't be |
* swizzle on r300/r400 GPUs. |
*/ |
if (!variable->C->is_r500) { |
can_change_writemask = 0; |
break; |
} |
old_swizzle = r.U.I.Src->Swizzle; |
} |
new_swizzle = rc_adjust_channels( |
old_swizzle, conversion_swizzle); |
if (!r300_swizzle_is_native_basic( |
new_swizzle)) { |
can_change_writemask = 0; |
break; |
} |
} |
if (!can_change_writemask) { |
break; |
} |
} |
if (!can_change_writemask) { |
break; |
} |
} |
} |
if (variable->Inst->Type == RC_INSTRUCTION_PAIR) { |
/* DDX/DDY seem to always fail when their writemasks are |
* changed.*/ |
if (is_derivative(variable->Inst->U.P.RGB.Opcode) |
|| is_derivative(variable->Inst->U.P.Alpha.Opcode)) { |
can_change_writemask = 0; |
} |
} |
for ( ; readers; readers = readers->Next) { |
struct rc_reader * r = readers->Item; |
if (r->Inst->Type == RC_INSTRUCTION_PAIR) { |
if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) { |
can_change_writemask = 0; |
break; |
} |
/* DDX/DDY also fail when their swizzles are changed. */ |
if (is_derivative(r->Inst->U.P.RGB.Opcode) |
|| is_derivative(r->Inst->U.P.Alpha.Opcode)) { |
can_change_writemask = 0; |
break; |
} |
} |
} |
class_index = find_class(classes, writemask, |
can_change_writemask ? 3 : 1); |
done: |
if (class_index > -1) { |
return classes[class_index].ID; |
} else { |
error: |
rc_error(variable->C, |
"Could not find class for index=%u mask=%u\n", |
variable->Dst.Index, writemask); |
return 0; |
} |
} |
static unsigned int overlap_live_intervals_array( |
struct live_intervals * a, |
struct live_intervals * b) |
{ |
unsigned int a_chan, b_chan; |
for (a_chan = 0; a_chan < 4; a_chan++) { |
for (b_chan = 0; b_chan < 4; b_chan++) { |
if (overlap_live_intervals(&a[a_chan], &b[b_chan])) { |
return 1; |
} |
} |
} |
return 0; |
} |
static unsigned int reg_get_index(int reg) |
{ |
return reg / RC_MASK_XYZW; |
} |
static unsigned int reg_get_writemask(int reg) |
{ |
return (reg % RC_MASK_XYZW) + 1; |
} |
static int get_reg_id(unsigned int index, unsigned int writemask) |
{ |
assert(writemask); |
if (writemask == 0) { |
return 0; |
} |
return (index * RC_MASK_XYZW) + (writemask - 1); |
} |
#if VERBOSE |
static void print_reg(int reg) |
{ |
unsigned int index = reg_get_index(reg); |
unsigned int mask = reg_get_writemask(reg); |
fprintf(stderr, "Temp[%u].%c%c%c%c", index, |
mask & RC_MASK_X ? 'x' : '_', |
mask & RC_MASK_Y ? 'y' : '_', |
mask & RC_MASK_Z ? 'z' : '_', |
mask & RC_MASK_W ? 'w' : '_'); |
} |
#endif |
static void add_register_conflicts( |
struct ra_regs * regs, |
unsigned int max_temp_regs) |
{ |
unsigned int index, a_mask, b_mask; |
for (index = 0; index < max_temp_regs; index++) { |
for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) { |
for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW; |
b_mask++) { |
if (a_mask & b_mask) { |
ra_add_reg_conflict(regs, |
get_reg_id(index, a_mask), |
get_reg_id(index, b_mask)); |
} |
} |
} |
} |
} |
static void do_advanced_regalloc(struct regalloc_state * s) |
{ |
unsigned int i, input_node, node_count, node_index; |
unsigned int * node_classes; |
struct rc_instruction * inst; |
struct rc_list * var_ptr; |
struct rc_list * variables; |
struct ra_graph * graph; |
const struct rc_regalloc_state *ra_state = s->C->regalloc_state; |
/* Get list of program variables */ |
variables = rc_get_variables(s->C); |
node_count = rc_list_count(variables); |
node_classes = memory_pool_malloc(&s->C->Pool, |
node_count * sizeof(unsigned int)); |
for (var_ptr = variables, node_index = 0; var_ptr; |
var_ptr = var_ptr->Next, node_index++) { |
unsigned int class_index; |
/* Compute the live intervals */ |
rc_variable_compute_live_intervals(var_ptr->Item); |
class_index = variable_get_class(var_ptr->Item, rc_class_list); |
node_classes[node_index] = ra_state->class_ids[class_index]; |
} |
/* Calculate live intervals for input registers */ |
for (inst = s->C->Program.Instructions.Next; |
inst != &s->C->Program.Instructions; |
inst = inst->Next) { |
rc_opcode op = rc_get_flow_control_inst(inst); |
if (op == RC_OPCODE_BGNLOOP) { |
struct rc_instruction * endloop = |
rc_match_bgnloop(inst); |
if (endloop->IP > s->LoopEnd) { |
s->LoopEnd = endloop->IP; |
} |
} |
rc_for_all_reads_mask(inst, scan_read_callback, s); |
} |
/* Compute the writemask for inputs. */ |
for (i = 0; i < s->NumInputs; i++) { |
unsigned int chan, writemask = 0; |
for (chan = 0; chan < 4; chan++) { |
if (s->Input[i].Live[chan].Used) { |
writemask |= (1 << chan); |
} |
} |
s->Input[i].Writemask = writemask; |
} |
graph = ra_alloc_interference_graph(ra_state->regs, |
node_count + s->NumInputs); |
/* Build the interference graph */ |
for (var_ptr = variables, node_index = 0; var_ptr; |
var_ptr = var_ptr->Next,node_index++) { |
struct rc_list * a, * b; |
unsigned int b_index; |
ra_set_node_class(graph, node_index, node_classes[node_index]); |
for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1; |
b; b = b->Next, b_index++) { |
struct rc_variable * var_a = a->Item; |
while (var_a) { |
struct rc_variable * var_b = b->Item; |
while (var_b) { |
if (overlap_live_intervals_array(var_a->Live, var_b->Live)) { |
ra_add_node_interference(graph, |
node_index, b_index); |
} |
var_b = var_b->Friend; |
} |
var_a = var_a->Friend; |
} |
} |
} |
/* Add input registers to the interference graph */ |
for (i = 0, input_node = 0; i< s->NumInputs; i++) { |
if (!s->Input[i].Writemask) { |
continue; |
} |
for (var_ptr = variables, node_index = 0; |
var_ptr; var_ptr = var_ptr->Next, node_index++) { |
struct rc_variable * var = var_ptr->Item; |
if (overlap_live_intervals_array(s->Input[i].Live, |
var->Live)) { |
ra_add_node_interference(graph, node_index, |
node_count + input_node); |
} |
} |
/* Manually allocate a register for this input */ |
ra_set_node_reg(graph, node_count + input_node, get_reg_id( |
s->Input[i].Index, s->Input[i].Writemask)); |
input_node++; |
} |
if (!ra_allocate_no_spills(graph)) { |
rc_error(s->C, "Ran out of hardware temporaries\n"); |
return; |
} |
/* Rewrite the registers */ |
for (var_ptr = variables, node_index = 0; var_ptr; |
var_ptr = var_ptr->Next, node_index++) { |
int reg = ra_get_node_reg(graph, node_index); |
unsigned int writemask = reg_get_writemask(reg); |
unsigned int index = reg_get_index(reg); |
struct rc_variable * var = var_ptr->Item; |
if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) { |
writemask = rc_variable_writemask_sum(var); |
} |
if (var->Dst.File == RC_FILE_INPUT) { |
continue; |
} |
rc_variable_change_dst(var, index, writemask); |
} |
ralloc_free(graph); |
} |
void rc_init_regalloc_state(struct rc_regalloc_state *s) |
{ |
unsigned i, j, index; |
unsigned **ra_q_values; |
/* Pre-computed q values. This array describes the maximum number of |
* a class's [row] registers that are in conflict with a single |
* register from another class [column]. |
* |
* For example: |
* q_values[0][2] is 3, because a register from class 2 |
* (RC_REG_CLASS_TRIPLE) may conflict with at most 3 registers from |
* class 0 (RC_REG_CLASS_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y, |
* and T0.z. |
* |
* q_values[2][0] is 1, because a register from class 0 |
* (RC_REG_CLASS_SINGLE) may conflict with at most 1 register from |
* class 2 (RC_REG_CLASS_TRIPLE) e.g. T0.x conflicts with T0.xyz |
* |
* The q values for each register class [row] will never be greater |
* than the maximum number of writemask combinations for that class. |
* |
* For example: |
* |
* Class 2 (RC_REG_CLASS_TRIPLE) only has 1 writemask combination, |
* so no value in q_values[2][0..RC_REG_CLASS_COUNT] will be greater |
* than 1. |
*/ |
const unsigned q_values[RC_REG_CLASS_COUNT][RC_REG_CLASS_COUNT] = { |
{1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}, |
{2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3}, |
{1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, |
{0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1}, |
{1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3}, |
{2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3}, |
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, |
{1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1}, |
{1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0}, |
{1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1}, |
{1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1}, |
{1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1}, |
{1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}, |
{1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1}, |
{1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1}, |
{1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1}, |
{1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}, |
{1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, |
{1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} |
}; |
/* Allocate the main ra data structure */ |
s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW); |
/* Create the register classes */ |
for (i = 0; i < RC_REG_CLASS_COUNT; i++) { |
const struct rc_class *class = &rc_class_list[i]; |
s->class_ids[class->ID] = ra_alloc_reg_class(s->regs); |
/* Assign registers to the classes */ |
for (index = 0; index < R500_PFS_NUM_TEMP_REGS; index++) { |
for (j = 0; j < class->WritemaskCount; j++) { |
int reg_id = get_reg_id(index, |
class->Writemasks[j]); |
ra_class_add_reg(s->regs, |
s->class_ids[class->ID], reg_id); |
} |
} |
} |
/* Set the q values. The q_values array is indexed based on |
* the rc_reg_class ID (RC_REG_CLASS_*) which might be |
* different than the ID assigned to that class by ra. |
* This why we need to manually construct this list. |
*/ |
ra_q_values = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned *)); |
for (i = 0; i < RC_REG_CLASS_COUNT; i++) { |
ra_q_values[i] = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned)); |
for (j = 0; j < RC_REG_CLASS_COUNT; j++) { |
ra_q_values[s->class_ids[i]][s->class_ids[j]] = |
q_values[i][j]; |
} |
} |
/* Add register conflicts */ |
add_register_conflicts(s->regs, R500_PFS_NUM_TEMP_REGS); |
ra_set_finalize(s->regs, ra_q_values); |
for (i = 0; i < RC_REG_CLASS_COUNT; i++) { |
FREE(ra_q_values[i]); |
} |
FREE(ra_q_values); |
} |
void rc_destroy_regalloc_state(struct rc_regalloc_state *s) |
{ |
ralloc_free(s->regs); |
} |
/** |
* @param user This parameter should be a pointer to an integer value. If this |
* integer value is zero, then a simple register allocator will be used that |
* only allocates space for input registers (\sa do_regalloc_inputs_only). If |
* user is non-zero, then the regular register allocator will be used |
* (\sa do_regalloc). |
*/ |
void rc_pair_regalloc(struct radeon_compiler *cc, void *user) |
{ |
struct r300_fragment_program_compiler *c = |
(struct r300_fragment_program_compiler*)cc; |
struct regalloc_state s; |
int * do_full_regalloc = (int*)user; |
memset(&s, 0, sizeof(s)); |
s.C = cc; |
s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1; |
s.Input = memory_pool_malloc(&cc->Pool, |
s.NumInputs * sizeof(struct register_info)); |
memset(s.Input, 0, s.NumInputs * sizeof(struct register_info)); |
s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1; |
s.Temporary = memory_pool_malloc(&cc->Pool, |
s.NumTemporaries * sizeof(struct register_info)); |
memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info)); |
rc_recompute_ips(s.C); |
c->AllocateHwInputs(c, &alloc_input_simple, &s); |
if (*do_full_regalloc) { |
do_advanced_regalloc(&s); |
} else { |
s.Simple = 1; |
do_regalloc_inputs_only(&s); |
} |
/* Rewrite inputs and if we are doing the simple allocation, rewrite |
* temporaries too. */ |
for (struct rc_instruction *inst = s.C->Program.Instructions.Next; |
inst != &s.C->Program.Instructions; |
inst = inst->Next) { |
rc_remap_registers(inst, &remap_register, &s); |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c |
---|
0,0 → 1,1359 |
/* |
* Copyright (C) 2009 Nicolai Haehnle. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include "radeon_program_pair.h" |
#include <stdio.h> |
#include "radeon_compiler.h" |
#include "radeon_compiler_util.h" |
#include "radeon_dataflow.h" |
#include "radeon_list.h" |
#include "radeon_variable.h" |
#include "util/u_debug.h" |
#define VERBOSE 0 |
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) |
struct schedule_instruction { |
struct rc_instruction * Instruction; |
/** Next instruction in the linked list of ready instructions. */ |
struct schedule_instruction *NextReady; |
/** Values that this instruction reads and writes */ |
struct reg_value * WriteValues[4]; |
struct reg_value * ReadValues[12]; |
unsigned int NumWriteValues:3; |
unsigned int NumReadValues:4; |
/** |
* Number of (read and write) dependencies that must be resolved before |
* this instruction can be scheduled. |
*/ |
unsigned int NumDependencies:5; |
/** List of all readers (see rc_get_readers() for the definition of |
* "all readers"), even those outside the basic block this instruction |
* lives in. */ |
struct rc_reader_data GlobalReaders; |
/** If the scheduler has paired an RGB and an Alpha instruction together, |
* PairedInst references the alpha insturction's dependency information. |
*/ |
struct schedule_instruction * PairedInst; |
/** This scheduler uses the value of Score to determine which |
* instruction to schedule. Instructions with a higher value of Score |
* will be scheduled first. */ |
int Score; |
/** The number of components that read from a TEX instruction. */ |
unsigned TexReadCount; |
/** For TEX instructions a list of readers */ |
struct rc_list * TexReaders; |
}; |
/** |
* Used to keep track of which instructions read a value. |
*/ |
struct reg_value_reader { |
struct schedule_instruction *Reader; |
struct reg_value_reader *Next; |
}; |
/** |
* Used to keep track which values are stored in each component of a |
* RC_FILE_TEMPORARY. |
*/ |
struct reg_value { |
struct schedule_instruction * Writer; |
/** |
* Unordered linked list of instructions that read from this value. |
* When this value becomes available, we increase all readers' |
* dependency count. |
*/ |
struct reg_value_reader *Readers; |
/** |
* Number of readers of this value. This is decremented each time |
* a reader of the value is committed. |
* When the reader cound reaches zero, the dependency count |
* of the instruction writing \ref Next is decremented. |
*/ |
unsigned int NumReaders; |
struct reg_value *Next; /**< Pointer to the next value to be written to the same register */ |
}; |
struct register_state { |
struct reg_value * Values[4]; |
}; |
struct remap_reg { |
struct rc_instruciont * Inst; |
unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1); |
unsigned int OldSwizzle:3; |
unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1); |
unsigned int NewSwizzle:3; |
unsigned int OnlyTexReads:1; |
struct remap_reg * Next; |
}; |
struct schedule_state { |
struct radeon_compiler * C; |
struct schedule_instruction * Current; |
/** Array of the previous writers of Current's destination register |
* indexed by channel. */ |
struct schedule_instruction * PrevWriter[4]; |
struct register_state Temporary[RC_REGISTER_MAX_INDEX]; |
/** |
* Linked lists of instructions that can be scheduled right now, |
* based on which ALU/TEX resources they require. |
*/ |
/*@{*/ |
struct schedule_instruction *ReadyFullALU; |
struct schedule_instruction *ReadyRGB; |
struct schedule_instruction *ReadyAlpha; |
struct schedule_instruction *ReadyTEX; |
/*@}*/ |
struct rc_list *PendingTEX; |
void (*CalcScore)(struct schedule_instruction *); |
long max_tex_group; |
unsigned PrevBlockHasTex:1; |
unsigned TEXCount; |
unsigned Opt:1; |
}; |
static struct reg_value ** get_reg_valuep(struct schedule_state * s, |
rc_register_file file, unsigned int index, unsigned int chan) |
{ |
if (file != RC_FILE_TEMPORARY) |
return 0; |
if (index >= RC_REGISTER_MAX_INDEX) { |
rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index); |
return 0; |
} |
return &s->Temporary[index].Values[chan]; |
} |
static unsigned get_tex_read_count(struct schedule_instruction * sinst) |
{ |
unsigned tex_read_count = sinst->TexReadCount; |
if (sinst->PairedInst) { |
tex_read_count += sinst->PairedInst->TexReadCount; |
} |
return tex_read_count; |
} |
#if VERBOSE |
static void print_list(struct schedule_instruction * sinst) |
{ |
struct schedule_instruction * ptr; |
for (ptr = sinst; ptr; ptr=ptr->NextReady) { |
unsigned tex_read_count = get_tex_read_count(ptr); |
unsigned score = sinst->Score; |
fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score, |
tex_read_count); |
} |
fprintf(stderr, "\n"); |
} |
#endif |
static void remove_inst_from_list(struct schedule_instruction ** list, |
struct schedule_instruction * inst) |
{ |
struct schedule_instruction * prev = NULL; |
struct schedule_instruction * list_ptr; |
for (list_ptr = *list; list_ptr; prev = list_ptr, |
list_ptr = list_ptr->NextReady) { |
if (list_ptr == inst) { |
if (prev) { |
prev->NextReady = inst->NextReady; |
} else { |
*list = inst->NextReady; |
} |
inst->NextReady = NULL; |
break; |
} |
} |
} |
static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst) |
{ |
inst->NextReady = *list; |
*list = inst; |
} |
static void add_inst_to_list_score(struct schedule_instruction ** list, |
struct schedule_instruction * inst) |
{ |
struct schedule_instruction * temp; |
struct schedule_instruction * prev; |
if (!*list) { |
*list = inst; |
return; |
} |
temp = *list; |
prev = NULL; |
while(temp && inst->Score <= temp->Score) { |
prev = temp; |
temp = temp->NextReady; |
} |
if (!prev) { |
inst->NextReady = temp; |
*list = inst; |
} else { |
prev->NextReady = inst; |
inst->NextReady = temp; |
} |
} |
static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst) |
{ |
DBG("%i is now ready\n", sinst->Instruction->IP); |
/* Adding Ready TEX instructions to the end of the "Ready List" helps |
* us emit TEX instructions in blocks without losing our place. */ |
if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) |
add_inst_to_list_score(&s->ReadyTEX, sinst); |
else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP) |
add_inst_to_list_score(&s->ReadyRGB, sinst); |
else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP) |
add_inst_to_list_score(&s->ReadyAlpha, sinst); |
else |
add_inst_to_list_score(&s->ReadyFullALU, sinst); |
} |
static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst) |
{ |
assert(sinst->NumDependencies > 0); |
sinst->NumDependencies--; |
if (!sinst->NumDependencies) |
instruction_ready(s, sinst); |
} |
/* These functions provide different heuristics for scheduling instructions. |
* The default is calc_score_readers. */ |
#if 0 |
static void calc_score_zero(struct schedule_instruction * sinst) |
{ |
sinst->Score = 0; |
} |
static void calc_score_deps(struct schedule_instruction * sinst) |
{ |
int i; |
sinst->Score = 0; |
for (i = 0; i < sinst->NumWriteValues; i++) { |
struct reg_value * v = sinst->WriteValues[i]; |
if (v->NumReaders) { |
struct reg_value_reader * r; |
for (r = v->Readers; r; r = r->Next) { |
if (r->Reader->NumDependencies == 1) { |
sinst->Score += 100; |
} |
sinst->Score += r->Reader->NumDependencies; |
} |
} |
} |
} |
#endif |
#define NO_OUTPUT_SCORE (1 << 24) |
static void score_no_output(struct schedule_instruction * sinst) |
{ |
assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL); |
if (!sinst->Instruction->U.P.RGB.OutputWriteMask && |
!sinst->Instruction->U.P.Alpha.OutputWriteMask) { |
if (sinst->PairedInst) { |
if (!sinst->PairedInst->Instruction->U.P. |
RGB.OutputWriteMask |
&& !sinst->PairedInst->Instruction->U.P. |
Alpha.OutputWriteMask) { |
sinst->Score |= NO_OUTPUT_SCORE; |
} |
} else { |
sinst->Score |= NO_OUTPUT_SCORE; |
} |
} |
} |
#define PAIRED_SCORE (1 << 16) |
static void calc_score_r300(struct schedule_instruction * sinst) |
{ |
unsigned src_idx; |
if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) { |
sinst->Score = 0; |
return; |
} |
score_no_output(sinst); |
if (sinst->PairedInst) { |
sinst->Score |= PAIRED_SCORE; |
return; |
} |
for (src_idx = 0; src_idx < 4; src_idx++) { |
sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used + |
sinst->Instruction->U.P.Alpha.Src[src_idx].Used; |
} |
} |
#define NO_READ_TEX_SCORE (1 << 16) |
static void calc_score_readers(struct schedule_instruction * sinst) |
{ |
if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) { |
sinst->Score = 0; |
} else { |
sinst->Score = sinst->NumReadValues; |
if (sinst->PairedInst) { |
sinst->Score += sinst->PairedInst->NumReadValues; |
} |
if (get_tex_read_count(sinst) == 0) { |
sinst->Score |= NO_READ_TEX_SCORE; |
} |
score_no_output(sinst); |
} |
} |
/** |
* This function decreases the dependencies of the next instruction that |
* wants to write to each of sinst's read values. |
*/ |
static void commit_update_reads(struct schedule_state * s, |
struct schedule_instruction * sinst){ |
unsigned int i; |
for(i = 0; i < sinst->NumReadValues; ++i) { |
struct reg_value * v = sinst->ReadValues[i]; |
assert(v->NumReaders > 0); |
v->NumReaders--; |
if (!v->NumReaders) { |
if (v->Next) { |
decrease_dependencies(s, v->Next->Writer); |
} |
} |
} |
if (sinst->PairedInst) { |
commit_update_reads(s, sinst->PairedInst); |
} |
} |
static void commit_update_writes(struct schedule_state * s, |
struct schedule_instruction * sinst){ |
unsigned int i; |
for(i = 0; i < sinst->NumWriteValues; ++i) { |
struct reg_value * v = sinst->WriteValues[i]; |
if (v->NumReaders) { |
for(struct reg_value_reader * r = v->Readers; r; r = r->Next) { |
decrease_dependencies(s, r->Reader); |
} |
} else { |
/* This happens in instruction sequences of the type |
* OP r.x, ...; |
* OP r.x, r.x, ...; |
* See also the subtlety in how instructions that both |
* read and write the same register are scanned. |
*/ |
if (v->Next) |
decrease_dependencies(s, v->Next->Writer); |
} |
} |
if (sinst->PairedInst) { |
commit_update_writes(s, sinst->PairedInst); |
} |
} |
static void notify_sem_wait(struct schedule_state *s) |
{ |
struct rc_list * pend_ptr; |
for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) { |
struct rc_list * read_ptr; |
struct schedule_instruction * pending = pend_ptr->Item; |
for (read_ptr = pending->TexReaders; read_ptr; |
read_ptr = read_ptr->Next) { |
struct schedule_instruction * reader = read_ptr->Item; |
reader->TexReadCount--; |
} |
} |
s->PendingTEX = NULL; |
} |
static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst) |
{ |
DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score); |
commit_update_reads(s, sinst); |
commit_update_writes(s, sinst); |
if (get_tex_read_count(sinst) > 0) { |
sinst->Instruction->U.P.SemWait = 1; |
notify_sem_wait(s); |
} |
} |
/** |
* Emit all ready texture instructions in a single block. |
* |
* Emit as a single block to (hopefully) sample many textures in parallel, |
* and to avoid hardware indirections on R300. |
*/ |
static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before) |
{ |
struct schedule_instruction *readytex; |
struct rc_instruction * inst_begin; |
assert(s->ReadyTEX); |
notify_sem_wait(s); |
/* Node marker for R300 */ |
inst_begin = rc_insert_new_instruction(s->C, before->Prev); |
inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX; |
/* Link texture instructions back in */ |
readytex = s->ReadyTEX; |
while(readytex) { |
rc_insert_instruction(before->Prev, readytex->Instruction); |
DBG("%i: commit TEX reads\n", readytex->Instruction->IP); |
/* All of the TEX instructions in the same TEX block have |
* their source registers read from before any of the |
* instructions in that block write to their destination |
* registers. This means that when we commit a TEX |
* instruction, any other TEX instruction that wants to write |
* to one of the committed instruction's source register can be |
* marked as ready and should be emitted in the same TEX |
* block. This prevents the following sequence from being |
* emitted in two different TEX blocks: |
* 0: TEX temp[0].xyz, temp[1].xy__, 2D[0]; |
* 1: TEX temp[1].xyz, temp[2].xy__, 2D[0]; |
*/ |
commit_update_reads(s, readytex); |
readytex = readytex->NextReady; |
} |
readytex = s->ReadyTEX; |
s->ReadyTEX = 0; |
while(readytex){ |
DBG("%i: commit TEX writes\n", readytex->Instruction->IP); |
commit_update_writes(s, readytex); |
/* Set semaphore bits for last TEX instruction in the block */ |
if (!readytex->NextReady) { |
readytex->Instruction->U.I.TexSemAcquire = 1; |
readytex->Instruction->U.I.TexSemWait = 1; |
} |
rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex)); |
readytex = readytex->NextReady; |
} |
} |
/* This is a helper function for destructive_merge_instructions(). It helps |
* merge presubtract sources from two instructions and makes sure the |
* presubtract sources end up in the correct spot. This function assumes that |
* dst_full is an rgb instruction, meaning that it has a vector instruction(rgb) |
* but no scalar instruction (alpha). |
* @return 0 if merging the presubtract sources fails. |
* @retrun 1 if merging the presubtract sources succeeds. |
*/ |
static int merge_presub_sources( |
struct rc_pair_instruction * dst_full, |
struct rc_pair_sub_instruction src, |
unsigned int type) |
{ |
unsigned int srcp_src, srcp_regs, is_rgb, is_alpha; |
struct rc_pair_sub_instruction * dst_sub; |
const struct rc_opcode_info * info; |
assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP); |
switch(type) { |
case RC_SOURCE_RGB: |
is_rgb = 1; |
is_alpha = 0; |
dst_sub = &dst_full->RGB; |
break; |
case RC_SOURCE_ALPHA: |
is_rgb = 0; |
is_alpha = 1; |
dst_sub = &dst_full->Alpha; |
break; |
default: |
assert(0); |
return 0; |
} |
info = rc_get_opcode_info(dst_full->RGB.Opcode); |
if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used) |
return 0; |
srcp_regs = rc_presubtract_src_reg_count( |
src.Src[RC_PAIR_PRESUB_SRC].Index); |
for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) { |
unsigned int arg; |
int free_source; |
unsigned int one_way = 0; |
struct rc_pair_instruction_source srcp = src.Src[srcp_src]; |
struct rc_pair_instruction_source temp; |
free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, |
srcp.File, srcp.Index); |
/* If free_source < 0 then there are no free source |
* slots. */ |
if (free_source < 0) |
return 0; |
temp = dst_sub->Src[srcp_src]; |
dst_sub->Src[srcp_src] = dst_sub->Src[free_source]; |
/* srcp needs src0 and src1 to be the same */ |
if (free_source < srcp_src) { |
if (!temp.Used) |
continue; |
free_source = rc_pair_alloc_source(dst_full, is_rgb, |
is_alpha, temp.File, temp.Index); |
if (free_source < 0) |
return 0; |
one_way = 1; |
} else { |
dst_sub->Src[free_source] = temp; |
} |
/* If free_source == srcp_src, then the presubtract |
* source is already in the correct place. */ |
if (free_source == srcp_src) |
continue; |
/* Shuffle the sources, so we can put the |
* presubtract source in the correct place. */ |
for(arg = 0; arg < info->NumSrcRegs; arg++) { |
/*If this arg does not read from an rgb source, |
* do nothing. */ |
if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) |
& type)) { |
continue; |
} |
if (dst_full->RGB.Arg[arg].Source == srcp_src) |
dst_full->RGB.Arg[arg].Source = free_source; |
/* We need to do this just in case register |
* is one of the sources already, but in the |
* wrong spot. */ |
else if(dst_full->RGB.Arg[arg].Source == free_source |
&& !one_way) { |
dst_full->RGB.Arg[arg].Source = srcp_src; |
} |
} |
} |
return 1; |
} |
/* This function assumes that rgb.Alpha and alpha.RGB are unused */ |
static int destructive_merge_instructions( |
struct rc_pair_instruction * rgb, |
struct rc_pair_instruction * alpha) |
{ |
const struct rc_opcode_info * opcode; |
assert(rgb->Alpha.Opcode == RC_OPCODE_NOP); |
assert(alpha->RGB.Opcode == RC_OPCODE_NOP); |
/* Presubtract registers need to be merged first so that registers |
* needed by the presubtract operation can be placed in src0 and/or |
* src1. */ |
/* Merge the rgb presubtract registers. */ |
if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { |
if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) { |
return 0; |
} |
} |
/* Merge the alpha presubtract registers */ |
if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { |
if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){ |
return 0; |
} |
} |
/* Copy alpha args into rgb */ |
opcode = rc_get_opcode_info(alpha->Alpha.Opcode); |
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { |
unsigned int srcrgb = 0; |
unsigned int srcalpha = 0; |
unsigned int oldsrc = alpha->Alpha.Arg[arg].Source; |
rc_register_file file = 0; |
unsigned int index = 0; |
int source; |
if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) { |
srcrgb = 1; |
file = alpha->RGB.Src[oldsrc].File; |
index = alpha->RGB.Src[oldsrc].Index; |
} else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) { |
srcalpha = 1; |
file = alpha->Alpha.Src[oldsrc].File; |
index = alpha->Alpha.Src[oldsrc].Index; |
} |
source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index); |
if (source < 0) |
return 0; |
rgb->Alpha.Arg[arg].Source = source; |
rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle; |
rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs; |
rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate; |
} |
/* Copy alpha opcode into rgb */ |
rgb->Alpha.Opcode = alpha->Alpha.Opcode; |
rgb->Alpha.DestIndex = alpha->Alpha.DestIndex; |
rgb->Alpha.WriteMask = alpha->Alpha.WriteMask; |
rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask; |
rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask; |
rgb->Alpha.Saturate = alpha->Alpha.Saturate; |
rgb->Alpha.Omod = alpha->Alpha.Omod; |
/* Merge ALU result writing */ |
if (alpha->WriteALUResult) { |
if (rgb->WriteALUResult) |
return 0; |
rgb->WriteALUResult = alpha->WriteALUResult; |
rgb->ALUResultCompare = alpha->ALUResultCompare; |
} |
/* Copy SemWait */ |
rgb->SemWait |= alpha->SemWait; |
return 1; |
} |
/** |
* Try to merge the given instructions into the rgb instructions. |
* |
* Return true on success; on failure, return false, and keep |
* the instructions untouched. |
*/ |
static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha) |
{ |
struct rc_pair_instruction backup; |
/*Instructions can't write output registers and ALU result at the |
* same time. */ |
if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask) |
|| (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) { |
return 0; |
} |
/* Writing output registers in the middle of shaders is slow, so |
* we don't want to pair output writes with temp writes. */ |
if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask) |
|| (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) { |
return 0; |
} |
memcpy(&backup, rgb, sizeof(struct rc_pair_instruction)); |
if (destructive_merge_instructions(rgb, alpha)) |
return 1; |
memcpy(rgb, &backup, sizeof(struct rc_pair_instruction)); |
return 0; |
} |
static void presub_nop(struct rc_instruction * emitted) { |
int prev_rgb_index, prev_alpha_index, i, num_src; |
/* We don't need a nop if the previous instruction is a TEX. */ |
if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) { |
return; |
} |
if (emitted->Prev->U.P.RGB.WriteMask) |
prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex; |
else |
prev_rgb_index = -1; |
if (emitted->Prev->U.P.Alpha.WriteMask) |
prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex; |
else |
prev_alpha_index = 1; |
/* Check the previous rgb instruction */ |
if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) { |
num_src = rc_presubtract_src_reg_count( |
emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index); |
for (i = 0; i < num_src; i++) { |
unsigned int index = emitted->U.P.RGB.Src[i].Index; |
if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY |
&& (index == prev_rgb_index |
|| index == prev_alpha_index)) { |
emitted->Prev->U.P.Nop = 1; |
return; |
} |
} |
} |
/* Check the previous alpha instruction. */ |
if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) |
return; |
num_src = rc_presubtract_src_reg_count( |
emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index); |
for (i = 0; i < num_src; i++) { |
unsigned int index = emitted->U.P.Alpha.Src[i].Index; |
if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY |
&& (index == prev_rgb_index || index == prev_alpha_index)) { |
emitted->Prev->U.P.Nop = 1; |
return; |
} |
} |
} |
static void rgb_to_alpha_remap ( |
struct rc_instruction * inst, |
struct rc_pair_instruction_arg * arg, |
rc_register_file old_file, |
rc_swizzle old_swz, |
unsigned int new_index) |
{ |
int new_src_index; |
unsigned int i; |
for (i = 0; i < 3; i++) { |
if (get_swz(arg->Swizzle, i) == old_swz) { |
SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W); |
} |
} |
new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1, |
old_file, new_index); |
/* This conversion is not possible, we must have made a mistake in |
* is_rgb_to_alpha_possible. */ |
if (new_src_index < 0) { |
assert(0); |
return; |
} |
arg->Source = new_src_index; |
} |
static int can_remap(unsigned int opcode) |
{ |
switch(opcode) { |
case RC_OPCODE_DDX: |
case RC_OPCODE_DDY: |
return 0; |
default: |
return 1; |
} |
} |
static int can_convert_opcode_to_alpha(unsigned int opcode) |
{ |
switch(opcode) { |
case RC_OPCODE_DDX: |
case RC_OPCODE_DDY: |
case RC_OPCODE_DP2: |
case RC_OPCODE_DP3: |
case RC_OPCODE_DP4: |
case RC_OPCODE_DPH: |
return 0; |
default: |
return 1; |
} |
} |
static void is_rgb_to_alpha_possible( |
void * userdata, |
struct rc_instruction * inst, |
struct rc_pair_instruction_arg * arg, |
struct rc_pair_instruction_source * src) |
{ |
unsigned int read_chan = RC_SWIZZLE_UNUSED; |
unsigned int alpha_sources = 0; |
unsigned int i; |
struct rc_reader_data * reader_data = userdata; |
if (!can_remap(inst->U.P.RGB.Opcode) |
|| !can_remap(inst->U.P.Alpha.Opcode)) { |
reader_data->Abort = 1; |
return; |
} |
if (!src) |
return; |
/* XXX There are some cases where we can still do the conversion if |
* a reader reads from a presubtract source, but for now we'll prevent |
* it. */ |
if (arg->Source == RC_PAIR_PRESUB_SRC) { |
reader_data->Abort = 1; |
return; |
} |
/* Make sure the source only reads the register component that we |
* are going to be convering from. It is OK if the instruction uses |
* this component more than once. |
* XXX If the index we will be converting to is the same as the |
* current index, then it is OK to read from more than one component. |
*/ |
for (i = 0; i < 3; i++) { |
rc_swizzle swz = get_swz(arg->Swizzle, i); |
switch(swz) { |
case RC_SWIZZLE_X: |
case RC_SWIZZLE_Y: |
case RC_SWIZZLE_Z: |
case RC_SWIZZLE_W: |
if (read_chan == RC_SWIZZLE_UNUSED) { |
read_chan = swz; |
} else if (read_chan != swz) { |
reader_data->Abort = 1; |
return; |
} |
break; |
default: |
break; |
} |
} |
/* Make sure there are enough alpha sources. |
* XXX If we know what register all the readers are going |
* to be remapped to, then in some situations we can still do |
* the subsitution, even if all 3 alpha sources are being used.*/ |
for (i = 0; i < 3; i++) { |
if (inst->U.P.Alpha.Src[i].Used) { |
alpha_sources++; |
} |
} |
if (alpha_sources > 2) { |
reader_data->Abort = 1; |
return; |
} |
} |
static int convert_rgb_to_alpha( |
struct schedule_state * s, |
struct schedule_instruction * sched_inst) |
{ |
struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P; |
unsigned int old_mask = pair_inst->RGB.WriteMask; |
unsigned int old_swz = rc_mask_to_swizzle(old_mask); |
const struct rc_opcode_info * info = |
rc_get_opcode_info(pair_inst->RGB.Opcode); |
int new_index = -1; |
unsigned int i; |
if (sched_inst->GlobalReaders.Abort) |
return 0; |
if (!pair_inst->RGB.WriteMask) |
return 0; |
if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode) |
|| !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) { |
return 0; |
} |
assert(sched_inst->NumWriteValues == 1); |
if (!sched_inst->WriteValues[0]) { |
assert(0); |
return 0; |
} |
/* We start at the old index, because if we can reuse the same |
* register and just change the swizzle then it is more likely we |
* will be able to convert all the readers. */ |
for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) { |
struct reg_value ** new_regvalp = get_reg_valuep( |
s, RC_FILE_TEMPORARY, i, 3); |
if (!*new_regvalp) { |
struct reg_value ** old_regvalp = |
get_reg_valuep(s, |
RC_FILE_TEMPORARY, |
pair_inst->RGB.DestIndex, |
rc_mask_to_swizzle(old_mask)); |
new_index = i; |
*new_regvalp = *old_regvalp; |
*old_regvalp = NULL; |
new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3); |
break; |
} |
} |
if (new_index < 0) { |
return 0; |
} |
/* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA |
* as the RGB opcode, then the Alpha instruction will already contain |
* the correct opcode and instruction args, so we do not want to |
* overwrite them. |
*/ |
if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) { |
pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode; |
memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg, |
sizeof(pair_inst->Alpha.Arg)); |
} |
pair_inst->Alpha.DestIndex = new_index; |
pair_inst->Alpha.WriteMask = RC_MASK_W; |
pair_inst->Alpha.Target = pair_inst->RGB.Target; |
pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask; |
pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask; |
pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate; |
pair_inst->Alpha.Omod = pair_inst->RGB.Omod; |
/* Move the swizzles into the first chan */ |
for (i = 0; i < info->NumSrcRegs; i++) { |
unsigned int j; |
for (j = 0; j < 3; j++) { |
unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j); |
if (swz != RC_SWIZZLE_UNUSED) { |
pair_inst->Alpha.Arg[i].Swizzle = |
rc_init_swizzle(swz, 1); |
break; |
} |
} |
} |
pair_inst->RGB.Opcode = RC_OPCODE_NOP; |
pair_inst->RGB.DestIndex = 0; |
pair_inst->RGB.WriteMask = 0; |
pair_inst->RGB.Target = 0; |
pair_inst->RGB.OutputWriteMask = 0; |
pair_inst->RGB.DepthWriteMask = 0; |
pair_inst->RGB.Saturate = 0; |
memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg)); |
for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) { |
struct rc_reader reader = sched_inst->GlobalReaders.Readers[i]; |
rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg, |
RC_FILE_TEMPORARY, old_swz, new_index); |
} |
return 1; |
} |
static void try_convert_and_pair( |
struct schedule_state *s, |
struct schedule_instruction ** inst_list) |
{ |
struct schedule_instruction * list_ptr = *inst_list; |
while (list_ptr && *inst_list && (*inst_list)->NextReady) { |
int paired = 0; |
if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP |
&& list_ptr->Instruction->U.P.RGB.Opcode |
!= RC_OPCODE_REPL_ALPHA) { |
goto next; |
} |
if (list_ptr->NumWriteValues == 1 |
&& convert_rgb_to_alpha(s, list_ptr)) { |
struct schedule_instruction * pair_ptr; |
remove_inst_from_list(inst_list, list_ptr); |
add_inst_to_list_score(&s->ReadyAlpha, list_ptr); |
for (pair_ptr = s->ReadyRGB; pair_ptr; |
pair_ptr = pair_ptr->NextReady) { |
if (merge_instructions(&pair_ptr->Instruction->U.P, |
&list_ptr->Instruction->U.P)) { |
remove_inst_from_list(&s->ReadyAlpha, list_ptr); |
remove_inst_from_list(&s->ReadyRGB, pair_ptr); |
pair_ptr->PairedInst = list_ptr; |
add_inst_to_list(&s->ReadyFullALU, pair_ptr); |
list_ptr = *inst_list; |
paired = 1; |
break; |
} |
} |
} |
if (!paired) { |
next: |
list_ptr = list_ptr->NextReady; |
} |
} |
} |
/** |
* This function attempts to merge RGB and Alpha instructions together. |
*/ |
static void pair_instructions(struct schedule_state * s) |
{ |
struct schedule_instruction *rgb_ptr; |
struct schedule_instruction *alpha_ptr; |
/* Some pairings might fail because they require too |
* many source slots; try all possible pairings if necessary */ |
rgb_ptr = s->ReadyRGB; |
while(rgb_ptr) { |
struct schedule_instruction * rgb_next = rgb_ptr->NextReady; |
alpha_ptr = s->ReadyAlpha; |
while(alpha_ptr) { |
struct schedule_instruction * alpha_next = alpha_ptr->NextReady; |
if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) { |
/* Remove RGB and Alpha from their ready lists. |
*/ |
remove_inst_from_list(&s->ReadyRGB, rgb_ptr); |
remove_inst_from_list(&s->ReadyAlpha, alpha_ptr); |
rgb_ptr->PairedInst = alpha_ptr; |
add_inst_to_list(&s->ReadyFullALU, rgb_ptr); |
break; |
} |
alpha_ptr = alpha_next; |
} |
rgb_ptr = rgb_next; |
} |
if (!s->Opt) { |
return; |
} |
/* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB |
* slot can be converted into Alpha instructions. */ |
try_convert_and_pair(s, &s->ReadyFullALU); |
/* Try to convert some of the RGB instructions to Alpha and |
* try to pair it with another RGB. */ |
try_convert_and_pair(s, &s->ReadyRGB); |
} |
static void update_max_score( |
struct schedule_state * s, |
struct schedule_instruction ** list, |
int * max_score, |
struct schedule_instruction ** max_inst_out, |
struct schedule_instruction *** list_out) |
{ |
struct schedule_instruction * list_ptr; |
for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) { |
int score; |
s->CalcScore(list_ptr); |
score = list_ptr->Score; |
if (!*max_inst_out || score > *max_score) { |
*max_score = score; |
*max_inst_out = list_ptr; |
*list_out = list; |
} |
} |
} |
static void emit_instruction( |
struct schedule_state * s, |
struct rc_instruction * before) |
{ |
int max_score = -1; |
struct schedule_instruction * max_inst = NULL; |
struct schedule_instruction ** max_list = NULL; |
unsigned tex_count = 0; |
struct schedule_instruction * tex_ptr; |
pair_instructions(s); |
#if VERBOSE |
fprintf(stderr, "Full:\n"); |
print_list(s->ReadyFullALU); |
fprintf(stderr, "RGB:\n"); |
print_list(s->ReadyRGB); |
fprintf(stderr, "Alpha:\n"); |
print_list(s->ReadyAlpha); |
fprintf(stderr, "TEX:\n"); |
print_list(s->ReadyTEX); |
#endif |
for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) { |
if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) { |
emit_all_tex(s, before); |
return; |
} |
tex_count++; |
} |
update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list); |
update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list); |
update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list); |
if (tex_count >= s->max_tex_group || max_score == -1 |
|| (s->TEXCount > 0 && tex_count == s->TEXCount) |
|| (!s->C->is_r500 && tex_count > 0 && max_score == -1)) { |
emit_all_tex(s, before); |
} else { |
remove_inst_from_list(max_list, max_inst); |
rc_insert_instruction(before->Prev, max_inst->Instruction); |
commit_alu_instruction(s, max_inst); |
presub_nop(before->Prev); |
} |
} |
static void add_tex_reader( |
struct schedule_state * s, |
struct schedule_instruction * writer, |
struct schedule_instruction * reader) |
{ |
if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) { |
/*Not a TEX instructions */ |
return; |
} |
reader->TexReadCount++; |
rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader)); |
} |
static void scan_read(void * data, struct rc_instruction * inst, |
rc_register_file file, unsigned int index, unsigned int chan) |
{ |
struct schedule_state * s = data; |
struct reg_value ** v = get_reg_valuep(s, file, index, chan); |
struct reg_value_reader * reader; |
if (!v) |
return; |
if (*v && (*v)->Writer == s->Current) { |
/* The instruction reads and writes to a register component. |
* In this case, we only want to increment dependencies by one. |
* Why? |
* Because each instruction depends on the writers of its source |
* registers _and_ the most recent writer of its destination |
* register. In this case, the current instruction (s->Current) |
* has a dependency that both writes to one of its source |
* registers and was the most recent writer to its destination |
* register. We have already marked this dependency in |
* scan_write(), so we don't need to do it again. |
*/ |
/* We need to make sure we are adding s->Current to the |
* previous writer's list of TexReaders, if the previous writer |
* was a TEX instruction. |
*/ |
add_tex_reader(s, s->PrevWriter[chan], s->Current); |
return; |
} |
DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); |
reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); |
reader->Reader = s->Current; |
if (!*v) { |
/* In this situation, the instruction reads from a register |
* that hasn't been written to or read from in the current |
* block. */ |
*v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value)); |
memset(*v, 0, sizeof(struct reg_value)); |
(*v)->Readers = reader; |
} else { |
reader->Next = (*v)->Readers; |
(*v)->Readers = reader; |
/* Only update the current instruction's dependencies if the |
* register it reads from has been written to in this block. */ |
if ((*v)->Writer) { |
add_tex_reader(s, (*v)->Writer, s->Current); |
s->Current->NumDependencies++; |
} |
} |
(*v)->NumReaders++; |
if (s->Current->NumReadValues >= 12) { |
rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__); |
} else { |
s->Current->ReadValues[s->Current->NumReadValues++] = *v; |
} |
} |
static void scan_write(void * data, struct rc_instruction * inst, |
rc_register_file file, unsigned int index, unsigned int chan) |
{ |
struct schedule_state * s = data; |
struct reg_value ** pv = get_reg_valuep(s, file, index, chan); |
struct reg_value * newv; |
if (!pv) |
return; |
DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); |
newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv)); |
memset(newv, 0, sizeof(*newv)); |
newv->Writer = s->Current; |
if (*pv) { |
(*pv)->Next = newv; |
s->Current->NumDependencies++; |
/* Keep track of the previous writer to s->Current's destination |
* register */ |
s->PrevWriter[chan] = (*pv)->Writer; |
} |
*pv = newv; |
if (s->Current->NumWriteValues >= 4) { |
rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__); |
} else { |
s->Current->WriteValues[s->Current->NumWriteValues++] = newv; |
} |
} |
static void is_rgb_to_alpha_possible_normal( |
void * userdata, |
struct rc_instruction * inst, |
struct rc_src_register * src) |
{ |
struct rc_reader_data * reader_data = userdata; |
reader_data->Abort = 1; |
} |
static void schedule_block(struct schedule_state * s, |
struct rc_instruction * begin, struct rc_instruction * end) |
{ |
unsigned int ip; |
/* Scan instructions for data dependencies */ |
ip = 0; |
for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { |
s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current)); |
memset(s->Current, 0, sizeof(struct schedule_instruction)); |
if (inst->Type == RC_INSTRUCTION_NORMAL) { |
const struct rc_opcode_info * info = |
rc_get_opcode_info(inst->U.I.Opcode); |
if (info->HasTexture) { |
s->TEXCount++; |
} |
} |
/* XXX: This causes SemWait to be set for all instructions in |
* a block if the previous block contained a TEX instruction. |
* We can do better here, but it will take a lot of work. */ |
if (s->PrevBlockHasTex) { |
s->Current->TexReadCount = 1; |
} |
s->Current->Instruction = inst; |
inst->IP = ip++; |
DBG("%i: Scanning\n", inst->IP); |
/* The order of things here is subtle and maybe slightly |
* counter-intuitive, to account for the case where an |
* instruction writes to the same register as it reads |
* from. */ |
rc_for_all_writes_chan(inst, &scan_write, s); |
rc_for_all_reads_chan(inst, &scan_read, s); |
DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies); |
if (!s->Current->NumDependencies) { |
instruction_ready(s, s->Current); |
} |
/* Get global readers for possible RGB->Alpha conversion. */ |
s->Current->GlobalReaders.ExitOnAbort = 1; |
rc_get_readers(s->C, inst, &s->Current->GlobalReaders, |
is_rgb_to_alpha_possible_normal, |
is_rgb_to_alpha_possible, NULL); |
} |
/* Temporarily unlink all instructions */ |
begin->Prev->Next = end; |
end->Prev = begin->Prev; |
/* Schedule instructions back */ |
while(!s->C->Error && |
(s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) { |
emit_instruction(s, end); |
} |
} |
static int is_controlflow(struct rc_instruction * inst) |
{ |
if (inst->Type == RC_INSTRUCTION_NORMAL) { |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
return opcode->IsFlowControl; |
} |
return 0; |
} |
void rc_pair_schedule(struct radeon_compiler *cc, void *user) |
{ |
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; |
struct schedule_state s; |
struct rc_instruction * inst = c->Base.Program.Instructions.Next; |
unsigned int * opt = user; |
memset(&s, 0, sizeof(s)); |
s.Opt = *opt; |
s.C = &c->Base; |
if (s.C->is_r500) { |
s.CalcScore = calc_score_readers; |
} else { |
s.CalcScore = calc_score_r300; |
} |
s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8); |
while(inst != &c->Base.Program.Instructions) { |
struct rc_instruction * first; |
if (is_controlflow(inst)) { |
inst = inst->Next; |
continue; |
} |
first = inst; |
while(inst != &c->Base.Program.Instructions && !is_controlflow(inst)) |
inst = inst->Next; |
DBG("Schedule one block\n"); |
memset(s.Temporary, 0, sizeof(s.Temporary)); |
s.TEXCount = 0; |
schedule_block(&s, first, inst); |
if (s.PendingTEX) { |
s.PrevBlockHasTex = 1; |
} |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_pair_translate.c |
---|
0,0 → 1,380 |
/* |
* Copyright (C) 2009 Nicolai Haehnle. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include "radeon_program_pair.h" |
#include "radeon_compiler.h" |
#include "radeon_compiler_util.h" |
/** |
* Finally rewrite ADD, MOV, MUL as the appropriate native instruction |
* and reverse the order of arguments for CMP. |
*/ |
static void final_rewrite(struct rc_sub_instruction *inst) |
{ |
struct rc_src_register tmp; |
switch(inst->Opcode) { |
case RC_OPCODE_ADD: |
inst->SrcReg[2] = inst->SrcReg[1]; |
inst->SrcReg[1].File = RC_FILE_NONE; |
inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; |
inst->SrcReg[1].Negate = RC_MASK_NONE; |
inst->Opcode = RC_OPCODE_MAD; |
break; |
case RC_OPCODE_CMP: |
tmp = inst->SrcReg[2]; |
inst->SrcReg[2] = inst->SrcReg[0]; |
inst->SrcReg[0] = tmp; |
break; |
case RC_OPCODE_MOV: |
/* AMD say we should use CMP. |
* However, when we transform |
* KIL -r0; |
* into |
* CMP tmp, -r0, -r0, 0; |
* KIL tmp; |
* we get incorrect behaviour on R500 when r0 == 0.0. |
* It appears that the R500 KIL hardware treats -0.0 as less |
* than zero. |
*/ |
inst->SrcReg[1].File = RC_FILE_NONE; |
inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; |
inst->SrcReg[2].File = RC_FILE_NONE; |
inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; |
inst->Opcode = RC_OPCODE_MAD; |
break; |
case RC_OPCODE_MUL: |
inst->SrcReg[2].File = RC_FILE_NONE; |
inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; |
inst->Opcode = RC_OPCODE_MAD; |
break; |
default: |
/* nothing to do */ |
break; |
} |
} |
/** |
* Classify an instruction according to which ALUs etc. it needs |
*/ |
static void classify_instruction(struct rc_sub_instruction * inst, |
int * needrgb, int * needalpha, int * istranscendent) |
{ |
*needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0; |
*needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0; |
*istranscendent = 0; |
if (inst->WriteALUResult == RC_ALURESULT_X) |
*needrgb = 1; |
else if (inst->WriteALUResult == RC_ALURESULT_W) |
*needalpha = 1; |
switch(inst->Opcode) { |
case RC_OPCODE_ADD: |
case RC_OPCODE_CMP: |
case RC_OPCODE_CND: |
case RC_OPCODE_DDX: |
case RC_OPCODE_DDY: |
case RC_OPCODE_FRC: |
case RC_OPCODE_MAD: |
case RC_OPCODE_MAX: |
case RC_OPCODE_MIN: |
case RC_OPCODE_MOV: |
case RC_OPCODE_MUL: |
break; |
case RC_OPCODE_COS: |
case RC_OPCODE_EX2: |
case RC_OPCODE_LG2: |
case RC_OPCODE_RCP: |
case RC_OPCODE_RSQ: |
case RC_OPCODE_SIN: |
*istranscendent = 1; |
*needalpha = 1; |
break; |
case RC_OPCODE_DP4: |
*needalpha = 1; |
/* fall through */ |
case RC_OPCODE_DP3: |
*needrgb = 1; |
break; |
default: |
break; |
} |
} |
static void src_uses(struct rc_src_register src, unsigned int * rgb, |
unsigned int * alpha) |
{ |
int j; |
for(j = 0; j < 4; ++j) { |
unsigned int swz = GET_SWZ(src.Swizzle, j); |
if (swz < 3) |
*rgb = 1; |
else if (swz < 4) |
*alpha = 1; |
} |
} |
/** |
* Fill the given ALU instruction's opcodes and source operands into the given pair, |
* if possible. |
*/ |
static void set_pair_instruction(struct r300_fragment_program_compiler *c, |
struct rc_pair_instruction * pair, |
struct rc_sub_instruction * inst) |
{ |
int needrgb, needalpha, istranscendent; |
const struct rc_opcode_info * opcode; |
int i; |
memset(pair, 0, sizeof(struct rc_pair_instruction)); |
classify_instruction(inst, &needrgb, &needalpha, &istranscendent); |
if (needrgb) { |
if (istranscendent) |
pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA; |
else |
pair->RGB.Opcode = inst->Opcode; |
if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) |
pair->RGB.Saturate = 1; |
} |
if (needalpha) { |
pair->Alpha.Opcode = inst->Opcode; |
if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) |
pair->Alpha.Saturate = 1; |
} |
opcode = rc_get_opcode_info(inst->Opcode); |
/* Presubtract handling: |
* We need to make sure that the values used by the presubtract |
* operation end up in src0 or src1. */ |
if(inst->PreSub.Opcode != RC_PRESUB_NONE) { |
/* rc_pair_alloc_source() will fill in data for |
* pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */ |
int j; |
for(j = 0; j < 3; j++) { |
int src_regs; |
if(inst->SrcReg[j].File != RC_FILE_PRESUB) |
continue; |
src_regs = rc_presubtract_src_reg_count( |
inst->PreSub.Opcode); |
for(i = 0; i < src_regs; i++) { |
unsigned int rgb = 0; |
unsigned int alpha = 0; |
src_uses(inst->SrcReg[j], &rgb, &alpha); |
if(rgb) { |
pair->RGB.Src[i].File = |
inst->PreSub.SrcReg[i].File; |
pair->RGB.Src[i].Index = |
inst->PreSub.SrcReg[i].Index; |
pair->RGB.Src[i].Used = 1; |
} |
if(alpha) { |
pair->Alpha.Src[i].File = |
inst->PreSub.SrcReg[i].File; |
pair->Alpha.Src[i].Index = |
inst->PreSub.SrcReg[i].Index; |
pair->Alpha.Src[i].Used = 1; |
} |
} |
} |
} |
for(i = 0; i < opcode->NumSrcRegs; ++i) { |
int source; |
if (needrgb && !istranscendent) { |
unsigned int srcrgb = 0; |
unsigned int srcalpha = 0; |
unsigned int srcmask = 0; |
int j; |
/* We don't care about the alpha channel here. We only |
* want the part of the swizzle that writes to rgb, |
* since we are creating an rgb instruction. */ |
for(j = 0; j < 3; ++j) { |
unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); |
if (swz < RC_SWIZZLE_W) |
srcrgb = 1; |
else if (swz == RC_SWIZZLE_W) |
srcalpha = 1; |
if (swz < RC_SWIZZLE_UNUSED) |
srcmask |= 1 << j; |
} |
source = rc_pair_alloc_source(pair, srcrgb, srcalpha, |
inst->SrcReg[i].File, inst->SrcReg[i].Index); |
if (source < 0) { |
rc_error(&c->Base, "Failed to translate " |
"rgb instruction.\n"); |
return; |
} |
pair->RGB.Arg[i].Source = source; |
pair->RGB.Arg[i].Swizzle = |
rc_init_swizzle(inst->SrcReg[i].Swizzle, 3); |
pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; |
pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); |
} |
if (needalpha) { |
unsigned int srcrgb = 0; |
unsigned int srcalpha = 0; |
unsigned int swz; |
if (istranscendent) { |
swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle); |
} else { |
swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3); |
} |
if (swz < 3) |
srcrgb = 1; |
else if (swz < 4) |
srcalpha = 1; |
source = rc_pair_alloc_source(pair, srcrgb, srcalpha, |
inst->SrcReg[i].File, inst->SrcReg[i].Index); |
if (source < 0) { |
rc_error(&c->Base, "Failed to translate " |
"alpha instruction.\n"); |
return; |
} |
pair->Alpha.Arg[i].Source = source; |
pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1); |
pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; |
if (istranscendent) { |
pair->Alpha.Arg[i].Negate = |
!!(inst->SrcReg[i].Negate & |
inst->DstReg.WriteMask); |
} else { |
pair->Alpha.Arg[i].Negate = |
!!(inst->SrcReg[i].Negate & RC_MASK_W); |
} |
} |
} |
/* Destination handling */ |
if (inst->DstReg.File == RC_FILE_OUTPUT) { |
if (inst->DstReg.Index == c->OutputDepth) { |
pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); |
} else { |
for (i = 0; i < 4; i++) { |
if (inst->DstReg.Index == c->OutputColor[i]) { |
pair->RGB.Target = i; |
pair->Alpha.Target = i; |
pair->RGB.OutputWriteMask |= |
inst->DstReg.WriteMask & RC_MASK_XYZ; |
pair->Alpha.OutputWriteMask |= |
GET_BIT(inst->DstReg.WriteMask, 3); |
break; |
} |
} |
} |
} else { |
if (needrgb) { |
pair->RGB.DestIndex = inst->DstReg.Index; |
pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; |
} |
if (needalpha) { |
pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3); |
if (pair->Alpha.WriteMask) { |
pair->Alpha.DestIndex = inst->DstReg.Index; |
} |
} |
} |
if (needrgb) { |
pair->RGB.Omod = inst->Omod; |
} |
if (needalpha) { |
pair->Alpha.Omod = inst->Omod; |
} |
if (inst->WriteALUResult) { |
pair->WriteALUResult = inst->WriteALUResult; |
pair->ALUResultCompare = inst->ALUResultCompare; |
} |
} |
static void check_opcode_support(struct r300_fragment_program_compiler *c, |
struct rc_sub_instruction *inst) |
{ |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); |
if (opcode->HasDstReg) { |
if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) { |
rc_error(&c->Base, "Fragment program does not support signed Saturate.\n"); |
return; |
} |
} |
for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { |
if (inst->SrcReg[i].RelAddr) { |
rc_error(&c->Base, "Fragment program does not support relative addressing " |
" of source operands.\n"); |
return; |
} |
} |
} |
/** |
* Translate all ALU instructions into corresponding pair instructions, |
* performing no other changes. |
*/ |
void rc_pair_translate(struct radeon_compiler *cc, void *user) |
{ |
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; |
for(struct rc_instruction * inst = c->Base.Program.Instructions.Next; |
inst != &c->Base.Program.Instructions; |
inst = inst->Next) { |
const struct rc_opcode_info * opcode; |
struct rc_sub_instruction copy; |
if (inst->Type != RC_INSTRUCTION_NORMAL) |
continue; |
opcode = rc_get_opcode_info(inst->U.I.Opcode); |
if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL) |
continue; |
copy = inst->U.I; |
check_opcode_support(c, ©); |
final_rewrite(©); |
inst->Type = RC_INSTRUCTION_PAIR; |
set_pair_instruction(c, &inst->U.P, ©); |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program.c |
---|
0,0 → 1,225 |
/* |
* Copyright (C) 2008 Nicolai Haehnle. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include "radeon_program.h" |
#include <stdio.h> |
#include "radeon_compiler.h" |
#include "radeon_dataflow.h" |
/** |
* Transform the given clause in the following way: |
* 1. Replace it with an empty clause |
* 2. For every instruction in the original clause, try the given |
* transformations in order. |
* 3. If one of the transformations returns GL_TRUE, assume that it |
* has emitted the appropriate instruction(s) into the new clause; |
* otherwise, copy the instruction verbatim. |
* |
* \note The transformation is currently not recursive; in other words, |
* instructions emitted by transformations are not transformed. |
* |
* \note The transform is called 'local' because it can only look at |
* one instruction at a time. |
*/ |
void rc_local_transform( |
struct radeon_compiler * c, |
void *user) |
{ |
struct radeon_program_transformation *transformations = |
(struct radeon_program_transformation*)user; |
struct rc_instruction * inst = c->Program.Instructions.Next; |
while(inst != &c->Program.Instructions) { |
struct rc_instruction * current = inst; |
int i; |
inst = inst->Next; |
for(i = 0; transformations[i].function; ++i) { |
struct radeon_program_transformation* t = transformations + i; |
if (t->function(c, current, t->userData)) |
break; |
} |
} |
} |
struct get_used_temporaries_data { |
unsigned char * Used; |
unsigned int UsedLength; |
}; |
static void get_used_temporaries_cb( |
void * userdata, |
struct rc_instruction * inst, |
rc_register_file file, |
unsigned int index, |
unsigned int mask) |
{ |
struct get_used_temporaries_data * d = userdata; |
if (file != RC_FILE_TEMPORARY) |
return; |
if (index >= d->UsedLength) |
return; |
d->Used[index] |= mask; |
} |
/** |
* This function fills in the parameter 'used' with a writemask that |
* represent which components of each temporary register are used by the |
* program. This is meant to be combined with rc_find_free_temporary_list as a |
* more efficient version of rc_find_free_temporary. |
* @param used The function does not initialize this parameter. |
*/ |
void rc_get_used_temporaries( |
struct radeon_compiler * c, |
unsigned char * used, |
unsigned int used_length) |
{ |
struct rc_instruction * inst; |
struct get_used_temporaries_data d; |
d.Used = used; |
d.UsedLength = used_length; |
for(inst = c->Program.Instructions.Next; |
inst != &c->Program.Instructions; inst = inst->Next) { |
rc_for_all_reads_mask(inst, get_used_temporaries_cb, &d); |
rc_for_all_writes_mask(inst, get_used_temporaries_cb, &d); |
} |
} |
/* Search a list of used temporaries for a free one |
* \sa rc_get_used_temporaries |
* @note If this functions finds a free temporary, it will mark it as used |
* in the used temporary list (param 'used') |
* @param used list of used temporaries |
* @param used_length number of items in param 'used' |
* @param mask which components must be free in the temporary index that is |
* returned. |
* @return -1 If there are no more free temporaries, otherwise the index of |
* a temporary register where the components specified in param 'mask' are |
* not being used. |
*/ |
int rc_find_free_temporary_list( |
struct radeon_compiler * c, |
unsigned char * used, |
unsigned int used_length, |
unsigned int mask) |
{ |
int i; |
for(i = 0; i < used_length; i++) { |
if ((~used[i] & mask) == mask) { |
used[i] |= mask; |
return i; |
} |
} |
return -1; |
} |
unsigned int rc_find_free_temporary(struct radeon_compiler * c) |
{ |
unsigned char used[RC_REGISTER_MAX_INDEX]; |
int free; |
memset(used, 0, sizeof(used)); |
rc_get_used_temporaries(c, used, RC_REGISTER_MAX_INDEX); |
free = rc_find_free_temporary_list(c, used, RC_REGISTER_MAX_INDEX, |
RC_MASK_XYZW); |
if (free < 0) { |
rc_error(c, "Ran out of temporary registers\n"); |
return 0; |
} |
return free; |
} |
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c) |
{ |
struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction)); |
memset(inst, 0, sizeof(struct rc_instruction)); |
inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; |
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; |
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; |
inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW; |
inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW; |
return inst; |
} |
void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst) |
{ |
inst->Prev = after; |
inst->Next = after->Next; |
inst->Prev->Next = inst; |
inst->Next->Prev = inst; |
} |
struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after) |
{ |
struct rc_instruction * inst = rc_alloc_instruction(c); |
rc_insert_instruction(after, inst); |
return inst; |
} |
void rc_remove_instruction(struct rc_instruction * inst) |
{ |
inst->Prev->Next = inst->Next; |
inst->Next->Prev = inst->Prev; |
} |
/** |
* Return the number of instructions in the program. |
*/ |
unsigned int rc_recompute_ips(struct radeon_compiler * c) |
{ |
unsigned int ip = 0; |
struct rc_instruction * inst; |
for(inst = c->Program.Instructions.Next; |
inst != &c->Program.Instructions; |
inst = inst->Next) { |
inst->IP = ip++; |
} |
c->Program.Instructions.IP = 0xcafedead; |
return ip; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program.h |
---|
0,0 → 1,213 |
/* |
* Copyright (C) 2008 Nicolai Haehnle. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#ifndef __RADEON_PROGRAM_H_ |
#define __RADEON_PROGRAM_H_ |
#include <stdint.h> |
#include <string.h> |
#include "radeon_opcodes.h" |
#include "radeon_code.h" |
#include "radeon_program_constants.h" |
#include "radeon_program_pair.h" |
struct radeon_compiler; |
struct rc_src_register { |
unsigned int File:4; |
/** Negative values may be used for relative addressing. */ |
signed int Index:(RC_REGISTER_INDEX_BITS+1); |
unsigned int RelAddr:1; |
unsigned int Swizzle:12; |
/** Take the component-wise absolute value */ |
unsigned int Abs:1; |
/** Post-Abs negation. */ |
unsigned int Negate:4; |
}; |
struct rc_dst_register { |
unsigned int File:3; |
unsigned int Index:RC_REGISTER_INDEX_BITS; |
unsigned int WriteMask:4; |
unsigned int Pred:2; |
}; |
struct rc_presub_instruction { |
rc_presubtract_op Opcode; |
struct rc_src_register SrcReg[2]; |
}; |
/** |
* Instructions are maintained by the compiler in a doubly linked list |
* of these structures. |
* |
* This instruction format is intended to be expanded for hardware-specific |
* trickery. At different stages of compilation, a different set of |
* instruction types may be valid. |
*/ |
struct rc_sub_instruction { |
struct rc_src_register SrcReg[3]; |
struct rc_dst_register DstReg; |
/** |
* Opcode of this instruction, according to \ref rc_opcode enums. |
*/ |
unsigned int Opcode:8; |
/** |
* Saturate each value of the result to the range [0,1] or [-1,1], |
* according to \ref rc_saturate_mode enums. |
*/ |
unsigned int SaturateMode:2; |
/** |
* Writing to the special register RC_SPECIAL_ALU_RESULT |
*/ |
/*@{*/ |
unsigned int WriteALUResult:2; |
unsigned int ALUResultCompare:3; |
/*@}*/ |
/** |
* \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions. |
*/ |
/*@{*/ |
/** Source texture unit. */ |
unsigned int TexSrcUnit:5; |
/** Source texture target, one of the \ref rc_texture_target enums */ |
unsigned int TexSrcTarget:3; |
/** True if tex instruction should do shadow comparison */ |
unsigned int TexShadow:1; |
/**/ |
unsigned int TexSemWait:1; |
unsigned int TexSemAcquire:1; |
/**R500 Only. How to swizzle the result of a TEX lookup*/ |
unsigned int TexSwizzle:12; |
/*@}*/ |
/** This holds information about the presubtract operation used by |
* this instruction. */ |
struct rc_presub_instruction PreSub; |
rc_omod_op Omod; |
}; |
typedef enum { |
RC_INSTRUCTION_NORMAL = 0, |
RC_INSTRUCTION_PAIR |
} rc_instruction_type; |
struct rc_instruction { |
struct rc_instruction * Prev; |
struct rc_instruction * Next; |
rc_instruction_type Type; |
union { |
struct rc_sub_instruction I; |
struct rc_pair_instruction P; |
} U; |
/** |
* Warning: IPs are not stable. If you want to use them, |
* you need to recompute them at the beginning of each pass |
* using \ref rc_recompute_ips |
*/ |
unsigned int IP; |
}; |
struct rc_program { |
/** |
* Instructions.Next points to the first instruction, |
* Instructions.Prev points to the last instruction. |
*/ |
struct rc_instruction Instructions; |
/* Long term, we should probably remove InputsRead & OutputsWritten, |
* since updating dependent state can be fragile, and they aren't |
* actually used very often. */ |
uint32_t InputsRead; |
uint32_t OutputsWritten; |
uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */ |
struct rc_constant_list Constants; |
}; |
/** |
* A transformation that can be passed to \ref rc_local_transform. |
* |
* The function will be called once for each instruction. |
* It has to either emit the appropriate transformed code for the instruction |
* and return true, or return false if it doesn't understand the |
* instruction. |
* |
* The function gets passed the userData as last parameter. |
*/ |
struct radeon_program_transformation { |
int (*function)( |
struct radeon_compiler*, |
struct rc_instruction*, |
void*); |
void *userData; |
}; |
void rc_local_transform( |
struct radeon_compiler *c, |
void *user); |
void rc_get_used_temporaries( |
struct radeon_compiler * c, |
unsigned char * used, |
unsigned int used_length); |
int rc_find_free_temporary_list( |
struct radeon_compiler * c, |
unsigned char * used, |
unsigned int used_length, |
unsigned int mask); |
unsigned int rc_find_free_temporary(struct radeon_compiler * c); |
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); |
struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after); |
void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst); |
void rc_remove_instruction(struct rc_instruction * inst); |
unsigned int rc_recompute_ips(struct radeon_compiler * c); |
void rc_print_program(const struct rc_program *prog); |
rc_swizzle rc_mask_to_swizzle(unsigned int mask); |
#endif |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program_alu.c |
---|
0,0 → 1,1313 |
/* |
* Copyright (C) 2008 Nicolai Haehnle. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
/** |
* @file |
* |
* Shareable transformations that transform "special" ALU instructions |
* into ALU instructions that are supported by hardware. |
* |
*/ |
#include "radeon_program_alu.h" |
#include "radeon_compiler.h" |
#include "radeon_compiler_util.h" |
static struct rc_instruction *emit1( |
struct radeon_compiler * c, struct rc_instruction * after, |
rc_opcode Opcode, struct rc_sub_instruction * base, |
struct rc_dst_register DstReg, struct rc_src_register SrcReg) |
{ |
struct rc_instruction *fpi = rc_insert_new_instruction(c, after); |
if (base) { |
memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction)); |
} |
fpi->U.I.Opcode = Opcode; |
fpi->U.I.DstReg = DstReg; |
fpi->U.I.SrcReg[0] = SrcReg; |
return fpi; |
} |
static struct rc_instruction *emit2( |
struct radeon_compiler * c, struct rc_instruction * after, |
rc_opcode Opcode, struct rc_sub_instruction * base, |
struct rc_dst_register DstReg, |
struct rc_src_register SrcReg0, struct rc_src_register SrcReg1) |
{ |
struct rc_instruction *fpi = rc_insert_new_instruction(c, after); |
if (base) { |
memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction)); |
} |
fpi->U.I.Opcode = Opcode; |
fpi->U.I.DstReg = DstReg; |
fpi->U.I.SrcReg[0] = SrcReg0; |
fpi->U.I.SrcReg[1] = SrcReg1; |
return fpi; |
} |
static struct rc_instruction *emit3( |
struct radeon_compiler * c, struct rc_instruction * after, |
rc_opcode Opcode, struct rc_sub_instruction * base, |
struct rc_dst_register DstReg, |
struct rc_src_register SrcReg0, struct rc_src_register SrcReg1, |
struct rc_src_register SrcReg2) |
{ |
struct rc_instruction *fpi = rc_insert_new_instruction(c, after); |
if (base) { |
memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction)); |
} |
fpi->U.I.Opcode = Opcode; |
fpi->U.I.DstReg = DstReg; |
fpi->U.I.SrcReg[0] = SrcReg0; |
fpi->U.I.SrcReg[1] = SrcReg1; |
fpi->U.I.SrcReg[2] = SrcReg2; |
return fpi; |
} |
static struct rc_dst_register dstregtmpmask(int index, int mask) |
{ |
struct rc_dst_register dst = {0, 0, 0}; |
dst.File = RC_FILE_TEMPORARY; |
dst.Index = index; |
dst.WriteMask = mask; |
return dst; |
} |
static const struct rc_src_register builtin_zero = { |
.File = RC_FILE_NONE, |
.Index = 0, |
.Swizzle = RC_SWIZZLE_0000 |
}; |
static const struct rc_src_register builtin_one = { |
.File = RC_FILE_NONE, |
.Index = 0, |
.Swizzle = RC_SWIZZLE_1111 |
}; |
static const struct rc_src_register builtin_half = { |
.File = RC_FILE_NONE, |
.Index = 0, |
.Swizzle = RC_SWIZZLE_HHHH |
}; |
static const struct rc_src_register srcreg_undefined = { |
.File = RC_FILE_NONE, |
.Index = 0, |
.Swizzle = RC_SWIZZLE_XYZW |
}; |
static struct rc_src_register srcreg(int file, int index) |
{ |
struct rc_src_register src = srcreg_undefined; |
src.File = file; |
src.Index = index; |
return src; |
} |
static struct rc_src_register srcregswz(int file, int index, int swz) |
{ |
struct rc_src_register src = srcreg_undefined; |
src.File = file; |
src.Index = index; |
src.Swizzle = swz; |
return src; |
} |
static struct rc_src_register absolute(struct rc_src_register reg) |
{ |
struct rc_src_register newreg = reg; |
newreg.Abs = 1; |
newreg.Negate = RC_MASK_NONE; |
return newreg; |
} |
static struct rc_src_register negate(struct rc_src_register reg) |
{ |
struct rc_src_register newreg = reg; |
newreg.Negate = newreg.Negate ^ RC_MASK_XYZW; |
return newreg; |
} |
static struct rc_src_register swizzle(struct rc_src_register reg, |
rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w) |
{ |
struct rc_src_register swizzled = reg; |
swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w); |
return swizzled; |
} |
static struct rc_src_register swizzle_smear(struct rc_src_register reg, |
rc_swizzle x) |
{ |
return swizzle(reg, x, x, x, x); |
} |
static struct rc_src_register swizzle_xxxx(struct rc_src_register reg) |
{ |
return swizzle_smear(reg, RC_SWIZZLE_X); |
} |
static struct rc_src_register swizzle_yyyy(struct rc_src_register reg) |
{ |
return swizzle_smear(reg, RC_SWIZZLE_Y); |
} |
static struct rc_src_register swizzle_zzzz(struct rc_src_register reg) |
{ |
return swizzle_smear(reg, RC_SWIZZLE_Z); |
} |
static struct rc_src_register swizzle_wwww(struct rc_src_register reg) |
{ |
return swizzle_smear(reg, RC_SWIZZLE_W); |
} |
static int is_dst_safe_to_reuse(struct rc_instruction *inst) |
{ |
const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); |
unsigned i; |
assert(info->HasDstReg); |
if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) |
return 0; |
for (i = 0; i < info->NumSrcRegs; i++) { |
if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && |
inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index) |
return 0; |
} |
return 1; |
} |
static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c, |
struct rc_instruction *inst) |
{ |
unsigned tmp; |
if (is_dst_safe_to_reuse(inst)) |
tmp = inst->U.I.DstReg.Index; |
else |
tmp = rc_find_free_temporary(c); |
return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask); |
} |
static void transform_ABS(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
struct rc_src_register src = inst->U.I.SrcReg[0]; |
src.Abs = 1; |
src.Negate = RC_MASK_NONE; |
emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I, inst->U.I.DstReg, src); |
rc_remove_instruction(inst); |
} |
static void transform_CEIL(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
/* Assuming: |
* ceil(x) = -floor(-x) |
* |
* After inlining floor: |
* ceil(x) = -(-x-frac(-x)) |
* |
* After simplification: |
* ceil(x) = x+frac(-x) |
*/ |
struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0])); |
emit2(c, inst->Prev, RC_OPCODE_ADD, &inst->U.I, inst->U.I.DstReg, |
inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index)); |
rc_remove_instruction(inst); |
} |
static void transform_CLAMP(struct radeon_compiler *c, |
struct rc_instruction *inst) |
{ |
/* CLAMP dst, src, min, max |
* into: |
* MIN tmp, src, max |
* MAX dst, tmp, min |
*/ |
struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst, |
inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]); |
emit2(c, inst->Prev, RC_OPCODE_MAX, &inst->U.I, inst->U.I.DstReg, |
srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]); |
rc_remove_instruction(inst); |
} |
static void transform_DP2(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
struct rc_src_register src0 = inst->U.I.SrcReg[0]; |
struct rc_src_register src1 = inst->U.I.SrcReg[1]; |
src0.Negate &= ~(RC_MASK_Z | RC_MASK_W); |
src0.Swizzle &= ~(63 << (3 * 2)); |
src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); |
src1.Negate &= ~(RC_MASK_Z | RC_MASK_W); |
src1.Swizzle &= ~(63 << (3 * 2)); |
src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); |
emit2(c, inst->Prev, RC_OPCODE_DP3, &inst->U.I, inst->U.I.DstReg, src0, src1); |
rc_remove_instruction(inst); |
} |
static void transform_DPH(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
struct rc_src_register src0 = inst->U.I.SrcReg[0]; |
src0.Negate &= ~RC_MASK_W; |
src0.Swizzle &= ~(7 << (3 * 3)); |
src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3); |
emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]); |
rc_remove_instruction(inst); |
} |
/** |
* [1, src0.y*src1.y, src0.z, src1.w] |
* So basically MUL with lotsa swizzling. |
*/ |
static void transform_DST(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
emit2(c, inst->Prev, RC_OPCODE_MUL, &inst->U.I, inst->U.I.DstReg, |
swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE), |
swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W)); |
rc_remove_instruction(inst); |
} |
static void transform_FLR(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]); |
emit2(c, inst->Prev, RC_OPCODE_ADD, &inst->U.I, inst->U.I.DstReg, |
inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); |
rc_remove_instruction(inst); |
} |
static void transform_TRUNC(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
/* Definition of trunc: |
* trunc(x) = (abs(x) - fract(abs(x))) * sgn(x) |
* |
* The multiplication by sgn(x) can be simplified using CMP: |
* y * sgn(x) = (x < 0 ? -y : y) |
*/ |
struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, absolute(inst->U.I.SrcReg[0])); |
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, absolute(inst->U.I.SrcReg[0]), |
negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); |
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, inst->U.I.SrcReg[0], |
negate(srcreg(RC_FILE_TEMPORARY, dst.Index)), srcreg(RC_FILE_TEMPORARY, dst.Index)); |
rc_remove_instruction(inst); |
} |
/** |
* Definition of LIT (from ARB_fragment_program): |
* |
* tmp = VectorLoad(op0); |
* if (tmp.x < 0) tmp.x = 0; |
* if (tmp.y < 0) tmp.y = 0; |
* if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); |
* else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; |
* result.x = 1.0; |
* result.y = tmp.x; |
* result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; |
* result.w = 1.0; |
* |
* The longest path of computation is the one leading to result.z, |
* consisting of 5 operations. This implementation of LIT takes |
* 5 slots, if the subsequent optimization passes are clever enough |
* to pair instructions correctly. |
*/ |
static void transform_LIT(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
unsigned int constant; |
unsigned int constant_swizzle; |
unsigned int temp; |
struct rc_src_register srctemp; |
constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle); |
if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { |
struct rc_instruction * inst_mov; |
inst_mov = emit1(c, inst, |
RC_OPCODE_MOV, 0, inst->U.I.DstReg, |
srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c))); |
inst->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; |
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; |
} |
temp = inst->U.I.DstReg.Index; |
srctemp = srcreg(RC_FILE_TEMPORARY, temp); |
/* tmp.x = max(0.0, Src.x); */ |
/* tmp.y = max(0.0, Src.y); */ |
/* tmp.w = clamp(Src.z, -128+eps, 128-eps); */ |
emit2(c, inst->Prev, RC_OPCODE_MAX, 0, |
dstregtmpmask(temp, RC_MASK_XYW), |
inst->U.I.SrcReg[0], |
swizzle(srcreg(RC_FILE_CONSTANT, constant), |
RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3)); |
emit2(c, inst->Prev, RC_OPCODE_MIN, 0, |
dstregtmpmask(temp, RC_MASK_Z), |
swizzle_wwww(srctemp), |
negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle))); |
/* tmp.w = Pow(tmp.y, tmp.w) */ |
emit1(c, inst->Prev, RC_OPCODE_LG2, 0, |
dstregtmpmask(temp, RC_MASK_W), |
swizzle_yyyy(srctemp)); |
emit2(c, inst->Prev, RC_OPCODE_MUL, 0, |
dstregtmpmask(temp, RC_MASK_W), |
swizzle_wwww(srctemp), |
swizzle_zzzz(srctemp)); |
emit1(c, inst->Prev, RC_OPCODE_EX2, 0, |
dstregtmpmask(temp, RC_MASK_W), |
swizzle_wwww(srctemp)); |
/* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */ |
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, |
dstregtmpmask(temp, RC_MASK_Z), |
negate(swizzle_xxxx(srctemp)), |
swizzle_wwww(srctemp), |
builtin_zero); |
/* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */ |
emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I, |
dstregtmpmask(temp, RC_MASK_XYW), |
swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE)); |
rc_remove_instruction(inst); |
} |
static void transform_LRP(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, |
dst, |
inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); |
emit3(c, inst->Prev, RC_OPCODE_MAD, &inst->U.I, |
inst->U.I.DstReg, |
inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]); |
rc_remove_instruction(inst); |
} |
static void transform_POW(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
struct rc_dst_register tempdst = try_to_reuse_dst(c, inst); |
struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index); |
tempdst.WriteMask = RC_MASK_W; |
tempsrc.Swizzle = RC_SWIZZLE_WWWW; |
emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0])); |
emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1])); |
emit1(c, inst->Prev, RC_OPCODE_EX2, &inst->U.I, inst->U.I.DstReg, tempsrc); |
rc_remove_instruction(inst); |
} |
/* dst = ROUND(src) : |
* add = src + .5 |
* frac = FRC(add) |
* dst = add - frac |
* |
* According to the GLSL spec, the implementor can decide which way to round |
* when the fraction is .5. We round down for .5. |
* |
*/ |
static void transform_ROUND(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
unsigned int mask = inst->U.I.DstReg.WriteMask; |
unsigned int frac_index, add_index; |
struct rc_dst_register frac_dst, add_dst; |
struct rc_src_register frac_src, add_src; |
/* add = src + .5 */ |
add_index = rc_find_free_temporary(c); |
add_dst = dstregtmpmask(add_index, mask); |
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, add_dst, inst->U.I.SrcReg[0], |
builtin_half); |
add_src = srcreg(RC_FILE_TEMPORARY, add_dst.Index); |
/* frac = FRC(add) */ |
frac_index = rc_find_free_temporary(c); |
frac_dst = dstregtmpmask(frac_index, mask); |
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, frac_dst, add_src); |
frac_src = srcreg(RC_FILE_TEMPORARY, frac_dst.Index); |
/* dst = add - frac */ |
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg, |
add_src, negate(frac_src)); |
rc_remove_instruction(inst); |
} |
static void transform_RSQ(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]); |
} |
static void transform_SEQ(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); |
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, |
negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one); |
rc_remove_instruction(inst); |
} |
static void transform_SFL(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I, inst->U.I.DstReg, builtin_zero); |
rc_remove_instruction(inst); |
} |
static void transform_SGE(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); |
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, |
srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); |
rc_remove_instruction(inst); |
} |
static void transform_SGT(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); |
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, |
srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); |
rc_remove_instruction(inst); |
} |
static void transform_SLE(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); |
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, |
srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); |
rc_remove_instruction(inst); |
} |
static void transform_SLT(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); |
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, |
srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); |
rc_remove_instruction(inst); |
} |
static void transform_SNE(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); |
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, |
negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero); |
rc_remove_instruction(inst); |
} |
static void transform_SSG(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
/* result = sign(x) |
* |
* CMP tmp0, -x, 1, 0 |
* CMP tmp1, x, 1, 0 |
* ADD result, tmp0, -tmp1; |
*/ |
struct rc_dst_register dst0; |
unsigned tmp1; |
/* 0 < x */ |
dst0 = try_to_reuse_dst(c, inst); |
emit3(c, inst->Prev, RC_OPCODE_CMP, 0, |
dst0, |
negate(inst->U.I.SrcReg[0]), |
builtin_one, |
builtin_zero); |
/* x < 0 */ |
tmp1 = rc_find_free_temporary(c); |
emit3(c, inst->Prev, RC_OPCODE_CMP, 0, |
dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), |
inst->U.I.SrcReg[0], |
builtin_one, |
builtin_zero); |
/* Either both are zero, or one of them is one and the other is zero. */ |
/* result = tmp0 - tmp1 */ |
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, |
inst->U.I.DstReg, |
srcreg(RC_FILE_TEMPORARY, dst0.Index), |
negate(srcreg(RC_FILE_TEMPORARY, tmp1))); |
rc_remove_instruction(inst); |
} |
static void transform_SUB(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
inst->U.I.Opcode = RC_OPCODE_ADD; |
inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]); |
} |
static void transform_SWZ(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
inst->U.I.Opcode = RC_OPCODE_MOV; |
} |
static void transform_XPD(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst, |
swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), |
swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); |
emit3(c, inst->Prev, RC_OPCODE_MAD, &inst->U.I, inst->U.I.DstReg, |
swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), |
swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), |
negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); |
rc_remove_instruction(inst); |
} |
/** |
* Can be used as a transformation for @ref radeonClauseLocalTransform, |
* no userData necessary. |
* |
* Eliminates the following ALU instructions: |
* ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD |
* using: |
* MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP |
* |
* Transforms RSQ to Radeon's native RSQ by explicitly setting |
* absolute value. |
* |
* @note should be applicable to R300 and R500 fragment programs. |
*/ |
int radeonTransformALU( |
struct radeon_compiler * c, |
struct rc_instruction* inst, |
void* unused) |
{ |
switch(inst->U.I.Opcode) { |
case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; |
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; |
case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; |
case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; |
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; |
case RC_OPCODE_DST: transform_DST(c, inst); return 1; |
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; |
case RC_OPCODE_LIT: transform_LIT(c, inst); return 1; |
case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; |
case RC_OPCODE_POW: transform_POW(c, inst); return 1; |
case RC_OPCODE_ROUND: transform_ROUND(c, inst); return 1; |
case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1; |
case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1; |
case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; |
case RC_OPCODE_SGE: transform_SGE(c, inst); return 1; |
case RC_OPCODE_SGT: transform_SGT(c, inst); return 1; |
case RC_OPCODE_SLE: transform_SLE(c, inst); return 1; |
case RC_OPCODE_SLT: transform_SLT(c, inst); return 1; |
case RC_OPCODE_SNE: transform_SNE(c, inst); return 1; |
case RC_OPCODE_SSG: transform_SSG(c, inst); return 1; |
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; |
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; |
case RC_OPCODE_TRUNC: transform_TRUNC(c, inst); return 1; |
case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; |
default: |
return 0; |
} |
} |
static void transform_r300_vertex_ABS(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
/* Note: r500 can take absolute values, but r300 cannot. */ |
inst->U.I.Opcode = RC_OPCODE_MAX; |
inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0]; |
inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; |
} |
static void transform_r300_vertex_CMP(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
/* There is no decent CMP available, so let's rig one up. |
* CMP is defined as dst = src0 < 0.0 ? src1 : src2 |
* The following sequence consumes zero to two temps and two extra slots |
* (the second temp and the second slot is consumed by transform_LRP), |
* but should be equivalent: |
* |
* SLT tmp0, src0, 0.0 |
* LRP dst, tmp0, src1, src2 |
* |
* Yes, I know, I'm a mad scientist. ~ C. & M. */ |
struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
/* SLT tmp0, src0, 0.0 */ |
emit2(c, inst->Prev, RC_OPCODE_SLT, 0, |
dst, |
inst->U.I.SrcReg[0], builtin_zero); |
/* LRP dst, tmp0, src1, src2 */ |
transform_LRP(c, |
emit3(c, inst->Prev, RC_OPCODE_LRP, 0, |
inst->U.I.DstReg, |
srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); |
rc_remove_instruction(inst); |
} |
static void transform_r300_vertex_DP2(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
struct rc_instruction *next_inst = inst->Next; |
transform_DP2(c, inst); |
next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4; |
} |
static void transform_r300_vertex_DP3(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
struct rc_src_register src0 = inst->U.I.SrcReg[0]; |
struct rc_src_register src1 = inst->U.I.SrcReg[1]; |
src0.Negate &= ~RC_MASK_W; |
src0.Swizzle &= ~(7 << (3 * 3)); |
src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); |
src1.Negate &= ~RC_MASK_W; |
src1.Swizzle &= ~(7 << (3 * 3)); |
src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); |
emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, src1); |
rc_remove_instruction(inst); |
} |
static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
unsigned constant_swizzle; |
int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, |
0.0000000000000000001, |
&constant_swizzle); |
/* MOV dst, src */ |
dst.WriteMask = RC_MASK_XYZW; |
emit1(c, inst->Prev, RC_OPCODE_MOV, 0, |
dst, |
inst->U.I.SrcReg[0]); |
/* MAX dst.y, src, 0.00...001 */ |
emit2(c, inst->Prev, RC_OPCODE_MAX, 0, |
dstregtmpmask(dst.Index, RC_MASK_Y), |
srcreg(RC_FILE_TEMPORARY, dst.Index), |
srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); |
inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index); |
} |
static void transform_r300_vertex_SEQ(struct radeon_compiler *c, |
struct rc_instruction *inst) |
{ |
/* x = y <==> x >= y && y >= x */ |
int tmp = rc_find_free_temporary(c); |
/* x <= y */ |
emit2(c, inst->Prev, RC_OPCODE_SGE, 0, |
dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), |
inst->U.I.SrcReg[0], |
inst->U.I.SrcReg[1]); |
/* y <= x */ |
emit2(c, inst->Prev, RC_OPCODE_SGE, 0, |
inst->U.I.DstReg, |
inst->U.I.SrcReg[1], |
inst->U.I.SrcReg[0]); |
/* x && y = x * y */ |
emit2(c, inst->Prev, RC_OPCODE_MUL, 0, |
inst->U.I.DstReg, |
srcreg(RC_FILE_TEMPORARY, tmp), |
srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); |
rc_remove_instruction(inst); |
} |
static void transform_r300_vertex_SNE(struct radeon_compiler *c, |
struct rc_instruction *inst) |
{ |
/* x != y <==> x < y || y < x */ |
int tmp = rc_find_free_temporary(c); |
/* x < y */ |
emit2(c, inst->Prev, RC_OPCODE_SLT, 0, |
dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), |
inst->U.I.SrcReg[0], |
inst->U.I.SrcReg[1]); |
/* y < x */ |
emit2(c, inst->Prev, RC_OPCODE_SLT, 0, |
inst->U.I.DstReg, |
inst->U.I.SrcReg[1], |
inst->U.I.SrcReg[0]); |
/* x || y = max(x, y) */ |
emit2(c, inst->Prev, RC_OPCODE_MAX, 0, |
inst->U.I.DstReg, |
srcreg(RC_FILE_TEMPORARY, tmp), |
srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); |
rc_remove_instruction(inst); |
} |
static void transform_r300_vertex_SGT(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
/* x > y <==> -x < -y */ |
inst->U.I.Opcode = RC_OPCODE_SLT; |
inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; |
inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; |
} |
static void transform_r300_vertex_SLE(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
/* x <= y <==> -x >= -y */ |
inst->U.I.Opcode = RC_OPCODE_SGE; |
inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; |
inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; |
} |
static void transform_r300_vertex_SSG(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
/* result = sign(x) |
* |
* SLT tmp0, 0, x; |
* SLT tmp1, x, 0; |
* ADD result, tmp0, -tmp1; |
*/ |
struct rc_dst_register dst0 = try_to_reuse_dst(c, inst); |
unsigned tmp1; |
/* 0 < x */ |
dst0 = try_to_reuse_dst(c, inst); |
emit2(c, inst->Prev, RC_OPCODE_SLT, 0, |
dst0, |
builtin_zero, |
inst->U.I.SrcReg[0]); |
/* x < 0 */ |
tmp1 = rc_find_free_temporary(c); |
emit2(c, inst->Prev, RC_OPCODE_SLT, 0, |
dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), |
inst->U.I.SrcReg[0], |
builtin_zero); |
/* Either both are zero, or one of them is one and the other is zero. */ |
/* result = tmp0 - tmp1 */ |
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, |
inst->U.I.DstReg, |
srcreg(RC_FILE_TEMPORARY, dst0.Index), |
negate(srcreg(RC_FILE_TEMPORARY, tmp1))); |
rc_remove_instruction(inst); |
} |
static void transform_vertex_TRUNC(struct radeon_compiler* c, |
struct rc_instruction* inst) |
{ |
struct rc_instruction *next = inst->Next; |
/* next->Prev is removed after each transformation and replaced |
* by a new instruction. */ |
transform_TRUNC(c, next->Prev); |
transform_r300_vertex_CMP(c, next->Prev); |
} |
/** |
* For use with rc_local_transform, this transforms non-native ALU |
* instructions of the r300 up to r500 vertex engine. |
*/ |
int r300_transform_vertex_alu( |
struct radeon_compiler * c, |
struct rc_instruction* inst, |
void* unused) |
{ |
switch(inst->U.I.Opcode) { |
case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; |
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; |
case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; |
case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; |
case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1; |
case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1; |
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; |
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; |
case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1; |
case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; |
case RC_OPCODE_SEQ: |
if (!c->is_r500) { |
transform_r300_vertex_SEQ(c, inst); |
return 1; |
} |
return 0; |
case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; |
case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1; |
case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1; |
case RC_OPCODE_SNE: |
if (!c->is_r500) { |
transform_r300_vertex_SNE(c, inst); |
return 1; |
} |
return 0; |
case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1; |
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; |
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; |
case RC_OPCODE_TRUNC: transform_vertex_TRUNC(c, inst); return 1; |
case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; |
default: |
return 0; |
} |
} |
static void sincos_constants(struct radeon_compiler* c, unsigned int *constants) |
{ |
static const float SinCosConsts[2][4] = { |
{ |
1.273239545, /* 4/PI */ |
-0.405284735, /* -4/(PI*PI) */ |
3.141592654, /* PI */ |
0.2225 /* weight */ |
}, |
{ |
0.75, |
0.5, |
0.159154943, /* 1/(2*PI) */ |
6.283185307 /* 2*PI */ |
} |
}; |
int i; |
for(i = 0; i < 2; ++i) |
constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]); |
} |
/** |
* Approximate sin(x), where x is clamped to (-pi/2, pi/2). |
* |
* MUL tmp.xy, src, { 4/PI, -4/(PI^2) } |
* MAD tmp.x, tmp.y, |src|, tmp.x |
* MAD tmp.y, tmp.x, |tmp.x|, -tmp.x |
* MAD dest, tmp.y, weight, tmp.x |
*/ |
static void sin_approx( |
struct radeon_compiler* c, struct rc_instruction * inst, |
struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants) |
{ |
unsigned int tempreg = rc_find_free_temporary(c); |
emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY), |
swizzle_xxxx(src), |
srcreg(RC_FILE_CONSTANT, constants[0])); |
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X), |
swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), |
absolute(swizzle_xxxx(src)), |
swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); |
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y), |
swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), |
absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))), |
negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)))); |
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst, |
swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), |
swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])), |
swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); |
} |
/** |
* Translate the trigonometric functions COS, SIN, and SCS |
* using only the basic instructions |
* MOV, ADD, MUL, MAD, FRC |
*/ |
int r300_transform_trig_simple(struct radeon_compiler* c, |
struct rc_instruction* inst, |
void* unused) |
{ |
unsigned int constants[2]; |
unsigned int tempreg; |
if (inst->U.I.Opcode != RC_OPCODE_COS && |
inst->U.I.Opcode != RC_OPCODE_SIN && |
inst->U.I.Opcode != RC_OPCODE_SCS) |
return 0; |
tempreg = rc_find_free_temporary(c); |
sincos_constants(c, constants); |
if (inst->U.I.Opcode == RC_OPCODE_COS) { |
/* MAD tmp.x, src, 1/(2*PI), 0.75 */ |
/* FRC tmp.x, tmp.x */ |
/* MAD tmp.z, tmp.x, 2*PI, -PI */ |
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), |
swizzle_xxxx(inst->U.I.SrcReg[0]), |
swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), |
swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1]))); |
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), |
swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); |
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), |
swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), |
swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), |
negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); |
sin_approx(c, inst, inst->U.I.DstReg, |
swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), |
constants); |
} else if (inst->U.I.Opcode == RC_OPCODE_SIN) { |
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), |
swizzle_xxxx(inst->U.I.SrcReg[0]), |
swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), |
swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1]))); |
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), |
swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); |
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), |
swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), |
swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), |
negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); |
sin_approx(c, inst, inst->U.I.DstReg, |
swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), |
constants); |
} else { |
struct rc_dst_register dst; |
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), |
swizzle_xxxx(inst->U.I.SrcReg[0]), |
swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), |
swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)); |
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY), |
srcreg(RC_FILE_TEMPORARY, tempreg)); |
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), |
srcreg(RC_FILE_TEMPORARY, tempreg), |
swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), |
negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); |
dst = inst->U.I.DstReg; |
dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X; |
sin_approx(c, inst, dst, |
swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), |
constants); |
dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y; |
sin_approx(c, inst, dst, |
swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), |
constants); |
} |
rc_remove_instruction(inst); |
return 1; |
} |
static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c, |
struct rc_instruction *inst, |
unsigned srctmp) |
{ |
if (inst->U.I.Opcode == RC_OPCODE_COS) { |
emit1(c, inst->Prev, RC_OPCODE_COS, &inst->U.I, inst->U.I.DstReg, |
srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); |
} else if (inst->U.I.Opcode == RC_OPCODE_SIN) { |
emit1(c, inst->Prev, RC_OPCODE_SIN, &inst->U.I, |
inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); |
} else if (inst->U.I.Opcode == RC_OPCODE_SCS) { |
struct rc_dst_register moddst = inst->U.I.DstReg; |
if (inst->U.I.DstReg.WriteMask & RC_MASK_X) { |
moddst.WriteMask = RC_MASK_X; |
emit1(c, inst->Prev, RC_OPCODE_COS, &inst->U.I, moddst, |
srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); |
} |
if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) { |
moddst.WriteMask = RC_MASK_Y; |
emit1(c, inst->Prev, RC_OPCODE_SIN, &inst->U.I, moddst, |
srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); |
} |
} |
rc_remove_instruction(inst); |
} |
/** |
* Transform the trigonometric functions COS, SIN, and SCS |
* to include pre-scaling by 1/(2*PI) and taking the fractional |
* part, so that the input to COS and SIN is always in the range [0,1). |
* SCS is replaced by one COS and one SIN instruction. |
* |
* @warning This transformation implicitly changes the semantics of SIN and COS! |
*/ |
int radeonTransformTrigScale(struct radeon_compiler* c, |
struct rc_instruction* inst, |
void* unused) |
{ |
static const float RCP_2PI = 0.15915494309189535; |
unsigned int temp; |
unsigned int constant; |
unsigned int constant_swizzle; |
if (inst->U.I.Opcode != RC_OPCODE_COS && |
inst->U.I.Opcode != RC_OPCODE_SIN && |
inst->U.I.Opcode != RC_OPCODE_SCS) |
return 0; |
temp = rc_find_free_temporary(c); |
constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle); |
emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W), |
swizzle_xxxx(inst->U.I.SrcReg[0]), |
srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); |
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), |
srcreg(RC_FILE_TEMPORARY, temp)); |
r300_transform_SIN_COS_SCS(c, inst, temp); |
return 1; |
} |
/** |
* Transform the trigonometric functions COS, SIN, and SCS |
* so that the input to COS and SIN is always in the range [-PI, PI]. |
* SCS is replaced by one COS and one SIN instruction. |
*/ |
int r300_transform_trig_scale_vertex(struct radeon_compiler *c, |
struct rc_instruction *inst, |
void *unused) |
{ |
static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979}; |
unsigned int temp; |
unsigned int constant; |
if (inst->U.I.Opcode != RC_OPCODE_COS && |
inst->U.I.Opcode != RC_OPCODE_SIN && |
inst->U.I.Opcode != RC_OPCODE_SCS) |
return 0; |
/* Repeat x in the range [-PI, PI]: |
* |
* repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI |
*/ |
temp = rc_find_free_temporary(c); |
constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons); |
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), |
swizzle_xxxx(inst->U.I.SrcReg[0]), |
srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX), |
srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY)); |
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), |
srcreg(RC_FILE_TEMPORARY, temp)); |
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), |
srcreg(RC_FILE_TEMPORARY, temp), |
srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ), |
srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW)); |
r300_transform_SIN_COS_SCS(c, inst, temp); |
return 1; |
} |
/** |
* Rewrite DDX/DDY instructions to properly work with r5xx shaders. |
* The r5xx MDH/MDV instruction provides per-quad partial derivatives. |
* It takes the form A*B+C. A and C are set by setting src0. B should be -1. |
* |
* @warning This explicitly changes the form of DDX and DDY! |
*/ |
int radeonTransformDeriv(struct radeon_compiler* c, |
struct rc_instruction* inst, |
void* unused) |
{ |
if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY) |
return 0; |
inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111; |
inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW; |
return 1; |
} |
/** |
* IF Temp[0].x -> IF Temp[0].x |
* ... -> ... |
* KILL -> KIL -abs(Temp[0].x) |
* ... -> ... |
* ENDIF -> ENDIF |
* |
* === OR === |
* |
* IF Temp[0].x -\ |
* KILL - > KIL -abs(Temp[0].x) |
* ENDIF -/ |
* |
* === OR === |
* |
* IF Temp[0].x -> IF Temp[0].x |
* ... -> ... |
* ELSE -> ELSE |
* ... -> ... |
* KILL -> KIL -abs(Temp[0].x) |
* ... -> ... |
* ENDIF -> ENDIF |
* |
* === OR === |
* |
* KILL -> KIL -none.1111 |
* |
* This needs to be done in its own pass, because it might modify the |
* instructions before and after KILL. |
*/ |
void rc_transform_KILL(struct radeon_compiler * c, void *user) |
{ |
struct rc_instruction * inst; |
for (inst = c->Program.Instructions.Next; |
inst != &c->Program.Instructions; inst = inst->Next) { |
struct rc_instruction * if_inst; |
unsigned in_if = 0; |
if (inst->U.I.Opcode != RC_OPCODE_KILP) |
continue; |
for (if_inst = inst->Prev; if_inst != &c->Program.Instructions; |
if_inst = if_inst->Prev) { |
if (if_inst->U.I.Opcode == RC_OPCODE_IF) { |
in_if = 1; |
break; |
} |
} |
inst->U.I.Opcode = RC_OPCODE_KIL; |
if (!in_if) { |
inst->U.I.SrcReg[0] = negate(builtin_one); |
} else { |
/* This should work even if the KILP is inside the ELSE |
* block, because -0.0 is considered negative. */ |
inst->U.I.SrcReg[0] = |
negate(absolute(if_inst->U.I.SrcReg[0])); |
if (inst->Prev->U.I.Opcode != RC_OPCODE_IF |
&& inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) { |
/* Optimize the special case: |
* IF Temp[0].x |
* KILP |
* ENDIF |
*/ |
/* Remove IF */ |
rc_remove_instruction(inst->Prev); |
/* Remove ENDIF */ |
rc_remove_instruction(inst->Next); |
} |
} |
} |
} |
int rc_force_output_alpha_to_one(struct radeon_compiler *c, |
struct rc_instruction *inst, void *data) |
{ |
struct r300_fragment_program_compiler *fragc = (struct r300_fragment_program_compiler*)c; |
const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); |
unsigned tmp; |
if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT || |
inst->U.I.DstReg.Index == fragc->OutputDepth) |
return 1; |
tmp = rc_find_free_temporary(c); |
/* Insert MOV after inst, set alpha to 1. */ |
emit1(c, inst, RC_OPCODE_MOV, 0, inst->U.I.DstReg, |
srcregswz(RC_FILE_TEMPORARY, tmp, RC_SWIZZLE_XYZ1)); |
/* Re-route the destination of inst to the source of mov. */ |
inst->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst->U.I.DstReg.Index = tmp; |
/* Move the saturate output modifier to the MOV instruction |
* (for better copy propagation). */ |
inst->Next->U.I.SaturateMode = inst->U.I.SaturateMode; |
inst->U.I.SaturateMode = RC_SATURATE_NONE; |
return 1; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program_alu.h |
---|
0,0 → 1,69 |
/* |
* Copyright (C) 2008 Nicolai Haehnle. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#ifndef __RADEON_PROGRAM_ALU_H_ |
#define __RADEON_PROGRAM_ALU_H_ |
#include "radeon_program.h" |
int radeonTransformALU( |
struct radeon_compiler * c, |
struct rc_instruction * inst, |
void*); |
int r300_transform_vertex_alu( |
struct radeon_compiler * c, |
struct rc_instruction * inst, |
void*); |
int r300_transform_trig_simple( |
struct radeon_compiler * c, |
struct rc_instruction * inst, |
void*); |
int radeonTransformTrigScale( |
struct radeon_compiler * c, |
struct rc_instruction * inst, |
void*); |
int r300_transform_trig_scale_vertex( |
struct radeon_compiler *c, |
struct rc_instruction *inst, |
void*); |
int radeonTransformDeriv( |
struct radeon_compiler * c, |
struct rc_instruction * inst, |
void*); |
void rc_transform_KILL(struct radeon_compiler * c, |
void *user); |
int rc_force_output_alpha_to_one(struct radeon_compiler *c, |
struct rc_instruction *inst, void *data); |
#endif /* __RADEON_PROGRAM_ALU_H_ */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program_constants.h |
---|
0,0 → 1,213 |
/* |
* Copyright (C) 2009 Nicolai Haehnle. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#ifndef RADEON_PROGRAM_CONSTANTS_H |
#define RADEON_PROGRAM_CONSTANTS_H |
typedef enum { |
RC_SATURATE_NONE = 0, |
RC_SATURATE_ZERO_ONE, |
RC_SATURATE_MINUS_PLUS_ONE |
} rc_saturate_mode; |
typedef enum { |
RC_TEXTURE_2D_ARRAY, |
RC_TEXTURE_1D_ARRAY, |
RC_TEXTURE_CUBE, |
RC_TEXTURE_3D, |
RC_TEXTURE_RECT, |
RC_TEXTURE_2D, |
RC_TEXTURE_1D |
} rc_texture_target; |
typedef enum { |
/** |
* Used to indicate unused register descriptions and |
* source register that use a constant swizzle. |
*/ |
RC_FILE_NONE = 0, |
RC_FILE_TEMPORARY, |
/** |
* Input register. |
* |
* \note The compiler attaches no implicit semantics to input registers. |
* Fragment/vertex program specific semantics must be defined explicitly |
* using the appropriate compiler interfaces. |
*/ |
RC_FILE_INPUT, |
/** |
* Output register. |
* |
* \note The compiler attaches no implicit semantics to input registers. |
* Fragment/vertex program specific semantics must be defined explicitly |
* using the appropriate compiler interfaces. |
*/ |
RC_FILE_OUTPUT, |
RC_FILE_ADDRESS, |
/** |
* Indicates a constant from the \ref rc_constant_list . |
*/ |
RC_FILE_CONSTANT, |
/** |
* Indicates a special register, see RC_SPECIAL_xxx. |
*/ |
RC_FILE_SPECIAL, |
/** |
* Indicates this register should use the result of the presubtract |
* operation. |
*/ |
RC_FILE_PRESUB, |
/** |
* Indicates that the source index has been encoded as a 7-bit float. |
*/ |
RC_FILE_INLINE |
} rc_register_file; |
enum { |
/** R500 fragment program ALU result "register" */ |
RC_SPECIAL_ALU_RESULT = 0, |
/** Must be last */ |
RC_NUM_SPECIAL_REGISTERS |
}; |
#define RC_REGISTER_INDEX_BITS 10 |
#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS) |
typedef enum { |
RC_SWIZZLE_X = 0, |
RC_SWIZZLE_Y, |
RC_SWIZZLE_Z, |
RC_SWIZZLE_W, |
RC_SWIZZLE_ZERO, |
RC_SWIZZLE_ONE, |
RC_SWIZZLE_HALF, |
RC_SWIZZLE_UNUSED |
} rc_swizzle; |
#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9)) |
#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a)) |
#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7) |
#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1) |
#define SET_SWZ(swz, idx, newv) \ |
do { \ |
(swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \ |
} while(0) |
#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W) |
#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO) |
#define RC_SWIZZLE_XYZ1 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE) |
#define RC_SWIZZLE_XYZZ RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z) |
#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X) |
#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y) |
#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z) |
#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W) |
#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO) |
#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE) |
#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF) |
#define RC_SWIZZLE_UUUU RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED) |
/** |
* \name Bitmasks for components of vectors. |
* |
* Used for write masks, negation masks, etc. |
*/ |
/*@{*/ |
#define RC_MASK_NONE 0 |
#define RC_MASK_X 1 |
#define RC_MASK_Y 2 |
#define RC_MASK_Z 4 |
#define RC_MASK_W 8 |
#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y) |
#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z) |
#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W) |
#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W) |
/*@}*/ |
typedef enum { |
RC_ALURESULT_NONE = 0, |
RC_ALURESULT_X, |
RC_ALURESULT_W |
} rc_write_aluresult; |
typedef enum { |
RC_PRESUB_NONE = 0, |
/** 1 - 2 * src0 */ |
RC_PRESUB_BIAS, |
/** src1 - src0 */ |
RC_PRESUB_SUB, |
/** src1 + src0 */ |
RC_PRESUB_ADD, |
/** 1 - src0 */ |
RC_PRESUB_INV |
} rc_presubtract_op; |
typedef enum { |
RC_OMOD_MUL_1, |
RC_OMOD_MUL_2, |
RC_OMOD_MUL_4, |
RC_OMOD_MUL_8, |
RC_OMOD_DIV_2, |
RC_OMOD_DIV_4, |
RC_OMOD_DIV_8, |
RC_OMOD_DISABLE |
} rc_omod_op; |
static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){ |
switch(op){ |
case RC_PRESUB_BIAS: |
case RC_PRESUB_INV: |
return 1; |
case RC_PRESUB_ADD: |
case RC_PRESUB_SUB: |
return 2; |
default: |
return 0; |
} |
} |
#define RC_SOURCE_NONE 0x0 |
#define RC_SOURCE_RGB 0x1 |
#define RC_SOURCE_ALPHA 0x2 |
typedef enum { |
RC_PRED_DISABLED, |
RC_PRED_SET, |
RC_PRED_INV |
} rc_predicate_mode; |
#endif /* RADEON_PROGRAM_CONSTANTS_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program_pair.c |
---|
0,0 → 1,239 |
/* |
* Copyright (C) 2008-2009 Nicolai Haehnle. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include "radeon_program_pair.h" |
#include "radeon_compiler_util.h" |
#include <stdlib.h> |
/** |
* Return the source slot where we installed the given register access, |
* or -1 if no slot was free anymore. |
*/ |
int rc_pair_alloc_source(struct rc_pair_instruction *pair, |
unsigned int rgb, unsigned int alpha, |
rc_register_file file, unsigned int index) |
{ |
int candidate = -1; |
int candidate_quality = -1; |
unsigned int alpha_used = 0; |
unsigned int rgb_used = 0; |
int i; |
if ((!rgb && !alpha) || file == RC_FILE_NONE) |
return 0; |
/* Make sure only one presubtract operation is used per instruction. */ |
if (file == RC_FILE_PRESUB) { |
if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used |
&& index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { |
return -1; |
} |
if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used |
&& index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { |
return -1; |
} |
} |
for(i = 0; i < 3; ++i) { |
int q = 0; |
if (rgb) { |
if (pair->RGB.Src[i].Used) { |
if (pair->RGB.Src[i].File != file || |
pair->RGB.Src[i].Index != index) { |
rgb_used++; |
continue; |
} |
q++; |
} |
} |
if (alpha) { |
if (pair->Alpha.Src[i].Used) { |
if (pair->Alpha.Src[i].File != file || |
pair->Alpha.Src[i].Index != index) { |
alpha_used++; |
continue; |
} |
q++; |
} |
} |
if (q > candidate_quality) { |
candidate_quality = q; |
candidate = i; |
} |
} |
if (file == RC_FILE_PRESUB) { |
candidate = RC_PAIR_PRESUB_SRC; |
} else if (candidate < 0 || (rgb && rgb_used > 2) |
|| (alpha && alpha_used > 2)) { |
return -1; |
} |
/* candidate >= 0 */ |
if (rgb) { |
pair->RGB.Src[candidate].Used = 1; |
pair->RGB.Src[candidate].File = file; |
pair->RGB.Src[candidate].Index = index; |
if (candidate == RC_PAIR_PRESUB_SRC) { |
/* For registers with the RC_FILE_PRESUB file, |
* the index stores the presubtract op. */ |
int src_regs = rc_presubtract_src_reg_count(index); |
for(i = 0; i < src_regs; i++) { |
pair->RGB.Src[i].Used = 1; |
} |
} |
} |
if (alpha) { |
pair->Alpha.Src[candidate].Used = 1; |
pair->Alpha.Src[candidate].File = file; |
pair->Alpha.Src[candidate].Index = index; |
if (candidate == RC_PAIR_PRESUB_SRC) { |
/* For registers with the RC_FILE_PRESUB file, |
* the index stores the presubtract op. */ |
int src_regs = rc_presubtract_src_reg_count(index); |
for(i=0; i < src_regs; i++) { |
pair->Alpha.Src[i].Used = 1; |
} |
} |
} |
return candidate; |
} |
static void pair_foreach_source_callback( |
struct rc_pair_instruction * pair, |
void * data, |
rc_pair_foreach_src_fn cb, |
unsigned int swz, |
unsigned int src) |
{ |
/* swz > 3 means that the swizzle is either not used, or a constant |
* swizzle (e.g. 0, 1, 0.5). */ |
if(swz > 3) |
return; |
if(swz == RC_SWIZZLE_W) { |
if (src == RC_PAIR_PRESUB_SRC) { |
unsigned int i; |
unsigned int src_count = rc_presubtract_src_reg_count( |
pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index); |
for(i = 0; i < src_count; i++) { |
cb(data, &pair->Alpha.Src[i]); |
} |
} else { |
cb(data, &pair->Alpha.Src[src]); |
} |
} else { |
if (src == RC_PAIR_PRESUB_SRC) { |
unsigned int i; |
unsigned int src_count = rc_presubtract_src_reg_count( |
pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index); |
for(i = 0; i < src_count; i++) { |
cb(data, &pair->RGB.Src[i]); |
} |
} |
else { |
cb(data, &pair->RGB.Src[src]); |
} |
} |
} |
void rc_pair_foreach_source_that_alpha_reads( |
struct rc_pair_instruction * pair, |
void * data, |
rc_pair_foreach_src_fn cb) |
{ |
unsigned int i; |
const struct rc_opcode_info * info = |
rc_get_opcode_info(pair->Alpha.Opcode); |
for(i = 0; i < info->NumSrcRegs; i++) { |
pair_foreach_source_callback(pair, data, cb, |
GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0), |
pair->Alpha.Arg[i].Source); |
} |
} |
void rc_pair_foreach_source_that_rgb_reads( |
struct rc_pair_instruction * pair, |
void * data, |
rc_pair_foreach_src_fn cb) |
{ |
unsigned int i; |
const struct rc_opcode_info * info = |
rc_get_opcode_info(pair->RGB.Opcode); |
for(i = 0; i < info->NumSrcRegs; i++) { |
unsigned int chan; |
unsigned int swz = RC_SWIZZLE_UNUSED; |
/* Find a swizzle that is either X,Y,Z,or W. We assume here |
* that if one channel swizzles X,Y, or Z, then none of the |
* other channels swizzle W, and vice-versa. */ |
for(chan = 0; chan < 4; chan++) { |
swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan); |
if(swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y |
|| swz == RC_SWIZZLE_Z || swz == RC_SWIZZLE_W) |
continue; |
} |
pair_foreach_source_callback(pair, data, cb, |
swz, |
pair->RGB.Arg[i].Source); |
} |
} |
struct rc_pair_instruction_source * rc_pair_get_src( |
struct rc_pair_instruction * pair_inst, |
struct rc_pair_instruction_arg * arg) |
{ |
unsigned int type; |
type = rc_source_type_swz(arg->Swizzle); |
if (type & RC_SOURCE_RGB) { |
return &pair_inst->RGB.Src[arg->Source]; |
} else if (type & RC_SOURCE_ALPHA) { |
return &pair_inst->Alpha.Src[arg->Source]; |
} else { |
return NULL; |
} |
} |
int rc_pair_get_src_index( |
struct rc_pair_instruction * pair_inst, |
struct rc_pair_instruction_source * src) |
{ |
int i; |
for (i = 0; i < 3; i++) { |
if (&pair_inst->RGB.Src[i] == src |
|| &pair_inst->Alpha.Src[i] == src) { |
return i; |
} |
} |
return -1; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program_pair.h |
---|
0,0 → 1,139 |
/* |
* Copyright (C) 2008 Nicolai Haehnle. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#ifndef __RADEON_PROGRAM_PAIR_H_ |
#define __RADEON_PROGRAM_PAIR_H_ |
#include "radeon_code.h" |
#include "radeon_opcodes.h" |
#include "radeon_program_constants.h" |
struct radeon_compiler; |
/** |
* \file |
* Represents a paired ALU instruction, as found in R300 and R500 |
* fragment programs. |
* |
* Note that this representation is taking some liberties as far |
* as register files are concerned, to allow separate register |
* allocation. |
* |
* Also note that there are some subtleties in that the semantics |
* of certain opcodes are implicitly changed in this representation; |
* see \ref rc_pair_translate |
*/ |
/* For rgb and alpha instructions when arg[n].Source = RC_PAIR_PRESUB_SRC, then |
* the presubtract value will be used, and |
* {RGB,Alpha}.Src[RC_PAIR_PRESUB_SRC].File will be set to RC_FILE_PRESUB. |
*/ |
#define RC_PAIR_PRESUB_SRC 3 |
struct rc_pair_instruction_source { |
unsigned int Used:1; |
unsigned int File:4; |
unsigned int Index:RC_REGISTER_INDEX_BITS; |
}; |
struct rc_pair_instruction_arg { |
unsigned int Source:2; |
unsigned int Swizzle:12; |
unsigned int Abs:1; |
unsigned int Negate:1; |
}; |
struct rc_pair_sub_instruction { |
unsigned int Opcode:8; |
unsigned int DestIndex:RC_REGISTER_INDEX_BITS; |
unsigned int WriteMask:4; |
unsigned int Target:2; |
unsigned int OutputWriteMask:3; |
unsigned int DepthWriteMask:1; |
unsigned int Saturate:1; |
unsigned int Omod:3; |
struct rc_pair_instruction_source Src[4]; |
struct rc_pair_instruction_arg Arg[3]; |
}; |
struct rc_pair_instruction { |
struct rc_pair_sub_instruction RGB; |
struct rc_pair_sub_instruction Alpha; |
unsigned int WriteALUResult:2; |
unsigned int ALUResultCompare:3; |
unsigned int Nop:1; |
unsigned int SemWait:1; |
}; |
typedef void (*rc_pair_foreach_src_fn) |
(void *, struct rc_pair_instruction_source *); |
/** |
* General helper functions for dealing with the paired instruction format. |
*/ |
/*@{*/ |
int rc_pair_alloc_source(struct rc_pair_instruction *pair, |
unsigned int rgb, unsigned int alpha, |
rc_register_file file, unsigned int index); |
void rc_pair_foreach_source_that_alpha_reads( |
struct rc_pair_instruction * pair, |
void * data, |
rc_pair_foreach_src_fn cb); |
void rc_pair_foreach_source_that_rgb_reads( |
struct rc_pair_instruction * pair, |
void * data, |
rc_pair_foreach_src_fn cb); |
struct rc_pair_instruction_source * rc_pair_get_src( |
struct rc_pair_instruction * pair_inst, |
struct rc_pair_instruction_arg * arg); |
int rc_pair_get_src_index( |
struct rc_pair_instruction * pair_inst, |
struct rc_pair_instruction_source * src); |
/*@}*/ |
/** |
* Compiler passes that operate with the paired format. |
*/ |
/*@{*/ |
struct radeon_pair_handler; |
void rc_pair_translate(struct radeon_compiler *cc, void *user); |
void rc_pair_schedule(struct radeon_compiler *cc, void *user); |
void rc_pair_regalloc(struct radeon_compiler *cc, void *user); |
void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user); |
void rc_pair_remove_dead_sources(struct radeon_compiler *c, void *user); |
/*@}*/ |
#endif /* __RADEON_PROGRAM_PAIR_H_ */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program_print.c |
---|
0,0 → 1,484 |
/* |
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "radeon_program.h" |
#include <stdio.h> |
static const char * textarget_to_string(rc_texture_target target) |
{ |
switch(target) { |
case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY"; |
case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY"; |
case RC_TEXTURE_CUBE: return "CUBE"; |
case RC_TEXTURE_3D: return "3D"; |
case RC_TEXTURE_RECT: return "RECT"; |
case RC_TEXTURE_2D: return "2D"; |
case RC_TEXTURE_1D: return "1D"; |
default: return "BAD_TEXTURE_TARGET"; |
} |
} |
static const char * presubtract_op_to_string(rc_presubtract_op op) |
{ |
switch(op) { |
case RC_PRESUB_NONE: |
return "NONE"; |
case RC_PRESUB_BIAS: |
return "(1 - 2 * src0)"; |
case RC_PRESUB_SUB: |
return "(src1 - src0)"; |
case RC_PRESUB_ADD: |
return "(src1 + src0)"; |
case RC_PRESUB_INV: |
return "(1 - src0)"; |
default: |
return "BAD_PRESUBTRACT_OP"; |
} |
} |
static void print_omod_op(FILE * f, rc_omod_op op) |
{ |
const char * omod_str; |
switch(op) { |
case RC_OMOD_MUL_1: |
case RC_OMOD_DISABLE: |
return; |
case RC_OMOD_MUL_2: |
omod_str = "* 2"; |
break; |
case RC_OMOD_MUL_4: |
omod_str = "* 4"; |
break; |
case RC_OMOD_MUL_8: |
omod_str = "* 8"; |
break; |
case RC_OMOD_DIV_2: |
omod_str = "/ 2"; |
break; |
case RC_OMOD_DIV_4: |
omod_str = "/ 4"; |
break; |
case RC_OMOD_DIV_8: |
omod_str = "/ 8"; |
break; |
default: |
return; |
} |
fprintf(f, " %s", omod_str); |
} |
static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs) |
{ |
if (func == RC_COMPARE_FUNC_NEVER) { |
fprintf(f, "false"); |
} else if (func == RC_COMPARE_FUNC_ALWAYS) { |
fprintf(f, "true"); |
} else { |
const char * op; |
switch(func) { |
case RC_COMPARE_FUNC_LESS: op = "<"; break; |
case RC_COMPARE_FUNC_EQUAL: op = "=="; break; |
case RC_COMPARE_FUNC_LEQUAL: op = "<="; break; |
case RC_COMPARE_FUNC_GREATER: op = ">"; break; |
case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break; |
case RC_COMPARE_FUNC_GEQUAL: op = ">="; break; |
default: op = "???"; break; |
} |
fprintf(f, "%s %s %s", lhs, op, rhs); |
} |
} |
static void rc_print_inline_float(FILE * f, int index) |
{ |
int r300_exponent = (index >> 3) & 0xf; |
unsigned r300_mantissa = index & 0x7; |
unsigned float_exponent; |
unsigned real_float; |
float * print_float = (float*) &real_float; |
r300_exponent -= 7; |
float_exponent = r300_exponent + 127; |
real_float = (r300_mantissa << 20) | (float_exponent << 23); |
fprintf(f, "%f (0x%x)", *print_float, index); |
} |
static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr) |
{ |
if (file == RC_FILE_NONE) { |
fprintf(f, "none"); |
} else if (file == RC_FILE_SPECIAL) { |
switch(index) { |
case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break; |
default: fprintf(f, "special[%i]", index); break; |
} |
} else if (file == RC_FILE_INLINE) { |
rc_print_inline_float(f, index); |
} else { |
const char * filename; |
switch(file) { |
case RC_FILE_TEMPORARY: filename = "temp"; break; |
case RC_FILE_INPUT: filename = "input"; break; |
case RC_FILE_OUTPUT: filename = "output"; break; |
case RC_FILE_ADDRESS: filename = "addr"; break; |
case RC_FILE_CONSTANT: filename = "const"; break; |
default: filename = "BAD FILE"; break; |
} |
fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : ""); |
} |
} |
static void rc_print_mask(FILE * f, unsigned int mask) |
{ |
if (mask & RC_MASK_X) fprintf(f, "x"); |
if (mask & RC_MASK_Y) fprintf(f, "y"); |
if (mask & RC_MASK_Z) fprintf(f, "z"); |
if (mask & RC_MASK_W) fprintf(f, "w"); |
} |
static void rc_print_dst_register(FILE * f, struct rc_dst_register dst) |
{ |
rc_print_register(f, dst.File, dst.Index, 0); |
if (dst.WriteMask != RC_MASK_XYZW) { |
fprintf(f, "."); |
rc_print_mask(f, dst.WriteMask); |
} |
} |
static char rc_swizzle_char(unsigned int swz) |
{ |
switch(swz) { |
case RC_SWIZZLE_X: return 'x'; |
case RC_SWIZZLE_Y: return 'y'; |
case RC_SWIZZLE_Z: return 'z'; |
case RC_SWIZZLE_W: return 'w'; |
case RC_SWIZZLE_ZERO: return '0'; |
case RC_SWIZZLE_ONE: return '1'; |
case RC_SWIZZLE_HALF: return 'H'; |
case RC_SWIZZLE_UNUSED: return '_'; |
} |
fprintf(stderr, "bad swz: %u\n", swz); |
return '?'; |
} |
static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate) |
{ |
unsigned int comp; |
for(comp = 0; comp < 4; ++comp) { |
rc_swizzle swz = GET_SWZ(swizzle, comp); |
if (GET_BIT(negate, comp)) |
fprintf(f, "-"); |
fprintf(f, "%c", rc_swizzle_char(swz)); |
} |
} |
static void rc_print_presub_instruction(FILE * f, |
struct rc_presub_instruction inst) |
{ |
fprintf(f,"("); |
switch(inst.Opcode){ |
case RC_PRESUB_BIAS: |
fprintf(f, "1 - 2 * "); |
rc_print_register(f, inst.SrcReg[0].File, |
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); |
break; |
case RC_PRESUB_SUB: |
rc_print_register(f, inst.SrcReg[1].File, |
inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr); |
fprintf(f, " - "); |
rc_print_register(f, inst.SrcReg[0].File, |
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); |
break; |
case RC_PRESUB_ADD: |
rc_print_register(f, inst.SrcReg[1].File, |
inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr); |
fprintf(f, " + "); |
rc_print_register(f, inst.SrcReg[0].File, |
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); |
break; |
case RC_PRESUB_INV: |
fprintf(f, "1 - "); |
rc_print_register(f, inst.SrcReg[0].File, |
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); |
break; |
default: |
break; |
} |
fprintf(f, ")"); |
} |
static void rc_print_src_register(FILE * f, struct rc_instruction * inst, |
struct rc_src_register src) |
{ |
int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW); |
if (src.Negate == RC_MASK_XYZW) |
fprintf(f, "-"); |
if (src.Abs) |
fprintf(f, "|"); |
if(src.File == RC_FILE_PRESUB) |
rc_print_presub_instruction(f, inst->U.I.PreSub); |
else |
rc_print_register(f, src.File, src.Index, src.RelAddr); |
if (src.Abs && !trivial_negate) |
fprintf(f, "|"); |
if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) { |
fprintf(f, "."); |
rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate); |
} |
if (src.Abs && trivial_negate) |
fprintf(f, "|"); |
} |
static unsigned update_branch_depth(rc_opcode opcode, unsigned *branch_depth) |
{ |
switch (opcode) { |
case RC_OPCODE_IF: |
case RC_OPCODE_BGNLOOP: |
return (*branch_depth)++ * 2; |
case RC_OPCODE_ENDIF: |
case RC_OPCODE_ENDLOOP: |
assert(*branch_depth > 0); |
return --(*branch_depth) * 2; |
case RC_OPCODE_ELSE: |
assert(*branch_depth > 0); |
return (*branch_depth - 1) * 2; |
default: |
return *branch_depth * 2; |
} |
} |
static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst, unsigned *branch_depth) |
{ |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
unsigned int reg; |
unsigned spaces = update_branch_depth(inst->U.I.Opcode, branch_depth); |
for (unsigned i = 0; i < spaces; i++) |
fprintf(f, " "); |
fprintf(f, "%s", opcode->Name); |
switch(inst->U.I.SaturateMode) { |
case RC_SATURATE_NONE: break; |
case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break; |
case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break; |
default: fprintf(f, "_BAD_SAT"); break; |
} |
if (opcode->HasDstReg) { |
fprintf(f, " "); |
rc_print_dst_register(f, inst->U.I.DstReg); |
print_omod_op(f, inst->U.I.Omod); |
if (opcode->NumSrcRegs) |
fprintf(f, ","); |
} |
for(reg = 0; reg < opcode->NumSrcRegs; ++reg) { |
if (reg > 0) |
fprintf(f, ","); |
fprintf(f, " "); |
rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]); |
} |
if (opcode->HasTexture) { |
fprintf(f, ", %s%s[%u]%s%s", |
textarget_to_string(inst->U.I.TexSrcTarget), |
inst->U.I.TexShadow ? "SHADOW" : "", |
inst->U.I.TexSrcUnit, |
inst->U.I.TexSemWait ? " SEM_WAIT" : "", |
inst->U.I.TexSemAcquire ? " SEM_ACQUIRE" : ""); |
} |
fprintf(f, ";"); |
if (inst->U.I.WriteALUResult) { |
fprintf(f, " [aluresult = ("); |
rc_print_comparefunc(f, |
(inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w", |
inst->U.I.ALUResultCompare, "0"); |
fprintf(f, ")]"); |
} |
if (inst->U.I.DstReg.Pred == RC_PRED_SET) { |
fprintf(f, " PRED_SET"); |
} else if (inst->U.I.DstReg.Pred == RC_PRED_INV) { |
fprintf(f, " PRED_INV"); |
} |
fprintf(f, "\n"); |
} |
static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst, unsigned *branch_depth) |
{ |
struct rc_pair_instruction * inst = &fullinst->U.P; |
int printedsrc = 0; |
unsigned spaces = update_branch_depth(inst->RGB.Opcode != RC_OPCODE_NOP ? |
inst->RGB.Opcode : inst->Alpha.Opcode, branch_depth); |
for (unsigned i = 0; i < spaces; i++) |
fprintf(f, " "); |
for(unsigned int src = 0; src < 3; ++src) { |
if (inst->RGB.Src[src].Used) { |
if (printedsrc) |
fprintf(f, ", "); |
fprintf(f, "src%i.xyz = ", src); |
rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0); |
printedsrc = 1; |
} |
if (inst->Alpha.Src[src].Used) { |
if (printedsrc) |
fprintf(f, ", "); |
fprintf(f, "src%i.w = ", src); |
rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0); |
printedsrc = 1; |
} |
} |
if(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { |
fprintf(f, ", srcp.xyz = %s", |
presubtract_op_to_string( |
inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index)); |
} |
if(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { |
fprintf(f, ", srcp.w = %s", |
presubtract_op_to_string( |
inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index)); |
} |
if (inst->SemWait) { |
fprintf(f, " SEM_WAIT"); |
} |
fprintf(f, "\n"); |
if (inst->RGB.Opcode != RC_OPCODE_NOP) { |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode); |
for (unsigned i = 0; i < spaces; i++) |
fprintf(f, " "); |
fprintf(f, " %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : ""); |
if (inst->RGB.WriteMask) |
fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex, |
(inst->RGB.WriteMask & 1) ? "x" : "", |
(inst->RGB.WriteMask & 2) ? "y" : "", |
(inst->RGB.WriteMask & 4) ? "z" : ""); |
if (inst->RGB.OutputWriteMask) |
fprintf(f, " color[%i].%s%s%s", inst->RGB.Target, |
(inst->RGB.OutputWriteMask & 1) ? "x" : "", |
(inst->RGB.OutputWriteMask & 2) ? "y" : "", |
(inst->RGB.OutputWriteMask & 4) ? "z" : ""); |
if (inst->WriteALUResult == RC_ALURESULT_X) |
fprintf(f, " aluresult"); |
print_omod_op(f, inst->RGB.Omod); |
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { |
const char* abs = inst->RGB.Arg[arg].Abs ? "|" : ""; |
const char* neg = inst->RGB.Arg[arg].Negate ? "-" : ""; |
fprintf(f, ", %s%ssrc", neg, abs); |
if(inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC) |
fprintf(f,"p"); |
else |
fprintf(f,"%d", inst->RGB.Arg[arg].Source); |
fprintf(f,".%c%c%c%s", |
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)), |
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)), |
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)), |
abs); |
} |
fprintf(f, "\n"); |
} |
if (inst->Alpha.Opcode != RC_OPCODE_NOP) { |
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode); |
for (unsigned i = 0; i < spaces; i++) |
fprintf(f, " "); |
fprintf(f, " %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : ""); |
if (inst->Alpha.WriteMask) |
fprintf(f, " temp[%i].w", inst->Alpha.DestIndex); |
if (inst->Alpha.OutputWriteMask) |
fprintf(f, " color[%i].w", inst->Alpha.Target); |
if (inst->Alpha.DepthWriteMask) |
fprintf(f, " depth.w"); |
if (inst->WriteALUResult == RC_ALURESULT_W) |
fprintf(f, " aluresult"); |
print_omod_op(f, inst->Alpha.Omod); |
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { |
const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : ""; |
const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : ""; |
fprintf(f, ", %s%ssrc", neg, abs); |
if(inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC) |
fprintf(f,"p"); |
else |
fprintf(f,"%d", inst->Alpha.Arg[arg].Source); |
fprintf(f,".%c%s", |
rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs); |
} |
fprintf(f, "\n"); |
} |
if (inst->WriteALUResult) { |
for (unsigned i = 0; i < spaces; i++) |
fprintf(f, " "); |
fprintf(f, " [aluresult = ("); |
rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0"); |
fprintf(f, ")]\n"); |
} |
} |
/** |
* Print program to stderr, default options. |
*/ |
void rc_print_program(const struct rc_program *prog) |
{ |
unsigned int linenum = 0; |
unsigned branch_depth = 0; |
struct rc_instruction *inst; |
fprintf(stderr, "# Radeon Compiler Program\n"); |
for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { |
fprintf(stderr, "%3d: ", linenum); |
if (inst->Type == RC_INSTRUCTION_PAIR) |
rc_print_pair_instruction(stderr, inst, &branch_depth); |
else |
rc_print_normal_instruction(stderr, inst, &branch_depth); |
linenum++; |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program_tex.c |
---|
0,0 → 1,519 |
/* |
* Copyright (C) 2010 Corbin Simpson |
* Copyright (C) 2010 Marek Olšák <maraeo@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include "radeon_program_tex.h" |
#include "radeon_compiler_util.h" |
/* Series of transformations to be done on textures. */ |
static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler, |
int tmu) |
{ |
struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 }; |
reg.File = RC_FILE_NONE; |
reg.Swizzle = combine_swizzles(RC_SWIZZLE_0000, |
compiler->state.unit[tmu].texture_swizzle); |
return reg; |
} |
static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler, |
int tmu) |
{ |
struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 }; |
reg.File = RC_FILE_NONE; |
reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111, |
compiler->state.unit[tmu].texture_swizzle); |
return reg; |
} |
static void scale_texcoords(struct r300_fragment_program_compiler *compiler, |
struct rc_instruction *inst, |
unsigned state_constant) |
{ |
struct rc_instruction *inst_mov; |
unsigned temp = rc_find_free_temporary(&compiler->Base); |
inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev); |
inst_mov->U.I.Opcode = RC_OPCODE_MUL; |
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_mov->U.I.DstReg.Index = temp; |
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; |
inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT; |
inst_mov->U.I.SrcReg[1].Index = |
rc_constants_add_state(&compiler->Base.Program.Constants, |
state_constant, inst->U.I.TexSrcUnit); |
reset_srcreg(&inst->U.I.SrcReg[0]); |
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; |
inst->U.I.SrcReg[0].Index = temp; |
} |
static void projective_divide(struct r300_fragment_program_compiler *compiler, |
struct rc_instruction *inst) |
{ |
struct rc_instruction *inst_mul, *inst_rcp; |
unsigned temp = rc_find_free_temporary(&compiler->Base); |
inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev); |
inst_rcp->U.I.Opcode = RC_OPCODE_RCP; |
inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_rcp->U.I.DstReg.Index = temp; |
inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; |
inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; |
/* Because the input can be arbitrarily swizzled, |
* read the component mapped to W. */ |
inst_rcp->U.I.SrcReg[0].Swizzle = |
RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); |
inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev); |
inst_mul->U.I.Opcode = RC_OPCODE_MUL; |
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_mul->U.I.DstReg.Index = temp; |
inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; |
inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; |
inst_mul->U.I.SrcReg[1].Index = temp; |
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; |
reset_srcreg(&inst->U.I.SrcReg[0]); |
inst->U.I.Opcode = RC_OPCODE_TEX; |
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; |
inst->U.I.SrcReg[0].Index = temp; |
} |
/** |
* Transform TEX, TXP, TXB, and KIL instructions in the following ways: |
* - implement texture compare (shadow extensions) |
* - extract non-native source / destination operands |
* - premultiply texture coordinates for RECT |
* - extract operand swizzles |
* - introduce a temporary register when write masks are needed |
*/ |
int radeonTransformTEX( |
struct radeon_compiler * c, |
struct rc_instruction * inst, |
void* data) |
{ |
struct r300_fragment_program_compiler *compiler = |
(struct r300_fragment_program_compiler*)data; |
rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode; |
int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT || |
compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords; |
if (inst->U.I.Opcode != RC_OPCODE_TEX && |
inst->U.I.Opcode != RC_OPCODE_TXB && |
inst->U.I.Opcode != RC_OPCODE_TXP && |
inst->U.I.Opcode != RC_OPCODE_TXD && |
inst->U.I.Opcode != RC_OPCODE_TXL && |
inst->U.I.Opcode != RC_OPCODE_KIL) |
return 0; |
/* ARB_shadow & EXT_shadow_funcs */ |
if (inst->U.I.Opcode != RC_OPCODE_KIL && |
((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) || |
(compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) { |
rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; |
if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { |
inst->U.I.Opcode = RC_OPCODE_MOV; |
if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { |
inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); |
} else { |
inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); |
} |
return 1; |
} else { |
struct rc_instruction * inst_rcp = NULL; |
struct rc_instruction *inst_mul, *inst_add, *inst_cmp; |
unsigned tmp_texsample; |
unsigned tmp_sum; |
int pass, fail; |
/* Save the output register. */ |
struct rc_dst_register output_reg = inst->U.I.DstReg; |
unsigned saturate_mode = inst->U.I.SaturateMode; |
/* Redirect TEX to a new temp. */ |
tmp_texsample = rc_find_free_temporary(c); |
inst->U.I.SaturateMode = 0; |
inst->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst->U.I.DstReg.Index = tmp_texsample; |
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; |
tmp_sum = rc_find_free_temporary(c); |
if (inst->U.I.Opcode == RC_OPCODE_TXP) { |
/* Compute 1/W. */ |
inst_rcp = rc_insert_new_instruction(c, inst); |
inst_rcp->U.I.Opcode = RC_OPCODE_RCP; |
inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_rcp->U.I.DstReg.Index = tmp_sum; |
inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; |
inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; |
inst_rcp->U.I.SrcReg[0].Swizzle = |
RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); |
} |
/* Divide Z by W (if it's TXP) and saturate. */ |
inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); |
inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV; |
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_mul->U.I.DstReg.Index = tmp_sum; |
inst_mul->U.I.DstReg.WriteMask = RC_MASK_W; |
inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; |
inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; |
inst_mul->U.I.SrcReg[0].Swizzle = |
RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2)); |
if (inst->U.I.Opcode == RC_OPCODE_TXP) { |
inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; |
inst_mul->U.I.SrcReg[1].Index = tmp_sum; |
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; |
} |
/* Add the depth texture value. */ |
inst_add = rc_insert_new_instruction(c, inst_mul); |
inst_add->U.I.Opcode = RC_OPCODE_ADD; |
inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_add->U.I.DstReg.Index = tmp_sum; |
inst_add->U.I.DstReg.WriteMask = RC_MASK_W; |
inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; |
inst_add->U.I.SrcReg[0].Index = tmp_sum; |
inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; |
inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; |
inst_add->U.I.SrcReg[1].Index = tmp_texsample; |
inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; |
/* Note that SrcReg[0] is r, SrcReg[1] is tex and: |
* LESS: r < tex <=> -tex+r < 0 |
* GEQUAL: r >= tex <=> not (-tex+r < 0) |
* GREATER: r > tex <=> tex-r < 0 |
* LEQUAL: r <= tex <=> not ( tex-r < 0) |
* EQUAL: GEQUAL |
* NOTEQUAL:LESS |
*/ |
/* This negates either r or tex: */ |
if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL || |
comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL) |
inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW; |
else |
inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; |
/* This negates the whole expresion: */ |
if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER || |
comparefunc == RC_COMPARE_FUNC_NOTEQUAL) { |
pass = 1; |
fail = 2; |
} else { |
pass = 2; |
fail = 1; |
} |
inst_cmp = rc_insert_new_instruction(c, inst_add); |
inst_cmp->U.I.Opcode = RC_OPCODE_CMP; |
inst_cmp->U.I.SaturateMode = saturate_mode; |
inst_cmp->U.I.DstReg = output_reg; |
inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; |
inst_cmp->U.I.SrcReg[0].Index = tmp_sum; |
inst_cmp->U.I.SrcReg[0].Swizzle = |
combine_swizzles(RC_SWIZZLE_WWWW, |
compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle); |
inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); |
inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); |
assert(tmp_texsample != tmp_sum); |
} |
} |
/* R300 cannot sample from rectangles and the wrap mode fallback needs |
* normalized coordinates anyway. */ |
if (inst->U.I.Opcode != RC_OPCODE_KIL && |
is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) { |
scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR); |
inst->U.I.TexSrcTarget = RC_TEXTURE_2D; |
} |
/* Divide by W if needed. */ |
if (inst->U.I.Opcode == RC_OPCODE_TXP && |
(wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT || |
compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) { |
projective_divide(compiler, inst); |
} |
/* Texture wrap modes don't work on NPOT textures. |
* |
* Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and |
* mirroring are not. If we need to repeat, we do: |
* |
* MUL temp, texcoord, <scaling factor constant> |
* FRC temp, temp ; Discard integer portion of coords |
* |
* This gives us coords in [0, 1]. |
* |
* Mirroring is trickier. We're going to start out like repeat: |
* |
* MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes |
* MUL temp, temp, 0.5 ; Pattern repeats in [0, 2] |
* ; so scale to [0, 1] |
* FRC temp, temp ; Make the pattern repeat |
* MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1] |
* ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern. |
* ; The pattern is backwards, so reverse it (1-x). |
* |
* This gives us coords in [0, 1]. |
* |
* ~ C & M. ;) |
*/ |
if (inst->U.I.Opcode != RC_OPCODE_KIL && |
wrapmode != RC_WRAP_NONE) { |
struct rc_instruction *inst_mov; |
unsigned temp = rc_find_free_temporary(c); |
if (wrapmode == RC_WRAP_REPEAT) { |
/* Both instructions will be paired up. */ |
struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev); |
inst_frc->U.I.Opcode = RC_OPCODE_FRC; |
inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_frc->U.I.DstReg.Index = temp; |
inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; |
inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; |
} else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) { |
/* |
* Function: |
* f(v) = 1 - abs(frac(v * 0.5) * 2 - 1) |
* |
* Code: |
* MUL temp, src0, 0.5 |
* FRC temp, temp |
* MAD temp, temp, 2, -1 |
* ADD temp, 1, -abs(temp) |
*/ |
struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add; |
unsigned two, two_swizzle; |
inst_mul = rc_insert_new_instruction(c, inst->Prev); |
inst_mul->U.I.Opcode = RC_OPCODE_MUL; |
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_mul->U.I.DstReg.Index = temp; |
inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; |
inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; |
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH; |
inst_frc = rc_insert_new_instruction(c, inst->Prev); |
inst_frc->U.I.Opcode = RC_OPCODE_FRC; |
inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_frc->U.I.DstReg.Index = temp; |
inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; |
inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; |
inst_frc->U.I.SrcReg[0].Index = temp; |
inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; |
two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle); |
inst_mad = rc_insert_new_instruction(c, inst->Prev); |
inst_mad->U.I.Opcode = RC_OPCODE_MAD; |
inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_mad->U.I.DstReg.Index = temp; |
inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; |
inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; |
inst_mad->U.I.SrcReg[0].Index = temp; |
inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; |
inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; |
inst_mad->U.I.SrcReg[1].Index = two; |
inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle; |
inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111; |
inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ; |
inst_add = rc_insert_new_instruction(c, inst->Prev); |
inst_add->U.I.Opcode = RC_OPCODE_ADD; |
inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_add->U.I.DstReg.Index = temp; |
inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ; |
inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; |
inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; |
inst_add->U.I.SrcReg[1].Index = temp; |
inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; |
inst_add->U.I.SrcReg[1].Abs = 1; |
inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ; |
} else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) { |
/* |
* Mirrored clamp modes are bloody simple, we just use abs |
* to mirror [0, 1] into [-1, 0]. This works for |
* all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER. |
*/ |
struct rc_instruction *inst_mov; |
inst_mov = rc_insert_new_instruction(c, inst->Prev); |
inst_mov->U.I.Opcode = RC_OPCODE_MOV; |
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_mov->U.I.DstReg.Index = temp; |
inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; |
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; |
inst_mov->U.I.SrcReg[0].Abs = 1; |
} |
/* Preserve W for TXP/TXB. */ |
inst_mov = rc_insert_new_instruction(c, inst->Prev); |
inst_mov->U.I.Opcode = RC_OPCODE_MOV; |
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_mov->U.I.DstReg.Index = temp; |
inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; |
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; |
reset_srcreg(&inst->U.I.SrcReg[0]); |
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; |
inst->U.I.SrcReg[0].Index = temp; |
} |
/* NPOT -> POT conversion for 3D textures. */ |
if (inst->U.I.Opcode != RC_OPCODE_KIL && |
compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) { |
struct rc_instruction *inst_mov; |
unsigned temp = rc_find_free_temporary(c); |
/* Saturate XYZ. */ |
inst_mov = rc_insert_new_instruction(c, inst->Prev); |
inst_mov->U.I.Opcode = RC_OPCODE_MOV; |
inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; |
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_mov->U.I.DstReg.Index = temp; |
inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; |
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; |
/* Copy W. */ |
inst_mov = rc_insert_new_instruction(c, inst->Prev); |
inst_mov->U.I.Opcode = RC_OPCODE_MOV; |
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_mov->U.I.DstReg.Index = temp; |
inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; |
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; |
reset_srcreg(&inst->U.I.SrcReg[0]); |
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; |
inst->U.I.SrcReg[0].Index = temp; |
scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR); |
} |
/* Convert SNORM-encoded ATI1N sampled as UNORM to SNORM. |
* Formula: dst = tex > 0.5 ? tex*2-2 : tex*2 |
*/ |
if (inst->U.I.Opcode != RC_OPCODE_KIL && |
compiler->state.unit[inst->U.I.TexSrcUnit].convert_unorm_to_snorm) { |
unsigned two, two_swizzle; |
struct rc_instruction *inst_mul, *inst_mad, *inst_cnd; |
two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2.35, &two_swizzle); |
inst_mul = rc_insert_new_instruction(c, inst); |
inst_mul->U.I.Opcode = RC_OPCODE_MUL; |
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c); |
inst_mul->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; |
inst_mul->U.I.SrcReg[0].Index = rc_find_free_temporary(c); /* redirected TEX output */ |
inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; /* 2 */ |
inst_mul->U.I.SrcReg[1].Index = two; |
inst_mul->U.I.SrcReg[1].Swizzle = two_swizzle; |
inst_mad = rc_insert_new_instruction(c, inst_mul); |
inst_mad->U.I.Opcode = RC_OPCODE_MAD; |
inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); |
inst_mad->U.I.SrcReg[0] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */ |
inst_mad->U.I.SrcReg[1] = inst_mul->U.I.SrcReg[1]; /* 2 */ |
inst_mad->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[1]; /* 2 */ |
inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZW; |
inst_cnd = rc_insert_new_instruction(c, inst_mad); |
inst_cnd->U.I.Opcode = RC_OPCODE_CND; |
inst_cnd->U.I.SaturateMode = inst->U.I.SaturateMode; |
inst_cnd->U.I.DstReg = inst->U.I.DstReg; |
inst_cnd->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; |
inst_cnd->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; |
inst_cnd->U.I.SrcReg[0].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle; |
inst_cnd->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; |
inst_cnd->U.I.SrcReg[1].Index = inst_mul->U.I.DstReg.Index; |
inst_cnd->U.I.SrcReg[1].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle; |
inst_cnd->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */ |
inst->U.I.SaturateMode = 0; |
inst->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst->U.I.DstReg.Index = inst_mul->U.I.SrcReg[0].Index; |
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; |
} |
/* Cannot write texture to output registers or with saturate (all chips), |
* or with masks (non-r500). */ |
if (inst->U.I.Opcode != RC_OPCODE_KIL && |
(inst->U.I.DstReg.File != RC_FILE_TEMPORARY || |
inst->U.I.SaturateMode || |
(!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) { |
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); |
inst_mov->U.I.Opcode = RC_OPCODE_MOV; |
inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode; |
inst_mov->U.I.DstReg = inst->U.I.DstReg; |
inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; |
inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); |
inst->U.I.SaturateMode = 0; |
inst->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; |
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; |
} |
/* Cannot read texture coordinate from constants file */ |
if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { |
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); |
inst_mov->U.I.Opcode = RC_OPCODE_MOV; |
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; |
inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); |
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; |
reset_srcreg(&inst->U.I.SrcReg[0]); |
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; |
inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; |
} |
return 1; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program_tex.h |
---|
0,0 → 1,39 |
/* |
* Copyright (C) 2010 Corbin Simpson |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#ifndef __RADEON_PROGRAM_TEX_H_ |
#define __RADEON_PROGRAM_TEX_H_ |
#include "radeon_compiler.h" |
#include "radeon_program.h" |
int radeonTransformTEX( |
struct radeon_compiler * c, |
struct rc_instruction * inst, |
void* data); |
#endif /* __RADEON_PROGRAM_TEX_H_ */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_regalloc.h |
---|
0,0 → 1,62 |
/* |
* Copyright 2012 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Author: Tom Stellard <thomas.stellard@amd.com> |
*/ |
#ifndef RADEON_REGALLOC_H |
#define RADEON_REGALLOC_H |
struct ra_regs; |
enum rc_reg_class { |
RC_REG_CLASS_SINGLE, |
RC_REG_CLASS_DOUBLE, |
RC_REG_CLASS_TRIPLE, |
RC_REG_CLASS_ALPHA, |
RC_REG_CLASS_SINGLE_PLUS_ALPHA, |
RC_REG_CLASS_DOUBLE_PLUS_ALPHA, |
RC_REG_CLASS_TRIPLE_PLUS_ALPHA, |
RC_REG_CLASS_X, |
RC_REG_CLASS_Y, |
RC_REG_CLASS_Z, |
RC_REG_CLASS_XY, |
RC_REG_CLASS_YZ, |
RC_REG_CLASS_XZ, |
RC_REG_CLASS_XW, |
RC_REG_CLASS_YW, |
RC_REG_CLASS_ZW, |
RC_REG_CLASS_XYW, |
RC_REG_CLASS_YZW, |
RC_REG_CLASS_XZW, |
RC_REG_CLASS_COUNT |
}; |
struct rc_regalloc_state { |
struct ra_regs *regs; |
unsigned class_ids[RC_REG_CLASS_COUNT]; |
}; |
void rc_init_regalloc_state(struct rc_regalloc_state *s); |
void rc_destroy_regalloc_state(struct rc_regalloc_state *s); |
#endif /* RADEON_REGALLOC_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_remove_constants.c |
---|
0,0 → 1,150 |
/* |
* Copyright (C) 2010 Marek Olšák <maraeo@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include "radeon_remove_constants.h" |
#include "radeon_dataflow.h" |
struct mark_used_data { |
unsigned char * const_used; |
unsigned * has_rel_addr; |
}; |
static void remap_regs(void * userdata, struct rc_instruction * inst, |
rc_register_file * pfile, unsigned int * pindex) |
{ |
unsigned *inv_remap_table = userdata; |
if (*pfile == RC_FILE_CONSTANT) { |
*pindex = inv_remap_table[*pindex]; |
} |
} |
static void mark_used(void * userdata, struct rc_instruction * inst, |
struct rc_src_register * src) |
{ |
struct mark_used_data * d = userdata; |
if (src->File == RC_FILE_CONSTANT) { |
if (src->RelAddr) { |
*d->has_rel_addr = 1; |
} else { |
d->const_used[src->Index] = 1; |
} |
} |
} |
void rc_remove_unused_constants(struct radeon_compiler *c, void *user) |
{ |
unsigned **out_remap_table = (unsigned**)user; |
unsigned char *const_used; |
unsigned *remap_table; |
unsigned *inv_remap_table; |
unsigned has_rel_addr = 0; |
unsigned is_identity = 1; |
unsigned are_externals_remapped = 0; |
struct rc_constant *constants = c->Program.Constants.Constants; |
struct mark_used_data d; |
unsigned new_count; |
if (!c->Program.Constants.Count) { |
*out_remap_table = NULL; |
return; |
} |
const_used = malloc(c->Program.Constants.Count); |
memset(const_used, 0, c->Program.Constants.Count); |
d.const_used = const_used; |
d.has_rel_addr = &has_rel_addr; |
/* Pass 1: Mark used constants. */ |
for (struct rc_instruction *inst = c->Program.Instructions.Next; |
inst != &c->Program.Instructions; inst = inst->Next) { |
rc_for_all_reads_src(inst, mark_used, &d); |
} |
/* Pass 2: If there is relative addressing or dead constant elimination |
* is disabled, mark all externals as used. */ |
if (has_rel_addr || !c->remove_unused_constants) { |
for (unsigned i = 0; i < c->Program.Constants.Count; i++) |
if (constants[i].Type == RC_CONSTANT_EXTERNAL) |
const_used[i] = 1; |
} |
/* Pass 3: Make the remapping table and remap constants. |
* This pass removes unused constants simply by overwriting them by other constants. */ |
remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); |
inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); |
new_count = 0; |
for (unsigned i = 0; i < c->Program.Constants.Count; i++) { |
if (const_used[i]) { |
remap_table[new_count] = i; |
inv_remap_table[i] = new_count; |
if (i != new_count) { |
if (constants[i].Type == RC_CONSTANT_EXTERNAL) |
are_externals_remapped = 1; |
constants[new_count] = constants[i]; |
is_identity = 0; |
} |
new_count++; |
} |
} |
/* is_identity ==> new_count == old_count |
* !is_identity ==> new_count < old_count */ |
assert( is_identity || new_count < c->Program.Constants.Count); |
assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped)); |
/* Pass 4: Redirect reads of all constants to their new locations. */ |
if (!is_identity) { |
for (struct rc_instruction *inst = c->Program.Instructions.Next; |
inst != &c->Program.Instructions; inst = inst->Next) { |
rc_remap_registers(inst, remap_regs, inv_remap_table); |
} |
} |
/* Set the new constant count. Note that new_count may be less than |
* Count even though the remapping function is identity. In that case, |
* the constants have been removed at the end of the array. */ |
c->Program.Constants.Count = new_count; |
if (are_externals_remapped) { |
*out_remap_table = remap_table; |
} else { |
*out_remap_table = NULL; |
free(remap_table); |
} |
free(const_used); |
free(inv_remap_table); |
if (c->Debug & RC_DBG_LOG) |
rc_constants_print(&c->Program.Constants); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_remove_constants.h |
---|
0,0 → 1,35 |
/* |
* Copyright (C) 2010 Marek Olšák <maraeo@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#ifndef RADEON_REMOVE_CONSTANTS_H |
#define RADEON_REMOVE_CONSTANTS_H |
#include "radeon_compiler.h" |
void rc_remove_unused_constants(struct radeon_compiler *c, void *user); |
#endif |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_rename_regs.c |
---|
0,0 → 1,89 |
/* |
* Copyright 2010 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
/** |
* \file |
*/ |
#include "radeon_rename_regs.h" |
#include "radeon_compiler.h" |
#include "radeon_list.h" |
#include "radeon_program.h" |
#include "radeon_variable.h" |
/** |
* This function renames registers in an attempt to get the code close to |
* SSA form. After this function has completed, most of the register are only |
* written to one time, with a few exceptions. |
* |
* This function assumes all the instructions are still of type |
* RC_INSTRUCTION_NORMAL. |
*/ |
void rc_rename_regs(struct radeon_compiler *c, void *user) |
{ |
unsigned int used_length; |
struct rc_instruction * inst; |
unsigned char * used; |
struct rc_list * variables; |
struct rc_list * var_ptr; |
/* XXX Remove this once the register allocation works with flow control. */ |
for(inst = c->Program.Instructions.Next; |
inst != &c->Program.Instructions; |
inst = inst->Next) { |
if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) |
return; |
} |
used_length = 2 * rc_recompute_ips(c); |
used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length); |
memset(used, 0, sizeof(unsigned char) * used_length); |
rc_get_used_temporaries(c, used, used_length); |
variables = rc_get_variables(c); |
for (var_ptr = variables; var_ptr; var_ptr = var_ptr->Next) { |
unsigned new_index; |
unsigned writemask; |
struct rc_variable * var = var_ptr->Item; |
if (var->Inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { |
continue; |
} |
new_index = rc_find_free_temporary_list(c, used, used_length, |
RC_MASK_XYZW); |
if (new_index < 0) { |
rc_error(c, "Ran out of temporary registers\n"); |
return; |
} |
writemask = rc_variable_writemask_sum(var); |
rc_variable_change_dst(var, new_index, writemask); |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_rename_regs.h |
---|
0,0 → 1,35 |
/* |
* Copyright 2010 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#ifndef RADEON_RENAME_REGS_H |
#define RADEON_RENAME_REGS_H |
struct radeon_compiler; |
void rc_rename_regs(struct radeon_compiler *c, void *user); |
#endif /* RADEON_RENAME_REGS_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_swizzle.h |
---|
0,0 → 1,59 |
/* |
* Copyright (C) 2009 Nicolai Haehnle. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#ifndef RADEON_SWIZZLE_H |
#define RADEON_SWIZZLE_H |
#include "radeon_program.h" |
struct rc_swizzle_split { |
unsigned char NumPhases; |
unsigned char Phase[4]; |
}; |
/** |
* Describe the swizzling capability of target hardware. |
*/ |
struct rc_swizzle_caps { |
/** |
* Check whether the given swizzle, absolute and negate combination |
* can be implemented natively by the hardware for this opcode. |
* |
* \return 1 if the swizzle is native for the given opcode |
*/ |
int (*IsNative)(rc_opcode opcode, struct rc_src_register reg); |
/** |
* Determine how to split access to the masked channels of the |
* given source register to obtain ALU-native swizzles. |
*/ |
void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split); |
}; |
extern struct rc_swizzle_caps r300_vertprog_swizzle_caps; |
#endif /* RADEON_SWIZZLE_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_variable.c |
---|
0,0 → 1,536 |
/* |
* Copyright 2011 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include "radeon_variable.h" |
#include "memory_pool.h" |
#include "radeon_compiler_util.h" |
#include "radeon_dataflow.h" |
#include "radeon_list.h" |
#include "radeon_opcodes.h" |
#include "radeon_program.h" |
/** |
* Rewrite the index and writemask for the destination register of var |
* and its friends to new_index and new_writemask. This function also takes |
* care of rewriting the swizzles for the sources of var. |
*/ |
void rc_variable_change_dst( |
struct rc_variable * var, |
unsigned int new_index, |
unsigned int new_writemask) |
{ |
struct rc_variable * var_ptr; |
struct rc_list * readers; |
unsigned int old_mask = rc_variable_writemask_sum(var); |
unsigned int conversion_swizzle = |
rc_make_conversion_swizzle(old_mask, new_writemask); |
for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) { |
if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) { |
rc_normal_rewrite_writemask(var_ptr->Inst, |
conversion_swizzle); |
var_ptr->Inst->U.I.DstReg.Index = new_index; |
} else { |
struct rc_pair_sub_instruction * sub; |
if (var_ptr->Dst.WriteMask == RC_MASK_W) { |
assert(new_writemask & RC_MASK_W); |
sub = &var_ptr->Inst->U.P.Alpha; |
} else { |
sub = &var_ptr->Inst->U.P.RGB; |
rc_pair_rewrite_writemask(sub, |
conversion_swizzle); |
} |
sub->DestIndex = new_index; |
} |
} |
readers = rc_variable_readers_union(var); |
for ( ; readers; readers = readers->Next) { |
struct rc_reader * reader = readers->Item; |
if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) { |
reader->U.I.Src->Index = new_index; |
reader->U.I.Src->Swizzle = rc_rewrite_swizzle( |
reader->U.I.Src->Swizzle, conversion_swizzle); |
} else { |
struct rc_pair_instruction * pair_inst = |
&reader->Inst->U.P; |
unsigned int src_type = rc_source_type_swz( |
reader->U.P.Arg->Swizzle); |
int src_index = reader->U.P.Arg->Source; |
if (src_index == RC_PAIR_PRESUB_SRC) { |
src_index = rc_pair_get_src_index( |
pair_inst, reader->U.P.Src); |
} |
/* Try to delete the old src, it is OK if this fails, |
* because rc_pair_alloc_source might be able to |
* find a source the ca be reused. |
*/ |
if (rc_pair_remove_src(reader->Inst, src_type, |
src_index, old_mask)) { |
/* Reuse the source index of the source that |
* was just deleted and set its register |
* index. We can't use rc_pair_alloc_source |
* for this becuase it might return a source |
* index that is already being used. */ |
if (src_type & RC_SOURCE_RGB) { |
pair_inst->RGB.Src[src_index] |
.Used = 1; |
pair_inst->RGB.Src[src_index] |
.Index = new_index; |
pair_inst->RGB.Src[src_index] |
.File = RC_FILE_TEMPORARY; |
} |
if (src_type & RC_SOURCE_ALPHA) { |
pair_inst->Alpha.Src[src_index] |
.Used = 1; |
pair_inst->Alpha.Src[src_index] |
.Index = new_index; |
pair_inst->Alpha.Src[src_index] |
.File = RC_FILE_TEMPORARY; |
} |
} else { |
src_index = rc_pair_alloc_source( |
&reader->Inst->U.P, |
src_type & RC_SOURCE_RGB, |
src_type & RC_SOURCE_ALPHA, |
RC_FILE_TEMPORARY, |
new_index); |
if (src_index < 0) { |
rc_error(var->C, "Rewrite of inst %u failed " |
"Can't allocate source for " |
"Inst %u src_type=%x " |
"new_index=%u new_mask=%u\n", |
var->Inst->IP, reader->Inst->IP, src_type, new_index, new_writemask); |
continue; |
} |
} |
reader->U.P.Arg->Swizzle = rc_rewrite_swizzle( |
reader->U.P.Arg->Swizzle, conversion_swizzle); |
if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) { |
reader->U.P.Arg->Source = src_index; |
} |
} |
} |
} |
/** |
* Compute the live intervals for var and its friends. |
*/ |
void rc_variable_compute_live_intervals(struct rc_variable * var) |
{ |
while(var) { |
unsigned int i; |
unsigned int start = var->Inst->IP; |
for (i = 0; i < var->ReaderCount; i++) { |
unsigned int chan; |
unsigned int chan_start = start; |
unsigned int chan_end = var->Readers[i].Inst->IP; |
unsigned int mask = var->Readers[i].WriteMask; |
struct rc_instruction * inst; |
/* Extend the live interval of T0 to the start of the |
* loop for sequences like: |
* BGNLOOP |
* read T0 |
* ... |
* write T0 |
* ENDLOOP |
*/ |
if (var->Readers[i].Inst->IP < start) { |
struct rc_instruction * bgnloop = |
rc_match_endloop(var->Readers[i].Inst); |
chan_start = bgnloop->IP; |
} |
/* Extend the live interval of T0 to the start of the |
* loop in case there is a BRK instruction in the loop |
* (we don't actually check for a BRK instruction we |
* assume there is one somewhere in the loop, which |
* there usually is) for sequences like: |
* BGNLOOP |
* ... |
* conditional BRK |
* ... |
* write T0 |
* ENDLOOP |
* read T0 |
*************************************************** |
* Extend the live interval of T0 to the end of the |
* loop for sequences like: |
* write T0 |
* BGNLOOP |
* ... |
* read T0 |
* ENDLOOP |
*/ |
for (inst = var->Inst; inst != var->Readers[i].Inst; |
inst = inst->Next) { |
rc_opcode op = rc_get_flow_control_inst(inst); |
if (op == RC_OPCODE_ENDLOOP) { |
struct rc_instruction * bgnloop = |
rc_match_endloop(inst); |
if (bgnloop->IP < chan_start) { |
chan_start = bgnloop->IP; |
} |
} else if (op == RC_OPCODE_BGNLOOP) { |
struct rc_instruction * endloop = |
rc_match_bgnloop(inst); |
if (endloop->IP > chan_end) { |
chan_end = endloop->IP; |
} |
} |
} |
for (chan = 0; chan < 4; chan++) { |
if ((mask >> chan) & 0x1) { |
if (!var->Live[chan].Used |
|| chan_start < var->Live[chan].Start) { |
var->Live[chan].Start = |
chan_start; |
} |
if (!var->Live[chan].Used |
|| chan_end > var->Live[chan].End) { |
var->Live[chan].End = chan_end; |
} |
var->Live[chan].Used = 1; |
} |
} |
} |
var = var->Friend; |
} |
} |
/** |
* @return 1 if a and b share a reader |
* @return 0 if they do not |
*/ |
static unsigned int readers_intersect( |
struct rc_variable * a, |
struct rc_variable * b) |
{ |
unsigned int a_index, b_index; |
for (a_index = 0; a_index < a->ReaderCount; a_index++) { |
struct rc_reader reader_a = a->Readers[a_index]; |
for (b_index = 0; b_index < b->ReaderCount; b_index++) { |
struct rc_reader reader_b = b->Readers[b_index]; |
if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL |
&& reader_b.Inst->Type == RC_INSTRUCTION_NORMAL |
&& reader_a.U.I.Src == reader_b.U.I.Src) { |
return 1; |
} |
if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR |
&& reader_b.Inst->Type == RC_INSTRUCTION_PAIR |
&& reader_a.U.P.Src == reader_b.U.P.Src) { |
return 1; |
} |
} |
} |
return 0; |
} |
void rc_variable_add_friend( |
struct rc_variable * var, |
struct rc_variable * friend) |
{ |
assert(var->Dst.Index == friend->Dst.Index); |
while(var->Friend) { |
var = var->Friend; |
} |
var->Friend = friend; |
} |
struct rc_variable * rc_variable( |
struct radeon_compiler * c, |
unsigned int DstFile, |
unsigned int DstIndex, |
unsigned int DstWriteMask, |
struct rc_reader_data * reader_data) |
{ |
struct rc_variable * new = |
memory_pool_malloc(&c->Pool, sizeof(struct rc_variable)); |
memset(new, 0, sizeof(struct rc_variable)); |
new->C = c; |
new->Dst.File = DstFile; |
new->Dst.Index = DstIndex; |
new->Dst.WriteMask = DstWriteMask; |
if (reader_data) { |
new->Inst = reader_data->Writer; |
new->ReaderCount = reader_data->ReaderCount; |
new->Readers = reader_data->Readers; |
} |
return new; |
} |
static void get_variable_helper( |
struct rc_list ** variable_list, |
struct rc_variable * variable) |
{ |
struct rc_list * list_ptr; |
for (list_ptr = *variable_list; list_ptr; list_ptr = list_ptr->Next) { |
struct rc_variable * var; |
for (var = list_ptr->Item; var; var = var->Friend) { |
if (readers_intersect(var, variable)) { |
rc_variable_add_friend(var, variable); |
return; |
} |
} |
} |
rc_list_add(variable_list, rc_list(&variable->C->Pool, variable)); |
} |
static void get_variable_pair_helper( |
struct rc_list ** variable_list, |
struct radeon_compiler * c, |
struct rc_instruction * inst, |
struct rc_pair_sub_instruction * sub_inst) |
{ |
struct rc_reader_data reader_data; |
struct rc_variable * new_var; |
rc_register_file file; |
unsigned int writemask; |
if (sub_inst->Opcode == RC_OPCODE_NOP) { |
return; |
} |
memset(&reader_data, 0, sizeof(struct rc_reader_data)); |
rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL); |
if (reader_data.ReaderCount == 0) { |
return; |
} |
if (sub_inst->WriteMask) { |
file = RC_FILE_TEMPORARY; |
writemask = sub_inst->WriteMask; |
} else if (sub_inst->OutputWriteMask) { |
file = RC_FILE_OUTPUT; |
writemask = sub_inst->OutputWriteMask; |
} else { |
writemask = 0; |
file = RC_FILE_NONE; |
} |
new_var = rc_variable(c, file, sub_inst->DestIndex, writemask, |
&reader_data); |
get_variable_helper(variable_list, new_var); |
} |
/** |
* Generate a list of variables used by the shader program. Each instruction |
* that writes to a register is considered a variable. The struct rc_variable |
* data structure includes a list of readers and is essentially a |
* definition-use chain. Any two variables that share a reader are considered |
* "friends" and they are linked together via the Friend attribute. |
*/ |
struct rc_list * rc_get_variables(struct radeon_compiler * c) |
{ |
struct rc_instruction * inst; |
struct rc_list * variable_list = NULL; |
for (inst = c->Program.Instructions.Next; |
inst != &c->Program.Instructions; |
inst = inst->Next) { |
struct rc_reader_data reader_data; |
struct rc_variable * new_var; |
memset(&reader_data, 0, sizeof(reader_data)); |
if (inst->Type == RC_INSTRUCTION_NORMAL) { |
rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); |
if (reader_data.ReaderCount == 0) { |
continue; |
} |
new_var = rc_variable(c, inst->U.I.DstReg.File, |
inst->U.I.DstReg.Index, |
inst->U.I.DstReg.WriteMask, &reader_data); |
get_variable_helper(&variable_list, new_var); |
} else { |
get_variable_pair_helper(&variable_list, c, inst, |
&inst->U.P.RGB); |
get_variable_pair_helper(&variable_list, c, inst, |
&inst->U.P.Alpha); |
} |
} |
return variable_list; |
} |
/** |
* @return The bitwise or of the writemasks of a variable and all of its |
* friends. |
*/ |
unsigned int rc_variable_writemask_sum(struct rc_variable * var) |
{ |
unsigned int writemask = 0; |
while(var) { |
writemask |= var->Dst.WriteMask; |
var = var->Friend; |
} |
return writemask; |
} |
/* |
* @return A list of readers for a variable and its friends. Readers |
* that read from two different variable friends are only included once in |
* this list. |
*/ |
struct rc_list * rc_variable_readers_union(struct rc_variable * var) |
{ |
struct rc_list * list = NULL; |
while (var) { |
unsigned int i; |
for (i = 0; i < var->ReaderCount; i++) { |
struct rc_list * temp; |
struct rc_reader * a = &var->Readers[i]; |
unsigned int match = 0; |
for (temp = list; temp; temp = temp->Next) { |
struct rc_reader * b = temp->Item; |
if (a->Inst->Type != b->Inst->Type) { |
continue; |
} |
if (a->Inst->Type == RC_INSTRUCTION_NORMAL) { |
if (a->U.I.Src == b->U.I.Src) { |
match = 1; |
break; |
} |
} |
if (a->Inst->Type == RC_INSTRUCTION_PAIR) { |
if (a->U.P.Arg == b->U.P.Arg |
&& a->U.P.Src == b->U.P.Src) { |
match = 1; |
break; |
} |
} |
} |
if (match) { |
continue; |
} |
rc_list_add(&list, rc_list(&var->C->Pool, a)); |
} |
var = var->Friend; |
} |
return list; |
} |
static unsigned int reader_equals_src( |
struct rc_reader reader, |
unsigned int src_type, |
void * src) |
{ |
if (reader.Inst->Type != src_type) { |
return 0; |
} |
if (src_type == RC_INSTRUCTION_NORMAL) { |
return reader.U.I.Src == src; |
} else { |
return reader.U.P.Src == src; |
} |
} |
static unsigned int variable_writes_src( |
struct rc_variable * var, |
unsigned int src_type, |
void * src) |
{ |
unsigned int i; |
for (i = 0; i < var->ReaderCount; i++) { |
if (reader_equals_src(var->Readers[i], src_type, src)) { |
return 1; |
} |
} |
return 0; |
} |
struct rc_list * rc_variable_list_get_writers( |
struct rc_list * var_list, |
unsigned int src_type, |
void * src) |
{ |
struct rc_list * list_ptr; |
struct rc_list * writer_list = NULL; |
for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) { |
struct rc_variable * var = list_ptr->Item; |
if (variable_writes_src(var, src_type, src)) { |
struct rc_variable * friend; |
rc_list_add(&writer_list, rc_list(&var->C->Pool, var)); |
for (friend = var->Friend; friend; |
friend = friend->Friend) { |
if (variable_writes_src(friend, src_type, src)) { |
rc_list_add(&writer_list, |
rc_list(&var->C->Pool, friend)); |
} |
} |
/* Once we have indentifed the variable and its |
* friends that write this source, we can stop |
* stop searching, because we know none of the |
* other variables in the list will write this source. |
* If they did they would be friends of var. |
*/ |
break; |
} |
} |
return writer_list; |
} |
struct rc_list * rc_variable_list_get_writers_one_reader( |
struct rc_list * var_list, |
unsigned int src_type, |
void * src) |
{ |
struct rc_list * writer_list = |
rc_variable_list_get_writers(var_list, src_type, src); |
struct rc_list * reader_list = |
rc_variable_readers_union(writer_list->Item); |
if (rc_list_count(reader_list) > 1) { |
return NULL; |
} else { |
return writer_list; |
} |
} |
void rc_variable_print(struct rc_variable * var) |
{ |
unsigned int i; |
while (var) { |
fprintf(stderr, "%u: TEMP[%u].%u: ", |
var->Inst->IP, var->Dst.Index, var->Dst.WriteMask); |
for (i = 0; i < 4; i++) { |
fprintf(stderr, "chan %u: start=%u end=%u ", i, |
var->Live[i].Start, var->Live[i].End); |
} |
fprintf(stderr, "%u readers\n", var->ReaderCount); |
if (var->Friend) { |
fprintf(stderr, "Friend: \n\t"); |
} |
var = var->Friend; |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_variable.h |
---|
0,0 → 1,94 |
/* |
* Copyright 2011 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#ifndef RADEON_VARIABLE_H |
#define RADEON_VARIABLE_H |
#include "radeon_compiler.h" |
struct radeon_compiler; |
struct rc_list; |
struct rc_reader_data; |
struct rc_readers; |
struct live_intervals { |
int Start; |
int End; |
int Used; |
}; |
struct rc_variable { |
struct radeon_compiler * C; |
struct rc_dst_register Dst; |
struct rc_instruction * Inst; |
unsigned int ReaderCount; |
struct rc_reader * Readers; |
struct live_intervals Live[4]; |
/* A friend is a variable that shares a reader with another variable. |
*/ |
struct rc_variable * Friend; |
}; |
void rc_variable_change_dst( |
struct rc_variable * var, |
unsigned int new_index, |
unsigned int new_writemask); |
void rc_variable_compute_live_intervals(struct rc_variable * var); |
void rc_variable_add_friend( |
struct rc_variable * var, |
struct rc_variable * friend); |
struct rc_variable * rc_variable( |
struct radeon_compiler * c, |
unsigned int DstFile, |
unsigned int DstIndex, |
unsigned int DstWriteMask, |
struct rc_reader_data * reader_data); |
struct rc_list * rc_get_variables(struct radeon_compiler * c); |
unsigned int rc_variable_writemask_sum(struct rc_variable * var); |
struct rc_list * rc_variable_readers_union(struct rc_variable * var); |
struct rc_list * rc_variable_list_get_writers( |
struct rc_list * var_list, |
unsigned int src_type, |
void * src); |
struct rc_list * rc_variable_list_get_writers_one_reader( |
struct rc_list * var_list, |
unsigned int src_type, |
void * src); |
void rc_variable_print(struct rc_variable * var); |
#endif /* RADEON_VARIABLE_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_vert_fc.c |
---|
0,0 → 1,302 |
/* |
* Copyright 2012 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Author: Tom Stellard <thomas.stellard@amd.com> |
*/ |
#include "radeon_compiler.h" |
#include "radeon_compiler_util.h" |
#include "radeon_dataflow.h" |
#include "radeon_program.h" |
#include "radeon_program_constants.h" |
struct vert_fc_state { |
struct radeon_compiler *C; |
unsigned BranchDepth; |
unsigned LoopDepth; |
unsigned LoopsReserved; |
int PredStack[R500_PVS_MAX_LOOP_DEPTH]; |
int PredicateReg; |
unsigned InCFBreak; |
}; |
static void build_pred_src( |
struct rc_src_register * src, |
struct vert_fc_state * fc_state) |
{ |
src->Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, |
RC_SWIZZLE_UNUSED, RC_SWIZZLE_W); |
src->File = RC_FILE_TEMPORARY; |
src->Index = fc_state->PredicateReg; |
} |
static void build_pred_dst( |
struct rc_dst_register * dst, |
struct vert_fc_state * fc_state) |
{ |
dst->WriteMask = RC_MASK_W; |
dst->File = RC_FILE_TEMPORARY; |
dst->Index = fc_state->PredicateReg; |
} |
static void mark_write(void * userdata, struct rc_instruction * inst, |
rc_register_file file, unsigned int index, unsigned int mask) |
{ |
unsigned int * writemasks = userdata; |
if (file != RC_FILE_TEMPORARY) |
return; |
if (index >= R300_VS_MAX_TEMPS) |
return; |
writemasks[index] |= mask; |
} |
static int reserve_predicate_reg(struct vert_fc_state * fc_state) |
{ |
int i; |
unsigned int writemasks[RC_REGISTER_MAX_INDEX]; |
struct rc_instruction * inst; |
memset(writemasks, 0, sizeof(writemasks)); |
for(inst = fc_state->C->Program.Instructions.Next; |
inst != &fc_state->C->Program.Instructions; |
inst = inst->Next) { |
rc_for_all_writes_mask(inst, mark_write, writemasks); |
} |
for(i = 0; i < fc_state->C->max_temp_regs; i++) { |
/* Most of the control flow instructions only write the |
* W component of the Predicate Register, but |
* the docs say that ME_PRED_SET_CLR and |
* ME_PRED_SET_RESTORE write all components of the |
* register, so we must reserve a register that has |
* all its components free. */ |
if (!writemasks[i]) { |
fc_state->PredicateReg = i; |
break; |
} |
} |
if (i == fc_state->C->max_temp_regs) { |
rc_error(fc_state->C, "No free temporary to use for" |
" predicate stack counter.\n"); |
return -1; |
} |
return 1; |
} |
static void lower_bgnloop( |
struct rc_instruction * inst, |
struct vert_fc_state * fc_state) |
{ |
struct rc_instruction * new_inst = |
rc_insert_new_instruction(fc_state->C, inst->Prev); |
if ((!fc_state->C->is_r500 |
&& fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH) |
|| fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) { |
rc_error(fc_state->C, "Loops are nested too deep."); |
return; |
} |
if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) { |
if (fc_state->PredicateReg == -1) { |
if (reserve_predicate_reg(fc_state) == -1) { |
return; |
} |
} |
/* Initialize the predicate bit to true. */ |
new_inst->U.I.Opcode = RC_ME_PRED_SEQ; |
build_pred_dst(&new_inst->U.I.DstReg, fc_state); |
new_inst->U.I.SrcReg[0].Index = 0; |
new_inst->U.I.SrcReg[0].File = RC_FILE_NONE; |
new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; |
} else { |
fc_state->PredStack[fc_state->LoopDepth] = |
fc_state->PredicateReg; |
/* Copy the the current predicate value to this loop's |
* predicate register */ |
/* Use the old predicate value for src0 */ |
build_pred_src(&new_inst->U.I.SrcReg[0], fc_state); |
/* Reserve this loop's predicate register */ |
if (reserve_predicate_reg(fc_state) == -1) { |
return; |
} |
/* Copy the old predicate value to the new register */ |
new_inst->U.I.Opcode = RC_OPCODE_ADD; |
build_pred_dst(&new_inst->U.I.DstReg, fc_state); |
new_inst->U.I.SrcReg[1].Index = 0; |
new_inst->U.I.SrcReg[1].File = RC_FILE_NONE; |
new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000; |
} |
} |
static void lower_brk( |
struct rc_instruction * inst, |
struct vert_fc_state * fc_state) |
{ |
if (fc_state->LoopDepth == 1) { |
inst->U.I.Opcode = RC_OPCODE_RCP; |
inst->U.I.DstReg.Pred = RC_PRED_INV; |
inst->U.I.SrcReg[0].Index = 0; |
inst->U.I.SrcReg[0].File = RC_FILE_NONE; |
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; |
} else { |
inst->U.I.Opcode = RC_ME_PRED_SET_CLR; |
inst->U.I.DstReg.Pred = RC_PRED_SET; |
} |
build_pred_dst(&inst->U.I.DstReg, fc_state); |
} |
static void lower_endloop( |
struct rc_instruction * inst, |
struct vert_fc_state * fc_state) |
{ |
struct rc_instruction * new_inst = |
rc_insert_new_instruction(fc_state->C, inst); |
new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE; |
build_pred_dst(&new_inst->U.I.DstReg, fc_state); |
/* Restore the previous predicate register. */ |
fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1]; |
build_pred_src(&new_inst->U.I.SrcReg[0], fc_state); |
} |
static void lower_if( |
struct rc_instruction * inst, |
struct vert_fc_state * fc_state) |
{ |
/* Reserve a temporary to use as our predicate stack counter, if we |
* don't already have one. */ |
if (fc_state->PredicateReg == -1) { |
/* If we are inside a loop, the Predicate Register should |
* have already been defined. */ |
assert(fc_state->LoopDepth == 0); |
if (reserve_predicate_reg(fc_state) == -1) { |
return; |
} |
} |
if (inst->Next->U.I.Opcode == RC_OPCODE_BRK) { |
fc_state->InCFBreak = 1; |
} |
if ((fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0) |
|| (fc_state->LoopDepth == 1 && fc_state->InCFBreak)) { |
if (fc_state->InCFBreak) { |
inst->U.I.Opcode = RC_ME_PRED_SEQ; |
inst->U.I.DstReg.Pred = RC_PRED_SET; |
} else { |
inst->U.I.Opcode = RC_ME_PRED_SNEQ; |
} |
} else { |
unsigned swz; |
inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH; |
memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0], |
sizeof(inst->U.I.SrcReg[1])); |
swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle); |
/* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the |
* w component */ |
inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, |
RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz); |
build_pred_src(&inst->U.I.SrcReg[0], fc_state); |
} |
build_pred_dst(&inst->U.I.DstReg, fc_state); |
} |
void rc_vert_fc(struct radeon_compiler *c, void *user) |
{ |
struct rc_instruction * inst; |
struct vert_fc_state fc_state; |
memset(&fc_state, 0, sizeof(fc_state)); |
fc_state.PredicateReg = -1; |
fc_state.C = c; |
for(inst = c->Program.Instructions.Next; |
inst != &c->Program.Instructions; |
inst = inst->Next) { |
switch (inst->U.I.Opcode) { |
case RC_OPCODE_BGNLOOP: |
lower_bgnloop(inst, &fc_state); |
fc_state.LoopDepth++; |
break; |
case RC_OPCODE_BRK: |
lower_brk(inst, &fc_state); |
break; |
case RC_OPCODE_ENDLOOP: |
if (fc_state.BranchDepth != 0 |
|| fc_state.LoopDepth != 1) { |
lower_endloop(inst, &fc_state); |
} |
fc_state.LoopDepth--; |
/* Skip PRED_RESTORE */ |
inst = inst->Next; |
break; |
case RC_OPCODE_IF: |
lower_if(inst, &fc_state); |
fc_state.BranchDepth++; |
break; |
case RC_OPCODE_ELSE: |
inst->U.I.Opcode = RC_ME_PRED_SET_INV; |
build_pred_dst(&inst->U.I.DstReg, &fc_state); |
build_pred_src(&inst->U.I.SrcReg[0], &fc_state); |
break; |
case RC_OPCODE_ENDIF: |
if (fc_state.LoopDepth == 1 && fc_state.InCFBreak) { |
struct rc_instruction * to_delete = inst; |
inst = inst->Prev; |
rc_remove_instruction(to_delete); |
/* XXX: Delete the endif instruction */ |
} else { |
inst->U.I.Opcode = RC_ME_PRED_SET_POP; |
build_pred_dst(&inst->U.I.DstReg, &fc_state); |
build_pred_src(&inst->U.I.SrcReg[0], &fc_state); |
} |
fc_state.InCFBreak = 0; |
fc_state.BranchDepth--; |
break; |
default: |
if (fc_state.BranchDepth || fc_state.LoopDepth) { |
inst->U.I.DstReg.Pred = RC_PRED_SET; |
} |
break; |
} |
if (c->Error) { |
return; |
} |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/omod_two_writers.test |
---|
0,0 → 1,5 |
RCP temp[0].x, const[1].x___; |
RCP temp[0].y, const[1]._y__; |
MUL temp[1].xy, const[0].xx__, temp[0].xy__; |
MOV output[0].xy, temp[1].xy; |
= |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/r300_compiler_tests.c |
---|
0,0 → 1,44 |
/* |
* Copyright 2011 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include "r300_compiler_tests.h" |
#include <stdlib.h> |
int main(int argc, char ** argv) |
{ |
unsigned pass = 1; |
pass &= radeon_compiler_optimize_run_tests(); |
pass &= radeon_compiler_regalloc_run_tests(); |
pass &= radeon_compiler_util_run_tests(); |
if (pass) { |
return EXIT_SUCCESS; |
} else { |
return EXIT_FAILURE; |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/r300_compiler_tests.h |
---|
0,0 → 1,30 |
/* |
* Copyright 2011 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
unsigned radeon_compiler_optimize_run_tests(void); |
unsigned radeon_compiler_regalloc_run_tests(void); |
unsigned radeon_compiler_util_run_tests(void); |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/radeon_compiler_optimize_tests.c |
---|
0,0 → 1,88 |
/* |
* Copyright 2013 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Author: Tom Stellard <thomas.stellard@amd.com> |
*/ |
#include "radeon_compiler.h" |
#include "radeon_dataflow.h" |
#include "r300_compiler_tests.h" |
#include "rc_test_helpers.h" |
#include "unit_test.h" |
static unsigned test_rc_optimize( |
struct test_result * result, |
struct radeon_compiler * c, |
const char * filename) |
{ |
struct rc_test_file test_file; |
test_begin(result); |
if (!load_program(c, &test_file, filename)) { |
fprintf(stderr, "Failed to load program\n"); |
return 0; |
} |
rc_optimize(c, NULL); |
return 1; |
} |
static void test_runner_rc_optimize(struct test_result * result) |
{ |
unsigned pass = 1; |
struct radeon_compiler c; |
struct rc_instruction *inst; |
struct rc_instruction *inst_list[3]; |
unsigned inst_count = 0; |
float const0[4] = {2.0f, 0.0f, 0.0f, 0.0f}; |
init_compiler(&c, RC_FRAGMENT_PROGRAM, 1, 0); |
rc_constants_add_immediate_vec4(&c.Program.Constants, const0); |
test_rc_optimize(result, &c, "omod_two_writers.test"); |
for(inst = c.Program.Instructions.Next; |
inst != &c.Program.Instructions; |
inst = inst->Next, inst_count++) { |
inst_list[inst_count] = inst; |
} |
if (inst_list[0]->U.I.Omod != RC_OMOD_MUL_2 || |
inst_list[1]->U.I.Omod != RC_OMOD_MUL_2 || |
inst_list[2]->U.I.Opcode != RC_OPCODE_MOV) { |
pass = 0; |
} |
test_check(result, pass); |
} |
unsigned radeon_compiler_optimize_run_tests() |
{ |
static struct test tests[] = { |
{"rc_optimize() => peephole_mul_omod()", test_runner_rc_optimize}, |
{NULL, NULL} |
}; |
return run_tests(tests); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/radeon_compiler_regalloc_tests.c |
---|
0,0 → 1,99 |
/* |
* Copyright 2013 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Author: Tom Stellard <thomas.stellard@amd.com> |
*/ |
#include "radeon_program_pair.h" |
#include "r300_compiler_tests.h" |
#include "rc_test_helpers.h" |
#include "unit_test.h" |
static void dummy_allocate_hw_inputs( |
struct r300_fragment_program_compiler * c, |
void (*allocate)(void * data, unsigned input, unsigned hwreg), |
void * mydata) |
{ |
unsigned i; |
for (i = 0; i < 10; i++) { |
allocate(mydata, i, i); |
} |
} |
static void test_runner_rc_regalloc( |
struct test_result *result, |
struct radeon_compiler *c, |
const char *filename) |
{ |
struct rc_test_file test_file; |
unsigned optimizations = 1; |
unsigned do_full_regalloc = 1; |
struct rc_instruction *inst; |
unsigned pass = 1; |
test_begin(result); |
if (!load_program(c, &test_file, filename)) { |
fprintf(stderr, "Failed to load program\n"); |
} |
rc_pair_translate(c, NULL); |
rc_pair_schedule(c, &optimizations); |
rc_pair_remove_dead_sources(c, NULL); |
rc_pair_regalloc(c, &do_full_regalloc); |
for(inst = c->Program.Instructions.Next; |
inst != &c->Program.Instructions; |
inst = inst->Next) { |
if (inst->Type == RC_INSTRUCTION_NORMAL && |
inst->U.I.Opcode != RC_OPCODE_BEGIN_TEX) { |
if (GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 0) |
!= RC_SWIZZLE_X) { |
pass = 0; |
} |
} |
} |
test_check(result, pass); |
} |
static void tex_1d_swizzle(struct test_result *result) |
{ |
struct radeon_compiler c; |
init_compiler(&c, RC_FRAGMENT_PROGRAM, 0, 0); |
struct r300_fragment_program_compiler *cc = |
(struct r300_fragment_program_compiler*)&c; |
cc->AllocateHwInputs = dummy_allocate_hw_inputs; |
test_runner_rc_regalloc(result, &c, "regalloc_tex_1d_swizzle.test"); |
} |
unsigned radeon_compiler_regalloc_run_tests() |
{ |
static struct test tests[] = { |
{"rc_pair_regalloc() => TEX 1D Swizzle - r300", tex_1d_swizzle }, |
{NULL, NULL} |
}; |
return run_tests(tests); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c |
---|
0,0 → 1,104 |
/* |
* Copyright 2011 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include <stdlib.h> |
#include <string.h> |
#include <sys/types.h> |
#include "radeon_compiler_util.h" |
#include "radeon_program.h" |
#include "r300_compiler_tests.h" |
#include "rc_test_helpers.h" |
#include "unit_test.h" |
static void test_rc_inst_can_use_presub( |
struct test_result * result, |
int expected, |
const char * add_str, |
const char * replace_str) |
{ |
struct rc_instruction add_inst, replace_inst; |
int ret; |
test_begin(result); |
init_rc_normal_instruction(&add_inst, add_str); |
init_rc_normal_instruction(&replace_inst, replace_str); |
ret = rc_inst_can_use_presub(&replace_inst, RC_PRESUB_ADD, 0, |
&replace_inst.U.I.SrcReg[0], |
&add_inst.U.I.SrcReg[0], &add_inst.U.I.SrcReg[1]); |
test_check(result, ret == expected); |
} |
static void test_runner_rc_inst_can_use_presub(struct test_result * result) |
{ |
/* This tests the case where the source being replace has the same |
* register file and register index as another source register in the |
* CMP instruction. A previous version of this function was ignoring |
* all registers that shared the same file and index as the replacement |
* register when counting the number of source selects. |
* |
* https://bugs.freedesktop.org/show_bug.cgi?id=36527 |
*/ |
test_rc_inst_can_use_presub(result, 0, |
"ADD temp[0].z, temp[6].__x_, const[1].__x_;", |
"CMP temp[0].y, temp[0]._z__, const[0]._z__, temp[0]._y__;"); |
/* Testing a random case that should fail |
* |
* https://bugs.freedesktop.org/show_bug.cgi?id=36527 |
*/ |
test_rc_inst_can_use_presub(result, 0, |
"ADD temp[3], temp[1], temp[2];", |
"MAD temp[1], temp[0], const[0].xxxx, -temp[3];"); |
/* This tests the case where the arguments of the ADD |
* instruction share the same register file and index. Normally, we |
* would need only one source select for these two arguments, but since |
* they will be part of a presubtract operation we need to use the two |
* source selects that the presubtract instruction expects |
* (src0 and src1). |
* |
* https://bugs.freedesktop.org/show_bug.cgi?id=36527 |
*/ |
test_rc_inst_can_use_presub(result, 0, |
"ADD temp[3].x, temp[0].x___, temp[0].x___;", |
"MAD temp[0].xyz, temp[2].xyz_, -temp[3].xxx_, input[5].xyz_;"); |
} |
unsigned radeon_compiler_util_run_tests() |
{ |
static struct test tests[] = { |
{"rc_inst_can_use_presub()", test_runner_rc_inst_can_use_presub}, |
{NULL, NULL} |
}; |
return run_tests(tests); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c |
---|
0,0 → 1,607 |
/* |
* Copyright 2011 Tom Stellard <tstellar@gmail.com> |
* Copyright 2013 Advanced Micro Devices, Inc. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Author: Tom Stellard <thomas.stellard@amd.com> |
*/ |
#include <errno.h> |
#include <regex.h> |
#include <stdlib.h> |
#include <stdio.h> |
#include <string.h> |
#include <sys/types.h> |
#include "r500_fragprog.h" |
#include "r300_fragprog_swizzle.h" |
#include "radeon_compiler.h" |
#include "radeon_compiler_util.h" |
#include "radeon_opcodes.h" |
#include "radeon_program.h" |
#include "radeon_regalloc.h" |
#include "radeon_swizzle.h" |
#include "util/u_math.h" |
#include "rc_test_helpers.h" |
/* This file contains some helper functions for filling out the rc_instruction |
* data structures. These functions take a string as input based on the format |
* output by rc_program_print(). |
*/ |
#define VERBOSE 0 |
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) |
#define REGEX_ERR_BUF_SIZE 50 |
struct match_info { |
const char * String; |
int Length; |
}; |
static int is_whitespace(const char *str) |
{ |
regex_t regex; |
if (regcomp(®ex, "^[ \n]+$", REG_EXTENDED)) { |
fprintf(stderr, "Failed to compile whitespace regex\n"); |
return 0; |
} |
return regexec(®ex, str, 0, NULL, 0) != REG_NOMATCH; |
} |
static int match_length(regmatch_t * matches, int index) |
{ |
return matches[index].rm_eo - matches[index].rm_so; |
} |
static int regex_helper( |
const char * regex_str, |
const char * search_str, |
regmatch_t * matches, |
int num_matches) |
{ |
char err_buf[REGEX_ERR_BUF_SIZE]; |
regex_t regex; |
int err_code; |
unsigned int i; |
err_code = regcomp(®ex, regex_str, REG_EXTENDED); |
if (err_code) { |
regerror(err_code, ®ex, err_buf, REGEX_ERR_BUF_SIZE); |
fprintf(stderr, "Failed to compile regex: %s\n", err_buf); |
return 0; |
} |
err_code = regexec(®ex, search_str, num_matches, matches, 0); |
DBG("Search string: '%s'\n", search_str); |
for (i = 0; i < num_matches; i++) { |
DBG("Match %u start = %d end = %d\n", i, |
matches[i].rm_so, matches[i].rm_eo); |
} |
if (err_code) { |
regerror(err_code, ®ex, err_buf, REGEX_ERR_BUF_SIZE); |
fprintf(stderr, "Failed to match regex: %s\n", err_buf); |
return 0; |
} |
return 1; |
} |
#define REGEX_SRC_MATCHES 6 |
struct src_tokens { |
struct match_info Negate; |
struct match_info Abs; |
struct match_info File; |
struct match_info Index; |
struct match_info Swizzle; |
}; |
/** |
* Initialize the source register at index src_index for the instruction based |
* on src_str. |
* |
* NOTE: Warning in init_rc_normal_instruction() applies to this function as |
* well. |
* |
* @param src_str A string that represents the source register. The format for |
* this string is the same that is output by rc_program_print. |
* @return 1 On success, 0 on failure |
*/ |
int init_rc_normal_src( |
struct rc_instruction * inst, |
unsigned int src_index, |
const char * src_str) |
{ |
const char * regex_str = "(-*)(\\|*)([[:lower:]]*)\\[*([[:digit:]]*)\\]*(\\.*[[:lower:]_]*)"; |
regmatch_t matches[REGEX_SRC_MATCHES]; |
struct src_tokens tokens; |
struct rc_src_register * src_reg = &inst->U.I.SrcReg[src_index]; |
unsigned int i; |
/* Execute the regex */ |
if (!regex_helper(regex_str, src_str, matches, REGEX_SRC_MATCHES)) { |
fprintf(stderr, "Failed to execute regex for src register.\n"); |
return 0; |
} |
/* Create Tokens */ |
tokens.Negate.String = src_str + matches[1].rm_so; |
tokens.Negate.Length = match_length(matches, 1); |
tokens.Abs.String = src_str + matches[2].rm_so; |
tokens.Abs.Length = match_length(matches, 2); |
tokens.File.String = src_str + matches[3].rm_so; |
tokens.File.Length = match_length(matches, 3); |
tokens.Index.String = src_str + matches[4].rm_so; |
tokens.Index.Length = match_length(matches, 4); |
tokens.Swizzle.String = src_str + matches[5].rm_so; |
tokens.Swizzle.Length = match_length(matches, 5); |
/* Negate */ |
if (tokens.Negate.Length > 0) { |
src_reg->Negate = RC_MASK_XYZW; |
} |
/* Abs */ |
if (tokens.Abs.Length > 0) { |
src_reg->Abs = 1; |
} |
/* File */ |
if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) { |
src_reg->File = RC_FILE_TEMPORARY; |
} else if (!strncmp(tokens.File.String, "input", tokens.File.Length)) { |
src_reg->File = RC_FILE_INPUT; |
} else if (!strncmp(tokens.File.String, "const", tokens.File.Length)) { |
src_reg->File = RC_FILE_CONSTANT; |
} else if (!strncmp(tokens.File.String, "none", tokens.File.Length)) { |
src_reg->File = RC_FILE_NONE; |
} |
/* Index */ |
errno = 0; |
src_reg->Index = strtol(tokens.Index.String, NULL, 10); |
if (errno > 0) { |
fprintf(stderr, "Could not convert src register index.\n"); |
return 0; |
} |
/* Swizzle */ |
if (tokens.Swizzle.Length == 0) { |
src_reg->Swizzle = RC_SWIZZLE_XYZW; |
} else { |
int str_index = 1; |
src_reg->Swizzle = RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED); |
if (tokens.Swizzle.String[0] != '.') { |
fprintf(stderr, "First char of swizzle is not valid.\n"); |
return 0; |
} |
for (i = 0; i < 4 && str_index < tokens.Swizzle.Length; |
i++, str_index++) { |
if (tokens.Swizzle.String[str_index] == '-') { |
src_reg->Negate |= (1 << i); |
str_index++; |
} |
switch(tokens.Swizzle.String[str_index]) { |
case 'x': |
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_X); |
break; |
case 'y': |
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Y); |
break; |
case 'z': |
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Z); |
break; |
case 'w': |
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_W); |
break; |
case '1': |
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ONE); |
break; |
case '0': |
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ZERO); |
break; |
case 'H': |
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_HALF); |
break; |
case '_': |
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_UNUSED); |
break; |
default: |
fprintf(stderr, "Unknown src register swizzle: %c\n", |
tokens.Swizzle.String[str_index]); |
return 0; |
} |
} |
} |
DBG("File=%u index=%u swizzle=%x negate=%u abs=%u\n", |
src_reg->File, src_reg->Index, src_reg->Swizzle, |
src_reg->Negate, src_reg->Abs); |
return 1; |
} |
#define REGEX_DST_MATCHES 4 |
struct dst_tokens { |
struct match_info File; |
struct match_info Index; |
struct match_info WriteMask; |
}; |
/** |
* Initialize the destination for the instruction based on dst_str. |
* |
* NOTE: Warning in init_rc_normal_instruction() applies to this function as |
* well. |
* |
* @param dst_str A string that represents the destination register. The format |
* for this string is the same that is output by rc_program_print. |
* @return 1 On success, 0 on failure |
*/ |
int init_rc_normal_dst( |
struct rc_instruction * inst, |
const char * dst_str) |
{ |
const char * regex_str = "([[:lower:]]*)\\[*([[:digit:]]*)\\]*(\\.*[[:lower:]]*)"; |
regmatch_t matches[REGEX_DST_MATCHES]; |
struct dst_tokens tokens; |
unsigned int i; |
/* Execute the regex */ |
if (!regex_helper(regex_str, dst_str, matches, REGEX_DST_MATCHES)) { |
fprintf(stderr, "Failed to execute regex for dst register.\n"); |
return 0; |
} |
/* Create Tokens */ |
tokens.File.String = dst_str + matches[1].rm_so; |
tokens.File.Length = match_length(matches, 1); |
tokens.Index.String = dst_str + matches[2].rm_so; |
tokens.Index.Length = match_length(matches, 2); |
tokens.WriteMask.String = dst_str + matches[3].rm_so; |
tokens.WriteMask.Length = match_length(matches, 3); |
/* File Type */ |
if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) { |
inst->U.I.DstReg.File = RC_FILE_TEMPORARY; |
} else if (!strncmp(tokens.File.String, "output", tokens.File.Length)) { |
inst->U.I.DstReg.File = RC_FILE_OUTPUT; |
} else if (!strncmp(tokens.File.String, "none", tokens.File.Length)) { |
inst->U.I.DstReg.File = RC_FILE_NONE; |
return 1; |
} else { |
fprintf(stderr, "Unknown dst register file type.\n"); |
return 0; |
} |
/* File Index */ |
errno = 0; |
inst->U.I.DstReg.Index = strtol(tokens.Index.String, NULL, 10); |
if (errno > 0) { |
fprintf(stderr, "Could not convert dst register index\n"); |
return 0; |
} |
/* WriteMask */ |
if (tokens.WriteMask.Length == 0) { |
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; |
} else { |
inst->U.I.DstReg.WriteMask = 0; |
/* The first character should be '.' */ |
if (tokens.WriteMask.String[0] != '.') { |
fprintf(stderr, "1st char of writemask is not valid.\n"); |
return 0; |
} |
for (i = 1; i < tokens.WriteMask.Length; i++) { |
switch(tokens.WriteMask.String[i]) { |
case 'x': |
inst->U.I.DstReg.WriteMask |= RC_MASK_X; |
break; |
case 'y': |
inst->U.I.DstReg.WriteMask |= RC_MASK_Y; |
break; |
case 'z': |
inst->U.I.DstReg.WriteMask |= RC_MASK_Z; |
break; |
case 'w': |
inst->U.I.DstReg.WriteMask |= RC_MASK_W; |
break; |
default: |
fprintf(stderr, "Unknown swizzle in writemask: %c\n", |
tokens.WriteMask.String[i]); |
return 0; |
} |
} |
} |
DBG("Dst Reg File=%u Index=%d Writemask=%d\n", |
inst->U.I.DstReg.File, |
inst->U.I.DstReg.Index, |
inst->U.I.DstReg.WriteMask); |
return 1; |
} |
#define REGEX_INST_MATCHES 7 |
#define REGEX_CONST_MATCHES 5 |
struct inst_tokens { |
struct match_info Opcode; |
struct match_info Sat; |
struct match_info Dst; |
struct match_info Srcs[3]; |
}; |
/** |
* Initialize a normal instruction based on inst_str. |
* |
* WARNING: This function might not be able to handle every kind of format that |
* rc_program_print() can output. If you are having problems with a |
* particular string, you may need to add support for it to this functions. |
* |
* @param inst_str A string that represents the source register. The format for |
* this string is the same that is output by rc_program_print. |
* @return 1 On success, 0 on failure |
*/ |
int parse_rc_normal_instruction( |
struct rc_instruction * inst, |
const char * inst_str) |
{ |
const char * regex_str = "[[:digit:]: ]*([[:upper:][:digit:]]+)(_SAT)*[ ]*([^,;]*)[, ]*([^,;]*)[, ]*([^,;]*)[, ]*([^;]*)"; |
int i; |
regmatch_t matches[REGEX_INST_MATCHES]; |
struct inst_tokens tokens; |
/* Execute the regex */ |
if (!regex_helper(regex_str, inst_str, matches, REGEX_INST_MATCHES)) { |
return 0; |
} |
memset(&tokens, 0, sizeof(tokens)); |
/* Create Tokens */ |
tokens.Opcode.String = inst_str + matches[1].rm_so; |
tokens.Opcode.Length = match_length(matches, 1); |
if (matches[2].rm_so > -1) { |
tokens.Sat.String = inst_str + matches[2].rm_so; |
tokens.Sat.Length = match_length(matches, 2); |
} |
/* Fill out the rest of the instruction. */ |
inst->Type = RC_INSTRUCTION_NORMAL; |
for (i = 0; i < MAX_RC_OPCODE; i++) { |
const struct rc_opcode_info * info = rc_get_opcode_info(i); |
unsigned int first_src = 3; |
unsigned int j; |
if (strncmp(tokens.Opcode.String, info->Name, tokens.Opcode.Length)) { |
continue; |
} |
inst->U.I.Opcode = info->Opcode; |
if (info->HasDstReg) { |
char * dst_str; |
tokens.Dst.String = inst_str + matches[3].rm_so; |
tokens.Dst.Length = match_length(matches, 3); |
first_src++; |
dst_str = malloc(sizeof(char) * (tokens.Dst.Length + 1)); |
strncpy(dst_str, tokens.Dst.String, tokens.Dst.Length); |
dst_str[tokens.Dst.Length] = '\0'; |
init_rc_normal_dst(inst, dst_str); |
free(dst_str); |
} |
for (j = 0; j < info->NumSrcRegs; j++) { |
char * src_str; |
tokens.Srcs[j].String = |
inst_str + matches[first_src + j].rm_so; |
tokens.Srcs[j].Length = |
match_length(matches, first_src + j); |
src_str = malloc(sizeof(char) * |
(tokens.Srcs[j].Length + 1)); |
strncpy(src_str, tokens.Srcs[j].String, |
tokens.Srcs[j].Length); |
src_str[tokens.Srcs[j].Length] = '\0'; |
init_rc_normal_src(inst, j, src_str); |
} |
if (info->HasTexture) { |
/* XXX: Will this always be XYZW ? */ |
inst->U.I.TexSwizzle = RC_SWIZZLE_XYZW; |
} |
break; |
} |
return 1; |
} |
#define INDEX_TOKEN_LEN 4 |
#define FLOAT_TOKEN_LEN 50 |
int parse_constant(unsigned *index, float *data, const char *const_str) |
{ |
int matched = sscanf(const_str, "const[%d] {%f, %f, %f, %f}", index, |
&data[0], &data[1], &data[2], &data[3]); |
return matched == 5; |
} |
int init_rc_normal_instruction( |
struct rc_instruction * inst, |
const char * inst_str) |
{ |
/* Initialize inst */ |
memset(inst, 0, sizeof(struct rc_instruction)); |
return parse_rc_normal_instruction(inst, inst_str); |
} |
void add_instruction(struct radeon_compiler *c, const char * inst_string) |
{ |
struct rc_instruction * new_inst = |
rc_insert_new_instruction(c, c->Program.Instructions.Prev); |
parse_rc_normal_instruction(new_inst, inst_string); |
} |
int add_constant(struct radeon_compiler *c, const char *const_str) |
{ |
float data[4]; |
unsigned index; |
struct rc_constant_list *constants; |
struct rc_constant constant; |
if (!parse_constant(&index, data, const_str)) { |
return 0; |
} |
constants = &c->Program.Constants; |
if (constants->_Reserved < index) { |
struct rc_constant * newlist; |
constants->_Reserved = index + 100; |
newlist = malloc(sizeof(struct rc_constant) * constants->_Reserved); |
if (constants->Constants) { |
memcpy(newlist, constants->Constants, |
sizeof(struct rc_constant) * |
constants->_Reserved); |
free(constants->Constants); |
} |
constants->Constants = newlist; |
} |
memset(&constant, 0, sizeof(constant)); |
constant.Type = RC_CONSTANT_IMMEDIATE; |
constant.Size = 4; |
memcpy(constant.u.Immediate, data, sizeof(float) * 4); |
constants->Constants[index] = constant; |
constants->Count = MAX2(constants->Count, index + 1); |
return 1; |
} |
void init_compiler( |
struct radeon_compiler *c, |
enum rc_program_type program_type, |
unsigned is_r500, |
unsigned is_r400) |
{ |
struct rc_regalloc_state *rs = malloc(sizeof(struct rc_regalloc_state)); |
rc_init_regalloc_state(rs); |
rc_init(c, rs); |
c->is_r500 = is_r500; |
c->max_temp_regs = is_r500 ? 128 : (is_r400 ? 64 : 32); |
c->max_constants = is_r500 ? 256 : 32; |
c->max_alu_insts = (is_r500 || is_r400) ? 512 : 64; |
c->max_tex_insts = (is_r500 || is_r400) ? 512 : 32; |
if (program_type == RC_FRAGMENT_PROGRAM) { |
c->has_half_swizzles = 1; |
c->has_presub = 1; |
c->has_omod = 1; |
c->SwizzleCaps = |
is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps; |
} else { |
c->SwizzleCaps = &r300_vertprog_swizzle_caps; |
} |
} |
#define MAX_LINE_LENGTH 100 |
#define MAX_PATH_LENGTH 100 |
unsigned load_program( |
struct radeon_compiler *c, |
struct rc_test_file *test, |
const char *filename) |
{ |
char line[MAX_LINE_LENGTH]; |
char path[MAX_PATH_LENGTH]; |
FILE *file; |
unsigned *count; |
char **string_store; |
unsigned i = 0; |
snprintf(path, MAX_PATH_LENGTH, "compiler/tests/%s", filename); |
file = fopen(path, "r"); |
if (!file) { |
return 0; |
} |
memset(test, 0, sizeof(struct rc_test_file)); |
count = &test->num_input_lines; |
while (fgets(line, MAX_LINE_LENGTH, file)){ |
if (line[MAX_LINE_LENGTH - 2] == '\n') { |
fprintf(stderr, "Error line cannot be longer than 100 " |
"characters:\n%s\n", line); |
return 0; |
} |
// Comment |
if (line[0] == '#' || is_whitespace(line)) { |
continue; |
} |
if (line[0] == '=') { |
count = &test->num_expected_lines; |
continue; |
} |
(*count)++; |
} |
test->input = malloc(sizeof(char *) * test->num_input_lines); |
test->expected = malloc(sizeof(char *) * test->num_expected_lines); |
rewind(file); |
string_store = test->input; |
while(fgets(line, MAX_LINE_LENGTH, file)) { |
// Comment |
char * dst; |
if (line[0] == '#' || is_whitespace(line)) { |
continue; |
} |
if (line[0] == '=') { |
i = 0; |
string_store = test->expected; |
continue; |
} |
dst = string_store[i++] = malloc((strlen(line) + 1) * |
sizeof (char)); |
strcpy(dst, line); |
} |
for (i = 0; i < test->num_input_lines; i++) { |
if (test->input[i][0] == 'c') { |
add_constant(c, test->input[i]); |
continue; |
} |
// XXX: Parse immediates from the file. |
add_instruction(c, test->input[i]); |
} |
return 1; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h |
---|
0,0 → 1,71 |
/* |
* Copyright 2011 Tom Stellard <tstellar@gmail.com> |
* Copyright 2013 Advanced Micro Devices, Inc. |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Author: Tom Stellard <thomas.stellard@amd.com> |
*/ |
#include "radeon_compiler.h" |
struct rc_test_file { |
unsigned num_input_lines; |
char **input; |
unsigned num_expected_lines; |
char **expected; |
}; |
int init_rc_normal_src( |
struct rc_instruction * inst, |
unsigned int src_index, |
const char * src_str); |
int init_rc_normal_dst( |
struct rc_instruction * inst, |
const char * dst_str); |
int parse_rc_normal_instruction( |
struct rc_instruction * inst, |
const char * inst_str); |
int parse_constant(unsigned *index, float *data, const char *const_str); |
int init_rc_normal_instruction( |
struct rc_instruction * inst, |
const char * inst_str); |
void add_instruction(struct radeon_compiler *c, const char * inst_string); |
int add_constant(struct radeon_compiler *c, const char *const_str); |
void init_compiler( |
struct radeon_compiler *c, |
enum rc_program_type program_type, |
unsigned is_r500, |
unsigned is_r400); |
unsigned load_program( |
struct radeon_compiler *c, |
struct rc_test_file *test, |
const char *filename); |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/regalloc_tex_1d_swizzle.test |
---|
0,0 → 1,15 |
const[0] = { 0.0000 2.0000 1.0000 0.0000 } |
0: TEX temp[8].xyz, input[1].xy__, 2D[0]; |
1: TEX temp[10].xyz, input[2].xyz_, CUBE[2]; |
2: TEX temp[12].xyz, input[1].xy__, 2D[1]; |
3: DP3 temp[14].w, input[2].xyz_, input[2].xyz_; |
4: MAD temp[15].xyz, temp[12].xyz_, const[0].yyy_, -none.111_; |
5: MAD temp[16].xyz, temp[10].xyz_, const[0].yyy_, -none.111_; |
6: MUL temp[17].xyz, temp[8].xyz_, input[0].xyz_; |
7: MOV output[0].w, none.___0; |
8: MOV temp[0].x, temp[14].w___; |
9: TEX temp[18].x, temp[0].x___, 1D[3]; |
10: DP3 temp[20].w, temp[16].xyz_, temp[15].xyz_; |
11: MUL temp[21].xyz, temp[17].xyz_, temp[18].xxx_; |
12: MUL output[0].xyz, temp[21].xyz_, temp[20].www_; |
= |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/unit_test.c |
---|
0,0 → 1,67 |
/* |
* Copyright 2011 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
#include <stdlib.h> |
#include <stdio.h> |
#include <string.h> |
#include "unit_test.h" |
unsigned run_tests(struct test tests[]) |
{ |
int i; |
unsigned pass = 1; |
for (i = 0; tests[i].name; i++) { |
printf("Test %s\n", tests[i].name); |
memset(&tests[i].result, 0, sizeof(tests[i].result)); |
tests[i].test_func(&tests[i].result); |
printf("Test %s (%d/%d) pass\n", tests[i].name, |
tests[i].result.pass, tests[i].result.test_count); |
if (tests[i].result.pass != tests[i].result.test_count) { |
pass = 0; |
} |
} |
return pass; |
} |
void test_begin(struct test_result * result) |
{ |
result->test_count++; |
} |
void test_check(struct test_result * result, int cond) |
{ |
printf("Subtest %u -> ", result->test_count); |
if (cond) { |
result->pass++; |
printf("Pass"); |
} else { |
result->fail++; |
printf("Fail"); |
} |
printf("\n"); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/unit_test.h |
---|
0,0 → 1,43 |
/* |
* Copyright 2011 Tom Stellard <tstellar@gmail.com> |
* |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining |
* a copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sublicense, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial |
* portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
*/ |
struct test_result { |
unsigned int test_count; |
unsigned int pass; |
unsigned int fail; |
}; |
struct test { |
const char * name; |
void (*test_func)(struct test_result * result); |
struct test_result result; |
}; |
unsigned run_tests(struct test tests[]); |
void test_begin(struct test_result * result); |
void test_check(struct test_result * result, int cond); |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_blit.c |
---|
0,0 → 1,863 |
/* |
* Copyright 2009 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "r300_context.h" |
#include "r300_emit.h" |
#include "r300_texture.h" |
#include "r300_reg.h" |
#include "util/u_format.h" |
#include "util/u_half.h" |
#include "util/u_pack_color.h" |
#include "util/u_surface.h" |
enum r300_blitter_op /* bitmask */ |
{ |
R300_STOP_QUERY = 1, |
R300_SAVE_TEXTURES = 2, |
R300_SAVE_FRAMEBUFFER = 4, |
R300_IGNORE_RENDER_COND = 8, |
R300_CLEAR = R300_STOP_QUERY, |
R300_CLEAR_SURFACE = R300_STOP_QUERY | R300_SAVE_FRAMEBUFFER, |
R300_COPY = R300_STOP_QUERY | R300_SAVE_FRAMEBUFFER | |
R300_SAVE_TEXTURES | R300_IGNORE_RENDER_COND, |
R300_BLIT = R300_STOP_QUERY | R300_SAVE_FRAMEBUFFER | |
R300_SAVE_TEXTURES | R300_IGNORE_RENDER_COND, |
R300_DECOMPRESS = R300_STOP_QUERY | R300_IGNORE_RENDER_COND, |
}; |
static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op op) |
{ |
if ((op & R300_STOP_QUERY) && r300->query_current) { |
r300->blitter_saved_query = r300->query_current; |
r300_stop_query(r300); |
} |
/* Yeah we have to save all those states to ensure the blitter operation |
* is really transparent. The states will be restored by the blitter once |
* copying is done. */ |
util_blitter_save_blend(r300->blitter, r300->blend_state.state); |
util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state.state); |
util_blitter_save_stencil_ref(r300->blitter, &(r300->stencil_ref)); |
util_blitter_save_rasterizer(r300->blitter, r300->rs_state.state); |
util_blitter_save_fragment_shader(r300->blitter, r300->fs.state); |
util_blitter_save_vertex_shader(r300->blitter, r300->vs_state.state); |
util_blitter_save_viewport(r300->blitter, &r300->viewport); |
util_blitter_save_scissor(r300->blitter, r300->scissor_state.state); |
util_blitter_save_sample_mask(r300->blitter, *(unsigned*)r300->sample_mask.state); |
util_blitter_save_vertex_buffer_slot(r300->blitter, r300->vertex_buffer); |
util_blitter_save_vertex_elements(r300->blitter, r300->velems); |
if (op & R300_SAVE_FRAMEBUFFER) { |
util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); |
} |
if (op & R300_SAVE_TEXTURES) { |
struct r300_textures_state* state = |
(struct r300_textures_state*)r300->textures_state.state; |
util_blitter_save_fragment_sampler_states( |
r300->blitter, state->sampler_state_count, |
(void**)state->sampler_states); |
util_blitter_save_fragment_sampler_views( |
r300->blitter, state->sampler_view_count, |
(struct pipe_sampler_view**)state->sampler_views); |
} |
if (op & R300_IGNORE_RENDER_COND) { |
/* Save the flag. */ |
r300->blitter_saved_skip_rendering = r300->skip_rendering+1; |
r300->skip_rendering = FALSE; |
} else { |
r300->blitter_saved_skip_rendering = 0; |
} |
} |
static void r300_blitter_end(struct r300_context *r300) |
{ |
if (r300->blitter_saved_query) { |
r300_resume_query(r300, r300->blitter_saved_query); |
r300->blitter_saved_query = NULL; |
} |
if (r300->blitter_saved_skip_rendering) { |
/* Restore the flag. */ |
r300->skip_rendering = r300->blitter_saved_skip_rendering-1; |
} |
} |
static uint32_t r300_depth_clear_cb_value(enum pipe_format format, |
const float* rgba) |
{ |
union util_color uc; |
util_pack_color(rgba, format, &uc); |
if (util_format_get_blocksizebits(format) == 32) |
return uc.ui; |
else |
return uc.us | (uc.us << 16); |
} |
static boolean r300_cbzb_clear_allowed(struct r300_context *r300, |
unsigned clear_buffers) |
{ |
struct pipe_framebuffer_state *fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
/* Only color clear allowed, and only one colorbuffer. */ |
if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1) |
return FALSE; |
return r300_surface(fb->cbufs[0])->cbzb_allowed; |
} |
static boolean r300_fast_zclear_allowed(struct r300_context *r300, |
unsigned clear_buffers) |
{ |
struct pipe_framebuffer_state *fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
return r300_resource(fb->zsbuf->texture)->tex.zmask_dwords[fb->zsbuf->u.tex.level] != 0; |
} |
static boolean r300_hiz_clear_allowed(struct r300_context *r300) |
{ |
struct pipe_framebuffer_state *fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
return r300_resource(fb->zsbuf->texture)->tex.hiz_dwords[fb->zsbuf->u.tex.level] != 0; |
} |
static uint32_t r300_depth_clear_value(enum pipe_format format, |
double depth, unsigned stencil) |
{ |
switch (format) { |
case PIPE_FORMAT_Z16_UNORM: |
case PIPE_FORMAT_X8Z24_UNORM: |
return util_pack_z(format, depth); |
case PIPE_FORMAT_S8_UINT_Z24_UNORM: |
return util_pack_z_stencil(format, depth, stencil); |
default: |
assert(0); |
return 0; |
} |
} |
static uint32_t r300_hiz_clear_value(double depth) |
{ |
uint32_t r = (uint32_t)(CLAMP(depth, 0, 1) * 255.5); |
assert(r <= 255); |
return r | (r << 8) | (r << 16) | (r << 24); |
} |
static void r300_set_clear_color(struct r300_context *r300, |
const union pipe_color_union *color) |
{ |
struct pipe_framebuffer_state *fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
union util_color uc; |
memset(&uc, 0, sizeof(uc)); |
util_pack_color(color->f, fb->cbufs[0]->format, &uc); |
if (fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT || |
fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16X16_FLOAT) { |
/* (0,1,2,3) maps to (B,G,R,A) */ |
r300->color_clear_value_gb = uc.h[0] | ((uint32_t)uc.h[1] << 16); |
r300->color_clear_value_ar = uc.h[2] | ((uint32_t)uc.h[3] << 16); |
} else { |
r300->color_clear_value = uc.ui; |
} |
} |
DEBUG_GET_ONCE_BOOL_OPTION(hyperz, "RADEON_HYPERZ", FALSE) |
/* Clear currently bound buffers. */ |
static void r300_clear(struct pipe_context* pipe, |
unsigned buffers, |
const union pipe_color_union *color, |
double depth, |
unsigned stencil) |
{ |
/* My notes about Zbuffer compression: |
* |
* 1) The zbuffer must be micro-tiled and whole microtiles must be |
* written if compression is enabled. If microtiling is disabled, |
* it locks up. |
* |
* 2) There is ZMASK RAM which contains a compressed zbuffer. |
* Each dword of the Z Mask contains compression information |
* for 16 4x4 pixel tiles, that is 2 bits for each tile. |
* On chips with 2 Z pipes, every other dword maps to a different |
* pipe. On newer chipsets, there is a new compression mode |
* with 8x8 pixel tiles per 2 bits. |
* |
* 3) The FASTFILL bit has nothing to do with filling. It only tells hw |
* it should look in the ZMASK RAM first before fetching from a real |
* zbuffer. |
* |
* 4) If a pixel is in a cleared state, ZB_DEPTHCLEARVALUE is returned |
* during zbuffer reads instead of the value that is actually stored |
* in the zbuffer memory. A pixel is in a cleared state when its ZMASK |
* is equal to 0. Therefore, if you clear ZMASK with zeros, you may |
* leave the zbuffer memory uninitialized, but then you must enable |
* compression, so that the ZMASK RAM is actually used. |
* |
* 5) Each 4x4 (or 8x8) tile is automatically decompressed and recompressed |
* during zbuffer updates. A special decompressing operation should be |
* used to fully decompress a zbuffer, which basically just stores all |
* compressed tiles in ZMASK to the zbuffer memory. |
* |
* 6) For a 16-bit zbuffer, compression causes a hung with one or |
* two samples and should not be used. |
* |
* 7) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears |
* to avoid needless decompression. |
* |
* 8) Fastfill must not be used if reading of compressed Z data is disabled |
* and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE), |
* i.e. it cannot be used to compress the zbuffer. |
* |
* 9) ZB_CB_CLEAR does not interact with zbuffer compression in any way. |
* |
* - Marek |
*/ |
struct r300_context* r300 = r300_context(pipe); |
struct pipe_framebuffer_state *fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
struct r300_hyperz_state *hyperz = |
(struct r300_hyperz_state*)r300->hyperz_state.state; |
uint32_t width = fb->width; |
uint32_t height = fb->height; |
uint32_t hyperz_dcv = hyperz->zb_depthclearvalue; |
/* Use fast Z clear. |
* The zbuffer must be in micro-tiled mode, otherwise it locks up. */ |
if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { |
boolean zmask_clear, hiz_clear; |
/* If both depth and stencil are present, they must be cleared together. */ |
if (fb->zsbuf->texture->format == PIPE_FORMAT_S8_UINT_Z24_UNORM && |
(buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) { |
zmask_clear = FALSE; |
hiz_clear = FALSE; |
} else { |
zmask_clear = r300_fast_zclear_allowed(r300, buffers); |
hiz_clear = r300_hiz_clear_allowed(r300); |
} |
/* If we need Hyper-Z. */ |
if (zmask_clear || hiz_clear) { |
/* Try to obtain the access to Hyper-Z buffers if we don't have one. */ |
if (!r300->hyperz_enabled && |
(r300->screen->caps.is_r500 || debug_get_option_hyperz())) { |
r300->hyperz_enabled = |
r300->rws->cs_request_feature(r300->cs, |
RADEON_FID_R300_HYPERZ_ACCESS, |
TRUE); |
if (r300->hyperz_enabled) { |
/* Need to emit HyperZ buffer regs for the first time. */ |
r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); |
} |
} |
/* Setup Hyper-Z clears. */ |
if (r300->hyperz_enabled) { |
if (zmask_clear) { |
hyperz_dcv = hyperz->zb_depthclearvalue = |
r300_depth_clear_value(fb->zsbuf->format, depth, stencil); |
r300_mark_atom_dirty(r300, &r300->zmask_clear); |
r300_mark_atom_dirty(r300, &r300->gpu_flush); |
buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; |
} |
if (hiz_clear) { |
r300->hiz_clear_value = r300_hiz_clear_value(depth); |
r300_mark_atom_dirty(r300, &r300->hiz_clear); |
r300_mark_atom_dirty(r300, &r300->gpu_flush); |
} |
r300->num_z_clears++; |
} |
} |
} |
/* Use fast color clear for an AA colorbuffer. |
* The CMASK is shared between all colorbuffers, so we use it |
* if there is only one colorbuffer bound. */ |
if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs == 1 && |
r300_resource(fb->cbufs[0]->texture)->tex.cmask_dwords) { |
/* Try to obtain the access to the CMASK if we don't have one. */ |
if (!r300->cmask_access) { |
r300->cmask_access = |
r300->rws->cs_request_feature(r300->cs, |
RADEON_FID_R300_CMASK_ACCESS, |
TRUE); |
} |
/* Setup the clear. */ |
if (r300->cmask_access) { |
/* Pair the resource with the CMASK to avoid other resources |
* accessing it. */ |
if (!r300->screen->cmask_resource) { |
pipe_mutex_lock(r300->screen->cmask_mutex); |
/* Double checking (first unlocked, then locked). */ |
if (!r300->screen->cmask_resource) { |
/* Don't reference this, so that the texture can be |
* destroyed while set in cmask_resource. |
* Then in texture_destroy, we set cmask_resource to NULL. */ |
r300->screen->cmask_resource = fb->cbufs[0]->texture; |
} |
pipe_mutex_unlock(r300->screen->cmask_mutex); |
} |
if (r300->screen->cmask_resource == fb->cbufs[0]->texture) { |
r300_set_clear_color(r300, color); |
r300_mark_atom_dirty(r300, &r300->cmask_clear); |
r300_mark_atom_dirty(r300, &r300->gpu_flush); |
buffers &= ~PIPE_CLEAR_COLOR; |
} |
} |
} |
/* Enable CBZB clear. */ |
else if (r300_cbzb_clear_allowed(r300, buffers)) { |
struct r300_surface *surf = r300_surface(fb->cbufs[0]); |
hyperz->zb_depthclearvalue = |
r300_depth_clear_cb_value(surf->base.format, color->f); |
width = surf->cbzb_width; |
height = surf->cbzb_height; |
r300->cbzb_clear = TRUE; |
r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); |
} |
/* Clear. */ |
if (buffers) { |
/* Clear using the blitter. */ |
r300_blitter_begin(r300, R300_CLEAR); |
util_blitter_clear(r300->blitter, |
width, |
height, |
buffers, color, depth, stencil); |
r300_blitter_end(r300); |
} else if (r300->zmask_clear.dirty || |
r300->hiz_clear.dirty || |
r300->cmask_clear.dirty) { |
/* Just clear zmask and hiz now, this does not use the standard draw |
* procedure. */ |
/* Calculate zmask_clear and hiz_clear atom sizes. */ |
unsigned dwords = |
r300->gpu_flush.size + |
(r300->zmask_clear.dirty ? r300->zmask_clear.size : 0) + |
(r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) + |
(r300->cmask_clear.dirty ? r300->cmask_clear.size : 0) + |
r300_get_num_cs_end_dwords(r300); |
/* Reserve CS space. */ |
if (dwords > (RADEON_MAX_CMDBUF_DWORDS - r300->cs->cdw)) { |
r300_flush(&r300->context, RADEON_FLUSH_ASYNC, NULL); |
} |
/* Emit clear packets. */ |
r300_emit_gpu_flush(r300, r300->gpu_flush.size, r300->gpu_flush.state); |
r300->gpu_flush.dirty = FALSE; |
if (r300->zmask_clear.dirty) { |
r300_emit_zmask_clear(r300, r300->zmask_clear.size, |
r300->zmask_clear.state); |
r300->zmask_clear.dirty = FALSE; |
} |
if (r300->hiz_clear.dirty) { |
r300_emit_hiz_clear(r300, r300->hiz_clear.size, |
r300->hiz_clear.state); |
r300->hiz_clear.dirty = FALSE; |
} |
if (r300->cmask_clear.dirty) { |
r300_emit_cmask_clear(r300, r300->cmask_clear.size, |
r300->cmask_clear.state); |
r300->cmask_clear.dirty = FALSE; |
} |
} else { |
assert(0); |
} |
/* Disable CBZB clear. */ |
if (r300->cbzb_clear) { |
r300->cbzb_clear = FALSE; |
hyperz->zb_depthclearvalue = hyperz_dcv; |
r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); |
} |
/* Enable fastfill and/or hiz. |
* |
* If we cleared zmask/hiz, it's in use now. The Hyper-Z state update |
* looks if zmask/hiz is in use and programs hardware accordingly. */ |
if (r300->zmask_in_use || r300->hiz_in_use) { |
r300_mark_atom_dirty(r300, &r300->hyperz_state); |
} |
} |
/* Clear a region of a color surface to a constant value. */ |
static void r300_clear_render_target(struct pipe_context *pipe, |
struct pipe_surface *dst, |
const union pipe_color_union *color, |
unsigned dstx, unsigned dsty, |
unsigned width, unsigned height) |
{ |
struct r300_context *r300 = r300_context(pipe); |
r300_blitter_begin(r300, R300_CLEAR_SURFACE); |
util_blitter_clear_render_target(r300->blitter, dst, color, |
dstx, dsty, width, height); |
r300_blitter_end(r300); |
} |
/* Clear a region of a depth stencil surface. */ |
static void r300_clear_depth_stencil(struct pipe_context *pipe, |
struct pipe_surface *dst, |
unsigned clear_flags, |
double depth, |
unsigned stencil, |
unsigned dstx, unsigned dsty, |
unsigned width, unsigned height) |
{ |
struct r300_context *r300 = r300_context(pipe); |
struct pipe_framebuffer_state *fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
if (r300->zmask_in_use && !r300->locked_zbuffer) { |
if (fb->zsbuf->texture == dst->texture) { |
r300_decompress_zmask(r300); |
} |
} |
/* XXX Do not decompress ZMask of the currently-set zbuffer. */ |
r300_blitter_begin(r300, R300_CLEAR_SURFACE); |
util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil, |
dstx, dsty, width, height); |
r300_blitter_end(r300); |
} |
void r300_decompress_zmask(struct r300_context *r300) |
{ |
struct pipe_framebuffer_state *fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
if (!r300->zmask_in_use || r300->locked_zbuffer) |
return; |
r300->zmask_decompress = TRUE; |
r300_mark_atom_dirty(r300, &r300->hyperz_state); |
r300_blitter_begin(r300, R300_DECOMPRESS); |
util_blitter_custom_clear_depth(r300->blitter, fb->width, fb->height, 0, |
r300->dsa_decompress_zmask); |
r300_blitter_end(r300); |
r300->zmask_decompress = FALSE; |
r300->zmask_in_use = FALSE; |
r300_mark_atom_dirty(r300, &r300->hyperz_state); |
} |
void r300_decompress_zmask_locked_unsafe(struct r300_context *r300) |
{ |
struct pipe_framebuffer_state fb; |
memset(&fb, 0, sizeof(fb)); |
fb.width = r300->locked_zbuffer->width; |
fb.height = r300->locked_zbuffer->height; |
fb.zsbuf = r300->locked_zbuffer; |
r300->context.set_framebuffer_state(&r300->context, &fb); |
r300_decompress_zmask(r300); |
} |
void r300_decompress_zmask_locked(struct r300_context *r300) |
{ |
struct pipe_framebuffer_state saved_fb; |
memset(&saved_fb, 0, sizeof(saved_fb)); |
util_copy_framebuffer_state(&saved_fb, r300->fb_state.state); |
r300_decompress_zmask_locked_unsafe(r300); |
r300->context.set_framebuffer_state(&r300->context, &saved_fb); |
util_unreference_framebuffer_state(&saved_fb); |
pipe_surface_reference(&r300->locked_zbuffer, NULL); |
} |
bool r300_is_blit_supported(enum pipe_format format) |
{ |
const struct util_format_description *desc = |
util_format_description(format); |
return desc->layout == UTIL_FORMAT_LAYOUT_PLAIN || |
desc->layout == UTIL_FORMAT_LAYOUT_S3TC || |
desc->layout == UTIL_FORMAT_LAYOUT_RGTC; |
} |
/* Copy a block of pixels from one surface to another. */ |
static void r300_resource_copy_region(struct pipe_context *pipe, |
struct pipe_resource *dst, |
unsigned dst_level, |
unsigned dstx, unsigned dsty, unsigned dstz, |
struct pipe_resource *src, |
unsigned src_level, |
const struct pipe_box *src_box) |
{ |
struct pipe_screen *screen = pipe->screen; |
struct r300_context *r300 = r300_context(pipe); |
struct pipe_framebuffer_state *fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
unsigned src_width0 = r300_resource(src)->tex.width0; |
unsigned src_height0 = r300_resource(src)->tex.height0; |
unsigned dst_width0 = r300_resource(dst)->tex.width0; |
unsigned dst_height0 = r300_resource(dst)->tex.height0; |
unsigned layout; |
struct pipe_box box, dstbox; |
struct pipe_sampler_view src_templ, *src_view; |
struct pipe_surface dst_templ, *dst_view; |
/* Fallback for buffers. */ |
if ((dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) || |
!r300_is_blit_supported(dst->format)) { |
util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, |
src, src_level, src_box); |
return; |
} |
/* Can't read MSAA textures. */ |
if (src->nr_samples > 1 || dst->nr_samples > 1) { |
return; |
} |
/* The code below changes the texture format so that the copy can be done |
* on hardware. E.g. depth-stencil surfaces are copied as RGBA |
* colorbuffers. */ |
util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz); |
util_blitter_default_src_texture(&src_templ, src, src_level); |
layout = util_format_description(dst_templ.format)->layout; |
/* Handle non-renderable plain formats. */ |
if (layout == UTIL_FORMAT_LAYOUT_PLAIN && |
(!screen->is_format_supported(screen, src_templ.format, src->target, |
src->nr_samples, |
PIPE_BIND_SAMPLER_VIEW) || |
!screen->is_format_supported(screen, dst_templ.format, dst->target, |
dst->nr_samples, |
PIPE_BIND_RENDER_TARGET))) { |
switch (util_format_get_blocksize(dst_templ.format)) { |
case 1: |
dst_templ.format = PIPE_FORMAT_I8_UNORM; |
break; |
case 2: |
dst_templ.format = PIPE_FORMAT_B4G4R4A4_UNORM; |
break; |
case 4: |
dst_templ.format = PIPE_FORMAT_B8G8R8A8_UNORM; |
break; |
case 8: |
dst_templ.format = PIPE_FORMAT_R16G16B16A16_UNORM; |
break; |
default: |
debug_printf("r300: copy_region: Unhandled format: %s. Falling back to software.\n" |
"r300: copy_region: Software fallback doesn't work for tiled textures.\n", |
util_format_short_name(dst_templ.format)); |
} |
src_templ.format = dst_templ.format; |
} |
/* Handle compressed formats. */ |
if (layout == UTIL_FORMAT_LAYOUT_S3TC || |
layout == UTIL_FORMAT_LAYOUT_RGTC) { |
assert(src_templ.format == dst_templ.format); |
box = *src_box; |
src_box = &box; |
dst_width0 = align(dst_width0, 4); |
dst_height0 = align(dst_height0, 4); |
src_width0 = align(src_width0, 4); |
src_height0 = align(src_height0, 4); |
box.width = align(box.width, 4); |
box.height = align(box.height, 4); |
switch (util_format_get_blocksize(dst_templ.format)) { |
case 8: |
/* one 4x4 pixel block has 8 bytes. |
* we set 1 pixel = 4 bytes ===> 1 block corrensponds to 2 pixels. */ |
dst_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM; |
dst_width0 = dst_width0 / 2; |
src_width0 = src_width0 / 2; |
dstx /= 2; |
box.x /= 2; |
box.width /= 2; |
break; |
case 16: |
/* one 4x4 pixel block has 16 bytes. |
* we set 1 pixel = 4 bytes ===> 1 block corresponds to 4 pixels. */ |
dst_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM; |
break; |
} |
src_templ.format = dst_templ.format; |
dst_height0 = dst_height0 / 4; |
src_height0 = src_height0 / 4; |
dsty /= 4; |
box.y /= 4; |
box.height /= 4; |
} |
/* Fallback for textures. */ |
if (!screen->is_format_supported(screen, dst_templ.format, |
dst->target, dst->nr_samples, |
PIPE_BIND_RENDER_TARGET) || |
!screen->is_format_supported(screen, src_templ.format, |
src->target, src->nr_samples, |
PIPE_BIND_SAMPLER_VIEW)) { |
assert(0 && "this shouldn't happen, update r300_is_blit_supported"); |
util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, |
src, src_level, src_box); |
return; |
} |
/* Decompress ZMASK. */ |
if (r300->zmask_in_use && !r300->locked_zbuffer) { |
if (fb->zsbuf->texture == src || |
fb->zsbuf->texture == dst) { |
r300_decompress_zmask(r300); |
} |
} |
dst_view = r300_create_surface_custom(pipe, dst, &dst_templ, dst_width0, dst_height0); |
src_view = r300_create_sampler_view_custom(pipe, src, &src_templ, src_width0, src_height0); |
u_box_3d(dstx, dsty, dstz, abs(src_box->width), abs(src_box->height), |
abs(src_box->depth), &dstbox); |
r300_blitter_begin(r300, R300_COPY); |
util_blitter_blit_generic(r300->blitter, dst_view, &dstbox, |
src_view, src_box, src_width0, src_height0, |
PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL, |
FALSE); |
r300_blitter_end(r300); |
pipe_surface_reference(&dst_view, NULL); |
pipe_sampler_view_reference(&src_view, NULL); |
} |
static boolean r300_is_simple_msaa_resolve(const struct pipe_blit_info *info) |
{ |
unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level); |
unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level); |
return info->dst.resource->format == info->src.resource->format && |
info->dst.resource->format == info->dst.format && |
info->src.resource->format == info->src.format && |
!info->scissor_enable && |
info->mask == PIPE_MASK_RGBA && |
dst_width == info->src.resource->width0 && |
dst_height == info->src.resource->height0 && |
info->dst.box.x == 0 && |
info->dst.box.y == 0 && |
info->dst.box.width == dst_width && |
info->dst.box.height == dst_height && |
info->src.box.x == 0 && |
info->src.box.y == 0 && |
info->src.box.width == dst_width && |
info->src.box.height == dst_height && |
(r300_resource(info->dst.resource)->tex.microtile != RADEON_LAYOUT_LINEAR || |
r300_resource(info->dst.resource)->tex.macrotile[info->dst.level] != RADEON_LAYOUT_LINEAR); |
} |
static void r300_simple_msaa_resolve(struct pipe_context *pipe, |
struct pipe_resource *dst, |
unsigned dst_level, |
unsigned dst_layer, |
struct pipe_resource *src, |
enum pipe_format format) |
{ |
struct r300_context *r300 = r300_context(pipe); |
struct r300_surface *srcsurf, *dstsurf; |
struct pipe_surface surf_tmpl; |
struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; |
memset(&surf_tmpl, 0, sizeof(surf_tmpl)); |
surf_tmpl.format = format; |
srcsurf = r300_surface(pipe->create_surface(pipe, src, &surf_tmpl)); |
surf_tmpl.format = format; |
surf_tmpl.u.tex.level = dst_level; |
surf_tmpl.u.tex.first_layer = |
surf_tmpl.u.tex.last_layer = dst_layer; |
dstsurf = r300_surface(pipe->create_surface(pipe, dst, &surf_tmpl)); |
/* COLORPITCH should contain the tiling info of the resolve buffer. |
* The tiling of the AA buffer isn't programmable anyway. */ |
srcsurf->pitch &= ~(R300_COLOR_TILE(1) | R300_COLOR_MICROTILE(3)); |
srcsurf->pitch |= dstsurf->pitch & (R300_COLOR_TILE(1) | R300_COLOR_MICROTILE(3)); |
/* Enable AA resolve. */ |
aa->dest = dstsurf; |
r300->aa_state.size = 8; |
r300_mark_atom_dirty(r300, &r300->aa_state); |
/* Resolve the surface. */ |
r300_blitter_begin(r300, R300_CLEAR_SURFACE); |
util_blitter_custom_color(r300->blitter, &srcsurf->base, NULL); |
r300_blitter_end(r300); |
/* Disable AA resolve. */ |
aa->dest = NULL; |
r300->aa_state.size = 4; |
r300_mark_atom_dirty(r300, &r300->aa_state); |
pipe_surface_reference((struct pipe_surface**)&srcsurf, NULL); |
pipe_surface_reference((struct pipe_surface**)&dstsurf, NULL); |
} |
static void r300_msaa_resolve(struct pipe_context *pipe, |
const struct pipe_blit_info *info) |
{ |
struct r300_context *r300 = r300_context(pipe); |
struct pipe_screen *screen = pipe->screen; |
struct pipe_resource *tmp, templ; |
struct pipe_blit_info blit; |
assert(info->src.level == 0); |
assert(info->src.box.z == 0); |
assert(info->src.box.depth == 1); |
assert(info->dst.box.depth == 1); |
if (r300_is_simple_msaa_resolve(info)) { |
r300_simple_msaa_resolve(pipe, info->dst.resource, info->dst.level, |
info->dst.box.z, info->src.resource, |
info->src.format); |
return; |
} |
/* resolve into a temporary texture, then blit */ |
memset(&templ, 0, sizeof(templ)); |
templ.target = PIPE_TEXTURE_2D; |
templ.format = info->src.resource->format; |
templ.width0 = info->src.resource->width0; |
templ.height0 = info->src.resource->height0; |
templ.depth0 = 1; |
templ.array_size = 1; |
templ.usage = PIPE_USAGE_STATIC; |
templ.flags = R300_RESOURCE_FORCE_MICROTILING; |
tmp = screen->resource_create(screen, &templ); |
/* resolve */ |
r300_simple_msaa_resolve(pipe, tmp, 0, 0, info->src.resource, |
info->src.format); |
/* blit */ |
blit = *info; |
blit.src.resource = tmp; |
blit.src.box.z = 0; |
r300_blitter_begin(r300, R300_BLIT); |
util_blitter_blit(r300->blitter, &blit); |
r300_blitter_end(r300); |
pipe_resource_reference(&tmp, NULL); |
} |
static void r300_blit(struct pipe_context *pipe, |
const struct pipe_blit_info *blit) |
{ |
struct r300_context *r300 = r300_context(pipe); |
struct pipe_framebuffer_state *fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
struct pipe_blit_info info = *blit; |
/* MSAA resolve. */ |
if (info.src.resource->nr_samples > 1 && |
info.dst.resource->nr_samples <= 1 && |
!util_format_is_depth_or_stencil(info.src.resource->format)) { |
r300_msaa_resolve(pipe, &info); |
return; |
} |
/* Can't read MSAA textures. */ |
if (info.src.resource->nr_samples > 1) { |
return; |
} |
/* Blit a combined depth-stencil resource as color. |
* S8Z24 is the only supported stencil format. */ |
if ((info.mask & PIPE_MASK_S) && |
info.src.format == PIPE_FORMAT_S8_UINT_Z24_UNORM && |
info.dst.format == PIPE_FORMAT_S8_UINT_Z24_UNORM) { |
if (info.dst.resource->nr_samples > 1) { |
/* Cannot do that with MSAA buffers. */ |
info.mask &= ~PIPE_MASK_S; |
if (!(info.mask & PIPE_MASK_Z)) { |
return; |
} |
} else { |
/* Single-sample buffer. */ |
info.src.format = PIPE_FORMAT_B8G8R8A8_UNORM; |
info.dst.format = PIPE_FORMAT_B8G8R8A8_UNORM; |
if (info.mask & PIPE_MASK_Z) { |
info.mask = PIPE_MASK_RGBA; /* depth+stencil */ |
} else { |
info.mask = PIPE_MASK_B; /* stencil only */ |
} |
} |
} |
/* Decompress ZMASK. */ |
if (r300->zmask_in_use && !r300->locked_zbuffer) { |
if (fb->zsbuf->texture == info.src.resource || |
fb->zsbuf->texture == info.dst.resource) { |
r300_decompress_zmask(r300); |
} |
} |
r300_blitter_begin(r300, R300_BLIT); |
util_blitter_blit(r300->blitter, &info); |
r300_blitter_end(r300); |
} |
void r300_init_blit_functions(struct r300_context *r300) |
{ |
r300->context.clear = r300_clear; |
r300->context.clear_render_target = r300_clear_render_target; |
r300->context.clear_depth_stencil = r300_clear_depth_stencil; |
r300->context.resource_copy_region = r300_resource_copy_region; |
r300->context.blit = r300_blit; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_cb.h |
---|
0,0 → 1,151 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Copyright 2010 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
/** |
* This file contains macros for building command buffers in memory. |
* |
* Use NEW_CB for buffers with a varying size and it will also allocate |
* the buffer. |
* Use BEGIN_CB for arrays with a static size. |
* |
* Example: |
* |
* uint32_t cb[3]; |
* CB_LOCALS; |
* |
* BEGIN_CB(cb, 3); |
* OUT_CB_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2); |
* OUT_CB(blend_color_red_alpha); |
* OUT_CB(blend_color_green_blue); |
* END_CB; |
* |
* And later: |
* |
* CS_LOCALS; |
* WRITE_CS_TABLE(cb, 3); |
* |
* Or using a little slower variant: |
* |
* CS_LOCALS; |
* BEGIN_CS(cb, 3); |
* OUT_CS_TABLE(cb, 3); |
* END_CS; |
*/ |
#ifndef R300_CB_H |
#define R300_CB_H |
#include "r300_reg.h" |
/* Yes, I know macros are ugly. However, they are much prettier than the code |
* that they neatly hide away, and don't have the cost of function setup, so |
* we're going to use them. */ |
/** |
* Command buffer setup. |
*/ |
#ifdef DEBUG |
#define CB_LOCALS \ |
int cs_count = 0; \ |
uint32_t *cs_ptr = NULL; \ |
(void) cs_count; (void) cs_ptr |
#define BEGIN_CB(ptr, size) do { \ |
assert(sizeof(*(ptr)) == sizeof(uint32_t)); \ |
cs_count = (size); \ |
cs_ptr = (ptr); \ |
} while (0) |
#define NEW_CB(ptr, size) \ |
do { \ |
assert(sizeof(*(ptr)) == sizeof(uint32_t)); \ |
cs_count = (size); \ |
cs_ptr = (ptr) = malloc((size) * sizeof(uint32_t)); \ |
} while (0) |
#define END_CB do { \ |
if (cs_count != 0) \ |
debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \ |
cs_count, __FUNCTION__, __FILE__, __LINE__); \ |
} while (0) |
#define CB_USED_DW(x) cs_count -= x |
#else |
#define CB_LOCALS \ |
uint32_t *cs_ptr = NULL; (void) cs_ptr |
#define NEW_CB(ptr, size) \ |
cs_ptr = (ptr) = malloc((size) * sizeof(uint32_t)) |
#define BEGIN_CB(ptr, size) cs_ptr = (ptr) |
#define END_CB |
#define CB_USED_DW(x) |
#endif |
/** |
* Storing pure DWORDs. |
*/ |
#define OUT_CB(value) do { \ |
*cs_ptr = (value); \ |
cs_ptr++; \ |
CB_USED_DW(1); \ |
} while (0) |
#define OUT_CB_TABLE(values, count) do { \ |
memcpy(cs_ptr, values, count * sizeof(uint32_t)); \ |
cs_ptr += count; \ |
CB_USED_DW(count); \ |
} while (0) |
#define OUT_CB_32F(value) \ |
OUT_CB(fui(value)); |
#define OUT_CB_REG(register, value) do { \ |
assert(register); \ |
OUT_CB(CP_PACKET0(register, 0)); \ |
OUT_CB(value); \ |
} while (0) |
/* Note: This expects count to be the number of registers, |
* not the actual packet0 count! */ |
#define OUT_CB_REG_SEQ(register, count) do { \ |
assert(register); \ |
OUT_CB(CP_PACKET0(register, (count) - 1)); \ |
} while (0) |
#define OUT_CB_ONE_REG(register, count) do { \ |
assert(register); \ |
OUT_CB(CP_PACKET0(register, (count) - 1) | RADEON_ONE_REG_WR); \ |
} while (0) |
#define OUT_CB_PKT3(op, count) \ |
OUT_CB(CP_PACKET3(op, count)) |
#endif /* R300_CB_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_chipset.c |
---|
0,0 → 1,175 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Copyright 2011 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "r300_chipset.h" |
#include "../../winsys/radeon/drm/radeon_winsys.h" |
#include "util/u_debug.h" |
#include "util/u_memory.h" |
#include "os/os_process.h" |
#include <stdio.h> |
#include <errno.h> |
/* r300_chipset: A file all to itself for deducing the various properties of |
* Radeons. */ |
static void r300_apply_hyperz_blacklist(struct r300_capabilities* caps) |
{ |
static const char *list[] = { |
"X", /* the DDX or indirect rendering */ |
"Xorg", /* (alternative name) */ |
"check_gl_texture_size", /* compiz */ |
"Compiz", |
"gnome-session-check-accelerated-helper", |
"gnome-shell", |
"kwin_opengl_test", |
"kwin", |
"firefox", |
}; |
int i; |
char proc_name[128]; |
if (!os_get_process_name(proc_name, sizeof(proc_name))) |
return; |
for (i = 0; i < Elements(list); i++) { |
if (strcmp(list[i], proc_name) == 0) { |
caps->zmask_ram = 0; |
caps->hiz_ram = 0; |
break; |
} |
} |
} |
/* Parse a PCI ID and fill an r300_capabilities struct with information. */ |
void r300_parse_chipset(uint32_t pci_id, struct r300_capabilities* caps) |
{ |
switch (pci_id) { |
#define CHIPSET(pci_id, name, chipfamily) \ |
case pci_id: \ |
caps->family = CHIP_##chipfamily; \ |
break; |
#include "pci_ids/r300_pci_ids.h" |
#undef CHIPSET |
default: |
fprintf(stderr, "r300: Warning: Unknown chipset 0x%x\nAborting...", |
pci_id); |
abort(); |
} |
/* Defaults. */ |
caps->high_second_pipe = FALSE; |
caps->num_vert_fpus = 0; |
caps->hiz_ram = 0; |
caps->zmask_ram = 0; |
switch (caps->family) { |
case CHIP_R300: |
case CHIP_R350: |
caps->high_second_pipe = TRUE; |
caps->num_vert_fpus = 4; |
caps->hiz_ram = R300_HIZ_LIMIT; |
caps->zmask_ram = PIPE_ZMASK_SIZE; |
break; |
case CHIP_RV350: |
case CHIP_RV370: |
caps->high_second_pipe = TRUE; |
caps->num_vert_fpus = 2; |
caps->zmask_ram = RV3xx_ZMASK_SIZE; |
break; |
case CHIP_RV380: |
caps->high_second_pipe = TRUE; |
caps->num_vert_fpus = 2; |
caps->hiz_ram = R300_HIZ_LIMIT; |
caps->zmask_ram = RV3xx_ZMASK_SIZE; |
break; |
case CHIP_RS400: |
case CHIP_RS600: |
case CHIP_RS690: |
case CHIP_RS740: |
break; |
case CHIP_RC410: |
case CHIP_RS480: |
caps->zmask_ram = RV3xx_ZMASK_SIZE; |
break; |
case CHIP_R420: |
case CHIP_R423: |
case CHIP_R430: |
case CHIP_R480: |
case CHIP_R481: |
case CHIP_RV410: |
caps->num_vert_fpus = 6; |
caps->hiz_ram = R300_HIZ_LIMIT; |
caps->zmask_ram = PIPE_ZMASK_SIZE; |
break; |
case CHIP_R520: |
caps->num_vert_fpus = 8; |
caps->hiz_ram = R300_HIZ_LIMIT; |
caps->zmask_ram = PIPE_ZMASK_SIZE; |
break; |
case CHIP_RV515: |
caps->num_vert_fpus = 2; |
caps->hiz_ram = R300_HIZ_LIMIT; |
caps->zmask_ram = PIPE_ZMASK_SIZE; |
break; |
case CHIP_RV530: |
caps->num_vert_fpus = 5; |
caps->hiz_ram = RV530_HIZ_LIMIT; |
caps->zmask_ram = PIPE_ZMASK_SIZE; |
break; |
case CHIP_R580: |
case CHIP_RV560: |
case CHIP_RV570: |
caps->num_vert_fpus = 8; |
caps->hiz_ram = RV530_HIZ_LIMIT; |
caps->zmask_ram = PIPE_ZMASK_SIZE; |
break; |
} |
caps->num_tex_units = 16; |
caps->is_r400 = caps->family >= CHIP_R420 && caps->family < CHIP_RV515; |
caps->is_r500 = caps->family >= CHIP_RV515; |
caps->is_rv350 = caps->family >= CHIP_RV350; |
caps->z_compress = caps->is_rv350 ? R300_ZCOMP_8X8 : R300_ZCOMP_4X4; |
caps->dxtc_swizzle = caps->is_r400 || caps->is_r500; |
caps->has_us_format = caps->family == CHIP_R520; |
caps->has_tcl = caps->num_vert_fpus > 0; |
if (caps->has_tcl) { |
caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; |
} |
r300_apply_hyperz_blacklist(caps); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_chipset.h |
---|
0,0 → 1,93 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#ifndef R300_CHIPSET_H |
#define R300_CHIPSET_H |
#include "pipe/p_compiler.h" |
/* these are sizes in dwords */ |
#define R300_HIZ_LIMIT 10240 |
#define RV530_HIZ_LIMIT 15360 |
/* rv3xx have only one pipe */ |
#define PIPE_ZMASK_SIZE 4096 |
#define RV3xx_ZMASK_SIZE 5120 |
/* The size of a compressed tile. Each compressed tile takes 2 bits |
* in the ZMASK RAM, so there is always 16 tiles per one dword. */ |
enum r300_zmask_compression { |
R300_ZCOMP_4X4 = 4, |
R300_ZCOMP_8X8 = 8 |
}; |
/* Structure containing all the possible information about a specific Radeon |
* in the R3xx, R4xx, and R5xx families. */ |
struct r300_capabilities { |
/* Chipset family */ |
int family; |
/* The number of vertex floating-point units */ |
unsigned num_vert_fpus; |
/* The number of texture units. */ |
unsigned num_tex_units; |
/* Whether or not TCL is physically present */ |
boolean has_tcl; |
/* Some chipsets do not have HiZ RAM - other have varying amounts. */ |
int hiz_ram; |
/* Some chipsets have zmask ram per pipe some don't. */ |
int zmask_ram; |
/* Compression mode for ZMASK. */ |
enum r300_zmask_compression z_compress; |
/* Whether or not this is RV350 or newer, including all r400 and r500 |
* chipsets. The differences compared to the oldest r300 chips are: |
* - Blend LTE/GTE thresholds |
* - Better MACRO_SWITCH in texture tiling |
* - Half float vertex |
* - More HyperZ optimizations */ |
boolean is_rv350; |
/* Whether or not this is R400. The differences compared their rv350 |
* cousins are: |
* - Extended fragment shader registers |
* - 3DC texture compression (RGTC2) */ |
boolean is_r400; |
/* Whether or not this is an RV515 or newer; R500s have many differences |
* that require extra consideration, compared to their rv350 cousins: |
* - Extra bit of width and height on texture sizes |
* - Blend color is split across two registers |
* - Universal Shader (US) block used for fragment shaders |
* - FP16 blending and multisampling |
* - Full RGTC texture compression |
* - 24-bit depth textures |
* - Stencil back-face reference value |
* - Ability to render up to 2^24 - 1 vertices with signed index offset */ |
boolean is_r500; |
/* Whether or not the second pixel pipe is accessed with the high bit */ |
boolean high_second_pipe; |
/* DXTC texture swizzling. */ |
boolean dxtc_swizzle; |
/* Whether R500_US_FORMAT0_0 exists (R520-only and depends on DRM). */ |
boolean has_us_format; |
}; |
void r300_parse_chipset(uint32_t pci_id, struct r300_capabilities* caps); |
#endif /* R300_CHIPSET_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_context.c |
---|
0,0 → 1,507 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "draw/draw_context.h" |
#include "util/u_memory.h" |
#include "util/u_sampler.h" |
#include "util/u_simple_list.h" |
#include "util/u_upload_mgr.h" |
#include "os/os_time.h" |
#include "vl/vl_decoder.h" |
#include "vl/vl_video_buffer.h" |
#include "r300_cb.h" |
#include "r300_context.h" |
#include "r300_emit.h" |
#include "r300_screen.h" |
#include "r300_screen_buffer.h" |
#include "compiler/radeon_regalloc.h" |
static void r300_release_referenced_objects(struct r300_context *r300) |
{ |
struct pipe_framebuffer_state *fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
struct r300_textures_state *textures = |
(struct r300_textures_state*)r300->textures_state.state; |
unsigned i; |
/* Framebuffer state. */ |
util_unreference_framebuffer_state(fb); |
/* Textures. */ |
for (i = 0; i < textures->sampler_view_count; i++) |
pipe_sampler_view_reference( |
(struct pipe_sampler_view**)&textures->sampler_views[i], NULL); |
/* The special dummy texture for texkill. */ |
if (r300->texkill_sampler) { |
pipe_sampler_view_reference( |
(struct pipe_sampler_view**)&r300->texkill_sampler, |
NULL); |
} |
/* Manually-created vertex buffers. */ |
pipe_resource_reference(&r300->dummy_vb.buffer, NULL); |
pb_reference(&r300->vbo, NULL); |
r300->context.delete_depth_stencil_alpha_state(&r300->context, |
r300->dsa_decompress_zmask); |
} |
static void r300_destroy_context(struct pipe_context* context) |
{ |
struct r300_context* r300 = r300_context(context); |
if (r300->cs && r300->hyperz_enabled) { |
r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, FALSE); |
} |
if (r300->cs && r300->cmask_access) { |
r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_CMASK_ACCESS, FALSE); |
} |
if (r300->blitter) |
util_blitter_destroy(r300->blitter); |
if (r300->draw) |
draw_destroy(r300->draw); |
if (r300->uploader) |
u_upload_destroy(r300->uploader); |
/* XXX: This function assumes r300->query_list was initialized */ |
r300_release_referenced_objects(r300); |
if (r300->cs) |
r300->rws->cs_destroy(r300->cs); |
rc_destroy_regalloc_state(&r300->fs_regalloc_state); |
/* XXX: No way to tell if this was initialized or not? */ |
util_slab_destroy(&r300->pool_transfers); |
/* Free the structs allocated in r300_setup_atoms() */ |
if (r300->aa_state.state) { |
FREE(r300->aa_state.state); |
FREE(r300->blend_color_state.state); |
FREE(r300->clip_state.state); |
FREE(r300->fb_state.state); |
FREE(r300->gpu_flush.state); |
FREE(r300->hyperz_state.state); |
FREE(r300->invariant_state.state); |
FREE(r300->rs_block_state.state); |
FREE(r300->sample_mask.state); |
FREE(r300->scissor_state.state); |
FREE(r300->textures_state.state); |
FREE(r300->vap_invariant_state.state); |
FREE(r300->viewport_state.state); |
FREE(r300->ztop_state.state); |
FREE(r300->fs_constants.state); |
FREE(r300->vs_constants.state); |
if (!r300->screen->caps.has_tcl) { |
FREE(r300->vertex_stream_state.state); |
} |
} |
FREE(r300); |
} |
static void r300_flush_callback(void *data, unsigned flags) |
{ |
struct r300_context* const cs_context_copy = data; |
r300_flush(&cs_context_copy->context, flags, NULL); |
} |
#define R300_INIT_ATOM(atomname, atomsize) \ |
do { \ |
r300->atomname.name = #atomname; \ |
r300->atomname.state = NULL; \ |
r300->atomname.size = atomsize; \ |
r300->atomname.emit = r300_emit_##atomname; \ |
r300->atomname.dirty = FALSE; \ |
} while (0) |
#define R300_ALLOC_ATOM(atomname, statetype) \ |
do { \ |
r300->atomname.state = CALLOC_STRUCT(statetype); \ |
if (r300->atomname.state == NULL) \ |
return FALSE; \ |
} while (0) |
static boolean r300_setup_atoms(struct r300_context* r300) |
{ |
boolean is_rv350 = r300->screen->caps.is_rv350; |
boolean is_r500 = r300->screen->caps.is_r500; |
boolean has_tcl = r300->screen->caps.has_tcl; |
boolean drm_2_6_0 = r300->screen->info.drm_minor >= 6; |
/* Create the actual atom list. |
* |
* Some atoms never change size, others change every emit - those have |
* the size of 0 here. |
* |
* NOTE: The framebuffer state is split into these atoms: |
* - gpu_flush (unpipelined regs) |
* - aa_state (unpipelined regs) |
* - fb_state (unpipelined regs) |
* - hyperz_state (unpipelined regs followed by pipelined ones) |
* - fb_state_pipelined (pipelined regs) |
* The motivation behind this is to be able to emit a strict |
* subset of the regs, and to have reasonable register ordering. */ |
/* SC, GB (unpipelined), RB3D (unpipelined), ZB (unpipelined). */ |
R300_INIT_ATOM(gpu_flush, 9); |
R300_INIT_ATOM(aa_state, 4); |
R300_INIT_ATOM(fb_state, 0); |
R300_INIT_ATOM(hyperz_state, is_r500 || (is_rv350 && drm_2_6_0) ? 10 : 8); |
/* ZB (unpipelined), SC. */ |
R300_INIT_ATOM(ztop_state, 2); |
/* ZB, FG. */ |
R300_INIT_ATOM(dsa_state, is_r500 ? (drm_2_6_0 ? 10 : 8) : 6); |
/* RB3D. */ |
R300_INIT_ATOM(blend_state, 8); |
R300_INIT_ATOM(blend_color_state, is_r500 ? 3 : 2); |
/* SC. */ |
R300_INIT_ATOM(sample_mask, 2); |
R300_INIT_ATOM(scissor_state, 3); |
/* GB, FG, GA, SU, SC, RB3D. */ |
R300_INIT_ATOM(invariant_state, 14 + (is_rv350 ? 4 : 0) + (is_r500 ? 4 : 0)); |
/* VAP. */ |
R300_INIT_ATOM(viewport_state, 9); |
R300_INIT_ATOM(pvs_flush, 2); |
R300_INIT_ATOM(vap_invariant_state, is_r500 ? 11 : 9); |
R300_INIT_ATOM(vertex_stream_state, 0); |
R300_INIT_ATOM(vs_state, 0); |
R300_INIT_ATOM(vs_constants, 0); |
R300_INIT_ATOM(clip_state, has_tcl ? 3 + (6 * 4) : 0); |
/* VAP, RS, GA, GB, SU, SC. */ |
R300_INIT_ATOM(rs_block_state, 0); |
R300_INIT_ATOM(rs_state, 0); |
/* SC, US. */ |
R300_INIT_ATOM(fb_state_pipelined, 8); |
/* US. */ |
R300_INIT_ATOM(fs, 0); |
R300_INIT_ATOM(fs_rc_constant_state, 0); |
R300_INIT_ATOM(fs_constants, 0); |
/* TX. */ |
R300_INIT_ATOM(texture_cache_inval, 2); |
R300_INIT_ATOM(textures_state, 0); |
/* Clear commands */ |
R300_INIT_ATOM(hiz_clear, r300->screen->caps.hiz_ram > 0 ? 4 : 0); |
R300_INIT_ATOM(zmask_clear, r300->screen->caps.zmask_ram > 0 ? 4 : 0); |
R300_INIT_ATOM(cmask_clear, 4); |
/* ZB (unpipelined), SU. */ |
R300_INIT_ATOM(query_start, 4); |
/* Replace emission functions for r500. */ |
if (is_r500) { |
r300->fs.emit = r500_emit_fs; |
r300->fs_rc_constant_state.emit = r500_emit_fs_rc_constant_state; |
r300->fs_constants.emit = r500_emit_fs_constants; |
} |
/* Some non-CSO atoms need explicit space to store the state locally. */ |
R300_ALLOC_ATOM(aa_state, r300_aa_state); |
R300_ALLOC_ATOM(blend_color_state, r300_blend_color_state); |
R300_ALLOC_ATOM(clip_state, r300_clip_state); |
R300_ALLOC_ATOM(hyperz_state, r300_hyperz_state); |
R300_ALLOC_ATOM(invariant_state, r300_invariant_state); |
R300_ALLOC_ATOM(textures_state, r300_textures_state); |
R300_ALLOC_ATOM(vap_invariant_state, r300_vap_invariant_state); |
R300_ALLOC_ATOM(viewport_state, r300_viewport_state); |
R300_ALLOC_ATOM(ztop_state, r300_ztop_state); |
R300_ALLOC_ATOM(fb_state, pipe_framebuffer_state); |
R300_ALLOC_ATOM(gpu_flush, pipe_framebuffer_state); |
r300->sample_mask.state = malloc(4); |
R300_ALLOC_ATOM(scissor_state, pipe_scissor_state); |
R300_ALLOC_ATOM(rs_block_state, r300_rs_block); |
R300_ALLOC_ATOM(fs_constants, r300_constant_buffer); |
R300_ALLOC_ATOM(vs_constants, r300_constant_buffer); |
if (!r300->screen->caps.has_tcl) { |
R300_ALLOC_ATOM(vertex_stream_state, r300_vertex_stream_state); |
} |
/* Some non-CSO atoms don't use the state pointer. */ |
r300->fb_state_pipelined.allow_null_state = TRUE; |
r300->fs_rc_constant_state.allow_null_state = TRUE; |
r300->pvs_flush.allow_null_state = TRUE; |
r300->query_start.allow_null_state = TRUE; |
r300->texture_cache_inval.allow_null_state = TRUE; |
/* Some states must be marked as dirty here to properly set up |
* hardware in the first command stream. */ |
r300_mark_atom_dirty(r300, &r300->invariant_state); |
r300_mark_atom_dirty(r300, &r300->pvs_flush); |
r300_mark_atom_dirty(r300, &r300->vap_invariant_state); |
r300_mark_atom_dirty(r300, &r300->texture_cache_inval); |
r300_mark_atom_dirty(r300, &r300->textures_state); |
return TRUE; |
} |
/* Not every state tracker calls every driver function before the first draw |
* call and we must initialize the command buffers somehow. */ |
static void r300_init_states(struct pipe_context *pipe) |
{ |
struct r300_context *r300 = r300_context(pipe); |
struct pipe_blend_color bc = {{0}}; |
struct pipe_clip_state cs = {{{0}}}; |
struct pipe_scissor_state ss = {0}; |
struct r300_gpu_flush *gpuflush = |
(struct r300_gpu_flush*)r300->gpu_flush.state; |
struct r300_vap_invariant_state *vap_invariant = |
(struct r300_vap_invariant_state*)r300->vap_invariant_state.state; |
struct r300_invariant_state *invariant = |
(struct r300_invariant_state*)r300->invariant_state.state; |
CB_LOCALS; |
pipe->set_blend_color(pipe, &bc); |
pipe->set_clip_state(pipe, &cs); |
pipe->set_scissor_states(pipe, 0, 1, &ss); |
pipe->set_sample_mask(pipe, ~0); |
/* Initialize the GPU flush. */ |
{ |
BEGIN_CB(gpuflush->cb_flush_clean, 6); |
/* Flush and free renderbuffer caches. */ |
OUT_CB_REG(R300_RB3D_DSTCACHE_CTLSTAT, |
R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | |
R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); |
OUT_CB_REG(R300_ZB_ZCACHE_CTLSTAT, |
R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | |
R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); |
/* Wait until the GPU is idle. |
* This fixes random pixels sometimes appearing probably caused |
* by incomplete rendering. */ |
OUT_CB_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); |
END_CB; |
} |
/* Initialize the VAP invariant state. */ |
{ |
BEGIN_CB(vap_invariant->cb, r300->vap_invariant_state.size); |
OUT_CB_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff); |
OUT_CB_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); |
OUT_CB_32F(1.0); |
OUT_CB_32F(1.0); |
OUT_CB_32F(1.0); |
OUT_CB_32F(1.0); |
OUT_CB_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO); |
if (r300->screen->caps.is_r500) { |
OUT_CB_REG(R500_VAP_TEX_TO_COLOR_CNTL, 0); |
} |
END_CB; |
} |
/* Initialize the invariant state. */ |
{ |
BEGIN_CB(invariant->cb, r300->invariant_state.size); |
OUT_CB_REG(R300_GB_SELECT, 0); |
OUT_CB_REG(R300_FG_FOG_BLEND, 0); |
OUT_CB_REG(R300_GA_OFFSET, 0); |
OUT_CB_REG(R300_SU_TEX_WRAP, 0); |
OUT_CB_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF); |
OUT_CB_REG(R300_SU_DEPTH_OFFSET, 0); |
OUT_CB_REG(R300_SC_EDGERULE, 0x2DA49525); |
if (r300->screen->caps.is_rv350) { |
OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); |
OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); |
} |
if (r300->screen->caps.is_r500) { |
OUT_CB_REG(R500_GA_COLOR_CONTROL_PS3, 0); |
OUT_CB_REG(R500_SU_TEX_WRAP_PS3, 0); |
} |
END_CB; |
} |
/* Initialize the hyperz state. */ |
{ |
struct r300_hyperz_state *hyperz = |
(struct r300_hyperz_state*)r300->hyperz_state.state; |
BEGIN_CB(&hyperz->cb_flush_begin, r300->hyperz_state.size); |
OUT_CB_REG(R300_ZB_ZCACHE_CTLSTAT, |
R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE); |
OUT_CB_REG(R300_ZB_BW_CNTL, 0); |
OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0); |
OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2); |
if (r300->screen->caps.is_r500 || |
(r300->screen->caps.is_rv350 && |
r300->screen->info.drm_minor >= 6)) { |
OUT_CB_REG(R300_GB_Z_PEQ_CONFIG, 0); |
} |
END_CB; |
} |
} |
struct pipe_context* r300_create_context(struct pipe_screen* screen, |
void *priv) |
{ |
struct r300_context* r300 = CALLOC_STRUCT(r300_context); |
struct r300_screen* r300screen = r300_screen(screen); |
struct radeon_winsys *rws = r300screen->rws; |
if (!r300) |
return NULL; |
r300->rws = rws; |
r300->screen = r300screen; |
r300->context.screen = screen; |
r300->context.priv = priv; |
r300->context.destroy = r300_destroy_context; |
util_slab_create(&r300->pool_transfers, |
sizeof(struct pipe_transfer), 64, |
UTIL_SLAB_SINGLETHREADED); |
r300->cs = rws->cs_create(rws, RING_GFX, NULL); |
if (r300->cs == NULL) |
goto fail; |
if (!r300screen->caps.has_tcl) { |
/* Create a Draw. This is used for SW TCL. */ |
r300->draw = draw_create(&r300->context); |
if (r300->draw == NULL) |
goto fail; |
/* Enable our renderer. */ |
draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300)); |
/* Disable converting points/lines to triangles. */ |
draw_wide_line_threshold(r300->draw, 10000000.f); |
draw_wide_point_threshold(r300->draw, 10000000.f); |
draw_wide_point_sprites(r300->draw, FALSE); |
draw_enable_line_stipple(r300->draw, TRUE); |
draw_enable_point_sprites(r300->draw, FALSE); |
} |
if (!r300_setup_atoms(r300)) |
goto fail; |
r300_init_blit_functions(r300); |
r300_init_flush_functions(r300); |
r300_init_query_functions(r300); |
r300_init_state_functions(r300); |
r300_init_resource_functions(r300); |
r300_init_render_functions(r300); |
r300_init_states(&r300->context); |
r300->context.create_video_decoder = vl_create_decoder; |
r300->context.create_video_buffer = vl_video_buffer_create; |
r300->uploader = u_upload_create(&r300->context, 256 * 1024, 4, |
PIPE_BIND_CUSTOM); |
r300->blitter = util_blitter_create(&r300->context); |
if (r300->blitter == NULL) |
goto fail; |
r300->blitter->draw_rectangle = r300_blitter_draw_rectangle; |
rws->cs_set_flush_callback(r300->cs, r300_flush_callback, r300); |
/* The KIL opcode needs the first texture unit to be enabled |
* on r3xx-r4xx. In order to calm down the CS checker, we bind this |
* dummy texture there. */ |
if (!r300->screen->caps.is_r500) { |
struct pipe_resource *tex; |
struct pipe_resource rtempl = {{0}}; |
struct pipe_sampler_view vtempl = {{0}}; |
rtempl.target = PIPE_TEXTURE_2D; |
rtempl.format = PIPE_FORMAT_I8_UNORM; |
rtempl.usage = PIPE_USAGE_IMMUTABLE; |
rtempl.width0 = 1; |
rtempl.height0 = 1; |
rtempl.depth0 = 1; |
tex = screen->resource_create(screen, &rtempl); |
u_sampler_view_default_template(&vtempl, tex, tex->format); |
r300->texkill_sampler = (struct r300_sampler_view*) |
r300->context.create_sampler_view(&r300->context, tex, &vtempl); |
pipe_resource_reference(&tex, NULL); |
} |
if (r300screen->caps.has_tcl) { |
struct pipe_resource vb; |
memset(&vb, 0, sizeof(vb)); |
vb.target = PIPE_BUFFER; |
vb.format = PIPE_FORMAT_R8_UNORM; |
vb.usage = PIPE_USAGE_STATIC; |
vb.width0 = sizeof(float) * 16; |
vb.height0 = 1; |
vb.depth0 = 1; |
r300->dummy_vb.buffer = screen->resource_create(screen, &vb); |
r300->context.set_vertex_buffers(&r300->context, 0, 1, &r300->dummy_vb); |
} |
{ |
struct pipe_depth_stencil_alpha_state dsa; |
memset(&dsa, 0, sizeof(dsa)); |
dsa.depth.writemask = 1; |
r300->dsa_decompress_zmask = |
r300->context.create_depth_stencil_alpha_state(&r300->context, |
&dsa); |
} |
r300->hyperz_time_of_last_flush = os_time_get(); |
/* Register allocator state */ |
rc_init_regalloc_state(&r300->fs_regalloc_state); |
/* Print driver info. */ |
#ifdef DEBUG |
{ |
#else |
if (DBG_ON(r300, DBG_INFO)) { |
#endif |
fprintf(stderr, |
"r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n" |
"r300: GART size: %d MB, VRAM size: %d MB\n" |
"r300: AA compression RAM: %s, Z compression RAM: %s, HiZ RAM: %s\n", |
r300->screen->info.drm_major, |
r300->screen->info.drm_minor, |
r300->screen->info.drm_patchlevel, |
screen->get_name(screen), |
r300->screen->info.pci_id, |
r300->screen->info.r300_num_gb_pipes, |
r300->screen->info.r300_num_z_pipes, |
r300->screen->info.gart_size >> 20, |
r300->screen->info.vram_size >> 20, |
"YES", /* XXX really? */ |
r300->screen->caps.zmask_ram ? "YES" : "NO", |
r300->screen->caps.hiz_ram ? "YES" : "NO"); |
} |
return &r300->context; |
fail: |
r300_destroy_context(&r300->context); |
return NULL; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_context.h |
---|
0,0 → 1,785 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#ifndef R300_CONTEXT_H |
#define R300_CONTEXT_H |
#define R300_BUFFER_ALIGNMENT 64 |
#include "draw/draw_vertex.h" |
#include "util/u_blitter.h" |
#include "pipe/p_context.h" |
#include "util/u_inlines.h" |
#include "util/u_transfer.h" |
#include "r300_defines.h" |
#include "r300_screen.h" |
#include "compiler/radeon_regalloc.h" |
#include "../../winsys/radeon/drm/radeon_winsys.h" |
struct u_upload_mgr; |
struct r300_context; |
struct r300_fragment_shader; |
struct r300_vertex_shader; |
struct r300_stencilref_context; |
enum colormask_swizzle { |
COLORMASK_BGRA, |
COLORMASK_RGBA, |
COLORMASK_RRRR, |
COLORMASK_AAAA, |
COLORMASK_GRRG, |
COLORMASK_ARRA, |
COLORMASK_BGRX, |
COLORMASK_RGBX, |
COLORMASK_NUM_SWIZZLES |
}; |
struct r300_atom { |
/* Name, for debugging. */ |
const char* name; |
/* Opaque state. */ |
void* state; |
/* Emit the state to the context. */ |
void (*emit)(struct r300_context*, unsigned, void*); |
/* Upper bound on number of dwords to emit. */ |
unsigned size; |
/* Whether this atom should be emitted. */ |
boolean dirty; |
/* Whether this atom may be emitted with state == NULL. */ |
boolean allow_null_state; |
}; |
struct r300_aa_state { |
struct r300_surface *dest; |
uint32_t aa_config; |
}; |
struct r300_blend_state { |
struct pipe_blend_state state; |
uint32_t cb_clamp[COLORMASK_NUM_SWIZZLES][8]; |
uint32_t cb_noclamp[8]; |
uint32_t cb_noclamp_noalpha[8]; |
uint32_t cb_no_readwrite[8]; |
}; |
struct r300_blend_color_state { |
struct pipe_blend_color state; |
uint32_t cb[3]; |
}; |
struct r300_clip_state { |
uint32_t cb[29]; |
}; |
struct r300_dsa_state { |
struct pipe_depth_stencil_alpha_state dsa; |
/* This is actually a command buffer with named dwords. */ |
uint32_t cb_begin; |
uint32_t z_buffer_control; /* R300_ZB_CNTL: 0x4f00 */ |
uint32_t z_stencil_control; /* R300_ZB_ZSTENCILCNTL: 0x4f04 */ |
uint32_t stencil_ref_mask; /* R300_ZB_STENCILREFMASK: 0x4f08 */ |
uint32_t cb_reg; |
uint32_t stencil_ref_bf; /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */ |
uint32_t cb_reg1; |
uint32_t alpha_value; /* R500_FG_ALPHA_VALUE: 0x4be0 */ |
/* Same, but without ZB reads and writes. */ |
uint32_t cb_zb_no_readwrite[8]; /* ZB not bound */ |
/* Emitted separately: */ |
uint32_t alpha_function; |
/* Whether a two-sided stencil is enabled. */ |
boolean two_sided; |
/* Whether a fallback should be used for a two-sided stencil ref value. */ |
boolean two_sided_stencil_ref; |
}; |
struct r300_hyperz_state { |
int flush; |
/* This is actually a command buffer with named dwords. */ |
uint32_t cb_flush_begin; |
uint32_t zb_zcache_ctlstat; /* R300_ZB_CACHE_CNTL */ |
uint32_t cb_begin; |
uint32_t zb_bw_cntl; /* R300_ZB_BW_CNTL */ |
uint32_t cb_reg1; |
uint32_t zb_depthclearvalue; /* R300_ZB_DEPTHCLEARVALUE */ |
uint32_t cb_reg2; |
uint32_t sc_hyperz; /* R300_SC_HYPERZ */ |
uint32_t cb_reg3; |
uint32_t gb_z_peq_config; /* R300_GB_Z_PEQ_CONFIG: 0x4028 */ |
}; |
struct r300_gpu_flush { |
uint32_t cb_flush_clean[6]; |
}; |
#define RS_STATE_MAIN_SIZE 27 |
struct r300_rs_state { |
/* Original rasterizer state. */ |
struct pipe_rasterizer_state rs; |
/* Draw-specific rasterizer state. */ |
struct pipe_rasterizer_state rs_draw; |
/* Command buffers. */ |
uint32_t cb_main[RS_STATE_MAIN_SIZE]; |
uint32_t cb_poly_offset_zb16[5]; |
uint32_t cb_poly_offset_zb24[5]; |
/* The index to cb_main where the cull_mode register value resides. */ |
unsigned cull_mode_index; |
/* Whether polygon offset is enabled. */ |
boolean polygon_offset_enable; |
/* This is emitted in the draw function. */ |
uint32_t color_control; /* R300_GA_COLOR_CONTROL: 0x4278 */ |
}; |
struct r300_rs_block { |
uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */ |
uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */ |
uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */ |
uint32_t gb_enable; |
uint32_t ip[8]; /* R300_RS_IP_[0-7], R500_RS_IP_[0-7] */ |
uint32_t count; /* R300_RS_COUNT */ |
uint32_t inst_count; /* R300_RS_INST_COUNT */ |
uint32_t inst[8]; /* R300_RS_INST_[0-7] */ |
}; |
struct r300_sampler_state { |
struct pipe_sampler_state state; |
uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */ |
uint32_t filter1; /* R300_TX_FILTER1: 0x4440 */ |
/* Min/max LOD must be clamped to [0, last_level], thus |
* it's dependent on a currently bound texture */ |
unsigned min_lod, max_lod; |
}; |
struct r300_texture_format_state { |
uint32_t format0; /* R300_TX_FORMAT0: 0x4480 */ |
uint32_t format1; /* R300_TX_FORMAT1: 0x44c0 */ |
uint32_t format2; /* R300_TX_FORMAT2: 0x4500 */ |
uint32_t tile_config; /* R300_TX_OFFSET (subset thereof) */ |
uint32_t us_format0; /* R500_US_FORMAT0_0: 0x4640 (through 15) */ |
}; |
struct r300_sampler_view { |
struct pipe_sampler_view base; |
/* For resource_copy_region. */ |
unsigned width0_override; |
unsigned height0_override; |
/* Swizzles in the UTIL_FORMAT_SWIZZLE_* representation, |
* derived from base. */ |
unsigned char swizzle[4]; |
/* Copy of r300_texture::texture_format_state with format-specific bits |
* added. */ |
struct r300_texture_format_state format; |
/* The texture cache region for this texture. */ |
uint32_t texcache_region; |
}; |
struct r300_texture_sampler_state { |
struct r300_texture_format_state format; |
uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */ |
uint32_t filter1; /* R300_TX_FILTER1: 0x4440 */ |
uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */ |
}; |
struct r300_textures_state { |
/* Textures. */ |
struct r300_sampler_view *sampler_views[16]; |
int sampler_view_count; |
/* Sampler states. */ |
struct r300_sampler_state *sampler_states[16]; |
int sampler_state_count; |
/* This is the merge of the texture and sampler states. */ |
unsigned count; |
uint32_t tx_enable; /* R300_TX_ENABLE: 0x4101 */ |
struct r300_texture_sampler_state regs[16]; |
}; |
struct r300_vertex_stream_state { |
/* R300_VAP_PROG_STREAK_CNTL_[0-7] */ |
uint32_t vap_prog_stream_cntl[8]; |
/* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */ |
uint32_t vap_prog_stream_cntl_ext[8]; |
unsigned count; |
}; |
struct r300_invariant_state { |
uint32_t cb[24]; |
}; |
struct r300_vap_invariant_state { |
uint32_t cb[11]; |
}; |
struct r300_viewport_state { |
float xscale; /* R300_VAP_VPORT_XSCALE: 0x2098 */ |
float xoffset; /* R300_VAP_VPORT_XOFFSET: 0x209c */ |
float yscale; /* R300_VAP_VPORT_YSCALE: 0x20a0 */ |
float yoffset; /* R300_VAP_VPORT_YOFFSET: 0x20a4 */ |
float zscale; /* R300_VAP_VPORT_ZSCALE: 0x20a8 */ |
float zoffset; /* R300_VAP_VPORT_ZOFFSET: 0x20ac */ |
uint32_t vte_control; /* R300_VAP_VTE_CNTL: 0x20b0 */ |
}; |
struct r300_ztop_state { |
uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */ |
}; |
/* The next several objects are not pure Radeon state; they inherit from |
* various Gallium classes. */ |
struct r300_constant_buffer { |
/* Buffer of constants */ |
uint32_t *ptr; |
/* Remapping table. */ |
unsigned *remap_table; |
/* const buffer base */ |
uint32_t buffer_base; |
}; |
/* Query object. |
* |
* This is not a subclass of pipe_query because pipe_query is never |
* actually fully defined. So, rather than have it as a member, and do |
* subclass-style casting, we treat pipe_query as an opaque, and just |
* trust that our state tracker does not ever mess up query objects. |
*/ |
struct r300_query { |
/* The kind of query. Currently only OQ is supported. */ |
unsigned type; |
/* The number of pipes where query results are stored. */ |
unsigned num_pipes; |
/* How many results have been written, in dwords. It's incremented |
* after end_query and flush. */ |
unsigned num_results; |
/* if begin has been emitted */ |
boolean begin_emitted; |
/* The buffer where query results are stored. */ |
struct pb_buffer *buf; |
struct radeon_winsys_cs_handle *cs_buf; |
}; |
struct r300_surface { |
struct pipe_surface base; |
/* Winsys buffer backing the texture. */ |
struct pb_buffer *buf; |
struct radeon_winsys_cs_handle *cs_buf; |
enum radeon_bo_domain domain; |
uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */ |
uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */ |
uint32_t pitch_zmask; /* ZMASK_PITCH */ |
uint32_t pitch_hiz; /* HIZ_PITCH */ |
uint32_t pitch_cmask; /* CMASK_PITCH */ |
uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */ |
/* Parameters dedicated to the CBZB clear. */ |
uint32_t cbzb_width; /* Aligned width. */ |
uint32_t cbzb_height; /* Half of the height. */ |
uint32_t cbzb_midpoint_offset; /* DEPTHOFFSET. */ |
uint32_t cbzb_pitch; /* DEPTHPITCH. */ |
uint32_t cbzb_format; /* ZB_FORMAT. */ |
/* Whether the CBZB clear is allowed on the surface. */ |
boolean cbzb_allowed; |
unsigned colormask_swizzle; |
}; |
struct r300_texture_desc { |
/* Width, height, and depth. |
* Most of the time, these are equal to pipe_texture::width0, height0, |
* and depth0. However, NPOT 3D textures must have dimensions aligned |
* to POT, and this is the only case when these variables differ from |
* pipe_texture. */ |
unsigned width0, height0, depth0; |
/* Buffer tiling. |
* Macrotiling is specified per-level because small mipmaps cannot |
* be macrotiled. */ |
enum radeon_bo_layout microtile; |
enum radeon_bo_layout macrotile[R300_MAX_TEXTURE_LEVELS]; |
/* Offsets into the buffer. */ |
unsigned offset_in_bytes[R300_MAX_TEXTURE_LEVELS]; |
/* Strides for each mip-level. */ |
unsigned stride_in_bytes[R300_MAX_TEXTURE_LEVELS]; |
/* Size of one zslice or face or 2D image based on the texture target. */ |
unsigned layer_size_in_bytes[R300_MAX_TEXTURE_LEVELS]; |
/* Total size of this texture, in bytes, |
* derived from the texture properties. */ |
unsigned size_in_bytes; |
/** |
* If non-zero, override the natural texture layout with |
* a custom stride (in bytes). |
* |
* \note Mipmapping fails for textures with a non-natural layout! |
* |
* \sa r300_texture_get_stride |
*/ |
unsigned stride_in_bytes_override; |
/* Whether this texture has non-power-of-two dimensions. |
* It can be either a regular texture or a rectangle one. */ |
boolean is_npot; |
/* This flag says that hardware must use the stride for addressing |
* instead of the width. */ |
boolean uses_stride_addressing; |
/* Whether CBZB fast color clear is allowed on the miplevel. */ |
boolean cbzb_allowed[R300_MAX_TEXTURE_LEVELS]; |
/* Zbuffer compression info for each miplevel. */ |
boolean zcomp8x8[R300_MAX_TEXTURE_LEVELS]; |
/* If zero, then disable Z compression/HiZ. */ |
unsigned zmask_dwords[R300_MAX_TEXTURE_LEVELS]; |
unsigned hiz_dwords[R300_MAX_TEXTURE_LEVELS]; |
/* Zmask/HiZ strides for each miplevel. */ |
unsigned zmask_stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; |
unsigned hiz_stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; |
/* CMASK info for AA buffers (no mipmapping). */ |
unsigned cmask_dwords; |
unsigned cmask_stride_in_pixels; |
}; |
struct r300_resource |
{ |
struct u_resource b; |
/* Winsys buffer backing this resource. */ |
struct pb_buffer *buf; |
struct radeon_winsys_cs_handle *cs_buf; |
enum radeon_bo_domain domain; |
/* Constant buffers and SWTCL vertex and index buffers are in user |
* memory. */ |
uint8_t *malloced_buffer; |
/* Texture description (addressing, layout, special features). */ |
struct r300_texture_desc tex; |
/* This is the level tiling flags were last time set for. |
* It's used to prevent redundant tiling-flags changes from happening.*/ |
unsigned surface_level; |
}; |
struct r300_vertex_element_state { |
unsigned count; |
struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; |
unsigned format_size[PIPE_MAX_ATTRIBS]; |
/* The size of the vertex, in dwords. */ |
unsigned vertex_size_dwords; |
struct r300_vertex_stream_state vertex_stream; |
}; |
enum r300_hiz_func { |
HIZ_FUNC_NONE, |
/* The function, when determined, is set in stone |
* until the next HiZ clear. */ |
/* MAX is written to the HiZ buffer. |
* Used for LESS, LEQUAL. */ |
HIZ_FUNC_MAX, |
/* MIN is written to the HiZ buffer. |
* Used for GREATER, GEQUAL. */ |
HIZ_FUNC_MIN, |
}; |
/* For deferred fragment shader state validation. */ |
enum r300_fs_validity_status { |
FRAGMENT_SHADER_VALID, /* No need to change/validate the FS. */ |
FRAGMENT_SHADER_MAYBE_DIRTY,/* Validate the FS if external state was changed. */ |
FRAGMENT_SHADER_DIRTY /* Always validate the FS (if the FS was changed) */ |
}; |
struct r300_context { |
/* Parent class */ |
struct pipe_context context; |
/* The interface to the windowing system, etc. */ |
struct radeon_winsys *rws; |
/* The command stream. */ |
struct radeon_winsys_cs *cs; |
/* Screen. */ |
struct r300_screen *screen; |
/* Draw module. Used mostly for SW TCL. */ |
struct draw_context* draw; |
/* Vertex buffer for SW TCL. */ |
struct pb_buffer *vbo; |
struct radeon_winsys_cs_handle *vbo_cs; |
/* Offset and size into the SW TCL VBO. */ |
size_t draw_vbo_offset; |
/* Accelerated blit support. */ |
struct blitter_context* blitter; |
/* Stencil two-sided reference value fallback. */ |
struct r300_stencilref_context *stencilref_fallback; |
/* The KIL opcode needs the first texture unit to be enabled |
* on r3xx-r4xx. In order to calm down the CS checker, we bind this |
* dummy texture there. */ |
struct r300_sampler_view *texkill_sampler; |
/* When no vertex buffer is set, this one is used instead to prevent |
* hardlocks. */ |
struct pipe_vertex_buffer dummy_vb; |
/* The currently active query. */ |
struct r300_query *query_current; |
/* The saved query for blitter operations. */ |
struct r300_query *blitter_saved_query; |
/* Query list. */ |
struct r300_query query_list; |
/* Various CSO state objects. */ |
/* Each atom is emitted in the order it appears here, which can affect |
* performance and stability if not handled with care. */ |
/* GPU flush. */ |
struct r300_atom gpu_flush; |
/* Clears must be emitted immediately after the flush. */ |
/* HiZ clear */ |
struct r300_atom hiz_clear; |
/* zmask clear */ |
struct r300_atom zmask_clear; |
/* cmask clear */ |
struct r300_atom cmask_clear; |
/* Anti-aliasing (MSAA) state. */ |
struct r300_atom aa_state; |
/* Framebuffer state. */ |
struct r300_atom fb_state; |
/* HyperZ state (various SC/ZB bits). */ |
struct r300_atom hyperz_state; |
/* ZTOP state. */ |
struct r300_atom ztop_state; |
/* Depth, stencil, and alpha state. */ |
struct r300_atom dsa_state; |
/* Blend state. */ |
struct r300_atom blend_state; |
/* Blend color state. */ |
struct r300_atom blend_color_state; |
/* Scissor state. */ |
struct r300_atom scissor_state; |
/* Sample mask. */ |
struct r300_atom sample_mask; |
/* Invariant state. This must be emitted to get the engine started. */ |
struct r300_atom invariant_state; |
/* Viewport state. */ |
struct r300_atom viewport_state; |
/* PVS flush. */ |
struct r300_atom pvs_flush; |
/* VAP invariant state. */ |
struct r300_atom vap_invariant_state; |
/* Vertex stream formatting state. */ |
struct r300_atom vertex_stream_state; |
/* Vertex shader. */ |
struct r300_atom vs_state; |
/* User clip planes. */ |
struct r300_atom clip_state; |
/* RS block state + VAP (vertex shader) output mapping state. */ |
struct r300_atom rs_block_state; |
/* Rasterizer state. */ |
struct r300_atom rs_state; |
/* Framebuffer state (pipelined regs). */ |
struct r300_atom fb_state_pipelined; |
/* Fragment shader. */ |
struct r300_atom fs; |
/* Fragment shader RC_CONSTANT_STATE variables. */ |
struct r300_atom fs_rc_constant_state; |
/* Fragment shader constant buffer. */ |
struct r300_atom fs_constants; |
/* Vertex shader constant buffer. */ |
struct r300_atom vs_constants; |
/* Texture cache invalidate. */ |
struct r300_atom texture_cache_inval; |
/* Textures state. */ |
struct r300_atom textures_state; |
/* Occlusion query. */ |
struct r300_atom query_start; |
/* The pointers to the first and the last atom. */ |
struct r300_atom *first_dirty, *last_dirty; |
/* Vertex elements for Gallium. */ |
struct r300_vertex_element_state *velems; |
/* Vertex info for Draw. */ |
struct vertex_info vertex_info; |
struct pipe_stencil_ref stencil_ref; |
struct pipe_viewport_state viewport; |
/* Stream locations for SWTCL. */ |
int stream_loc_notcl[16]; |
/* Flag indicating whether or not the HW is dirty. */ |
uint32_t dirty_hw; |
/* Whether polygon offset is enabled. */ |
boolean polygon_offset_enabled; |
/* Z buffer bit depth. */ |
uint32_t zbuffer_bpp; |
/* Whether rendering is conditional and should be skipped. */ |
boolean skip_rendering; |
/* The flag above saved by blitter. */ |
unsigned char blitter_saved_skip_rendering; |
/* Point sprites texcoord index, 1 bit per texcoord */ |
int sprite_coord_enable; |
/* Whether two-sided color selection is enabled (AKA light_twoside). */ |
boolean two_sided_color; |
boolean flatshade; |
/* Whether fast color clear is enabled. */ |
boolean cbzb_clear; |
/* Whether fragment shader needs to be validated. */ |
enum r300_fs_validity_status fs_status; |
/* Framebuffer multi-write. */ |
boolean fb_multiwrite; |
unsigned num_samples; |
boolean msaa_enable; |
boolean alpha_to_one; |
boolean alpha_to_coverage; |
void *dsa_decompress_zmask; |
struct pipe_index_buffer index_buffer; |
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; |
unsigned nr_vertex_buffers; |
struct u_upload_mgr *uploader; |
struct util_slab_mempool pool_transfers; |
/* Stat counter. */ |
uint64_t flush_counter; |
/* const tracking for VS */ |
int vs_const_base; |
/* Vertex array state info */ |
boolean vertex_arrays_dirty; |
boolean vertex_arrays_indexed; |
int vertex_arrays_offset; |
int vertex_arrays_instance_id; |
boolean instancing_enabled; |
/* Hyper-Z stats. */ |
boolean hyperz_enabled; /* Whether it owns Hyper-Z access. */ |
int64_t hyperz_time_of_last_flush; /* Time of the last flush with Z clear. */ |
unsigned num_z_clears; /* Since the last flush. */ |
/* ZMask state. */ |
boolean zmask_in_use; /* Whether ZMASK is enabled. */ |
boolean zmask_decompress; /* Whether ZMASK is being decompressed. */ |
struct pipe_surface *locked_zbuffer; /* Unbound zbuffer which still has data in ZMASK. */ |
/* HiZ state. */ |
boolean hiz_in_use; /* Whether HIZ is enabled. */ |
enum r300_hiz_func hiz_func; /* HiZ function. Can be either MIN or MAX. */ |
uint32_t hiz_clear_value; /* HiZ clear value. */ |
/* CMASK state. */ |
boolean cmask_access; |
boolean cmask_in_use; |
uint32_t color_clear_value; /* RGBA8 or RGBA1010102 */ |
uint32_t color_clear_value_ar; /* RGBA16F */ |
uint32_t color_clear_value_gb; /* RGBA16F */ |
/* Compiler state. */ |
struct rc_regalloc_state fs_regalloc_state; /* Register allocator info for |
* fragment shaders. */ |
}; |
#define foreach_atom(r300, atom) \ |
for (atom = &r300->gpu_flush; atom != (&r300->query_start)+1; atom++) |
#define foreach_dirty_atom(r300, atom) \ |
for (atom = r300->first_dirty; atom != r300->last_dirty; atom++) |
/* Convenience cast wrappers. */ |
static INLINE struct r300_query* r300_query(struct pipe_query* q) |
{ |
return (struct r300_query*)q; |
} |
static INLINE struct r300_surface* r300_surface(struct pipe_surface* surf) |
{ |
return (struct r300_surface*)surf; |
} |
static INLINE struct r300_resource* r300_resource(struct pipe_resource* tex) |
{ |
return (struct r300_resource*)tex; |
} |
static INLINE struct r300_context* r300_context(struct pipe_context* context) |
{ |
return (struct r300_context*)context; |
} |
static INLINE struct r300_fragment_shader *r300_fs(struct r300_context *r300) |
{ |
return (struct r300_fragment_shader*)r300->fs.state; |
} |
static INLINE void r300_mark_atom_dirty(struct r300_context *r300, |
struct r300_atom *atom) |
{ |
atom->dirty = TRUE; |
if (!r300->first_dirty) { |
r300->first_dirty = atom; |
r300->last_dirty = atom+1; |
} else { |
if (atom < r300->first_dirty) |
r300->first_dirty = atom; |
else if (atom+1 > r300->last_dirty) |
r300->last_dirty = atom+1; |
} |
} |
struct pipe_context* r300_create_context(struct pipe_screen* screen, |
void *priv); |
/* Context initialization. */ |
struct draw_stage* r300_draw_stage(struct r300_context* r300); |
void r300_init_blit_functions(struct r300_context *r300); |
void r300_init_flush_functions(struct r300_context* r300); |
void r300_init_query_functions(struct r300_context* r300); |
void r300_init_render_functions(struct r300_context *r300); |
void r300_init_state_functions(struct r300_context* r300); |
void r300_init_resource_functions(struct r300_context* r300); |
/* r300_blit.c */ |
void r300_decompress_zmask(struct r300_context *r300); |
void r300_decompress_zmask_locked_unsafe(struct r300_context *r300); |
void r300_decompress_zmask_locked(struct r300_context *r300); |
bool r300_is_blit_supported(enum pipe_format format); |
/* r300_flush.c */ |
void r300_flush(struct pipe_context *pipe, |
unsigned flags, |
struct pipe_fence_handle **fence); |
/* r300_hyperz.c */ |
void r300_update_hyperz_state(struct r300_context* r300); |
/* r300_query.c */ |
void r300_resume_query(struct r300_context *r300, |
struct r300_query *query); |
void r300_stop_query(struct r300_context *r300); |
/* r300_render_translate.c */ |
void r300_translate_index_buffer(struct r300_context *r300, |
struct pipe_index_buffer *ib, |
struct pipe_resource **out_index_buffer, |
unsigned *index_size, unsigned index_offset, |
unsigned *start, unsigned count); |
/* r300_render_stencilref.c */ |
void r300_plug_in_stencil_ref_fallback(struct r300_context *r300); |
/* r300_render.c */ |
void r500_emit_index_bias(struct r300_context *r300, int index_bias); |
void r300_blitter_draw_rectangle(struct blitter_context *blitter, |
int x1, int y1, int x2, int y2, |
float depth, |
enum blitter_attrib_type type, |
const union pipe_color_union *attrib); |
/* r300_state.c */ |
enum r300_fb_state_change { |
R300_CHANGED_FB_STATE = 0, |
R300_CHANGED_HYPERZ_FLAG, |
R300_CHANGED_MULTIWRITE, |
R300_CHANGED_CMASK_ENABLE, |
}; |
void r300_mark_fb_state_dirty(struct r300_context *r300, |
enum r300_fb_state_change change); |
void r300_mark_fs_code_dirty(struct r300_context *r300); |
struct pipe_sampler_view * |
r300_create_sampler_view_custom(struct pipe_context *pipe, |
struct pipe_resource *texture, |
const struct pipe_sampler_view *templ, |
unsigned width0_override, |
unsigned height0_override); |
/* r300_state_derived.c */ |
void r300_update_derived_state(struct r300_context* r300); |
/* r300_debug.c */ |
void r500_dump_rs_block(struct r300_rs_block *rs); |
static INLINE boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags) |
{ |
return SCREEN_DBG_ON(ctx->screen, flags); |
} |
static INLINE void CTX_DBG(struct r300_context * ctx, unsigned flags, |
const char * fmt, ...) |
{ |
if (CTX_DBG_ON(ctx, flags)) { |
va_list va; |
va_start(va, fmt); |
vfprintf(stderr, fmt, va); |
va_end(va); |
} |
} |
#define DBG_ON CTX_DBG_ON |
#define DBG CTX_DBG |
#endif /* R300_CONTEXT_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_cs.h |
---|
0,0 → 1,127 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
/** |
* This file contains macros for immediate command submission. |
*/ |
#ifndef R300_CS_H |
#define R300_CS_H |
#include "r300_reg.h" |
#include "r300_context.h" |
/* Yes, I know macros are ugly. However, they are much prettier than the code |
* that they neatly hide away, and don't have the cost of function setup,so |
* we're going to use them. */ |
/** |
* Command submission setup. |
*/ |
#define CS_LOCALS(context) \ |
struct radeon_winsys_cs *cs_copy = (context)->cs; \ |
struct radeon_winsys *cs_winsys = (context)->rws; \ |
int cs_count = 0; (void) cs_count; (void) cs_winsys; |
#ifdef DEBUG |
#define BEGIN_CS(size) do { \ |
assert(size <= (RADEON_MAX_CMDBUF_DWORDS - cs_copy->cdw)); \ |
cs_count = size; \ |
} while (0) |
#define END_CS do { \ |
if (cs_count != 0) \ |
debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \ |
cs_count, __FUNCTION__, __FILE__, __LINE__); \ |
cs_count = 0; \ |
} while (0) |
#define CS_USED_DW(x) cs_count -= (x) |
#else |
#define BEGIN_CS(size) |
#define END_CS |
#define CS_USED_DW(x) |
#endif |
/** |
* Writing pure DWORDs. |
*/ |
#define OUT_CS(value) do { \ |
cs_copy->buf[cs_copy->cdw++] = (value); \ |
CS_USED_DW(1); \ |
} while (0) |
#define OUT_CS_32F(value) \ |
OUT_CS(fui(value)) |
#define OUT_CS_REG(register, value) do { \ |
OUT_CS(CP_PACKET0(register, 0)); \ |
OUT_CS(value); \ |
} while (0) |
/* Note: This expects count to be the number of registers, |
* not the actual packet0 count! */ |
#define OUT_CS_REG_SEQ(register, count) \ |
OUT_CS(CP_PACKET0((register), ((count) - 1))) |
#define OUT_CS_ONE_REG(register, count) \ |
OUT_CS(CP_PACKET0((register), ((count) - 1)) | RADEON_ONE_REG_WR) |
#define OUT_CS_PKT3(op, count) \ |
OUT_CS(CP_PACKET3(op, count)) |
#define OUT_CS_TABLE(values, count) do { \ |
memcpy(cs_copy->buf + cs_copy->cdw, (values), (count) * 4); \ |
cs_copy->cdw += (count); \ |
CS_USED_DW(count); \ |
} while (0) |
/** |
* Writing relocations. |
*/ |
#define OUT_CS_RELOC(r) do { \ |
assert((r)); \ |
assert((r)->cs_buf); \ |
cs_winsys->cs_write_reloc(cs_copy, (r)->cs_buf); \ |
CS_USED_DW(2); \ |
} while (0) |
/** |
* Command buffer emission. |
*/ |
#define WRITE_CS_TABLE(values, count) do { \ |
assert(cs_count == 0); \ |
memcpy(cs_copy->buf + cs_copy->cdw, (values), (count) * 4); \ |
cs_copy->cdw += (count); \ |
} while (0) |
#endif /* R300_CS_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_debug.c |
---|
0,0 → 1,143 |
/* |
* Copyright 2009 Nicolai Haehnle <nhaehnle@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "r300_context.h" |
#include "util/u_debug.h" |
#include <stdio.h> |
static const struct debug_named_value debug_options[] = { |
{ "info", DBG_INFO, "Print hardware info (printed by default on debug builds"}, |
{ "fp", DBG_FP, "Log fragment program compilation" }, |
{ "vp", DBG_VP, "Log vertex program compilation" }, |
{ "pstat", DBG_P_STAT, "Log vertex/fragment program stats" }, |
{ "draw", DBG_DRAW, "Log draw calls" }, |
{ "swtcl", DBG_SWTCL, "Log SWTCL-specific info" }, |
{ "rsblock", DBG_RS_BLOCK, "Log rasterizer registers" }, |
{ "psc", DBG_PSC, "Log vertex stream registers" }, |
{ "tex", DBG_TEX, "Log basic info about textures" }, |
{ "texalloc", DBG_TEXALLOC, "Log texture mipmap tree info" }, |
{ "rs", DBG_RS, "Log rasterizer" }, |
{ "fb", DBG_FB, "Log framebuffer" }, |
{ "cbzb", DBG_CBZB, "Log fast color clear info" }, |
{ "hyperz", DBG_HYPERZ, "Log HyperZ info" }, |
{ "scissor", DBG_SCISSOR, "Log scissor info" }, |
{ "msaa", DBG_MSAA, "Log MSAA resources"}, |
{ "anisohq", DBG_ANISOHQ, "Use high quality anisotropic filtering" }, |
{ "notiling", DBG_NO_TILING, "Disable tiling" }, |
{ "noimmd", DBG_NO_IMMD, "Disable immediate mode" }, |
{ "noopt", DBG_NO_OPT, "Disable shader optimizations" }, |
{ "nocbzb", DBG_NO_CBZB, "Disable fast color clear" }, |
{ "nozmask", DBG_NO_ZMASK, "Disable zbuffer compression" }, |
{ "nohiz", DBG_NO_HIZ, "Disable hierarchical zbuffer" }, |
{ "nocmask", DBG_NO_CMASK, "Disable AA compression and fast AA clear" }, |
/* must be last */ |
DEBUG_NAMED_VALUE_END |
}; |
void r300_init_debug(struct r300_screen * screen) |
{ |
screen->debug = debug_get_flags_option("RADEON_DEBUG", debug_options, 0); |
} |
void r500_dump_rs_block(struct r300_rs_block *rs) |
{ |
unsigned count, ip, it_count, ic_count, i, j; |
unsigned tex_ptr; |
unsigned col_ptr, col_fmt; |
count = rs->inst_count & 0xf; |
count++; |
it_count = rs->count & 0x7f; |
ic_count = (rs->count >> 7) & 0xf; |
fprintf(stderr, "RS Block: %d texcoords (linear), %d colors (perspective)\n", |
it_count, ic_count); |
fprintf(stderr, "%d instructions\n", count); |
for (i = 0; i < count; i++) { |
if (rs->inst[i] & 0x10) { |
ip = rs->inst[i] & 0xf; |
fprintf(stderr, "texture: ip %d to psf %d\n", |
ip, (rs->inst[i] >> 5) & 0x7f); |
tex_ptr = rs->ip[ip] & 0xffffff; |
fprintf(stderr, " : "); |
j = 3; |
do { |
if ((tex_ptr & 0x3f) == 63) { |
fprintf(stderr, "1.0"); |
} else if ((tex_ptr & 0x3f) == 62) { |
fprintf(stderr, "0.0"); |
} else { |
fprintf(stderr, "[%d]", tex_ptr & 0x3f); |
} |
} while (j-- && fprintf(stderr, "/")); |
fprintf(stderr, "\n"); |
} |
if (rs->inst[i] & 0x10000) { |
ip = (rs->inst[i] >> 12) & 0xf; |
fprintf(stderr, "color: ip %d to psf %d\n", |
ip, (rs->inst[i] >> 18) & 0x7f); |
col_ptr = (rs->ip[ip] >> 24) & 0x7; |
col_fmt = (rs->ip[ip] >> 27) & 0xf; |
fprintf(stderr, " : offset %d ", col_ptr); |
switch (col_fmt) { |
case 0: |
fprintf(stderr, "(R/G/B/A)"); |
break; |
case 1: |
fprintf(stderr, "(R/G/B/0)"); |
break; |
case 2: |
fprintf(stderr, "(R/G/B/1)"); |
break; |
case 4: |
fprintf(stderr, "(0/0/0/A)"); |
break; |
case 5: |
fprintf(stderr, "(0/0/0/0)"); |
break; |
case 6: |
fprintf(stderr, "(0/0/0/1)"); |
break; |
case 8: |
fprintf(stderr, "(1/1/1/A)"); |
break; |
case 9: |
fprintf(stderr, "(1/1/1/0)"); |
break; |
case 10: |
fprintf(stderr, "(1/1/1/1)"); |
break; |
} |
fprintf(stderr, "\n"); |
} |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_defines.h |
---|
0,0 → 1,36 |
/* |
* Copyright 2010 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#ifndef R300_DEFINES_H |
#define R300_DEFINES_H |
#include "pipe/p_defines.h" |
#define R300_MAX_TEXTURE_LEVELS 13 |
#define R300_MAX_DRAW_VBO_SIZE (1024 * 1024) |
#define R300_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0) |
#define R300_RESOURCE_FORCE_MICROTILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 1) |
#define R300_INVALID_FORMAT 0xffff |
#endif |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_emit.c |
---|
0,0 → 1,1439 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Copyright 2009 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
/* r300_emit: Functions for emitting state. */ |
#include "util/u_format.h" |
#include "util/u_math.h" |
#include "util/u_mm.h" |
#include "r300_context.h" |
#include "r300_cb.h" |
#include "r300_cs.h" |
#include "r300_emit.h" |
#include "r300_fs.h" |
#include "r300_screen.h" |
#include "r300_screen_buffer.h" |
#include "r300_vs.h" |
void r300_emit_blend_state(struct r300_context* r300, |
unsigned size, void* state) |
{ |
struct r300_blend_state* blend = (struct r300_blend_state*)state; |
struct pipe_framebuffer_state* fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
CS_LOCALS(r300); |
if (fb->nr_cbufs) { |
if (fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT) { |
WRITE_CS_TABLE(blend->cb_noclamp, size); |
} else if (fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16X16_FLOAT) { |
WRITE_CS_TABLE(blend->cb_noclamp_noalpha, size); |
} else { |
unsigned swz = r300_surface(fb->cbufs[0])->colormask_swizzle; |
WRITE_CS_TABLE(blend->cb_clamp[swz], size); |
} |
} else { |
WRITE_CS_TABLE(blend->cb_no_readwrite, size); |
} |
} |
void r300_emit_blend_color_state(struct r300_context* r300, |
unsigned size, void* state) |
{ |
struct r300_blend_color_state* bc = (struct r300_blend_color_state*)state; |
CS_LOCALS(r300); |
WRITE_CS_TABLE(bc->cb, size); |
} |
void r300_emit_clip_state(struct r300_context* r300, |
unsigned size, void* state) |
{ |
struct r300_clip_state* clip = (struct r300_clip_state*)state; |
CS_LOCALS(r300); |
WRITE_CS_TABLE(clip->cb, size); |
} |
void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state) |
{ |
struct r300_dsa_state* dsa = (struct r300_dsa_state*)state; |
struct pipe_framebuffer_state* fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
boolean is_r500 = r300->screen->caps.is_r500; |
CS_LOCALS(r300); |
uint32_t alpha_func = dsa->alpha_function; |
/* Choose the alpha ref value between 8-bit (FG_ALPHA_FUNC.AM_VAL) and |
* 16-bit (FG_ALPHA_VALUE). */ |
if (is_r500 && (alpha_func & R300_FG_ALPHA_FUNC_ENABLE)) { |
if (fb->nr_cbufs && |
(fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT || |
fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16X16_FLOAT)) { |
alpha_func |= R500_FG_ALPHA_FUNC_FP16_ENABLE; |
} else { |
alpha_func |= R500_FG_ALPHA_FUNC_8BIT; |
} |
} |
/* Setup alpha-to-coverage. */ |
if (r300->alpha_to_coverage && r300->msaa_enable) { |
/* Always set 3/6, it improves precision even for 2x and 4x MSAA. */ |
alpha_func |= R300_FG_ALPHA_FUNC_MASK_ENABLE | |
R300_FG_ALPHA_FUNC_CFG_3_OF_6; |
} |
BEGIN_CS(size); |
OUT_CS_REG(R300_FG_ALPHA_FUNC, alpha_func); |
OUT_CS_TABLE(fb->zsbuf ? &dsa->cb_begin : dsa->cb_zb_no_readwrite, size-2); |
END_CS; |
} |
static void get_rc_constant_state( |
float vec[4], |
struct r300_context * r300, |
struct rc_constant * constant) |
{ |
struct r300_textures_state* texstate = r300->textures_state.state; |
struct r300_resource *tex; |
assert(constant->Type == RC_CONSTANT_STATE); |
/* vec should either be (0, 0, 0, 1), which should be a relatively safe |
* RGBA or STRQ value, or it could be one of the RC_CONSTANT_STATE |
* state factors. */ |
switch (constant->u.State[0]) { |
/* Factor for converting rectangle coords to |
* normalized coords. Should only show up on non-r500. */ |
case RC_STATE_R300_TEXRECT_FACTOR: |
tex = r300_resource(texstate->sampler_views[constant->u.State[1]]->base.texture); |
vec[0] = 1.0 / tex->tex.width0; |
vec[1] = 1.0 / tex->tex.height0; |
vec[2] = 0; |
vec[3] = 1; |
break; |
case RC_STATE_R300_TEXSCALE_FACTOR: |
tex = r300_resource(texstate->sampler_views[constant->u.State[1]]->base.texture); |
/* Add a small number to the texture size to work around rounding errors in hw. */ |
vec[0] = tex->b.b.width0 / (tex->tex.width0 + 0.001f); |
vec[1] = tex->b.b.height0 / (tex->tex.height0 + 0.001f); |
vec[2] = tex->b.b.depth0 / (tex->tex.depth0 + 0.001f); |
vec[3] = 1; |
break; |
case RC_STATE_R300_VIEWPORT_SCALE: |
vec[0] = r300->viewport.scale[0]; |
vec[1] = r300->viewport.scale[1]; |
vec[2] = r300->viewport.scale[2]; |
vec[3] = 1; |
break; |
case RC_STATE_R300_VIEWPORT_OFFSET: |
vec[0] = r300->viewport.translate[0]; |
vec[1] = r300->viewport.translate[1]; |
vec[2] = r300->viewport.translate[2]; |
vec[3] = 1; |
break; |
default: |
fprintf(stderr, "r300: Implementation error: " |
"Unknown RC_CONSTANT type %d\n", constant->u.State[0]); |
vec[0] = 0; |
vec[1] = 0; |
vec[2] = 0; |
vec[3] = 1; |
} |
} |
/* Convert a normal single-precision float into the 7.16 format |
* used by the R300 fragment shader. |
*/ |
uint32_t pack_float24(float f) |
{ |
union { |
float fl; |
uint32_t u; |
} u; |
float mantissa; |
int exponent; |
uint32_t float24 = 0; |
if (f == 0.0) |
return 0; |
u.fl = f; |
mantissa = frexpf(f, &exponent); |
/* Handle -ve */ |
if (mantissa < 0) { |
float24 |= (1 << 23); |
mantissa = mantissa * -1.0; |
} |
/* Handle exponent, bias of 63 */ |
exponent += 62; |
float24 |= (exponent << 16); |
/* Kill 7 LSB of mantissa */ |
float24 |= (u.u & 0x7FFFFF) >> 7; |
return float24; |
} |
void r300_emit_fs(struct r300_context* r300, unsigned size, void *state) |
{ |
struct r300_fragment_shader *fs = r300_fs(r300); |
CS_LOCALS(r300); |
WRITE_CS_TABLE(fs->shader->cb_code, fs->shader->cb_code_size); |
} |
void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state) |
{ |
struct r300_fragment_shader *fs = r300_fs(r300); |
struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state; |
unsigned count = fs->shader->externals_count; |
unsigned i, j; |
CS_LOCALS(r300); |
if (count == 0) |
return; |
BEGIN_CS(size); |
OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count * 4); |
if (buf->remap_table){ |
for (i = 0; i < count; i++) { |
float *data = (float*)&buf->ptr[buf->remap_table[i]*4]; |
for (j = 0; j < 4; j++) |
OUT_CS(pack_float24(data[j])); |
} |
} else { |
for (i = 0; i < count; i++) |
for (j = 0; j < 4; j++) |
OUT_CS(pack_float24(*(float*)&buf->ptr[i*4+j])); |
} |
END_CS; |
} |
void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, void *state) |
{ |
struct r300_fragment_shader *fs = r300_fs(r300); |
struct rc_constant_list *constants = &fs->shader->code.constants; |
unsigned i; |
unsigned count = fs->shader->rc_state_count; |
unsigned first = fs->shader->externals_count; |
unsigned end = constants->Count; |
unsigned j; |
CS_LOCALS(r300); |
if (count == 0) |
return; |
BEGIN_CS(size); |
for(i = first; i < end; ++i) { |
if (constants->Constants[i].Type == RC_CONSTANT_STATE) { |
float data[4]; |
get_rc_constant_state(data, r300, &constants->Constants[i]); |
OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4); |
for (j = 0; j < 4; j++) |
OUT_CS(pack_float24(data[j])); |
} |
} |
END_CS; |
} |
void r500_emit_fs(struct r300_context* r300, unsigned size, void *state) |
{ |
struct r300_fragment_shader *fs = r300_fs(r300); |
CS_LOCALS(r300); |
WRITE_CS_TABLE(fs->shader->cb_code, fs->shader->cb_code_size); |
} |
void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *state) |
{ |
struct r300_fragment_shader *fs = r300_fs(r300); |
struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state; |
unsigned count = fs->shader->externals_count; |
CS_LOCALS(r300); |
if (count == 0) |
return; |
BEGIN_CS(size); |
OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST); |
OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count * 4); |
if (buf->remap_table){ |
for (unsigned i = 0; i < count; i++) { |
uint32_t *data = &buf->ptr[buf->remap_table[i]*4]; |
OUT_CS_TABLE(data, 4); |
} |
} else { |
OUT_CS_TABLE(buf->ptr, count * 4); |
} |
END_CS; |
} |
void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, void *state) |
{ |
struct r300_fragment_shader *fs = r300_fs(r300); |
struct rc_constant_list *constants = &fs->shader->code.constants; |
unsigned i; |
unsigned count = fs->shader->rc_state_count; |
unsigned first = fs->shader->externals_count; |
unsigned end = constants->Count; |
CS_LOCALS(r300); |
if (count == 0) |
return; |
BEGIN_CS(size); |
for(i = first; i < end; ++i) { |
if (constants->Constants[i].Type == RC_CONSTANT_STATE) { |
float data[4]; |
get_rc_constant_state(data, r300, &constants->Constants[i]); |
OUT_CS_REG(R500_GA_US_VECTOR_INDEX, |
R500_GA_US_VECTOR_INDEX_TYPE_CONST | |
(i & R500_GA_US_VECTOR_INDEX_MASK)); |
OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, 4); |
OUT_CS_TABLE(data, 4); |
} |
} |
END_CS; |
} |
void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state) |
{ |
struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state; |
struct pipe_framebuffer_state* fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
uint32_t height = fb->height; |
uint32_t width = fb->width; |
CS_LOCALS(r300); |
if (r300->cbzb_clear) { |
struct r300_surface *surf = r300_surface(fb->cbufs[0]); |
height = surf->cbzb_height; |
width = surf->cbzb_width; |
} |
DBG(r300, DBG_SCISSOR, |
"r300: Scissor width: %i, height: %i, CBZB clear: %s\n", |
width, height, r300->cbzb_clear ? "YES" : "NO"); |
BEGIN_CS(size); |
/* Set up scissors. |
* By writing to the SC registers, SC & US assert idle. */ |
OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); |
if (r300->screen->caps.is_r500) { |
OUT_CS(0); |
OUT_CS(((width - 1) << R300_SCISSORS_X_SHIFT) | |
((height - 1) << R300_SCISSORS_Y_SHIFT)); |
} else { |
OUT_CS((1440 << R300_SCISSORS_X_SHIFT) | |
(1440 << R300_SCISSORS_Y_SHIFT)); |
OUT_CS(((width + 1440-1) << R300_SCISSORS_X_SHIFT) | |
((height + 1440-1) << R300_SCISSORS_Y_SHIFT)); |
} |
/* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */ |
OUT_CS_TABLE(gpuflush->cb_flush_clean, 6); |
END_CS; |
} |
void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state) |
{ |
struct r300_aa_state *aa = (struct r300_aa_state*)state; |
CS_LOCALS(r300); |
BEGIN_CS(size); |
OUT_CS_REG(R300_GB_AA_CONFIG, aa->aa_config); |
if (aa->dest) { |
OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 3); |
OUT_CS(aa->dest->offset); |
OUT_CS(aa->dest->pitch & R300_RB3D_AARESOLVE_PITCH_MASK); |
OUT_CS(R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE | |
R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE); |
OUT_CS_RELOC(aa->dest); |
} else { |
OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0); |
} |
END_CS; |
} |
void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) |
{ |
struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; |
struct r300_surface* surf; |
unsigned i; |
uint32_t rb3d_cctl = 0; |
CS_LOCALS(r300); |
BEGIN_CS(size); |
if (r300->screen->caps.is_r500) { |
rb3d_cctl = R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE; |
} |
/* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers. */ |
if (fb->nr_cbufs && r300->fb_multiwrite) { |
rb3d_cctl |= R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs); |
} |
if (r300->cmask_in_use) { |
rb3d_cctl |= R300_RB3D_CCTL_AA_COMPRESSION_ENABLE | |
R300_RB3D_CCTL_CMASK_ENABLE; |
} |
OUT_CS_REG(R300_RB3D_CCTL, rb3d_cctl); |
/* Set up colorbuffers. */ |
for (i = 0; i < fb->nr_cbufs; i++) { |
surf = r300_surface(fb->cbufs[i]); |
OUT_CS_REG(R300_RB3D_COLOROFFSET0 + (4 * i), surf->offset); |
OUT_CS_RELOC(surf); |
OUT_CS_REG(R300_RB3D_COLORPITCH0 + (4 * i), surf->pitch); |
OUT_CS_RELOC(surf); |
if (r300->cmask_in_use && i == 0) { |
OUT_CS_REG(R300_RB3D_CMASK_OFFSET0, 0); |
OUT_CS_REG(R300_RB3D_CMASK_PITCH0, surf->pitch_cmask); |
OUT_CS_REG(R300_RB3D_COLOR_CLEAR_VALUE, r300->color_clear_value); |
if (r300->screen->caps.is_r500 && r300->screen->info.drm_minor >= 29) { |
OUT_CS_REG_SEQ(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2); |
OUT_CS(r300->color_clear_value_ar); |
OUT_CS(r300->color_clear_value_gb); |
} |
} |
} |
/* Set up the ZB part of the CBZB clear. */ |
if (r300->cbzb_clear) { |
surf = r300_surface(fb->cbufs[0]); |
OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format); |
OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->cbzb_midpoint_offset); |
OUT_CS_RELOC(surf); |
OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->cbzb_pitch); |
OUT_CS_RELOC(surf); |
DBG(r300, DBG_CBZB, |
"CBZB clearing cbuf %08x %08x\n", surf->cbzb_format, |
surf->cbzb_pitch); |
} |
/* Set up a zbuffer. */ |
else if (fb->zsbuf) { |
surf = r300_surface(fb->zsbuf); |
OUT_CS_REG(R300_ZB_FORMAT, surf->format); |
OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->offset); |
OUT_CS_RELOC(surf); |
OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->pitch); |
OUT_CS_RELOC(surf); |
if (r300->hyperz_enabled) { |
/* HiZ RAM. */ |
OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); |
OUT_CS_REG(R300_ZB_HIZ_PITCH, surf->pitch_hiz); |
/* Z Mask RAM. (compressed zbuffer) */ |
OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); |
OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf->pitch_zmask); |
} |
} |
END_CS; |
} |
void r300_emit_hyperz_state(struct r300_context *r300, |
unsigned size, void *state) |
{ |
struct r300_hyperz_state *z = state; |
CS_LOCALS(r300); |
if (z->flush) |
WRITE_CS_TABLE(&z->cb_flush_begin, size); |
else |
WRITE_CS_TABLE(&z->cb_begin, size - 2); |
} |
void r300_emit_hyperz_end(struct r300_context *r300) |
{ |
struct r300_hyperz_state z = |
*(struct r300_hyperz_state*)r300->hyperz_state.state; |
z.flush = 1; |
z.zb_bw_cntl = 0; |
z.zb_depthclearvalue = 0; |
z.sc_hyperz = R300_SC_HYPERZ_ADJ_2; |
z.gb_z_peq_config = 0; |
r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z); |
} |
#define R300_NIBBLES(x0, y0, x1, y1, x2, y2, d0y, d0x) \ |
(((x0) & 0xf) | (((y0) & 0xf) << 4) | \ |
(((x1) & 0xf) << 8) | (((y1) & 0xf) << 12) | \ |
(((x2) & 0xf) << 16) | (((y2) & 0xf) << 20) | \ |
(((d0y) & 0xf) << 24) | (((d0x) & 0xf) << 28)) |
static unsigned r300_get_mspos(int index, unsigned *p) |
{ |
unsigned reg, i, distx, disty, dist; |
if (index == 0) { |
/* MSPOS0 contains positions for samples 0,1,2 as (X,Y) pairs of nibbles, |
* followed by a (Y,X) pair containing the minimum distance from the pixel |
* edge: |
* X0, Y0, X1, Y1, X2, Y2, D0_Y, D0_X |
* |
* There is a quirk when setting D0_X. The value represents the distance |
* from the left edge of the pixel quad to the first sample in subpixels. |
* All values less than eight should use the actual value, but „7‟ should |
* be used for the distance „8‟. The hardware will convert 7 into 8 internally. |
*/ |
distx = 11; |
for (i = 0; i < 12; i += 2) { |
if (p[i] < distx) |
distx = p[i]; |
} |
disty = 11; |
for (i = 1; i < 12; i += 2) { |
if (p[i] < disty) |
disty = p[i]; |
} |
if (distx == 8) |
distx = 7; |
reg = R300_NIBBLES(p[0], p[1], p[2], p[3], p[4], p[5], disty, distx); |
} else { |
/* MSPOS1 contains positions for samples 3,4,5 as (X,Y) pairs of nibbles, |
* followed by the minimum distance from the pixel edge (not sure if X or Y): |
* X3, Y3, X4, Y4, X5, Y5, D1 |
*/ |
dist = 11; |
for (i = 0; i < 12; i++) { |
if (p[i] < dist) |
dist = p[i]; |
} |
reg = R300_NIBBLES(p[6], p[7], p[8], p[9], p[10], p[11], dist, 0); |
} |
return reg; |
} |
void r300_emit_fb_state_pipelined(struct r300_context *r300, |
unsigned size, void *state) |
{ |
/* The sample coordinates are in the range [0,11], because |
* GB_TILE_CONFIG.SUBPIXEL is set to the 1/12 subpixel precision. |
* |
* Some sample coordinates reach to neighboring pixels and should not be used. |
* (e.g. Y=11) |
* |
* The unused samples must be set to the positions of other valid samples. */ |
static unsigned sample_locs_1x[12] = { |
6,6, 6,6, 6,6, 6,6, 6,6, 6,6 |
}; |
static unsigned sample_locs_2x[12] = { |
3,9, 9,3, 9,3, 9,3, 9,3, 9,3 |
}; |
static unsigned sample_locs_4x[12] = { |
4,4, 8,8, 2,10, 10,2, 10,2, 10,2 |
}; |
static unsigned sample_locs_6x[12] = { |
3,1, 7,3, 11,5, 1,7, 5,9, 9,10 |
}; |
struct pipe_framebuffer_state* fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
unsigned i, num_cbufs = fb->nr_cbufs; |
unsigned mspos0, mspos1; |
CS_LOCALS(r300); |
/* If we use the multiwrite feature, the colorbuffers 2,3,4 must be |
* marked as UNUSED in the US block. */ |
if (r300->fb_multiwrite) { |
num_cbufs = MIN2(num_cbufs, 1); |
} |
BEGIN_CS(size); |
/* Colorbuffer format in the US block. |
* (must be written after unpipelined regs) */ |
OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4); |
for (i = 0; i < num_cbufs; i++) { |
OUT_CS(r300_surface(fb->cbufs[i])->format); |
} |
for (; i < 1; i++) { |
OUT_CS(R300_US_OUT_FMT_C4_8 | |
R300_C0_SEL_B | R300_C1_SEL_G | |
R300_C2_SEL_R | R300_C3_SEL_A); |
} |
for (; i < 4; i++) { |
OUT_CS(R300_US_OUT_FMT_UNUSED); |
} |
/* Set sample positions. It depends on the framebuffer sample count. |
* These are pipelined regs and as such cannot be moved to the AA state. |
*/ |
switch (r300->num_samples) { |
default: |
mspos0 = r300_get_mspos(0, sample_locs_1x); |
mspos1 = r300_get_mspos(1, sample_locs_1x); |
break; |
case 2: |
mspos0 = r300_get_mspos(0, sample_locs_2x); |
mspos1 = r300_get_mspos(1, sample_locs_2x); |
break; |
case 4: |
mspos0 = r300_get_mspos(0, sample_locs_4x); |
mspos1 = r300_get_mspos(1, sample_locs_4x); |
break; |
case 6: |
mspos0 = r300_get_mspos(0, sample_locs_6x); |
mspos1 = r300_get_mspos(1, sample_locs_6x); |
break; |
} |
OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); |
OUT_CS(mspos0); |
OUT_CS(mspos1); |
END_CS; |
} |
void r300_emit_query_start(struct r300_context *r300, unsigned size, void*state) |
{ |
struct r300_query *query = r300->query_current; |
CS_LOCALS(r300); |
if (!query) |
return; |
BEGIN_CS(size); |
if (r300->screen->caps.family == CHIP_RV530) { |
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); |
} else { |
OUT_CS_REG(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL); |
} |
OUT_CS_REG(R300_ZB_ZPASS_DATA, 0); |
END_CS; |
query->begin_emitted = TRUE; |
} |
static void r300_emit_query_end_frag_pipes(struct r300_context *r300, |
struct r300_query *query) |
{ |
struct r300_capabilities* caps = &r300->screen->caps; |
uint32_t gb_pipes = r300->screen->info.r300_num_gb_pipes; |
CS_LOCALS(r300); |
assert(gb_pipes); |
BEGIN_CS(6 * gb_pipes + 2); |
/* I'm not so sure I like this switch, but it's hard to be elegant |
* when there's so many special cases... |
* |
* So here's the basic idea. For each pipe, enable writes to it only, |
* then put out the relocation for ZPASS_ADDR, taking into account a |
* 4-byte offset for each pipe. RV380 and older are special; they have |
* only two pipes, and the second pipe's enable is on bit 3, not bit 1, |
* so there's a chipset cap for that. */ |
switch (gb_pipes) { |
case 4: |
/* pipe 3 only */ |
OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); |
OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 3) * 4); |
OUT_CS_RELOC(r300->query_current); |
case 3: |
/* pipe 2 only */ |
OUT_CS_REG(R300_SU_REG_DEST, 1 << 2); |
OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 2) * 4); |
OUT_CS_RELOC(r300->query_current); |
case 2: |
/* pipe 1 only */ |
/* As mentioned above, accomodate RV380 and older. */ |
OUT_CS_REG(R300_SU_REG_DEST, |
1 << (caps->high_second_pipe ? 3 : 1)); |
OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 1) * 4); |
OUT_CS_RELOC(r300->query_current); |
case 1: |
/* pipe 0 only */ |
OUT_CS_REG(R300_SU_REG_DEST, 1 << 0); |
OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 0) * 4); |
OUT_CS_RELOC(r300->query_current); |
break; |
default: |
fprintf(stderr, "r300: Implementation error: Chipset reports %d" |
" pixel pipes!\n", gb_pipes); |
abort(); |
} |
/* And, finally, reset it to normal... */ |
OUT_CS_REG(R300_SU_REG_DEST, 0xF); |
END_CS; |
} |
static void rv530_emit_query_end_single_z(struct r300_context *r300, |
struct r300_query *query) |
{ |
CS_LOCALS(r300); |
BEGIN_CS(8); |
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); |
OUT_CS_REG(R300_ZB_ZPASS_ADDR, query->num_results * 4); |
OUT_CS_RELOC(r300->query_current); |
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); |
END_CS; |
} |
static void rv530_emit_query_end_double_z(struct r300_context *r300, |
struct r300_query *query) |
{ |
CS_LOCALS(r300); |
BEGIN_CS(14); |
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); |
OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 0) * 4); |
OUT_CS_RELOC(r300->query_current); |
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); |
OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 1) * 4); |
OUT_CS_RELOC(r300->query_current); |
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); |
END_CS; |
} |
void r300_emit_query_end(struct r300_context* r300) |
{ |
struct r300_capabilities *caps = &r300->screen->caps; |
struct r300_query *query = r300->query_current; |
if (!query) |
return; |
if (query->begin_emitted == FALSE) |
return; |
if (caps->family == CHIP_RV530) { |
if (r300->screen->info.r300_num_z_pipes == 2) |
rv530_emit_query_end_double_z(r300, query); |
else |
rv530_emit_query_end_single_z(r300, query); |
} else |
r300_emit_query_end_frag_pipes(r300, query); |
query->begin_emitted = FALSE; |
query->num_results += query->num_pipes; |
/* XXX grab all the results and reset the counter. */ |
if (query->num_results >= query->buf->size / 4 - 4) { |
query->num_results = (query->buf->size / 4) / 2; |
fprintf(stderr, "r300: Rewinding OQBO...\n"); |
} |
} |
void r300_emit_invariant_state(struct r300_context *r300, |
unsigned size, void *state) |
{ |
CS_LOCALS(r300); |
WRITE_CS_TABLE(state, size); |
} |
void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state) |
{ |
struct r300_rs_state* rs = state; |
CS_LOCALS(r300); |
BEGIN_CS(size); |
OUT_CS_TABLE(rs->cb_main, RS_STATE_MAIN_SIZE); |
if (rs->polygon_offset_enable) { |
if (r300->zbuffer_bpp == 16) { |
OUT_CS_TABLE(rs->cb_poly_offset_zb16, 5); |
} else { |
OUT_CS_TABLE(rs->cb_poly_offset_zb24, 5); |
} |
} |
END_CS; |
} |
void r300_emit_rs_block_state(struct r300_context* r300, |
unsigned size, void* state) |
{ |
struct r300_rs_block* rs = (struct r300_rs_block*)state; |
unsigned i; |
/* It's the same for both INST and IP tables */ |
unsigned count = (rs->inst_count & R300_RS_INST_COUNT_MASK) + 1; |
CS_LOCALS(r300); |
if (DBG_ON(r300, DBG_RS_BLOCK)) { |
r500_dump_rs_block(rs); |
fprintf(stderr, "r300: RS emit:\n"); |
for (i = 0; i < count; i++) |
fprintf(stderr, " : ip %d: 0x%08x\n", i, rs->ip[i]); |
for (i = 0; i < count; i++) |
fprintf(stderr, " : inst %d: 0x%08x\n", i, rs->inst[i]); |
fprintf(stderr, " : count: 0x%08x inst_count: 0x%08x\n", |
rs->count, rs->inst_count); |
} |
BEGIN_CS(size); |
OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); |
OUT_CS(rs->vap_vtx_state_cntl); |
OUT_CS(rs->vap_vsm_vtx_assm); |
OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); |
OUT_CS(rs->vap_out_vtx_fmt[0]); |
OUT_CS(rs->vap_out_vtx_fmt[1]); |
OUT_CS_REG_SEQ(R300_GB_ENABLE, 1); |
OUT_CS(rs->gb_enable); |
if (r300->screen->caps.is_r500) { |
OUT_CS_REG_SEQ(R500_RS_IP_0, count); |
} else { |
OUT_CS_REG_SEQ(R300_RS_IP_0, count); |
} |
OUT_CS_TABLE(rs->ip, count); |
OUT_CS_REG_SEQ(R300_RS_COUNT, 2); |
OUT_CS(rs->count); |
OUT_CS(rs->inst_count); |
if (r300->screen->caps.is_r500) { |
OUT_CS_REG_SEQ(R500_RS_INST_0, count); |
} else { |
OUT_CS_REG_SEQ(R300_RS_INST_0, count); |
} |
OUT_CS_TABLE(rs->inst, count); |
END_CS; |
} |
void r300_emit_sample_mask(struct r300_context *r300, |
unsigned size, void *state) |
{ |
unsigned mask = (*(unsigned*)state) & ((1 << 6)-1); |
CS_LOCALS(r300); |
BEGIN_CS(size); |
OUT_CS_REG(R300_SC_SCREENDOOR, |
mask | (mask << 6) | (mask << 12) | (mask << 18)); |
END_CS; |
} |
void r300_emit_scissor_state(struct r300_context* r300, |
unsigned size, void* state) |
{ |
struct pipe_scissor_state* scissor = (struct pipe_scissor_state*)state; |
CS_LOCALS(r300); |
BEGIN_CS(size); |
OUT_CS_REG_SEQ(R300_SC_CLIPRECT_TL_0, 2); |
if (r300->screen->caps.is_r500) { |
OUT_CS((scissor->minx << R300_CLIPRECT_X_SHIFT) | |
(scissor->miny << R300_CLIPRECT_Y_SHIFT)); |
OUT_CS(((scissor->maxx - 1) << R300_CLIPRECT_X_SHIFT) | |
((scissor->maxy - 1) << R300_CLIPRECT_Y_SHIFT)); |
} else { |
OUT_CS(((scissor->minx + 1440) << R300_CLIPRECT_X_SHIFT) | |
((scissor->miny + 1440) << R300_CLIPRECT_Y_SHIFT)); |
OUT_CS(((scissor->maxx + 1440-1) << R300_CLIPRECT_X_SHIFT) | |
((scissor->maxy + 1440-1) << R300_CLIPRECT_Y_SHIFT)); |
} |
END_CS; |
} |
void r300_emit_textures_state(struct r300_context *r300, |
unsigned size, void *state) |
{ |
struct r300_textures_state *allstate = (struct r300_textures_state*)state; |
struct r300_texture_sampler_state *texstate; |
struct r300_resource *tex; |
unsigned i; |
boolean has_us_format = r300->screen->caps.has_us_format; |
CS_LOCALS(r300); |
BEGIN_CS(size); |
OUT_CS_REG(R300_TX_ENABLE, allstate->tx_enable); |
for (i = 0; i < allstate->count; i++) { |
if ((1 << i) & allstate->tx_enable) { |
texstate = &allstate->regs[i]; |
tex = r300_resource(allstate->sampler_views[i]->base.texture); |
OUT_CS_REG(R300_TX_FILTER0_0 + (i * 4), texstate->filter0); |
OUT_CS_REG(R300_TX_FILTER1_0 + (i * 4), texstate->filter1); |
OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (i * 4), |
texstate->border_color); |
OUT_CS_REG(R300_TX_FORMAT0_0 + (i * 4), texstate->format.format0); |
OUT_CS_REG(R300_TX_FORMAT1_0 + (i * 4), texstate->format.format1); |
OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format.format2); |
OUT_CS_REG(R300_TX_OFFSET_0 + (i * 4), texstate->format.tile_config); |
OUT_CS_RELOC(tex); |
if (has_us_format) { |
OUT_CS_REG(R500_US_FORMAT0_0 + (i * 4), |
texstate->format.us_format0); |
} |
} |
} |
END_CS; |
} |
void r300_emit_vertex_arrays(struct r300_context* r300, int offset, |
boolean indexed, int instance_id) |
{ |
struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; |
struct pipe_vertex_element *velem = r300->velems->velem; |
struct r300_resource *buf; |
int i; |
unsigned vertex_array_count = r300->velems->count; |
unsigned packet_size = (vertex_array_count * 3 + 1) / 2; |
struct pipe_vertex_buffer *vb1, *vb2; |
unsigned *hw_format_size = r300->velems->format_size; |
unsigned size1, size2, offset1, offset2, stride1, stride2; |
CS_LOCALS(r300); |
BEGIN_CS(2 + packet_size + vertex_array_count * 2); |
OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); |
OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); |
if (instance_id == -1) { |
/* Non-instanced arrays. This ignores instance_divisor and instance_id. */ |
for (i = 0; i < vertex_array_count - 1; i += 2) { |
vb1 = &vbuf[velem[i].vertex_buffer_index]; |
vb2 = &vbuf[velem[i+1].vertex_buffer_index]; |
size1 = hw_format_size[i]; |
size2 = hw_format_size[i+1]; |
OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | |
R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); |
OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); |
OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride); |
} |
if (vertex_array_count & 1) { |
vb1 = &vbuf[velem[i].vertex_buffer_index]; |
size1 = hw_format_size[i]; |
OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); |
OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); |
} |
for (i = 0; i < vertex_array_count; i++) { |
buf = r300_resource(vbuf[velem[i].vertex_buffer_index].buffer); |
OUT_CS_RELOC(buf); |
} |
} else { |
/* Instanced arrays. */ |
for (i = 0; i < vertex_array_count - 1; i += 2) { |
vb1 = &vbuf[velem[i].vertex_buffer_index]; |
vb2 = &vbuf[velem[i+1].vertex_buffer_index]; |
size1 = hw_format_size[i]; |
size2 = hw_format_size[i+1]; |
if (velem[i].instance_divisor) { |
stride1 = 0; |
offset1 = vb1->buffer_offset + velem[i].src_offset + |
(instance_id / velem[i].instance_divisor) * vb1->stride; |
} else { |
stride1 = vb1->stride; |
offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride; |
} |
if (velem[i+1].instance_divisor) { |
stride2 = 0; |
offset2 = vb2->buffer_offset + velem[i+1].src_offset + |
(instance_id / velem[i+1].instance_divisor) * vb2->stride; |
} else { |
stride2 = vb2->stride; |
offset2 = vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride; |
} |
OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1) | |
R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(stride2)); |
OUT_CS(offset1); |
OUT_CS(offset2); |
} |
if (vertex_array_count & 1) { |
vb1 = &vbuf[velem[i].vertex_buffer_index]; |
size1 = hw_format_size[i]; |
if (velem[i].instance_divisor) { |
stride1 = 0; |
offset1 = vb1->buffer_offset + velem[i].src_offset + |
(instance_id / velem[i].instance_divisor) * vb1->stride; |
} else { |
stride1 = vb1->stride; |
offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride; |
} |
OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1)); |
OUT_CS(offset1); |
} |
for (i = 0; i < vertex_array_count; i++) { |
buf = r300_resource(vbuf[velem[i].vertex_buffer_index].buffer); |
OUT_CS_RELOC(buf); |
} |
} |
END_CS; |
} |
void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed) |
{ |
CS_LOCALS(r300); |
DBG(r300, DBG_SWTCL, "r300: Preparing vertex buffer %p for render, " |
"vertex size %d\n", r300->vbo, |
r300->vertex_info.size); |
/* Set the pointer to our vertex buffer. The emitted values are this: |
* PACKET3 [3D_LOAD_VBPNTR] |
* COUNT [1] |
* FORMAT [size | stride << 8] |
* OFFSET [offset into BO] |
* VBPNTR [relocated BO] |
*/ |
BEGIN_CS(7); |
OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); |
OUT_CS(1 | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); |
OUT_CS(r300->vertex_info.size | |
(r300->vertex_info.size << 8)); |
OUT_CS(r300->draw_vbo_offset); |
OUT_CS(0); |
assert(r300->vbo_cs); |
cs_winsys->cs_write_reloc(cs_copy, r300->vbo_cs); |
CS_USED_DW(2); |
END_CS; |
} |
void r300_emit_vertex_stream_state(struct r300_context* r300, |
unsigned size, void* state) |
{ |
struct r300_vertex_stream_state *streams = |
(struct r300_vertex_stream_state*)state; |
unsigned i; |
CS_LOCALS(r300); |
if (DBG_ON(r300, DBG_PSC)) { |
fprintf(stderr, "r300: PSC emit:\n"); |
for (i = 0; i < streams->count; i++) { |
fprintf(stderr, " : prog_stream_cntl%d: 0x%08x\n", i, |
streams->vap_prog_stream_cntl[i]); |
} |
for (i = 0; i < streams->count; i++) { |
fprintf(stderr, " : prog_stream_cntl_ext%d: 0x%08x\n", i, |
streams->vap_prog_stream_cntl_ext[i]); |
} |
} |
BEGIN_CS(size); |
OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count); |
OUT_CS_TABLE(streams->vap_prog_stream_cntl, streams->count); |
OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count); |
OUT_CS_TABLE(streams->vap_prog_stream_cntl_ext, streams->count); |
END_CS; |
} |
void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state) |
{ |
CS_LOCALS(r300); |
BEGIN_CS(size); |
OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); |
END_CS; |
} |
void r300_emit_vap_invariant_state(struct r300_context *r300, |
unsigned size, void *state) |
{ |
CS_LOCALS(r300); |
WRITE_CS_TABLE(state, size); |
} |
void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) |
{ |
struct r300_vertex_shader* vs = (struct r300_vertex_shader*)state; |
struct r300_vertex_program_code* code = &vs->code; |
struct r300_screen* r300screen = r300->screen; |
unsigned instruction_count = code->length / 4; |
unsigned vtx_mem_size = r300screen->caps.is_r500 ? 128 : 72; |
unsigned input_count = MAX2(util_bitcount(code->InputsRead), 1); |
unsigned output_count = MAX2(util_bitcount(code->OutputsWritten), 1); |
unsigned temp_count = MAX2(code->num_temporaries, 1); |
unsigned pvs_num_slots = MIN3(vtx_mem_size / input_count, |
vtx_mem_size / output_count, 10); |
unsigned pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 5); |
CS_LOCALS(r300); |
BEGIN_CS(size); |
/* R300_VAP_PVS_CODE_CNTL_0 |
* R300_VAP_PVS_CONST_CNTL |
* R300_VAP_PVS_CODE_CNTL_1 |
* See the r5xx docs for instructions on how to use these. */ |
OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_0, R300_PVS_FIRST_INST(0) | |
R300_PVS_XYZW_VALID_INST(instruction_count - 1) | |
R300_PVS_LAST_INST(instruction_count - 1)); |
OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_1, instruction_count - 1); |
OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0); |
OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->length); |
OUT_CS_TABLE(code->body.d, code->length); |
OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) | |
R300_PVS_NUM_CNTLRS(pvs_num_controllers) | |
R300_PVS_NUM_FPUS(r300screen->caps.num_vert_fpus) | |
R300_PVS_VF_MAX_VTX_NUM(12) | |
(r300screen->caps.is_r500 ? R500_TCL_STATE_OPTIMIZATION : 0)); |
/* Emit flow control instructions. Even if there are no fc instructions, |
* we still need to write the registers to make sure they are cleared. */ |
OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops); |
if (r300screen->caps.is_r500) { |
OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, R300_VS_MAX_FC_OPS * 2); |
OUT_CS_TABLE(code->fc_op_addrs.r500, R300_VS_MAX_FC_OPS * 2); |
} else { |
OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, R300_VS_MAX_FC_OPS); |
OUT_CS_TABLE(code->fc_op_addrs.r300, R300_VS_MAX_FC_OPS); |
} |
OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, R300_VS_MAX_FC_OPS); |
OUT_CS_TABLE(code->fc_loop_index, R300_VS_MAX_FC_OPS); |
END_CS; |
} |
void r300_emit_vs_constants(struct r300_context* r300, |
unsigned size, void *state) |
{ |
unsigned count = |
((struct r300_vertex_shader*)r300->vs_state.state)->externals_count; |
struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state; |
struct r300_vertex_shader *vs = (struct r300_vertex_shader*)r300->vs_state.state; |
unsigned i; |
int imm_first = vs->externals_count; |
int imm_end = vs->code.constants.Count; |
int imm_count = vs->immediates_count; |
CS_LOCALS(r300); |
BEGIN_CS(size); |
OUT_CS_REG(R300_VAP_PVS_CONST_CNTL, |
R300_PVS_CONST_BASE_OFFSET(buf->buffer_base) | |
R300_PVS_MAX_CONST_ADDR(MAX2(imm_end - 1, 0))); |
if (vs->externals_count) { |
OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, |
(r300->screen->caps.is_r500 ? |
R500_PVS_CONST_START : R300_PVS_CONST_START) + buf->buffer_base); |
OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4); |
if (buf->remap_table){ |
for (i = 0; i < count; i++) { |
uint32_t *data = &buf->ptr[buf->remap_table[i]*4]; |
OUT_CS_TABLE(data, 4); |
} |
} else { |
OUT_CS_TABLE(buf->ptr, count * 4); |
} |
} |
/* Emit immediates. */ |
if (imm_count) { |
OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, |
(r300->screen->caps.is_r500 ? |
R500_PVS_CONST_START : R300_PVS_CONST_START) + |
buf->buffer_base + imm_first); |
OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, imm_count * 4); |
for (i = imm_first; i < imm_end; i++) { |
const float *data = vs->code.constants.Constants[i].u.Immediate; |
OUT_CS_TABLE(data, 4); |
} |
} |
END_CS; |
} |
void r300_emit_viewport_state(struct r300_context* r300, |
unsigned size, void* state) |
{ |
struct r300_viewport_state* viewport = (struct r300_viewport_state*)state; |
CS_LOCALS(r300); |
BEGIN_CS(size); |
OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); |
OUT_CS_TABLE(&viewport->xscale, 6); |
OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); |
END_CS; |
} |
void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) |
{ |
struct pipe_framebuffer_state *fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
struct r300_resource* tex; |
CS_LOCALS(r300); |
tex = r300_resource(fb->zsbuf->texture); |
BEGIN_CS(size); |
OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); |
OUT_CS(0); |
OUT_CS(tex->tex.hiz_dwords[fb->zsbuf->u.tex.level]); |
OUT_CS(r300->hiz_clear_value); |
END_CS; |
/* Mark the current zbuffer's hiz ram as in use. */ |
r300->hiz_in_use = TRUE; |
r300->hiz_func = HIZ_FUNC_NONE; |
r300_mark_atom_dirty(r300, &r300->hyperz_state); |
} |
void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state) |
{ |
struct pipe_framebuffer_state *fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
struct r300_resource *tex; |
CS_LOCALS(r300); |
tex = r300_resource(fb->zsbuf->texture); |
BEGIN_CS(size); |
OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); |
OUT_CS(0); |
OUT_CS(tex->tex.zmask_dwords[fb->zsbuf->u.tex.level]); |
OUT_CS(0); |
END_CS; |
/* Mark the current zbuffer's zmask as in use. */ |
r300->zmask_in_use = TRUE; |
r300_mark_atom_dirty(r300, &r300->hyperz_state); |
} |
void r300_emit_cmask_clear(struct r300_context *r300, unsigned size, void *state) |
{ |
struct pipe_framebuffer_state *fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
struct r300_resource *tex; |
CS_LOCALS(r300); |
tex = r300_resource(fb->cbufs[0]->texture); |
BEGIN_CS(size); |
OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_CMASK, 2); |
OUT_CS(0); |
OUT_CS(tex->tex.cmask_dwords); |
OUT_CS(0); |
END_CS; |
/* Mark the current zbuffer's zmask as in use. */ |
r300->cmask_in_use = TRUE; |
r300_mark_fb_state_dirty(r300, R300_CHANGED_CMASK_ENABLE); |
} |
void r300_emit_ztop_state(struct r300_context* r300, |
unsigned size, void* state) |
{ |
struct r300_ztop_state* ztop = (struct r300_ztop_state*)state; |
CS_LOCALS(r300); |
BEGIN_CS(size); |
OUT_CS_REG(R300_ZB_ZTOP, ztop->z_buffer_top); |
END_CS; |
} |
void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, void* state) |
{ |
CS_LOCALS(r300); |
BEGIN_CS(size); |
OUT_CS_REG(R300_TX_INVALTAGS, 0); |
END_CS; |
} |
boolean r300_emit_buffer_validate(struct r300_context *r300, |
boolean do_validate_vertex_buffers, |
struct pipe_resource *index_buffer) |
{ |
struct pipe_framebuffer_state *fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; |
struct r300_textures_state *texstate = |
(struct r300_textures_state*)r300->textures_state.state; |
struct r300_resource *tex; |
unsigned i; |
boolean flushed = FALSE; |
validate: |
if (r300->fb_state.dirty) { |
/* Color buffers... */ |
for (i = 0; i < fb->nr_cbufs; i++) { |
tex = r300_resource(fb->cbufs[i]->texture); |
assert(tex && tex->buf && "cbuf is marked, but NULL!"); |
r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, |
RADEON_USAGE_READWRITE, |
r300_surface(fb->cbufs[i])->domain); |
} |
/* ...depth buffer... */ |
if (fb->zsbuf) { |
tex = r300_resource(fb->zsbuf->texture); |
assert(tex && tex->buf && "zsbuf is marked, but NULL!"); |
r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, |
RADEON_USAGE_READWRITE, |
r300_surface(fb->zsbuf)->domain); |
} |
} |
/* The AA resolve buffer. */ |
if (r300->aa_state.dirty) { |
if (aa->dest) { |
r300->rws->cs_add_reloc(r300->cs, aa->dest->cs_buf, |
RADEON_USAGE_WRITE, |
aa->dest->domain); |
} |
} |
if (r300->textures_state.dirty) { |
/* ...textures... */ |
for (i = 0; i < texstate->count; i++) { |
if (!(texstate->tx_enable & (1 << i))) { |
continue; |
} |
tex = r300_resource(texstate->sampler_views[i]->base.texture); |
r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, RADEON_USAGE_READ, |
tex->domain); |
} |
} |
/* ...occlusion query buffer... */ |
if (r300->query_current) |
r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf, |
RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT); |
/* ...vertex buffer for SWTCL path... */ |
if (r300->vbo_cs) |
r300->rws->cs_add_reloc(r300->cs, r300->vbo_cs, |
RADEON_USAGE_READ, RADEON_DOMAIN_GTT); |
/* ...vertex buffers for HWTCL path... */ |
if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) { |
struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; |
struct pipe_vertex_buffer *last = r300->vertex_buffer + |
r300->nr_vertex_buffers; |
struct pipe_resource *buf; |
for (; vbuf != last; vbuf++) { |
buf = vbuf->buffer; |
if (!buf) |
continue; |
r300->rws->cs_add_reloc(r300->cs, r300_resource(buf)->cs_buf, |
RADEON_USAGE_READ, |
r300_resource(buf)->domain); |
} |
} |
/* ...and index buffer for HWTCL path. */ |
if (index_buffer) |
r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf, |
RADEON_USAGE_READ, |
r300_resource(index_buffer)->domain); |
/* Now do the validation (flush is called inside cs_validate on failure). */ |
if (!r300->rws->cs_validate(r300->cs)) { |
/* Ooops, an infinite loop, give up. */ |
if (flushed) |
return FALSE; |
flushed = TRUE; |
goto validate; |
} |
return TRUE; |
} |
unsigned r300_get_num_dirty_dwords(struct r300_context *r300) |
{ |
struct r300_atom* atom; |
unsigned dwords = 0; |
foreach_dirty_atom(r300, atom) { |
if (atom->dirty) { |
dwords += atom->size; |
} |
} |
/* let's reserve some more, just in case */ |
dwords += 32; |
return dwords; |
} |
unsigned r300_get_num_cs_end_dwords(struct r300_context *r300) |
{ |
unsigned dwords = 0; |
/* Emitted in flush. */ |
dwords += 26; /* emit_query_end */ |
dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ |
if (r300->screen->caps.is_r500) |
dwords += 2; /* emit_index_bias */ |
if (r300->screen->info.drm_minor >= 6) |
dwords += 3; /* MSPOS */ |
return dwords; |
} |
/* Emit all dirty state. */ |
void r300_emit_dirty_state(struct r300_context* r300) |
{ |
struct r300_atom *atom; |
foreach_dirty_atom(r300, atom) { |
if (atom->dirty) { |
atom->emit(r300, atom->size, atom->state); |
atom->dirty = FALSE; |
} |
} |
r300->first_dirty = NULL; |
r300->last_dirty = NULL; |
r300->dirty_hw++; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_emit.h |
---|
0,0 → 1,132 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#ifndef R300_EMIT_H |
#define R300_EMIT_H |
#include "r300_context.h" |
struct rX00_fragment_program_code; |
struct r300_vertex_program_code; |
uint32_t pack_float24(float f); |
void r300_emit_vertex_arrays(struct r300_context* r300, int offset, |
boolean indexed, int instance_id); |
void r300_emit_blend_state(struct r300_context* r300, |
unsigned size, void* state); |
void r300_emit_blend_color_state(struct r300_context* r300, |
unsigned size, void* state); |
void r300_emit_clip_state(struct r300_context* r300, |
unsigned size, void* state); |
void r300_emit_dsa_state(struct r300_context* r300, |
unsigned size, void* state); |
void r300_emit_hyperz_state(struct r300_context *r300, |
unsigned size, void *state); |
void r300_emit_hyperz_end(struct r300_context *r300); |
void r300_emit_fs(struct r300_context* r300, unsigned size, void *state); |
void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state); |
void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, void *state); |
void r500_emit_fs(struct r300_context* r300, unsigned size, void *state); |
void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *state); |
void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, void *state); |
void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state); |
void r300_emit_fb_state_pipelined(struct r300_context *r300, |
unsigned size, void *state); |
void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state); |
void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state); |
void r300_emit_query_start(struct r300_context *r300, unsigned size, void *state); |
void r300_emit_query_end(struct r300_context* r300); |
void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state); |
void r300_emit_rs_block_state(struct r300_context* r300, |
unsigned size, void* state); |
void r300_emit_sample_mask(struct r300_context *r300, |
unsigned size, void *state); |
void r300_emit_scissor_state(struct r300_context* r300, |
unsigned size, void* state); |
void r300_emit_textures_state(struct r300_context *r300, |
unsigned size, void *state); |
void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed); |
void r300_emit_vap_invariant_state(struct r300_context *r300, |
unsigned size, void *state); |
void r300_emit_vertex_stream_state(struct r300_context* r300, |
unsigned size, void* state); |
void r300_emit_vs_constants(struct r300_context* r300, |
unsigned size, void *state); |
void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state); |
void r300_emit_viewport_state(struct r300_context* r300, |
unsigned size, void* state); |
void r300_emit_ztop_state(struct r300_context* r300, |
unsigned size, void* state); |
void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state); |
void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, void* state); |
void r300_emit_invariant_state(struct r300_context *r300, |
unsigned size, void *state); |
void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state); |
void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state); |
void r300_emit_cmask_clear(struct r300_context *r300, unsigned size, void *state); |
unsigned r300_get_num_dirty_dwords(struct r300_context *r300); |
unsigned r300_get_num_cs_end_dwords(struct r300_context *r300); |
/* Emit all dirty state. */ |
void r300_emit_dirty_state(struct r300_context* r300); |
boolean r300_emit_buffer_validate(struct r300_context *r300, |
boolean do_validate_vertex_buffers, |
struct pipe_resource *index_buffer); |
#endif /* R300_EMIT_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_flush.c |
---|
0,0 → 1,152 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Copyright 2010 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "draw/draw_context.h" |
#include "draw/draw_private.h" |
#include "util/u_simple_list.h" |
#include "util/u_upload_mgr.h" |
#include "os/os_time.h" |
#include "r300_context.h" |
#include "r300_cs.h" |
#include "r300_emit.h" |
static void r300_flush_and_cleanup(struct r300_context *r300, unsigned flags) |
{ |
struct r300_atom *atom; |
r300_emit_hyperz_end(r300); |
r300_emit_query_end(r300); |
if (r300->screen->caps.is_r500) |
r500_emit_index_bias(r300, 0); |
/* The DDX doesn't set these regs. */ |
if (r300->screen->info.drm_minor >= 6) { |
CS_LOCALS(r300); |
OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); |
OUT_CS(0x66666666); |
OUT_CS(0x6666666); |
} |
r300->flush_counter++; |
r300->rws->cs_flush(r300->cs, flags, 0); |
r300->dirty_hw = 0; |
/* New kitchen sink, baby. */ |
foreach_atom(r300, atom) { |
if (atom->state || atom->allow_null_state) { |
r300_mark_atom_dirty(r300, atom); |
} |
} |
r300->vertex_arrays_dirty = TRUE; |
/* Unmark HWTCL state for SWTCL. */ |
if (!r300->screen->caps.has_tcl) { |
r300->vs_state.dirty = FALSE; |
r300->vs_constants.dirty = FALSE; |
r300->clip_state.dirty = FALSE; |
} |
} |
void r300_flush(struct pipe_context *pipe, |
unsigned flags, |
struct pipe_fence_handle **fence) |
{ |
struct r300_context *r300 = r300_context(pipe); |
struct pb_buffer **rfence = (struct pb_buffer**)fence; |
if (r300->screen->info.drm_minor >= 12) { |
flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; |
} |
if (rfence) { |
/* Create a fence, which is a dummy BO. */ |
*rfence = r300->rws->buffer_create(r300->rws, 1, 1, TRUE, |
RADEON_DOMAIN_GTT); |
/* Add the fence as a dummy relocation. */ |
r300->rws->cs_add_reloc(r300->cs, |
r300->rws->buffer_get_cs_handle(*rfence), |
RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT); |
} |
if (r300->dirty_hw) { |
r300_flush_and_cleanup(r300, flags); |
} else { |
if (rfence) { |
/* We have to create a fence object, but the command stream is empty |
* and we cannot emit an empty CS. Let's write to some reg. */ |
CS_LOCALS(r300); |
OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0); |
r300->rws->cs_flush(r300->cs, flags, 0); |
} else { |
/* Even if hw is not dirty, we should at least reset the CS in case |
* the space checking failed for the first draw operation. */ |
r300->rws->cs_flush(r300->cs, flags, 0); |
} |
} |
/* Update Hyper-Z status. */ |
if (r300->hyperz_enabled) { |
/* If there was a Z clear, keep Hyper-Z access. */ |
if (r300->num_z_clears) { |
r300->hyperz_time_of_last_flush = os_time_get(); |
r300->num_z_clears = 0; |
} else if (r300->hyperz_time_of_last_flush - os_time_get() > 2000000) { |
/* If there hasn't been a Z clear for 2 seconds, revoke Hyper-Z access. */ |
r300->hiz_in_use = FALSE; |
/* Decompress the Z buffer. */ |
if (r300->zmask_in_use) { |
if (r300->locked_zbuffer) { |
r300_decompress_zmask_locked(r300); |
} else { |
r300_decompress_zmask(r300); |
} |
r300_flush_and_cleanup(r300, flags); |
} |
/* Revoke Hyper-Z access, so that some other process can take it. */ |
r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, |
FALSE); |
r300->hyperz_enabled = FALSE; |
} |
} |
} |
static void r300_flush_wrapped(struct pipe_context *pipe, |
struct pipe_fence_handle **fence, |
unsigned flags) |
{ |
r300_flush(pipe, |
flags & PIPE_FLUSH_END_OF_FRAME ? RADEON_FLUSH_END_OF_FRAME : 0, |
fence); |
} |
void r300_init_flush_functions(struct r300_context* r300) |
{ |
r300->context.flush = r300_flush_wrapped; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_fs.c |
---|
0,0 → 1,630 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Joakim Sindholt <opensource@zhasha.com> |
* Copyright 2009 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "util/u_format.h" |
#include "util/u_math.h" |
#include "util/u_memory.h" |
#include "tgsi/tgsi_dump.h" |
#include "tgsi/tgsi_ureg.h" |
#include "r300_cb.h" |
#include "r300_context.h" |
#include "r300_emit.h" |
#include "r300_screen.h" |
#include "r300_fs.h" |
#include "r300_reg.h" |
#include "r300_texture.h" |
#include "r300_tgsi_to_rc.h" |
#include "compiler/radeon_compiler.h" |
/* Convert info about FS input semantics to r300_shader_semantics. */ |
void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, |
struct r300_shader_semantics* fs_inputs) |
{ |
int i; |
unsigned index; |
r300_shader_semantics_reset(fs_inputs); |
for (i = 0; i < info->num_inputs; i++) { |
index = info->input_semantic_index[i]; |
switch (info->input_semantic_name[i]) { |
case TGSI_SEMANTIC_COLOR: |
assert(index < ATTR_COLOR_COUNT); |
fs_inputs->color[index] = i; |
break; |
case TGSI_SEMANTIC_GENERIC: |
assert(index < ATTR_GENERIC_COUNT); |
fs_inputs->generic[index] = i; |
break; |
case TGSI_SEMANTIC_FOG: |
assert(index == 0); |
fs_inputs->fog = i; |
break; |
case TGSI_SEMANTIC_POSITION: |
assert(index == 0); |
fs_inputs->wpos = i; |
break; |
case TGSI_SEMANTIC_FACE: |
assert(index == 0); |
fs_inputs->face = i; |
break; |
default: |
fprintf(stderr, "r300: FP: Unknown input semantic: %i\n", |
info->input_semantic_name[i]); |
} |
} |
} |
static void find_output_registers(struct r300_fragment_program_compiler * compiler, |
struct r300_fragment_shader_code *shader) |
{ |
unsigned i, colorbuf_count = 0; |
/* Mark the outputs as not present initially */ |
compiler->OutputColor[0] = shader->info.num_outputs; |
compiler->OutputColor[1] = shader->info.num_outputs; |
compiler->OutputColor[2] = shader->info.num_outputs; |
compiler->OutputColor[3] = shader->info.num_outputs; |
compiler->OutputDepth = shader->info.num_outputs; |
/* Now see where they really are. */ |
for(i = 0; i < shader->info.num_outputs; ++i) { |
switch(shader->info.output_semantic_name[i]) { |
case TGSI_SEMANTIC_COLOR: |
compiler->OutputColor[colorbuf_count] = i; |
colorbuf_count++; |
break; |
case TGSI_SEMANTIC_POSITION: |
compiler->OutputDepth = i; |
break; |
} |
} |
} |
static void allocate_hardware_inputs( |
struct r300_fragment_program_compiler * c, |
void (*allocate)(void * data, unsigned input, unsigned hwreg), |
void * mydata) |
{ |
struct r300_shader_semantics* inputs = |
(struct r300_shader_semantics*)c->UserData; |
int i, reg = 0; |
/* Allocate input registers. */ |
for (i = 0; i < ATTR_COLOR_COUNT; i++) { |
if (inputs->color[i] != ATTR_UNUSED) { |
allocate(mydata, inputs->color[i], reg++); |
} |
} |
if (inputs->face != ATTR_UNUSED) { |
allocate(mydata, inputs->face, reg++); |
} |
for (i = 0; i < ATTR_GENERIC_COUNT; i++) { |
if (inputs->generic[i] != ATTR_UNUSED) { |
allocate(mydata, inputs->generic[i], reg++); |
} |
} |
if (inputs->fog != ATTR_UNUSED) { |
allocate(mydata, inputs->fog, reg++); |
} |
if (inputs->wpos != ATTR_UNUSED) { |
allocate(mydata, inputs->wpos, reg++); |
} |
} |
static void get_external_state( |
struct r300_context* r300, |
struct r300_fragment_program_external_state* state) |
{ |
struct r300_textures_state *texstate = r300->textures_state.state; |
unsigned i; |
state->alpha_to_one = r300->alpha_to_one && r300->msaa_enable; |
for (i = 0; i < texstate->sampler_state_count; i++) { |
struct r300_sampler_state *s = texstate->sampler_states[i]; |
struct r300_sampler_view *v = texstate->sampler_views[i]; |
struct r300_resource *t; |
if (!s || !v) { |
continue; |
} |
t = r300_resource(v->base.texture); |
if (s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { |
state->unit[i].compare_mode_enabled = 1; |
/* Fortunately, no need to translate this. */ |
state->unit[i].texture_compare_func = s->state.compare_func; |
} |
state->unit[i].non_normalized_coords = !s->state.normalized_coords; |
state->unit[i].convert_unorm_to_snorm = |
v->base.format == PIPE_FORMAT_RGTC1_SNORM || |
v->base.format == PIPE_FORMAT_LATC1_SNORM; |
/* Pass texture swizzling to the compiler, some lowering passes need it. */ |
if (v->base.format == PIPE_FORMAT_RGTC1_SNORM || |
v->base.format == PIPE_FORMAT_LATC1_SNORM) { |
unsigned char swizzle[4]; |
util_format_compose_swizzles( |
util_format_description(v->base.format)->swizzle, |
v->swizzle, |
swizzle); |
state->unit[i].texture_swizzle = |
RC_MAKE_SWIZZLE(swizzle[0], swizzle[1], |
swizzle[2], swizzle[3]); |
} else if (state->unit[i].compare_mode_enabled) { |
state->unit[i].texture_swizzle = |
RC_MAKE_SWIZZLE(v->swizzle[0], v->swizzle[1], |
v->swizzle[2], v->swizzle[3]); |
} |
/* XXX this should probably take into account STR, not just S. */ |
if (t->tex.is_npot) { |
switch (s->state.wrap_s) { |
case PIPE_TEX_WRAP_REPEAT: |
state->unit[i].wrap_mode = RC_WRAP_REPEAT; |
break; |
case PIPE_TEX_WRAP_MIRROR_REPEAT: |
state->unit[i].wrap_mode = RC_WRAP_MIRRORED_REPEAT; |
break; |
case PIPE_TEX_WRAP_MIRROR_CLAMP: |
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: |
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: |
state->unit[i].wrap_mode = RC_WRAP_MIRRORED_CLAMP; |
break; |
default: |
state->unit[i].wrap_mode = RC_WRAP_NONE; |
} |
if (t->b.b.target == PIPE_TEXTURE_3D) |
state->unit[i].clamp_and_scale_before_fetch = TRUE; |
} |
} |
} |
static void r300_translate_fragment_shader( |
struct r300_context* r300, |
struct r300_fragment_shader_code* shader, |
const struct tgsi_token *tokens); |
static void r300_dummy_fragment_shader( |
struct r300_context* r300, |
struct r300_fragment_shader_code* shader) |
{ |
struct pipe_shader_state state; |
struct ureg_program *ureg; |
struct ureg_dst out; |
struct ureg_src imm; |
/* Make a simple fragment shader which outputs (0, 0, 0, 1) */ |
ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT); |
out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); |
imm = ureg_imm4f(ureg, 0, 0, 0, 1); |
ureg_MOV(ureg, out, imm); |
ureg_END(ureg); |
state.tokens = ureg_finalize(ureg); |
shader->dummy = TRUE; |
r300_translate_fragment_shader(r300, shader, state.tokens); |
ureg_destroy(ureg); |
} |
static void r300_emit_fs_code_to_buffer( |
struct r300_context *r300, |
struct r300_fragment_shader_code *shader) |
{ |
struct rX00_fragment_program_code *generic_code = &shader->code; |
unsigned imm_count = shader->immediates_count; |
unsigned imm_first = shader->externals_count; |
unsigned imm_end = generic_code->constants.Count; |
struct rc_constant *constants = generic_code->constants.Constants; |
unsigned i; |
CB_LOCALS; |
if (r300->screen->caps.is_r500) { |
struct r500_fragment_program_code *code = &generic_code->code.r500; |
shader->cb_code_size = 19 + |
((code->inst_end + 1) * 6) + |
imm_count * 7 + |
code->int_constant_count * 2; |
NEW_CB(shader->cb_code, shader->cb_code_size); |
OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); |
OUT_CB_REG(R500_US_PIXSIZE, code->max_temp_idx); |
OUT_CB_REG(R500_US_FC_CTRL, code->us_fc_ctrl); |
for(i = 0; i < code->int_constant_count; i++){ |
OUT_CB_REG(R500_US_FC_INT_CONST_0 + (i * 4), |
code->int_constants[i]); |
} |
OUT_CB_REG(R500_US_CODE_RANGE, |
R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end)); |
OUT_CB_REG(R500_US_CODE_OFFSET, 0); |
OUT_CB_REG(R500_US_CODE_ADDR, |
R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(code->inst_end)); |
OUT_CB_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_INSTR); |
OUT_CB_ONE_REG(R500_GA_US_VECTOR_DATA, (code->inst_end + 1) * 6); |
for (i = 0; i <= code->inst_end; i++) { |
OUT_CB(code->inst[i].inst0); |
OUT_CB(code->inst[i].inst1); |
OUT_CB(code->inst[i].inst2); |
OUT_CB(code->inst[i].inst3); |
OUT_CB(code->inst[i].inst4); |
OUT_CB(code->inst[i].inst5); |
} |
/* Emit immediates. */ |
if (imm_count) { |
for(i = imm_first; i < imm_end; ++i) { |
if (constants[i].Type == RC_CONSTANT_IMMEDIATE) { |
const float *data = constants[i].u.Immediate; |
OUT_CB_REG(R500_GA_US_VECTOR_INDEX, |
R500_GA_US_VECTOR_INDEX_TYPE_CONST | |
(i & R500_GA_US_VECTOR_INDEX_MASK)); |
OUT_CB_ONE_REG(R500_GA_US_VECTOR_DATA, 4); |
OUT_CB_TABLE(data, 4); |
} |
} |
} |
} else { /* r300 */ |
struct r300_fragment_program_code *code = &generic_code->code.r300; |
unsigned int alu_length = code->alu.length; |
unsigned int alu_iterations = ((alu_length - 1) / 64) + 1; |
unsigned int tex_length = code->tex.length; |
unsigned int tex_iterations = |
tex_length > 0 ? ((tex_length - 1) / 32) + 1 : 0; |
unsigned int iterations = |
alu_iterations > tex_iterations ? alu_iterations : tex_iterations; |
unsigned int bank = 0; |
shader->cb_code_size = 15 + |
/* R400_US_CODE_BANK */ |
(r300->screen->caps.is_r400 ? 2 * (iterations + 1): 0) + |
/* R400_US_CODE_EXT */ |
(r300->screen->caps.is_r400 ? 2 : 0) + |
/* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0, R400_US_ALU_EXT_ADDR_0 */ |
(code->r390_mode ? (5 * alu_iterations) : 4) + |
/* R400_US_ALU_EXT_ADDR_[0-63] */ |
(code->r390_mode ? (code->alu.length) : 0) + |
/* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0 */ |
code->alu.length * 4 + |
/* R300_US_TEX_INST_0, R300_US_TEX_INST_[0-31] */ |
(code->tex.length > 0 ? code->tex.length + tex_iterations : 0) + |
imm_count * 5; |
NEW_CB(shader->cb_code, shader->cb_code_size); |
OUT_CB_REG(R300_US_CONFIG, code->config); |
OUT_CB_REG(R300_US_PIXSIZE, code->pixsize); |
OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset); |
if (code->r390_mode) { |
OUT_CB_REG(R400_US_CODE_EXT, code->r400_code_offset_ext); |
} else if (r300->screen->caps.is_r400) { |
/* This register appears to affect shaders even if r390_mode is |
* disabled, so it needs to be set to 0 for shaders that |
* don't use r390_mode. */ |
OUT_CB_REG(R400_US_CODE_EXT, 0); |
} |
OUT_CB_REG_SEQ(R300_US_CODE_ADDR_0, 4); |
OUT_CB_TABLE(code->code_addr, 4); |
do { |
unsigned int bank_alu_length = (alu_length < 64 ? alu_length : 64); |
unsigned int bank_alu_offset = bank * 64; |
unsigned int bank_tex_length = (tex_length < 32 ? tex_length : 32); |
unsigned int bank_tex_offset = bank * 32; |
if (r300->screen->caps.is_r400) { |
OUT_CB_REG(R400_US_CODE_BANK, code->r390_mode ? |
(bank << R400_BANK_SHIFT) | R400_R390_MODE_ENABLE : 0);//2 |
} |
if (bank_alu_length > 0) { |
OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, bank_alu_length); |
for (i = 0; i < bank_alu_length; i++) |
OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_inst); |
OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, bank_alu_length); |
for (i = 0; i < bank_alu_length; i++) |
OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_addr); |
OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, bank_alu_length); |
for (i = 0; i < bank_alu_length; i++) |
OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_inst); |
OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, bank_alu_length); |
for (i = 0; i < bank_alu_length; i++) |
OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_addr); |
if (code->r390_mode) { |
OUT_CB_REG_SEQ(R400_US_ALU_EXT_ADDR_0, bank_alu_length); |
for (i = 0; i < bank_alu_length; i++) |
OUT_CB(code->alu.inst[i + bank_alu_offset].r400_ext_addr); |
} |
} |
if (bank_tex_length > 0) { |
OUT_CB_REG_SEQ(R300_US_TEX_INST_0, bank_tex_length); |
OUT_CB_TABLE(code->tex.inst + bank_tex_offset, bank_tex_length); |
} |
alu_length -= bank_alu_length; |
tex_length -= bank_tex_length; |
bank++; |
} while(code->r390_mode && (alu_length > 0 || tex_length > 0)); |
/* R400_US_CODE_BANK needs to be reset to 0, otherwise some shaders |
* will be rendered incorrectly. */ |
if (r300->screen->caps.is_r400) { |
OUT_CB_REG(R400_US_CODE_BANK, |
code->r390_mode ? R400_R390_MODE_ENABLE : 0); |
} |
/* Emit immediates. */ |
if (imm_count) { |
for(i = imm_first; i < imm_end; ++i) { |
if (constants[i].Type == RC_CONSTANT_IMMEDIATE) { |
const float *data = constants[i].u.Immediate; |
OUT_CB_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4); |
OUT_CB(pack_float24(data[0])); |
OUT_CB(pack_float24(data[1])); |
OUT_CB(pack_float24(data[2])); |
OUT_CB(pack_float24(data[3])); |
} |
} |
} |
} |
OUT_CB_REG(R300_FG_DEPTH_SRC, shader->fg_depth_src); |
OUT_CB_REG(R300_US_W_FMT, shader->us_out_w); |
END_CB; |
} |
static void r300_translate_fragment_shader( |
struct r300_context* r300, |
struct r300_fragment_shader_code* shader, |
const struct tgsi_token *tokens) |
{ |
struct r300_fragment_program_compiler compiler; |
struct tgsi_to_rc ttr; |
int wpos, face; |
unsigned i; |
tgsi_scan_shader(tokens, &shader->info); |
r300_shader_read_fs_inputs(&shader->info, &shader->inputs); |
wpos = shader->inputs.wpos; |
face = shader->inputs.face; |
/* Setup the compiler. */ |
memset(&compiler, 0, sizeof(compiler)); |
rc_init(&compiler.Base, &r300->fs_regalloc_state); |
DBG_ON(r300, DBG_FP) ? compiler.Base.Debug |= RC_DBG_LOG : 0; |
DBG_ON(r300, DBG_P_STAT) ? compiler.Base.Debug |= RC_DBG_STATS : 0; |
compiler.code = &shader->code; |
compiler.state = shader->compare_state; |
compiler.Base.is_r500 = r300->screen->caps.is_r500; |
compiler.Base.is_r400 = r300->screen->caps.is_r400; |
compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT); |
compiler.Base.has_half_swizzles = TRUE; |
compiler.Base.has_presub = TRUE; |
compiler.Base.has_omod = TRUE; |
compiler.Base.max_temp_regs = |
compiler.Base.is_r500 ? 128 : (compiler.Base.is_r400 ? 64 : 32); |
compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32; |
compiler.Base.max_alu_insts = |
(compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 64; |
compiler.Base.max_tex_insts = |
(compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 32; |
compiler.AllocateHwInputs = &allocate_hardware_inputs; |
compiler.UserData = &shader->inputs; |
find_output_registers(&compiler, shader); |
shader->write_all = FALSE; |
for (i = 0; i < shader->info.num_properties; i++) { |
if (shader->info.properties[i].name == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { |
shader->write_all = TRUE; |
} |
} |
if (compiler.Base.Debug & RC_DBG_LOG) { |
DBG(r300, DBG_FP, "r300: Initial fragment program\n"); |
tgsi_dump(tokens, 0); |
} |
/* Translate TGSI to our internal representation */ |
ttr.compiler = &compiler.Base; |
ttr.info = &shader->info; |
ttr.use_half_swizzles = TRUE; |
r300_tgsi_to_rc(&ttr, tokens); |
if (ttr.error) { |
fprintf(stderr, "r300 FP: Cannot translate a shader. " |
"Using a dummy shader instead.\n"); |
r300_dummy_fragment_shader(r300, shader); |
return; |
} |
if (!r300->screen->caps.is_r500 || |
compiler.Base.Program.Constants.Count > 200) { |
compiler.Base.remove_unused_constants = TRUE; |
} |
/** |
* Transform the program to support WPOS. |
* |
* Introduce a small fragment at the start of the program that will be |
* the only code that directly reads the WPOS input. |
* All other code pieces that reference that input will be rewritten |
* to read from a newly allocated temporary. */ |
if (wpos != ATTR_UNUSED) { |
/* Moving the input to some other reg is not really necessary. */ |
rc_transform_fragment_wpos(&compiler.Base, wpos, wpos, TRUE); |
} |
if (face != ATTR_UNUSED) { |
rc_transform_fragment_face(&compiler.Base, face); |
} |
/* Invoke the compiler */ |
r3xx_compile_fragment_program(&compiler); |
if (compiler.Base.Error) { |
fprintf(stderr, "r300 FP: Compiler Error:\n%sUsing a dummy shader" |
" instead.\n", compiler.Base.ErrorMsg); |
if (shader->dummy) { |
fprintf(stderr, "r300 FP: Cannot compile the dummy shader! " |
"Giving up...\n"); |
abort(); |
} |
rc_destroy(&compiler.Base); |
r300_dummy_fragment_shader(r300, shader); |
return; |
} |
/* Shaders with zero instructions are invalid, |
* use the dummy shader instead. */ |
if (shader->code.code.r500.inst_end == -1) { |
rc_destroy(&compiler.Base); |
r300_dummy_fragment_shader(r300, shader); |
return; |
} |
/* Initialize numbers of constants for each type. */ |
shader->externals_count = 0; |
for (i = 0; |
i < shader->code.constants.Count && |
shader->code.constants.Constants[i].Type == RC_CONSTANT_EXTERNAL; i++) { |
shader->externals_count = i+1; |
} |
shader->immediates_count = 0; |
shader->rc_state_count = 0; |
for (i = shader->externals_count; i < shader->code.constants.Count; i++) { |
switch (shader->code.constants.Constants[i].Type) { |
case RC_CONSTANT_IMMEDIATE: |
++shader->immediates_count; |
break; |
case RC_CONSTANT_STATE: |
++shader->rc_state_count; |
break; |
default: |
assert(0); |
} |
} |
/* Setup shader depth output. */ |
if (shader->code.writes_depth) { |
shader->fg_depth_src = R300_FG_DEPTH_SRC_SHADER; |
shader->us_out_w = R300_W_FMT_W24 | R300_W_SRC_US; |
} else { |
shader->fg_depth_src = R300_FG_DEPTH_SRC_SCAN; |
shader->us_out_w = R300_W_FMT_W0 | R300_W_SRC_US; |
} |
/* And, finally... */ |
rc_destroy(&compiler.Base); |
/* Build the command buffer. */ |
r300_emit_fs_code_to_buffer(r300, shader); |
} |
boolean r300_pick_fragment_shader(struct r300_context* r300) |
{ |
struct r300_fragment_shader* fs = r300_fs(r300); |
struct r300_fragment_program_external_state state = {{{ 0 }}}; |
struct r300_fragment_shader_code* ptr; |
get_external_state(r300, &state); |
if (!fs->first) { |
/* Build the fragment shader for the first time. */ |
fs->first = fs->shader = CALLOC_STRUCT(r300_fragment_shader_code); |
memcpy(&fs->shader->compare_state, &state, |
sizeof(struct r300_fragment_program_external_state)); |
r300_translate_fragment_shader(r300, fs->shader, fs->state.tokens); |
return TRUE; |
} else { |
/* Check if the currently-bound shader has been compiled |
* with the texture-compare state we need. */ |
if (memcmp(&fs->shader->compare_state, &state, sizeof(state)) != 0) { |
/* Search for the right shader. */ |
ptr = fs->first; |
while (ptr) { |
if (memcmp(&ptr->compare_state, &state, sizeof(state)) == 0) { |
if (fs->shader != ptr) { |
fs->shader = ptr; |
return TRUE; |
} |
/* The currently-bound one is OK. */ |
return FALSE; |
} |
ptr = ptr->next; |
} |
/* Not found, gotta compile a new one. */ |
ptr = CALLOC_STRUCT(r300_fragment_shader_code); |
ptr->next = fs->first; |
fs->first = fs->shader = ptr; |
ptr->compare_state = state; |
r300_translate_fragment_shader(r300, ptr, fs->state.tokens); |
return TRUE; |
} |
} |
return FALSE; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_fs.h |
---|
0,0 → 1,93 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Joakim Sindholt <opensource@zhasha.com> |
* Copyright 2009 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#ifndef R300_FS_H |
#define R300_FS_H |
#include "pipe/p_state.h" |
#include "tgsi/tgsi_scan.h" |
#include "compiler/radeon_code.h" |
#include "r300_shader_semantics.h" |
struct r300_fragment_shader_code { |
struct rX00_fragment_program_code code; |
struct tgsi_shader_info info; |
struct r300_shader_semantics inputs; |
/* Whether the shader was replaced by a dummy one due to a shader |
* compilation failure. */ |
boolean dummy; |
/* Numbers of constants for each type. */ |
unsigned externals_count; |
unsigned immediates_count; |
unsigned rc_state_count; |
/* Registers for fragment depth output setup. */ |
uint32_t fg_depth_src; /* R300_FG_DEPTH_SRC: 0x4bd8 */ |
uint32_t us_out_w; /* R300_US_W_FMT: 0x46b4 */ |
struct r300_fragment_program_external_state compare_state; |
unsigned cb_code_size; |
uint32_t *cb_code; |
struct r300_fragment_shader_code* next; |
boolean write_all; |
}; |
struct r300_fragment_shader { |
/* Parent class */ |
struct pipe_shader_state state; |
/* Currently-bound fragment shader. */ |
struct r300_fragment_shader_code* shader; |
/* List of the same shaders compiled with different texture-compare |
* states. */ |
struct r300_fragment_shader_code* first; |
}; |
void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, |
struct r300_shader_semantics* fs_inputs); |
/* Return TRUE if the shader was switched and should be re-emitted. */ |
boolean r300_pick_fragment_shader(struct r300_context* r300); |
static INLINE boolean r300_fragment_shader_writes_depth(struct r300_fragment_shader *fs) |
{ |
if (!fs) |
return FALSE; |
return (fs->shader->code.writes_depth) ? TRUE : FALSE; |
} |
static INLINE boolean r300_fragment_shader_writes_all(struct r300_fragment_shader *fs) |
{ |
if (!fs) |
return FALSE; |
return (fs->shader->write_all) ? TRUE : FALSE; |
} |
#endif /* R300_FS_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_hyperz.c |
---|
0,0 → 1,313 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Copyright 2009 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "r300_context.h" |
#include "r300_reg.h" |
#include "r300_fs.h" |
#include "util/u_format.h" |
#include "util/u_mm.h" |
/* |
HiZ rules - taken from various docs |
1. HiZ only works on depth values |
2. Cannot HiZ if stencil fail or zfail is !KEEP |
3. on R300/400, HiZ is disabled if depth test is EQUAL |
4. comparison changes without clears usually mean disabling HiZ |
*/ |
/*****************************************************************************/ |
/* The HyperZ setup */ |
/*****************************************************************************/ |
static enum r300_hiz_func r300_get_hiz_func(struct r300_context *r300) |
{ |
struct r300_dsa_state *dsa = r300->dsa_state.state; |
switch (dsa->dsa.depth.func) { |
case PIPE_FUNC_NEVER: |
case PIPE_FUNC_EQUAL: |
case PIPE_FUNC_NOTEQUAL: |
case PIPE_FUNC_ALWAYS: |
default: |
/* Guess MAX for uncertain cases. */ |
case PIPE_FUNC_LESS: |
case PIPE_FUNC_LEQUAL: |
return HIZ_FUNC_MAX; |
case PIPE_FUNC_GREATER: |
case PIPE_FUNC_GEQUAL: |
return HIZ_FUNC_MIN; |
} |
} |
/* Return what's used for the depth test (either minimum or maximum). */ |
static unsigned r300_get_sc_hz_max(struct r300_context *r300) |
{ |
struct r300_dsa_state *dsa = r300->dsa_state.state; |
unsigned func = dsa->dsa.depth.func; |
return func >= PIPE_FUNC_GREATER ? R300_SC_HYPERZ_MAX : R300_SC_HYPERZ_MIN; |
} |
static boolean r300_is_hiz_func_valid(struct r300_context *r300) |
{ |
struct r300_dsa_state *dsa = r300->dsa_state.state; |
unsigned func = dsa->dsa.depth.func; |
if (r300->hiz_func == HIZ_FUNC_NONE) |
return TRUE; |
/* func1 is less/lessthan */ |
if (r300->hiz_func == HIZ_FUNC_MAX && |
(func == PIPE_FUNC_GEQUAL || func == PIPE_FUNC_GREATER)) |
return FALSE; |
/* func1 is greater/greaterthan */ |
if (r300->hiz_func == HIZ_FUNC_MIN && |
(func == PIPE_FUNC_LESS || func == PIPE_FUNC_LEQUAL)) |
return FALSE; |
return TRUE; |
} |
static boolean r300_dsa_stencil_op_not_keep(struct pipe_stencil_state *s) |
{ |
return s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP || |
s->zfail_op != PIPE_STENCIL_OP_KEEP); |
} |
static boolean r300_hiz_allowed(struct r300_context *r300) |
{ |
struct r300_dsa_state *dsa = r300->dsa_state.state; |
struct r300_screen *r300screen = r300->screen; |
if (r300_fragment_shader_writes_depth(r300_fs(r300))) |
return FALSE; |
if (r300->query_current) |
return FALSE; |
/* If the depth function is inverted, HiZ must be disabled. */ |
if (!r300_is_hiz_func_valid(r300)) |
return FALSE; |
/* if stencil fail/zfail op is not KEEP */ |
if (r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[0]) || |
r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[1])) |
return FALSE; |
if (dsa->dsa.depth.enabled) { |
/* if depth func is EQUAL pre-r500 */ |
if (dsa->dsa.depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500) |
return FALSE; |
/* if depth func is NOTEQUAL */ |
if (dsa->dsa.depth.func == PIPE_FUNC_NOTEQUAL) |
return FALSE; |
} |
return TRUE; |
} |
static void r300_update_hyperz(struct r300_context* r300) |
{ |
struct r300_hyperz_state *z = |
(struct r300_hyperz_state*)r300->hyperz_state.state; |
struct pipe_framebuffer_state *fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
struct r300_dsa_state *dsa = r300->dsa_state.state; |
struct r300_resource *zstex = |
fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL; |
z->gb_z_peq_config = 0; |
z->zb_bw_cntl = 0; |
z->sc_hyperz = R300_SC_HYPERZ_ADJ_2; |
z->flush = 0; |
if (r300->cbzb_clear) { |
z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY; |
return; |
} |
if (!zstex || !r300->hyperz_enabled) |
return; |
/* Set the size of ZMASK tiles. */ |
if (zstex->tex.zcomp8x8[fb->zsbuf->u.tex.level]) { |
z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; |
} |
/* R500-specific features and optimizations. */ |
if (r300->screen->caps.is_r500) { |
z->zb_bw_cntl |= R500_PEQ_PACKING_ENABLE | |
R500_COVERED_PTR_MASKING_ENABLE; |
} |
/* Setup decompression if needed. No other HyperZ setting is required. */ |
if (r300->zmask_decompress) { |
z->zb_bw_cntl |= R300_FAST_FILL_ENABLE | |
R300_RD_COMP_ENABLE; |
return; |
} |
/* Do not set anything if depth and stencil tests are off. */ |
if (!dsa->dsa.depth.enabled && |
!dsa->dsa.stencil[0].enabled && |
!dsa->dsa.stencil[1].enabled) { |
assert(!dsa->dsa.depth.writemask); |
return; |
} |
/* Zbuffer compression. */ |
if (r300->zmask_in_use && !r300->locked_zbuffer) { |
z->zb_bw_cntl |= R300_FAST_FILL_ENABLE | |
R300_RD_COMP_ENABLE | |
R300_WR_COMP_ENABLE; |
} |
/* HiZ. */ |
if (r300->hiz_in_use && !r300->locked_zbuffer) { |
/* HiZ cannot be used under some circumstances. */ |
if (!r300_hiz_allowed(r300)) { |
/* If writemask is disabled, the HiZ memory will not be changed, |
* so we can keep its content for later. */ |
if (dsa->dsa.depth.writemask) { |
r300->hiz_in_use = FALSE; |
} |
return; |
} |
DBG(r300, DBG_HYPERZ, "r300: Z-func: %i\n", dsa->dsa.depth.func); |
/* Set the HiZ function if needed. */ |
if (r300->hiz_func == HIZ_FUNC_NONE) { |
r300->hiz_func = r300_get_hiz_func(r300); |
} |
/* Setup the HiZ bits. */ |
z->zb_bw_cntl |= R300_HIZ_ENABLE | |
(r300->hiz_func == HIZ_FUNC_MIN ? R300_HIZ_MIN : R300_HIZ_MAX); |
z->sc_hyperz |= R300_SC_HYPERZ_ENABLE | |
r300_get_sc_hz_max(r300); |
if (r300->screen->caps.is_r500) { |
z->zb_bw_cntl |= R500_HIZ_EQUAL_REJECT_ENABLE; |
} |
} |
} |
/*****************************************************************************/ |
/* The ZTOP state */ |
/*****************************************************************************/ |
static boolean r300_dsa_writes_stencil( |
struct pipe_stencil_state *s) |
{ |
return s->enabled && s->writemask && |
(s->fail_op != PIPE_STENCIL_OP_KEEP || |
s->zfail_op != PIPE_STENCIL_OP_KEEP || |
s->zpass_op != PIPE_STENCIL_OP_KEEP); |
} |
static boolean r300_dsa_writes_depth_stencil( |
struct pipe_depth_stencil_alpha_state *dsa) |
{ |
/* We are interested only in the cases when a depth or stencil value |
* can be changed. */ |
if (dsa->depth.enabled && dsa->depth.writemask && |
dsa->depth.func != PIPE_FUNC_NEVER) |
return TRUE; |
if (r300_dsa_writes_stencil(&dsa->stencil[0]) || |
r300_dsa_writes_stencil(&dsa->stencil[1])) |
return TRUE; |
return FALSE; |
} |
static boolean r300_dsa_alpha_test_enabled( |
struct pipe_depth_stencil_alpha_state *dsa) |
{ |
/* We are interested only in the cases when alpha testing can kill |
* a fragment. */ |
return dsa->alpha.enabled && dsa->alpha.func != PIPE_FUNC_ALWAYS; |
} |
static void r300_update_ztop(struct r300_context* r300) |
{ |
struct r300_ztop_state* ztop_state = |
(struct r300_ztop_state*)r300->ztop_state.state; |
uint32_t old_ztop = ztop_state->z_buffer_top; |
/* This is important enough that I felt it warranted a comment. |
* |
* According to the docs, these are the conditions where ZTOP must be |
* disabled: |
* 1) Alpha testing enabled |
* 2) Texture kill instructions in fragment shader |
* 3) Chroma key culling enabled |
* 4) W-buffering enabled |
* |
* The docs claim that for the first three cases, if no ZS writes happen, |
* then ZTOP can be used. |
* |
* (3) will never apply since we do not support chroma-keyed operations. |
* (4) will need to be re-examined (and this comment updated) if/when |
* Hyper-Z becomes supported. |
* |
* Additionally, the following conditions require disabled ZTOP: |
* 5) Depth writes in fragment shader |
* 6) Outstanding occlusion queries |
* |
* This register causes stalls all the way from SC to CB when changed, |
* but it is buffered on-chip so it does not hurt to write it if it has |
* not changed. |
* |
* ~C. |
*/ |
/* ZS writes */ |
if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) && |
(r300_dsa_alpha_test_enabled(r300->dsa_state.state) || /* (1) */ |
r300_fs(r300)->shader->info.uses_kill)) { /* (2) */ |
ztop_state->z_buffer_top = R300_ZTOP_DISABLE; |
} else if (r300_fragment_shader_writes_depth(r300_fs(r300))) { /* (5) */ |
ztop_state->z_buffer_top = R300_ZTOP_DISABLE; |
} else if (r300->query_current) { /* (6) */ |
ztop_state->z_buffer_top = R300_ZTOP_DISABLE; |
} else { |
ztop_state->z_buffer_top = R300_ZTOP_ENABLE; |
} |
if (ztop_state->z_buffer_top != old_ztop) |
r300_mark_atom_dirty(r300, &r300->ztop_state); |
} |
void r300_update_hyperz_state(struct r300_context* r300) |
{ |
r300_update_ztop(r300); |
if (r300->hyperz_state.dirty) { |
r300_update_hyperz(r300); |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_public.h |
---|
0,0 → 1,9 |
#ifndef R300_PUBLIC_H |
#define R300_PUBLIC_H |
struct radeon_winsys; |
struct pipe_screen* r300_screen_create(struct radeon_winsys *rws); |
#endif |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_query.c |
---|
0,0 → 1,212 |
/* |
* Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "util/u_memory.h" |
#include "util/u_simple_list.h" |
#include "r300_context.h" |
#include "r300_screen.h" |
#include "r300_emit.h" |
#include <stdio.h> |
static struct pipe_query *r300_create_query(struct pipe_context *pipe, |
unsigned query_type) |
{ |
struct r300_context *r300 = r300_context(pipe); |
struct r300_screen *r300screen = r300->screen; |
struct r300_query *q; |
if (query_type != PIPE_QUERY_OCCLUSION_COUNTER && |
query_type != PIPE_QUERY_OCCLUSION_PREDICATE && |
query_type != PIPE_QUERY_GPU_FINISHED) { |
return NULL; |
} |
q = CALLOC_STRUCT(r300_query); |
if (!q) |
return NULL; |
q->type = query_type; |
if (query_type == PIPE_QUERY_GPU_FINISHED) { |
return (struct pipe_query*)q; |
} |
if (r300screen->caps.family == CHIP_RV530) |
q->num_pipes = r300screen->info.r300_num_z_pipes; |
else |
q->num_pipes = r300screen->info.r300_num_gb_pipes; |
q->buf = r300->rws->buffer_create(r300->rws, 4096, 4096, TRUE, |
RADEON_DOMAIN_GTT); |
if (!q->buf) { |
FREE(q); |
return NULL; |
} |
q->cs_buf = r300->rws->buffer_get_cs_handle(q->buf); |
return (struct pipe_query*)q; |
} |
static void r300_destroy_query(struct pipe_context* pipe, |
struct pipe_query* query) |
{ |
struct r300_query* q = r300_query(query); |
pb_reference(&q->buf, NULL); |
FREE(query); |
} |
void r300_resume_query(struct r300_context *r300, |
struct r300_query *query) |
{ |
r300->query_current = query; |
r300_mark_atom_dirty(r300, &r300->query_start); |
} |
static void r300_begin_query(struct pipe_context* pipe, |
struct pipe_query* query) |
{ |
struct r300_context* r300 = r300_context(pipe); |
struct r300_query* q = r300_query(query); |
if (q->type == PIPE_QUERY_GPU_FINISHED) |
return; |
if (r300->query_current != NULL) { |
fprintf(stderr, "r300: begin_query: " |
"Some other query has already been started.\n"); |
assert(0); |
return; |
} |
q->num_results = 0; |
r300_resume_query(r300, q); |
} |
void r300_stop_query(struct r300_context *r300) |
{ |
r300_emit_query_end(r300); |
r300->query_current = NULL; |
} |
static void r300_end_query(struct pipe_context* pipe, |
struct pipe_query* query) |
{ |
struct r300_context* r300 = r300_context(pipe); |
struct r300_query *q = r300_query(query); |
if (q->type == PIPE_QUERY_GPU_FINISHED) { |
pb_reference(&q->buf, NULL); |
r300_flush(pipe, RADEON_FLUSH_ASYNC, |
(struct pipe_fence_handle**)&q->buf); |
return; |
} |
if (q != r300->query_current) { |
fprintf(stderr, "r300: end_query: Got invalid query.\n"); |
assert(0); |
return; |
} |
r300_stop_query(r300); |
} |
static boolean r300_get_query_result(struct pipe_context* pipe, |
struct pipe_query* query, |
boolean wait, |
union pipe_query_result *vresult) |
{ |
struct r300_context* r300 = r300_context(pipe); |
struct r300_query *q = r300_query(query); |
unsigned i; |
uint32_t temp, *map; |
if (q->type == PIPE_QUERY_GPU_FINISHED) { |
if (wait) { |
r300->rws->buffer_wait(q->buf, RADEON_USAGE_READWRITE); |
vresult->b = TRUE; |
} else { |
vresult->b = !r300->rws->buffer_is_busy(q->buf, RADEON_USAGE_READWRITE); |
} |
return vresult->b; |
} |
map = r300->rws->buffer_map(q->cs_buf, r300->cs, |
PIPE_TRANSFER_READ | |
(!wait ? PIPE_TRANSFER_DONTBLOCK : 0)); |
if (!map) |
return FALSE; |
/* Sum up the results. */ |
temp = 0; |
for (i = 0; i < q->num_results; i++) { |
/* Convert little endian values written by GPU to CPU byte order */ |
temp += util_le32_to_cpu(*map); |
map++; |
} |
r300->rws->buffer_unmap(q->cs_buf); |
if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE) { |
vresult->b = temp != 0; |
} else { |
vresult->u64 = temp; |
} |
return TRUE; |
} |
static void r300_render_condition(struct pipe_context *pipe, |
struct pipe_query *query, |
boolean condition, |
uint mode) |
{ |
struct r300_context *r300 = r300_context(pipe); |
union pipe_query_result result; |
boolean wait; |
r300->skip_rendering = FALSE; |
if (query) { |
wait = mode == PIPE_RENDER_COND_WAIT || |
mode == PIPE_RENDER_COND_BY_REGION_WAIT; |
if (r300_get_query_result(pipe, query, wait, &result)) { |
if (r300_query(query)->type == PIPE_QUERY_OCCLUSION_PREDICATE) { |
r300->skip_rendering = condition == result.b; |
} else { |
r300->skip_rendering = condition == !!result.u64; |
} |
} |
} |
} |
void r300_init_query_functions(struct r300_context* r300) |
{ |
r300->context.create_query = r300_create_query; |
r300->context.destroy_query = r300_destroy_query; |
r300->context.begin_query = r300_begin_query; |
r300->context.end_query = r300_end_query; |
r300->context.get_query_result = r300_get_query_result; |
r300->context.render_condition = r300_render_condition; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_reg.h |
---|
0,0 → 1,3576 |
/************************************************************************** |
Copyright (C) 2004-2005 Nicolai Haehnle et al. |
Permission is hereby granted, free of charge, to any person obtaining a |
copy of this software and associated documentation files (the "Software"), |
to deal in the Software without restriction, including without limitation |
on the rights to use, copy, modify, merge, publish, distribute, sub |
license, and/or sell copies of the Software, and to permit persons to whom |
the Software is furnished to do so, subject to the following conditions: |
The above copyright notice and this permission notice (including the next |
paragraph) shall be included in all copies or substantial portions of the |
Software. |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
USE OR OTHER DEALINGS IN THE SOFTWARE. |
**************************************************************************/ |
/* *INDENT-OFF* */ |
#ifndef _R300_REG_H |
#define _R300_REG_H |
#define R300_MC_INIT_MISC_LAT_TIMER 0x180 |
# define R300_MC_MISC__MC_CPR_INIT_LAT_SHIFT 0 |
# define R300_MC_MISC__MC_VF_INIT_LAT_SHIFT 4 |
# define R300_MC_MISC__MC_DISP0R_INIT_LAT_SHIFT 8 |
# define R300_MC_MISC__MC_DISP1R_INIT_LAT_SHIFT 12 |
# define R300_MC_MISC__MC_FIXED_INIT_LAT_SHIFT 16 |
# define R300_MC_MISC__MC_E2R_INIT_LAT_SHIFT 20 |
# define R300_MC_MISC__MC_SAME_PAGE_PRIO_SHIFT 24 |
# define R300_MC_MISC__MC_GLOBW_INIT_LAT_SHIFT 28 |
#define R300_MC_INIT_GFX_LAT_TIMER 0x154 |
# define R300_MC_MISC__MC_G3D0R_INIT_LAT_SHIFT 0 |
# define R300_MC_MISC__MC_G3D1R_INIT_LAT_SHIFT 4 |
# define R300_MC_MISC__MC_G3D2R_INIT_LAT_SHIFT 8 |
# define R300_MC_MISC__MC_G3D3R_INIT_LAT_SHIFT 12 |
# define R300_MC_MISC__MC_TX0R_INIT_LAT_SHIFT 16 |
# define R300_MC_MISC__MC_TX1R_INIT_LAT_SHIFT 20 |
# define R300_MC_MISC__MC_GLOBR_INIT_LAT_SHIFT 24 |
# define R300_MC_MISC__MC_GLOBW_FULL_LAT_SHIFT 28 |
/* |
* This file contains registers and constants for the R300. They have been |
* found mostly by examining command buffers captured using glxtest, as well |
* as by extrapolating some known registers and constants from the R200. |
* I am fairly certain that they are correct unless stated otherwise |
* in comments. |
*/ |
#define R300_SE_VPORT_XSCALE 0x1D98 |
#define R300_SE_VPORT_XOFFSET 0x1D9C |
#define R300_SE_VPORT_YSCALE 0x1DA0 |
#define R300_SE_VPORT_YOFFSET 0x1DA4 |
#define R300_SE_VPORT_ZSCALE 0x1DA8 |
#define R300_SE_VPORT_ZOFFSET 0x1DAC |
#define R300_VAP_PORT_IDX0 0x2040 |
/* |
* Vertex Array Processing (VAP) Control |
*/ |
#define R300_VAP_CNTL 0x2080 |
# define R300_PVS_NUM_SLOTS_SHIFT 0 |
# define R300_PVS_NUM_CNTLRS_SHIFT 4 |
# define R300_PVS_NUM_FPUS_SHIFT 8 |
# define R300_VF_MAX_VTX_NUM_SHIFT 18 |
# define R300_PVS_NUM_SLOTS(x) ((x) << 0) |
# define R300_PVS_NUM_CNTLRS(x) ((x) << 4) |
# define R300_PVS_NUM_FPUS(x) ((x) << 8) |
# define R300_PVS_VF_MAX_VTX_NUM(x) ((x) << 18) |
# define R300_GL_CLIP_SPACE_DEF (0 << 22) |
# define R300_DX_CLIP_SPACE_DEF (1 << 22) |
# define R500_TCL_STATE_OPTIMIZATION (1 << 23) |
/* This register is written directly and also starts data section |
* in many 3d CP_PACKET3's |
*/ |
#define R300_VAP_VF_CNTL 0x2084 |
# define R300_VAP_VF_CNTL__PRIM_TYPE__SHIFT 0 |
# define R300_VAP_VF_CNTL__PRIM_NONE (0<<0) |
# define R300_VAP_VF_CNTL__PRIM_POINTS (1<<0) |
# define R300_VAP_VF_CNTL__PRIM_LINES (2<<0) |
# define R300_VAP_VF_CNTL__PRIM_LINE_STRIP (3<<0) |
# define R300_VAP_VF_CNTL__PRIM_TRIANGLES (4<<0) |
# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN (5<<0) |
# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP (6<<0) |
# define R300_VAP_VF_CNTL__PRIM_LINE_LOOP (12<<0) |
# define R300_VAP_VF_CNTL__PRIM_QUADS (13<<0) |
# define R300_VAP_VF_CNTL__PRIM_QUAD_STRIP (14<<0) |
# define R300_VAP_VF_CNTL__PRIM_POLYGON (15<<0) |
# define R300_VAP_VF_CNTL__PRIM_WALK__SHIFT 4 |
/* State based - direct writes to registers trigger vertex |
generation */ |
# define R300_VAP_VF_CNTL__PRIM_WALK_STATE_BASED (0<<4) |
# define R300_VAP_VF_CNTL__PRIM_WALK_INDICES (1<<4) |
# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST (2<<4) |
# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED (3<<4) |
/* I don't think I saw these three used.. */ |
# define R300_VAP_VF_CNTL__COLOR_ORDER__SHIFT 6 |
# define R300_VAP_VF_CNTL__TCL_OUTPUT_CTL_ENA__SHIFT 9 |
# define R300_VAP_VF_CNTL__PROG_STREAM_ENA__SHIFT 10 |
/* index size - when not set the indices are assumed to be 16 bit */ |
# define R300_VAP_VF_CNTL__INDEX_SIZE_32bit (1<<11) |
# define R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS (1<<14) |
/* number of vertices */ |
# define R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT 16 |
#define R500_VAP_INDEX_OFFSET 0x208c |
#define R500_VAP_ALT_NUM_VERTICES 0x2088 |
#define R300_VAP_OUTPUT_VTX_FMT_0 0x2090 |
# define R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT (1<<0) |
# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT (1<<1) |
# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2) |
# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3) |
# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4) |
# define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16) |
#define R300_VAP_OUTPUT_VTX_FMT_1 0x2094 |
/* each of the following is 3 bits wide, specifies number |
of components */ |
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0 |
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3 |
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6 |
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9 |
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12 |
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 |
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 |
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 |
# define R300_VAP_OUTPUT_VTX_FMT_1__NOT_PRESENT 0 |
# define R300_VAP_OUTPUT_VTX_FMT_1__1_COMPONENT 1 |
# define R300_VAP_OUTPUT_VTX_FMT_1__2_COMPONENTS 2 |
# define R300_VAP_OUTPUT_VTX_FMT_1__3_COMPONENTS 3 |
# define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS 4 |
#define R300_VAP_VPORT_XSCALE 0x2098 |
#define R300_VAP_VPORT_XOFFSET 0x209c |
#define R300_VAP_VPORT_YSCALE 0x20a0 |
#define R300_VAP_VPORT_YOFFSET 0x20a4 |
#define R300_VAP_VPORT_ZSCALE 0x20a8 |
#define R300_VAP_VPORT_ZOFFSET 0x20ac |
#define R300_VAP_VTE_CNTL 0x20b0 |
#define R300_SE_VTE_CNTL R300_VAP_VTE_CNTL |
# define R300_VPORT_X_SCALE_ENA (1 << 0) |
# define R300_VPORT_X_OFFSET_ENA (1 << 1) |
# define R300_VPORT_Y_SCALE_ENA (1 << 2) |
# define R300_VPORT_Y_OFFSET_ENA (1 << 3) |
# define R300_VPORT_Z_SCALE_ENA (1 << 4) |
# define R300_VPORT_Z_OFFSET_ENA (1 << 5) |
# define R300_VTX_XY_FMT (1 << 8) |
# define R300_VTX_Z_FMT (1 << 9) |
# define R300_VTX_W0_FMT (1 << 10) |
# define R300_SERIAL_PROC_ENA (1 << 11) |
#define R300_VAP_VTX_SIZE 0x20b4 |
/* BEGIN: Vertex data assembly - lots of uncertainties */ |
/* gap */ |
/* Maximum Vertex Indx Clamp */ |
#define R300_VAP_VF_MAX_VTX_INDX 0x2134 |
/* Minimum Vertex Indx Clamp */ |
#define R300_VAP_VF_MIN_VTX_INDX 0x2138 |
/** Vertex assembler/processor control status */ |
#define R300_VAP_CNTL_STATUS 0x2140 |
/* No swap at all (default) */ |
# define R300_VC_NO_SWAP (0 << 0) |
/* 16-bit swap: 0xAABBCCDD becomes 0xBBAADDCC */ |
# define R300_VC_16BIT_SWAP (1 << 0) |
/* 32-bit swap: 0xAABBCCDD becomes 0xDDCCBBAA */ |
# define R300_VC_32BIT_SWAP (2 << 0) |
/* Half-dword swap: 0xAABBCCDD becomes 0xCCDDAABB */ |
# define R300_VC_HALF_DWORD_SWAP (3 << 0) |
/* The TCL engine will not be used (as it is logically or even physically removed) */ |
# define R300_VAP_TCL_BYPASS (1 << 8) |
/* Read only flag if TCL engine is busy. */ |
# define R300_VAP_PVS_BUSY (1 << 11) |
/* TODO: gap for MAX_MPS */ |
/* Read only flag if the vertex store is busy. */ |
# define R300_VAP_VS_BUSY (1 << 24) |
/* Read only flag if the reciprocal engine is busy. */ |
# define R300_VAP_RCP_BUSY (1 << 25) |
/* Read only flag if the viewport transform engine is busy. */ |
# define R300_VAP_VTE_BUSY (1 << 26) |
/* Read only flag if the memory interface unit is busy. */ |
# define R300_VAP_MUI_BUSY (1 << 27) |
/* Read only flag if the vertex cache is busy. */ |
# define R300_VAP_VC_BUSY (1 << 28) |
/* Read only flag if the vertex fetcher is busy. */ |
# define R300_VAP_VF_BUSY (1 << 29) |
/* Read only flag if the register pipeline is busy. */ |
# define R300_VAP_REGPIPE_BUSY (1 << 30) |
/* Read only flag if the VAP engine is busy. */ |
# define R300_VAP_VAP_BUSY (1 << 31) |
/* gap */ |
/* Where do we get our vertex data? |
* |
* Vertex data either comes either from immediate mode registers or from |
* vertex arrays. |
* There appears to be no mixed mode (though we can force the pitch of |
* vertex arrays to 0, effectively reusing the same element over and over |
* again). |
* |
* Immediate mode is controlled by the INPUT_CNTL registers. I am not sure |
* if these registers influence vertex array processing. |
* |
* Vertex arrays are controlled via the 3D_LOAD_VBPNTR packet3. |
* |
* In both cases, vertex attributes are then passed through INPUT_ROUTE. |
* |
* Beginning with INPUT_ROUTE_0_0 is a list of WORDs that route vertex data |
* into the vertex processor's input registers. |
* The first word routes the first input, the second word the second, etc. |
* The corresponding input is routed into the register with the given index. |
* The list is ended by a word with INPUT_ROUTE_END set. |
* |
* Always set COMPONENTS_4 in immediate mode. |
*/ |
#define R300_VAP_PROG_STREAM_CNTL_0 0x2150 |
# define R300_DATA_TYPE_0_SHIFT 0 |
# define R300_DATA_TYPE_FLOAT_1 0 |
# define R300_DATA_TYPE_FLOAT_2 1 |
# define R300_DATA_TYPE_FLOAT_3 2 |
# define R300_DATA_TYPE_FLOAT_4 3 |
# define R300_DATA_TYPE_BYTE 4 |
# define R300_DATA_TYPE_D3DCOLOR 5 |
# define R300_DATA_TYPE_SHORT_2 6 |
# define R300_DATA_TYPE_SHORT_4 7 |
# define R300_DATA_TYPE_VECTOR_3_TTT 8 |
# define R300_DATA_TYPE_VECTOR_3_EET 9 |
# define R300_DATA_TYPE_FLOAT_8 10 |
# define R300_DATA_TYPE_FLT16_2 11 |
# define R300_DATA_TYPE_FLT16_4 12 |
# define R300_SKIP_DWORDS_SHIFT 4 |
# define R300_DST_VEC_LOC_SHIFT 8 |
# define R300_LAST_VEC (1 << 13) |
# define R300_SIGNED (1 << 14) |
# define R300_NORMALIZE (1 << 15) |
# define R300_DATA_TYPE_1_SHIFT 16 |
#define R300_VAP_PROG_STREAM_CNTL_1 0x2154 |
#define R300_VAP_PROG_STREAM_CNTL_2 0x2158 |
#define R300_VAP_PROG_STREAM_CNTL_3 0x215C |
#define R300_VAP_PROG_STREAM_CNTL_4 0x2160 |
#define R300_VAP_PROG_STREAM_CNTL_5 0x2164 |
#define R300_VAP_PROG_STREAM_CNTL_6 0x2168 |
#define R300_VAP_PROG_STREAM_CNTL_7 0x216C |
/* gap */ |
/* Notes: |
* - always set up to produce at least two attributes: |
* if vertex program uses only position, fglrx will set normal, too |
* - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal. |
*/ |
#define R300_VAP_VTX_STATE_CNTL 0x2180 |
# define R300_COLOR_0_ASSEMBLY_SHIFT 0 |
# define R300_SEL_COLOR 0 |
# define R300_SEL_USER_COLOR_0 1 |
# define R300_SEL_USER_COLOR_1 2 |
# define R300_COLOR_1_ASSEMBLY_SHIFT 2 |
# define R300_COLOR_2_ASSEMBLY_SHIFT 4 |
# define R300_COLOR_3_ASSEMBLY_SHIFT 6 |
# define R300_COLOR_4_ASSEMBLY_SHIFT 8 |
# define R300_COLOR_5_ASSEMBLY_SHIFT 10 |
# define R300_COLOR_6_ASSEMBLY_SHIFT 12 |
# define R300_COLOR_7_ASSEMBLY_SHIFT 14 |
# define R300_UPDATE_USER_COLOR_0_ENA (1 << 16) |
/* |
* Each bit in this field applies to the corresponding vector in the VSM |
* memory (i.e. Bit 0 applies to VECTOR_0 (POSITION), etc.). If the bit |
* is set, then the corresponding 4-Dword Vector is output into the Vertex Stream. |
*/ |
#define R300_VAP_VSM_VTX_ASSM 0x2184 |
# define R300_INPUT_CNTL_POS 0x00000001 |
# define R300_INPUT_CNTL_NORMAL 0x00000002 |
# define R300_INPUT_CNTL_COLOR 0x00000004 |
# define R300_INPUT_CNTL_TC0 0x00000400 |
# define R300_INPUT_CNTL_TC1 0x00000800 |
# define R300_INPUT_CNTL_TC2 0x00001000 /* GUESS */ |
# define R300_INPUT_CNTL_TC3 0x00002000 /* GUESS */ |
# define R300_INPUT_CNTL_TC4 0x00004000 /* GUESS */ |
# define R300_INPUT_CNTL_TC5 0x00008000 /* GUESS */ |
# define R300_INPUT_CNTL_TC6 0x00010000 /* GUESS */ |
# define R300_INPUT_CNTL_TC7 0x00020000 /* GUESS */ |
/* Programmable Stream Control Signed Normalize Control */ |
#define R300_VAP_PSC_SGN_NORM_CNTL 0x21dc |
# define SGN_NORM_ZERO 0 |
# define SGN_NORM_ZERO_CLAMP_MINUS_ONE 1 |
# define SGN_NORM_NO_ZERO 2 |
# define R300_SGN_NORM_NO_ZERO (SGN_NORM_NO_ZERO | \ |
(SGN_NORM_NO_ZERO << 2) | (SGN_NORM_NO_ZERO << 4) | \ |
(SGN_NORM_NO_ZERO << 6) | (SGN_NORM_NO_ZERO << 8) | \ |
(SGN_NORM_NO_ZERO << 10) | (SGN_NORM_NO_ZERO << 12) | \ |
(SGN_NORM_NO_ZERO << 14) | (SGN_NORM_NO_ZERO << 16) | \ |
(SGN_NORM_NO_ZERO << 18) | (SGN_NORM_NO_ZERO << 20) | \ |
(SGN_NORM_NO_ZERO << 22) | (SGN_NORM_NO_ZERO << 24) | \ |
(SGN_NORM_NO_ZERO << 26) | (SGN_NORM_NO_ZERO << 28) | \ |
(SGN_NORM_NO_ZERO << 30)) |
/* gap */ |
/* Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0 |
* are set to a swizzling bit pattern, other words are 0. |
* |
* In immediate mode, the pattern is always set to xyzw. In vertex array |
* mode, the swizzling pattern is e.g. used to set zw components in texture |
* coordinates with only tweo components. |
*/ |
#define R300_VAP_PROG_STREAM_CNTL_EXT_0 0x21e0 |
# define R300_SWIZZLE0_SHIFT 0 |
# define R300_SWIZZLE_SELECT_X_SHIFT 0 |
# define R300_SWIZZLE_SELECT_Y_SHIFT 3 |
# define R300_SWIZZLE_SELECT_Z_SHIFT 6 |
# define R300_SWIZZLE_SELECT_W_SHIFT 9 |
# define R300_SWIZZLE_SELECT_X 0 |
# define R300_SWIZZLE_SELECT_Y 1 |
# define R300_SWIZZLE_SELECT_Z 2 |
# define R300_SWIZZLE_SELECT_W 3 |
# define R300_SWIZZLE_SELECT_FP_ZERO 4 |
# define R300_SWIZZLE_SELECT_FP_ONE 5 |
/* alternate forms for r300_emit.c */ |
# define R300_INPUT_ROUTE_SELECT_X 0 |
# define R300_INPUT_ROUTE_SELECT_Y 1 |
# define R300_INPUT_ROUTE_SELECT_Z 2 |
# define R300_INPUT_ROUTE_SELECT_W 3 |
# define R300_INPUT_ROUTE_SELECT_ZERO 4 |
# define R300_INPUT_ROUTE_SELECT_ONE 5 |
# define R300_WRITE_ENA_SHIFT 12 |
# define R300_WRITE_ENA_X 1 |
# define R300_WRITE_ENA_Y 2 |
# define R300_WRITE_ENA_Z 4 |
# define R300_WRITE_ENA_W 8 |
# define R300_SWIZZLE1_SHIFT 16 |
# define R300_VAP_SWIZZLE_X001 \ |
((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \ |
(R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Y_SHIFT) | \ |
(R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | \ |
(R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \ |
(0xf << R300_WRITE_ENA_SHIFT)) |
# define R300_VAP_SWIZZLE_XY01 \ |
((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \ |
(R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \ |
(R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | \ |
(R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \ |
(0xf << R300_WRITE_ENA_SHIFT)) |
# define R300_VAP_SWIZZLE_XYZ1 \ |
((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \ |
(R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \ |
(R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | \ |
(R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \ |
(0xf << R300_WRITE_ENA_SHIFT)) |
# define R300_VAP_SWIZZLE_XYZW \ |
((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \ |
(R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \ |
(R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | \ |
(R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | \ |
(0xf << R300_WRITE_ENA_SHIFT)) |
#define R300_VAP_PROG_STREAM_CNTL_EXT_1 0x21e4 |
#define R300_VAP_PROG_STREAM_CNTL_EXT_2 0x21e8 |
#define R300_VAP_PROG_STREAM_CNTL_EXT_3 0x21ec |
#define R300_VAP_PROG_STREAM_CNTL_EXT_4 0x21f0 |
#define R300_VAP_PROG_STREAM_CNTL_EXT_5 0x21f4 |
#define R300_VAP_PROG_STREAM_CNTL_EXT_6 0x21f8 |
#define R300_VAP_PROG_STREAM_CNTL_EXT_7 0x21fc |
/* END: Vertex data assembly */ |
/* gap */ |
/* BEGIN: Upload vertex program and data */ |
/* |
* The programmable vertex shader unit has a memory bank of unknown size |
* that can be written to in 16 byte units by writing the address into |
* UPLOAD_ADDRESS, followed by data in UPLOAD_DATA (multiples of 4 DWORDs). |
* |
* Pointers into the memory bank are always in multiples of 16 bytes. |
* |
* The memory bank is divided into areas with fixed meaning. |
* |
* Starting at address UPLOAD_PROGRAM: Vertex program instructions. |
* Native limits reported by drivers from ATI suggest size 256 (i.e. 4KB), |
* whereas the difference between known addresses suggests size 512. |
* |
* Starting at address UPLOAD_PARAMETERS: Vertex program parameters. |
* Native reported limits and the VPI layout suggest size 256, whereas |
* difference between known addresses suggests size 512. |
* |
* At address UPLOAD_POINTSIZE is a vector (0, 0, ps, 0), where ps is the |
* floating point pointsize. The exact purpose of this state is uncertain, |
* as there is also the R300_RE_POINTSIZE register. |
* |
* Multiple vertex programs and parameter sets can be loaded at once, |
* which could explain the size discrepancy. |
*/ |
#define R300_VAP_PVS_VECTOR_INDX_REG 0x2200 |
# define R300_PVS_CODE_START 0 |
# define R300_MAX_PVS_CODE_LINES 256 |
# define R500_MAX_PVS_CODE_LINES 1024 |
# define R300_PVS_CONST_START 512 |
# define R500_PVS_CONST_START 1024 |
# define R300_MAX_PVS_CONST_VECS 256 |
# define R500_MAX_PVS_CONST_VECS 256 |
# define R300_PVS_UCP_START 1024 |
# define R500_PVS_UCP_START 1536 |
# define R300_POINT_VPORT_SCALE_OFFSET 1030 |
# define R500_POINT_VPORT_SCALE_OFFSET 1542 |
# define R300_POINT_GEN_TEX_OFFSET 1031 |
# define R500_POINT_GEN_TEX_OFFSET 1543 |
/* |
* These are obsolete defines form r300_context.h, but they might give some |
* clues when investigating the addresses further... |
*/ |
#if 0 |
#define VSF_DEST_PROGRAM 0x0 |
#define VSF_DEST_MATRIX0 0x200 |
#define VSF_DEST_MATRIX1 0x204 |
#define VSF_DEST_MATRIX2 0x208 |
#define VSF_DEST_VECTOR0 0x20c |
#define VSF_DEST_VECTOR1 0x20d |
#define VSF_DEST_UNKNOWN1 0x400 |
#define VSF_DEST_UNKNOWN2 0x406 |
#endif |
/* gap */ |
#define R300_VAP_PVS_UPLOAD_DATA 0x2208 |
/* END: Upload vertex program and data */ |
/* gap */ |
/* I do not know the purpose of this register. However, I do know that |
* it is set to 221C_CLEAR for clear operations and to 221C_NORMAL |
* for normal rendering. |
* |
* 2007-11-05: This register is the user clip plane control register, but there |
* also seems to be a rendering mode control; the NORMAL/CLEAR defines. |
* |
* See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view |
*/ |
#define R500_VAP_TEX_TO_COLOR_CNTL 0x2218 |
#define R300_VAP_CLIP_CNTL 0x221C |
# define R300_VAP_UCP_ENABLE_0 (1 << 0) |
# define R300_VAP_UCP_ENABLE_1 (1 << 1) |
# define R300_VAP_UCP_ENABLE_2 (1 << 2) |
# define R300_VAP_UCP_ENABLE_3 (1 << 3) |
# define R300_VAP_UCP_ENABLE_4 (1 << 4) |
# define R300_VAP_UCP_ENABLE_5 (1 << 5) |
# define R300_PS_UCP_MODE_DIST_COP (0 << 14) |
# define R300_PS_UCP_MODE_RADIUS_COP (1 << 14) |
# define R300_PS_UCP_MODE_RADIUS_COP_CLIP (2 << 14) |
# define R300_PS_UCP_MODE_CLIP_AS_TRIFAN (3 << 14) |
# define R300_CLIP_DISABLE (1 << 16) |
# define R300_UCP_CULL_ONLY_ENABLE (1 << 17) |
# define R300_BOUNDARY_EDGE_FLAG_ENABLE (1 << 18) |
# define R500_COLOR2_IS_TEXTURE (1 << 20) |
# define R500_COLOR3_IS_TEXTURE (1 << 21) |
/* These seem to be per-pixel and per-vertex X and Y clipping planes. The first |
* plane is per-pixel and the second plane is per-vertex. |
* |
* This was determined by experimentation alone but I believe it is correct. |
* |
* These registers are called X_QUAD0_1_FL to X_QUAD0_4_FL by glxtest. |
*/ |
#define R300_VAP_GB_VERT_CLIP_ADJ 0x2220 |
#define R300_VAP_GB_VERT_DISC_ADJ 0x2224 |
#define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228 |
#define R300_VAP_GB_HORZ_DISC_ADJ 0x222c |
#define R300_VAP_PVS_FLOW_CNTL_ADDRS_0 0x2230 |
#define R300_PVS_FC_ACT_ADRS(x) ((x) << 0) |
#define R300_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 8) |
#define R300_PVS_FC_LAST_INST(x) ((x) << 16) |
#define R300_PVS_FC_RTN_INST(x) ((x) << 24) |
/* gap */ |
/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between |
* rendering commands and overwriting vertex program parameters. |
* Therefore, I suspect writing zero to 0x2284 synchronizes the engine and |
* avoids bugs caused by still running shaders reading bad data from memory. |
*/ |
#define R300_VAP_PVS_STATE_FLUSH_REG 0x2284 |
/* This register is used to define the number of core clocks to wait for a |
* vertex to be received by the VAP input controller (while the primitive |
* path is backed up) before forcing any accumulated vertices to be submitted |
* to the vertex processing path. |
*/ |
#define VAP_PVS_VTX_TIMEOUT_REG 0x2288 |
# define R300_2288_R300 0x00750000 /* -- nh */ |
# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */ |
#define R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0 0x2290 |
#define R300_PVS_FC_LOOP_INIT_VAL(x) ((x) << 0) |
#define R300_PVS_FC_LOOP_STEP_VAL(x) ((x) << 8) |
/* gap */ |
/* Addresses are relative to the vertex program instruction area of the |
* memory bank. PROGRAM_END points to the last instruction of the active |
* program |
* |
* The meaning of the two UNKNOWN fields is obviously not known. However, |
* experiments so far have shown that both *must* point to an instruction |
* inside the vertex program, otherwise the GPU locks up. |
* |
* fglrx usually sets CNTL_3_UNKNOWN to the end of the program and |
* R300_PVS_CNTL_1_POS_END_SHIFT points to instruction where last write to |
* position takes place. |
* |
* Most likely this is used to ignore rest of the program in cases |
* where group of verts arent visible. For some reason this "section" |
* is sometimes accepted other instruction that have no relationship with |
* position calculations. |
*/ |
#define R300_VAP_PVS_CODE_CNTL_0 0x22D0 |
# define R300_PVS_FIRST_INST_SHIFT 0 |
# define R300_PVS_XYZW_VALID_INST_SHIFT 10 |
# define R300_PVS_LAST_INST_SHIFT 20 |
# define R300_PVS_FIRST_INST(x) ((x) << 0) |
# define R300_PVS_XYZW_VALID_INST(x) ((x) << 10) |
# define R300_PVS_LAST_INST(x) ((x) << 20) |
/* Addresses are relative to the vertex program parameters area. */ |
#define R300_VAP_PVS_CONST_CNTL 0x22D4 |
# define R300_PVS_CONST_BASE_OFFSET_SHIFT 0 |
# define R300_PVS_CONST_BASE_OFFSET(x) (x) |
# define R300_PVS_MAX_CONST_ADDR_SHIFT 16 |
# define R300_PVS_MAX_CONST_ADDR(x) ((x) << 16) |
#define R300_VAP_PVS_CODE_CNTL_1 0x22D8 |
# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0 |
#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC |
#define R300_VAP_PVS_FC_OPC_JUMP(x) (1 << (2 * (x))) |
#define R300_VAP_PVS_FC_OPC_LOOP(x) (2 << (2 * (x))) |
#define R300_VAP_PVS_FC_OPC_JSR(x) (3 << (2 * (x))) |
/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for |
* immediate vertices |
*/ |
#define R300_VAP_VTX_COLOR_R 0x2464 |
#define R300_VAP_VTX_COLOR_G 0x2468 |
#define R300_VAP_VTX_COLOR_B 0x246C |
#define R300_VAP_VTX_POS_0_X_1 0x2490 /* used for glVertex2*() */ |
#define R300_VAP_VTX_POS_0_Y_1 0x2494 |
#define R300_VAP_VTX_COLOR_PKD 0x249C /* RGBA */ |
#define R300_VAP_VTX_POS_0_X_2 0x24A0 /* used for glVertex3*() */ |
#define R300_VAP_VTX_POS_0_Y_2 0x24A4 |
#define R300_VAP_VTX_POS_0_Z_2 0x24A8 |
/* write 0 to indicate end of packet? */ |
#define R300_VAP_VTX_END_OF_PKT 0x24AC |
#define R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0 0x2500 |
#define R500_PVS_FC_ACT_ADRS(x) ((x) << 0) |
#define R500_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 16) |
#define R500_VAP_PVS_FLOW_CNTL_ADDRS_UW_0 0x2504 |
#define R500_PVS_FC_LAST_INST(x) ((x) << 0) |
#define R500_PVS_FC_RTN_INST(x) ((x) << 16) |
/* gap */ |
/* These are values from r300_reg/r300_reg.h - they are known to be correct |
* and are here so we can use one register file instead of several |
* - Vladimir |
*/ |
#define R300_GB_VAP_RASTER_VTX_FMT_0 0x4000 |
# define R300_GB_VAP_RASTER_VTX_FMT_0__POS_PRESENT (1<<0) |
# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_0_PRESENT (1<<1) |
# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_1_PRESENT (1<<2) |
# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_2_PRESENT (1<<3) |
# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_3_PRESENT (1<<4) |
# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_SPACE (0xf<<5) |
# define R300_GB_VAP_RASTER_VTX_FMT_0__PT_SIZE_PRESENT (0x1<<16) |
#define R300_GB_VAP_RASTER_VTX_FMT_1 0x4004 |
/* each of the following is 3 bits wide, specifies number |
of components */ |
# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0 |
# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3 |
# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6 |
# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9 |
# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12 |
# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 |
# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 |
# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 |
/* UNK30 seems to enables point to quad transformation on textures |
* (or something closely related to that). |
* This bit is rather fatal at the time being due to lackings at pixel |
* shader side |
* Specifies top of Raster pipe specific enable controls. |
*/ |
#define R300_GB_ENABLE 0x4008 |
# define R300_GB_POINT_STUFF_DISABLE (0 << 0) |
# define R300_GB_POINT_STUFF_ENABLE (1 << 0) /* Specifies if points will have stuffed texture coordinates. */ |
# define R300_GB_LINE_STUFF_DISABLE (0 << 1) |
# define R300_GB_LINE_STUFF_ENABLE (1 << 1) /* Specifies if lines will have stuffed texture coordinates. */ |
# define R300_GB_TRIANGLE_STUFF_DISABLE (0 << 2) |
# define R300_GB_TRIANGLE_STUFF_ENABLE (1 << 2) /* Specifies if triangles will have stuffed texture coordinates. */ |
# define R300_GB_STENCIL_AUTO_DISABLE (0 << 4) |
# define R300_GB_STENCIL_AUTO_ENABLE (1 << 4) /* Enable stencil auto inc/dec based on triangle cw/ccw, force into dzy low bit. */ |
# define R300_GB_STENCIL_AUTO_FORCE (2 << 4) /* Force 0 into dzy low bit. */ |
/* each of the following is 2 bits wide */ |
#define R300_GB_TEX_REPLICATE 0 /* Replicate VAP source texture coordinates (S,T,[R,Q]). */ |
#define R300_GB_TEX_ST 1 /* Stuff with source texture coordinates (S,T). */ |
#define R300_GB_TEX_STR 2 /* Stuff with source texture coordinates (S,T,R). */ |
# define R300_GB_TEX0_SOURCE_SHIFT 16 |
# define R300_GB_TEX1_SOURCE_SHIFT 18 |
# define R300_GB_TEX2_SOURCE_SHIFT 20 |
# define R300_GB_TEX3_SOURCE_SHIFT 22 |
# define R300_GB_TEX4_SOURCE_SHIFT 24 |
# define R300_GB_TEX5_SOURCE_SHIFT 26 |
# define R300_GB_TEX6_SOURCE_SHIFT 28 |
# define R300_GB_TEX7_SOURCE_SHIFT 30 |
/* MSPOS - positions for multisample antialiasing (?) */ |
#define R300_GB_MSPOS0 0x4010 |
/* shifts - each of the fields is 4 bits */ |
# define R300_GB_MSPOS0__MS_X0_SHIFT 0 |
# define R300_GB_MSPOS0__MS_Y0_SHIFT 4 |
# define R300_GB_MSPOS0__MS_X1_SHIFT 8 |
# define R300_GB_MSPOS0__MS_Y1_SHIFT 12 |
# define R300_GB_MSPOS0__MS_X2_SHIFT 16 |
# define R300_GB_MSPOS0__MS_Y2_SHIFT 20 |
# define R300_GB_MSPOS0__MSBD0_Y 24 |
# define R300_GB_MSPOS0__MSBD0_X 28 |
#define R300_GB_MSPOS1 0x4014 |
# define R300_GB_MSPOS1__MS_X3_SHIFT 0 |
# define R300_GB_MSPOS1__MS_Y3_SHIFT 4 |
# define R300_GB_MSPOS1__MS_X4_SHIFT 8 |
# define R300_GB_MSPOS1__MS_Y4_SHIFT 12 |
# define R300_GB_MSPOS1__MS_X5_SHIFT 16 |
# define R300_GB_MSPOS1__MS_Y5_SHIFT 20 |
# define R300_GB_MSPOS1__MSBD1 24 |
/* Specifies the graphics pipeline configuration for rasterization. */ |
#define R300_GB_TILE_CONFIG 0x4018 |
# define R300_GB_TILE_DISABLE (0 << 0) |
# define R300_GB_TILE_ENABLE (1 << 0) |
# define R300_GB_TILE_PIPE_COUNT_RV300 (0 << 1) /* RV350 (1 pipe, 1 ctx) */ |
# define R300_GB_TILE_PIPE_COUNT_R300 (3 << 1) /* R300 (2 pipes, 1 ctx) */ |
# define R300_GB_TILE_PIPE_COUNT_R420_3P (6 << 1) /* R420-3P (3 pipes, 1 ctx) */ |
# define R300_GB_TILE_PIPE_COUNT_R420 (7 << 1) /* R420 (4 pipes, 1 ctx) */ |
# define R300_GB_TILE_SIZE_8 (0 << 4) |
# define R300_GB_TILE_SIZE_16 (1 << 4) |
# define R300_GB_TILE_SIZE_32 (2 << 4) |
# define R300_GB_SUPER_SIZE_1 (0 << 6) |
# define R300_GB_SUPER_SIZE_2 (1 << 6) |
# define R300_GB_SUPER_SIZE_4 (2 << 6) |
# define R300_GB_SUPER_SIZE_8 (3 << 6) |
# define R300_GB_SUPER_SIZE_16 (4 << 6) |
# define R300_GB_SUPER_SIZE_32 (5 << 6) |
# define R300_GB_SUPER_SIZE_64 (6 << 6) |
# define R300_GB_SUPER_SIZE_128 (7 << 6) |
# define R300_GB_SUPER_X_SHIFT 9 /* 3 bits wide */ |
# define R300_GB_SUPER_Y_SHIFT 12 /* 3 bits wide */ |
# define R300_GB_SUPER_TILE_A (0 << 15) |
# define R300_GB_SUPER_TILE_B (1 << 15) |
# define R300_GB_SUBPIXEL_1_12 (0 << 16) |
# define R300_GB_SUBPIXEL_1_16 (1 << 16) |
# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17) |
# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17) |
# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17) |
# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17) |
# define R300_GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19) |
# define R300_GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19) |
# define R300_GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20) |
# define R300_GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20) |
# define R300_GB_TILE_CONFIG_ALT_OFFSET (0 << 21) |
# define R300_GB_TILE_CONFIG_SUBPRECISION (0 << 22) |
# define R300_GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23) |
# define R300_GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23) |
# define R300_GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24) |
# define R300_GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24) |
/* Specifies the sizes of the various FIFO`s in the sc/rs/us. This register must be the first one written */ |
#define R300_GB_FIFO_SIZE 0x4024 |
/* each of the following is 2 bits wide */ |
#define R300_GB_FIFO_SIZE_32 0 |
#define R300_GB_FIFO_SIZE_64 1 |
#define R300_GB_FIFO_SIZE_128 2 |
#define R300_GB_FIFO_SIZE_256 3 |
# define R300_SC_IFIFO_SIZE_SHIFT 0 |
# define R300_SC_TZFIFO_SIZE_SHIFT 2 |
# define R300_SC_BFIFO_SIZE_SHIFT 4 |
# define R300_US_OFIFO_SIZE_SHIFT 12 |
# define R300_US_WFIFO_SIZE_SHIFT 14 |
/* the following use the same constants as above, but meaning is |
is times 2 (i.e. instead of 32 words it means 64 */ |
# define R300_RS_TFIFO_SIZE_SHIFT 6 |
# define R300_RS_CFIFO_SIZE_SHIFT 8 |
# define R300_US_RAM_SIZE_SHIFT 10 |
/* watermarks, 3 bits wide */ |
# define R300_RS_HIGHWATER_COL_SHIFT 16 |
# define R300_RS_HIGHWATER_TEX_SHIFT 19 |
# define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */ |
# define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24 |
#define R300_GB_Z_PEQ_CONFIG 0x4028 |
# define R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0) |
# define R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0) |
/* Specifies various polygon specific selects (fog, depth, perspective). */ |
#define R300_GB_SELECT 0x401c |
# define R300_GB_FOG_SELECT_C0A (0 << 0) |
# define R300_GB_FOG_SELECT_C1A (1 << 0) |
# define R300_GB_FOG_SELECT_C2A (2 << 0) |
# define R300_GB_FOG_SELECT_C3A (3 << 0) |
# define R300_GB_FOG_SELECT_1_1_W (4 << 0) |
# define R300_GB_FOG_SELECT_Z (5 << 0) |
# define R300_GB_DEPTH_SELECT_Z (0 << 3) |
# define R300_GB_DEPTH_SELECT_1_1_W (1 << 3) |
# define R300_GB_W_SELECT_1_W (0 << 4) |
# define R300_GB_W_SELECT_1 (1 << 4) |
# define R300_GB_FOG_STUFF_DISABLE (0 << 5) |
# define R300_GB_FOG_STUFF_ENABLE (1 << 5) |
# define R300_GB_FOG_STUFF_TEX_SHIFT 6 |
# define R300_GB_FOG_STUFF_TEX_MASK 0x000003c0 |
# define R300_GB_FOG_STUFF_COMP_SHIFT 10 |
# define R300_GB_FOG_STUFF_COMP_MASK 0x00000c00 |
/* Specifies the graphics pipeline configuration for antialiasing. */ |
#define R300_GB_AA_CONFIG 0x4020 |
# define R300_GB_AA_CONFIG_AA_DISABLE (0 << 0) |
# define R300_GB_AA_CONFIG_AA_ENABLE (1 << 0) |
# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1) |
# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1) |
# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1) |
# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1) |
/* Selects which of 4 pipes are active. */ |
#define R300_GB_PIPE_SELECT 0x402c |
# define R300_GB_PIPE_SELECT_PIPE0_ID_SHIFT 0 |
# define R300_GB_PIPE_SELECT_PIPE1_ID_SHIFT 2 |
# define R300_GB_PIPE_SELECT_PIPE2_ID_SHIFT 4 |
# define R300_GB_PIPE_SELECT_PIPE3_ID_SHIFT 6 |
# define R300_GB_PIPE_SELECT_PIPE_MASK_SHIFT 8 |
# define R300_GB_PIPE_SELECT_MAX_PIPE 12 |
# define R300_GB_PIPE_SELECT_BAD_PIPES 14 |
# define R300_GB_PIPE_SELECT_CONFIG_PIPES 18 |
/* Specifies the sizes of the various FIFO`s in the sc/rs. */ |
#define R300_GB_FIFO_SIZE1 0x4070 |
/* High water mark for SC input fifo */ |
# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0 |
# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f |
/* High water mark for SC input fifo (B) */ |
# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6 |
# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0 |
/* High water mark for RS colors' fifo */ |
# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12 |
# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000 |
/* High water mark for RS textures' fifo */ |
# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18 |
# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000 |
/* This table specifies the source location and format for up to 16 texture |
* addresses (i[0]:i[15]) and four colors (c[0]:c[3]) |
*/ |
#define R500_RS_IP_0 0x4074 |
#define R500_RS_IP_1 0x4078 |
#define R500_RS_IP_2 0x407C |
#define R500_RS_IP_3 0x4080 |
#define R500_RS_IP_4 0x4084 |
#define R500_RS_IP_5 0x4088 |
#define R500_RS_IP_6 0x408C |
#define R500_RS_IP_7 0x4090 |
#define R500_RS_IP_8 0x4094 |
#define R500_RS_IP_9 0x4098 |
#define R500_RS_IP_10 0x409C |
#define R500_RS_IP_11 0x40A0 |
#define R500_RS_IP_12 0x40A4 |
#define R500_RS_IP_13 0x40A8 |
#define R500_RS_IP_14 0x40AC |
#define R500_RS_IP_15 0x40B0 |
#define R500_RS_IP_PTR_K0 62 |
#define R500_RS_IP_PTR_K1 63 |
#define R500_RS_IP_TEX_PTR_S_SHIFT 0 |
#define R500_RS_IP_TEX_PTR_T_SHIFT 6 |
#define R500_RS_IP_TEX_PTR_R_SHIFT 12 |
#define R500_RS_IP_TEX_PTR_Q_SHIFT 18 |
#define R500_RS_IP_COL_PTR_SHIFT 24 |
#define R500_RS_IP_COL_FMT_SHIFT 27 |
# define R500_RS_SEL_S(x) ((x) << 0) |
# define R500_RS_SEL_T(x) ((x) << 6) |
# define R500_RS_SEL_R(x) ((x) << 12) |
# define R500_RS_SEL_Q(x) ((x) << 18) |
# define R500_RS_COL_PTR(x) ((x) << 24) |
# define R500_RS_COL_FMT(x) ((x) << 27) |
/* gap */ |
#define R500_RS_IP_OFFSET_DIS (0 << 31) |
#define R500_RS_IP_OFFSET_EN (1 << 31) |
/* gap */ |
/* Zero to flush caches. */ |
#define R300_TX_INVALTAGS 0x4100 |
#define R300_TX_FLUSH 0x0 |
/* The upper enable bits are guessed, based on fglrx reported limits. */ |
#define R300_TX_ENABLE 0x4104 |
# define R300_TX_ENABLE_0 (1 << 0) |
# define R300_TX_ENABLE_1 (1 << 1) |
# define R300_TX_ENABLE_2 (1 << 2) |
# define R300_TX_ENABLE_3 (1 << 3) |
# define R300_TX_ENABLE_4 (1 << 4) |
# define R300_TX_ENABLE_5 (1 << 5) |
# define R300_TX_ENABLE_6 (1 << 6) |
# define R300_TX_ENABLE_7 (1 << 7) |
# define R300_TX_ENABLE_8 (1 << 8) |
# define R300_TX_ENABLE_9 (1 << 9) |
# define R300_TX_ENABLE_10 (1 << 10) |
# define R300_TX_ENABLE_11 (1 << 11) |
# define R300_TX_ENABLE_12 (1 << 12) |
# define R300_TX_ENABLE_13 (1 << 13) |
# define R300_TX_ENABLE_14 (1 << 14) |
# define R300_TX_ENABLE_15 (1 << 15) |
#define R500_TX_FILTER_4 0x4110 |
# define R500_TX_WEIGHT_1_SHIFT (0) |
# define R500_TX_WEIGHT_0_SHIFT (11) |
# define R500_TX_WEIGHT_PAIR (1<<22) |
# define R500_TX_PHASE_SHIFT (23) |
# define R500_TX_DIRECTION_HORIZONTAL (0<<27) |
# define R500_TX_DIRECTION_VERITCAL (1<<27) |
#define R500_SU_TEX_WRAP_PS3 0x4114 |
/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ |
#define R300_GA_POINT_S0 0x4200 |
/* T Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ |
#define R300_GA_POINT_T0 0x4204 |
/* S Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */ |
#define R300_GA_POINT_S1 0x4208 |
/* T Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */ |
#define R300_GA_POINT_T1 0x420c |
/* Specifies amount to shift integer position of vertex (screen space) before |
* converting to float for triangle stipple. |
*/ |
#define R300_GA_TRIANGLE_STIPPLE 0x4214 |
# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_SHIFT 0 |
# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_MASK 0x0000000f |
# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT 16 |
# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_MASK 0x000f0000 |
/* The pointsize is given in multiples of 6. The pointsize can be enormous: |
* Clear() renders a single point that fills the entire framebuffer. |
* 1/2 Height of point; fixed (16.0), subpixel format (1/12 or 1/16, even if in |
* 8b precision). |
*/ |
#define R300_GA_POINT_SIZE 0x421C |
# define R300_POINTSIZE_Y_SHIFT 0 |
# define R300_POINTSIZE_Y_MASK 0x0000ffff |
# define R300_POINTSIZE_X_SHIFT 16 |
# define R300_POINTSIZE_X_MASK 0xffff0000 |
# define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6) |
/* Red fill color */ |
#define R500_GA_FILL_R 0x4220 |
/* Green fill color */ |
#define R500_GA_FILL_G 0x4224 |
/* Blue fill color */ |
#define R500_GA_FILL_B 0x4228 |
/* Alpha fill color */ |
#define R500_GA_FILL_A 0x422c |
/* Specifies maximum and minimum point & sprite sizes for per vertex size |
* specification. The lower part (15:0) is MIN and (31:16) is max. |
*/ |
#define R300_GA_POINT_MINMAX 0x4230 |
# define R300_GA_POINT_MINMAX_MIN_SHIFT 0 |
# define R300_GA_POINT_MINMAX_MIN_MASK (0xFFFF << 0) |
# define R300_GA_POINT_MINMAX_MAX_SHIFT 16 |
# define R300_GA_POINT_MINMAX_MAX_MASK (0xFFFF << 16) |
/* 1/2 width of line, in subpixels (1/12 or 1/16 only, even in 8b |
* subprecision); (16.0) fixed format. |
* |
* The line width is given in multiples of 6. |
* In default mode lines are classified as vertical lines. |
* HO: horizontal |
* VE: vertical or horizontal |
* HO & VE: no classification |
*/ |
#define R300_GA_LINE_CNTL 0x4234 |
# define R300_GA_LINE_CNTL_WIDTH_SHIFT 0 |
# define R300_GA_LINE_CNTL_WIDTH_MASK 0x0000ffff |
# define R300_GA_LINE_CNTL_END_TYPE_HOR (0 << 16) |
# define R300_GA_LINE_CNTL_END_TYPE_VER (1 << 16) |
# define R300_GA_LINE_CNTL_END_TYPE_SQR (2 << 16) /* horizontal or vertical depending upon slope */ |
# define R300_GA_LINE_CNTL_END_TYPE_COMP (3 << 16) /* Computed (perpendicular to slope) */ |
# define R500_GA_LINE_CNTL_SORT_NO (0 << 18) |
# define R500_GA_LINE_CNTL_SORT_MINX_MINY (1 << 18) |
/** TODO: looks wrong */ |
# define R300_LINESIZE_MAX (R300_GA_LINE_CNTL_WIDTH_MASK / 6) |
/** TODO: looks wrong */ |
# define R300_LINE_CNT_HO (1 << 16) |
/** TODO: looks wrong */ |
# define R300_LINE_CNT_VE (1 << 17) |
/* Line Stipple configuration information. */ |
#define R300_GA_LINE_STIPPLE_CONFIG 0x4238 |
# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_NO (0 << 0) |
# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE (1 << 0) |
# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_PACKET (2 << 0) |
# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_SHIFT 2 |
# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK 0xfffffffc |
/* Used to load US instructions and constants */ |
#define R500_GA_US_VECTOR_INDEX 0x4250 |
# define R500_GA_US_VECTOR_INDEX_SHIFT 0 |
# define R500_GA_US_VECTOR_INDEX_MASK 0x000000ff |
# define R500_GA_US_VECTOR_INDEX_TYPE_INSTR (0 << 16) |
# define R500_GA_US_VECTOR_INDEX_TYPE_CONST (1 << 16) |
# define R500_GA_US_VECTOR_INDEX_CLAMP_NO (0 << 17) |
# define R500_GA_US_VECTOR_INDEX_CLAMP_CONST (1 << 17) |
/* Data register for loading US instructions and constants */ |
#define R500_GA_US_VECTOR_DATA 0x4254 |
/* Specifies color properties and mappings of textures. */ |
#define R500_GA_COLOR_CONTROL_PS3 0x4258 |
# define R500_TEX0_SHADING_PS3_SOLID (0 << 0) |
# define R500_TEX0_SHADING_PS3_FLAT (1 << 0) |
# define R500_TEX0_SHADING_PS3_GOURAUD (2 << 0) |
# define R500_TEX1_SHADING_PS3_SOLID (0 << 2) |
# define R500_TEX1_SHADING_PS3_FLAT (1 << 2) |
# define R500_TEX1_SHADING_PS3_GOURAUD (2 << 2) |
# define R500_TEX2_SHADING_PS3_SOLID (0 << 4) |
# define R500_TEX2_SHADING_PS3_FLAT (1 << 4) |
# define R500_TEX2_SHADING_PS3_GOURAUD (2 << 4) |
# define R500_TEX3_SHADING_PS3_SOLID (0 << 6) |
# define R500_TEX3_SHADING_PS3_FLAT (1 << 6) |
# define R500_TEX3_SHADING_PS3_GOURAUD (2 << 6) |
# define R500_TEX4_SHADING_PS3_SOLID (0 << 8) |
# define R500_TEX4_SHADING_PS3_FLAT (1 << 8) |
# define R500_TEX4_SHADING_PS3_GOURAUD (2 << 8) |
# define R500_TEX5_SHADING_PS3_SOLID (0 << 10) |
# define R500_TEX5_SHADING_PS3_FLAT (1 << 10) |
# define R500_TEX5_SHADING_PS3_GOURAUD (2 << 10) |
# define R500_TEX6_SHADING_PS3_SOLID (0 << 12) |
# define R500_TEX6_SHADING_PS3_FLAT (1 << 12) |
# define R500_TEX6_SHADING_PS3_GOURAUD (2 << 12) |
# define R500_TEX7_SHADING_PS3_SOLID (0 << 14) |
# define R500_TEX7_SHADING_PS3_FLAT (1 << 14) |
# define R500_TEX7_SHADING_PS3_GOURAUD (2 << 14) |
# define R500_TEX8_SHADING_PS3_SOLID (0 << 16) |
# define R500_TEX8_SHADING_PS3_FLAT (1 << 16) |
# define R500_TEX8_SHADING_PS3_GOURAUD (2 << 16) |
# define R500_TEX9_SHADING_PS3_SOLID (0 << 18) |
# define R500_TEX9_SHADING_PS3_FLAT (1 << 18) |
# define R500_TEX9_SHADING_PS3_GOURAUD (2 << 18) |
# define R500_TEX10_SHADING_PS3_SOLID (0 << 20) |
# define R500_TEX10_SHADING_PS3_FLAT (1 << 20) |
# define R500_TEX10_SHADING_PS3_GOURAUD (2 << 20) |
# define R500_COLOR0_TEX_OVERRIDE_NO (0 << 22) |
# define R500_COLOR0_TEX_OVERRIDE_TEX_0 (1 << 22) |
# define R500_COLOR0_TEX_OVERRIDE_TEX_1 (2 << 22) |
# define R500_COLOR0_TEX_OVERRIDE_TEX_2 (3 << 22) |
# define R500_COLOR0_TEX_OVERRIDE_TEX_3 (4 << 22) |
# define R500_COLOR0_TEX_OVERRIDE_TEX_4 (5 << 22) |
# define R500_COLOR0_TEX_OVERRIDE_TEX_5 (6 << 22) |
# define R500_COLOR0_TEX_OVERRIDE_TEX_6 (7 << 22) |
# define R500_COLOR0_TEX_OVERRIDE_TEX_7 (8 << 22) |
# define R500_COLOR0_TEX_OVERRIDE_TEX_8_C2 (9 << 22) |
# define R500_COLOR0_TEX_OVERRIDE_TEX_9_C3 (10 << 22) |
# define R500_COLOR1_TEX_OVERRIDE_NO (0 << 26) |
# define R500_COLOR1_TEX_OVERRIDE_TEX_0 (1 << 26) |
# define R500_COLOR1_TEX_OVERRIDE_TEX_1 (2 << 26) |
# define R500_COLOR1_TEX_OVERRIDE_TEX_2 (3 << 26) |
# define R500_COLOR1_TEX_OVERRIDE_TEX_3 (4 << 26) |
# define R500_COLOR1_TEX_OVERRIDE_TEX_4 (5 << 26) |
# define R500_COLOR1_TEX_OVERRIDE_TEX_5 (6 << 26) |
# define R500_COLOR1_TEX_OVERRIDE_TEX_6 (7 << 26) |
# define R500_COLOR1_TEX_OVERRIDE_TEX_7 (8 << 26) |
# define R500_COLOR1_TEX_OVERRIDE_TEX_8_C2 (9 << 26) |
# define R500_COLOR1_TEX_OVERRIDE_TEX_9_C3 (10 << 26) |
/* Returns idle status of various G3D block, captured when GA_IDLE written or |
* when hard or soft reset asserted. |
*/ |
#define R500_GA_IDLE 0x425c |
# define R500_GA_IDLE_PIPE3_Z_IDLE (0 << 0) |
# define R500_GA_IDLE_PIPE2_Z_IDLE (0 << 1) |
# define R500_GA_IDLE_PIPE3_CD_IDLE (0 << 2) |
# define R500_GA_IDLE_PIPE2_CD_IDLE (0 << 3) |
# define R500_GA_IDLE_PIPE3_FG_IDLE (0 << 4) |
# define R500_GA_IDLE_PIPE2_FG_IDLE (0 << 5) |
# define R500_GA_IDLE_PIPE3_US_IDLE (0 << 6) |
# define R500_GA_IDLE_PIPE2_US_IDLE (0 << 7) |
# define R500_GA_IDLE_PIPE3_SC_IDLE (0 << 8) |
# define R500_GA_IDLE_PIPE2_SC_IDLE (0 << 9) |
# define R500_GA_IDLE_PIPE3_RS_IDLE (0 << 10) |
# define R500_GA_IDLE_PIPE2_RS_IDLE (0 << 11) |
# define R500_GA_IDLE_PIPE1_Z_IDLE (0 << 12) |
# define R500_GA_IDLE_PIPE0_Z_IDLE (0 << 13) |
# define R500_GA_IDLE_PIPE1_CD_IDLE (0 << 14) |
# define R500_GA_IDLE_PIPE0_CD_IDLE (0 << 15) |
# define R500_GA_IDLE_PIPE1_FG_IDLE (0 << 16) |
# define R500_GA_IDLE_PIPE0_FG_IDLE (0 << 17) |
# define R500_GA_IDLE_PIPE1_US_IDLE (0 << 18) |
# define R500_GA_IDLE_PIPE0_US_IDLE (0 << 19) |
# define R500_GA_IDLE_PIPE1_SC_IDLE (0 << 20) |
# define R500_GA_IDLE_PIPE0_SC_IDLE (0 << 21) |
# define R500_GA_IDLE_PIPE1_RS_IDLE (0 << 22) |
# define R500_GA_IDLE_PIPE0_RS_IDLE (0 << 23) |
# define R500_GA_IDLE_SU_IDLE (0 << 24) |
# define R500_GA_IDLE_GA_IDLE (0 << 25) |
# define R500_GA_IDLE_GA_UNIT2_IDLE (0 << 26) |
/* Current value of stipple accumulator. */ |
#define R300_GA_LINE_STIPPLE_VALUE 0x4260 |
/* S Texture Coordinate Value for Vertex 0 of Line (stuff textures -- i.e. AA) */ |
#define R300_GA_LINE_S0 0x4264 |
/* S Texture Coordinate Value for Vertex 1 of Lines (V2 of parallelogram -- stuff textures -- i.e. AA) */ |
#define R300_GA_LINE_S1 0x4268 |
/* GA Input fifo high water marks */ |
#define R500_GA_FIFO_CNTL 0x4270 |
# define R500_GA_FIFO_CNTL_VERTEX_FIFO_MASK 0x00000007 |
# define R500_GA_FIFO_CNTL_VERTEX_FIFO_SHIFT 0 |
# define R500_GA_FIFO_CNTL_VERTEX_INDEX_MASK 0x00000038 |
# define R500_GA_FIFO_CNTL_VERTEX_INDEX_SHIFT 3 |
# define R500_GA_FIFO_CNTL_VERTEX_REG_MASK 0x00003fc0 |
# define R500_GA_FIFO_CNTL_VERTEX_REG_SHIFT 6 |
/* GA enhance/tweaks */ |
#define R300_GA_ENHANCE 0x4274 |
# define R300_GA_ENHANCE_DEADLOCK_CNTL_NO_EFFECT (0 << 0) |
# define R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL (1 << 0) /* Prevents TCL interface from deadlocking on GA side. */ |
# define R300_GA_ENHANCE_FASTSYNC_CNTL_NO_EFFECT (0 << 1) |
# define R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE (1 << 1) /* Enables high-performance register/primitive switching. */ |
# define R500_GA_ENHANCE_REG_READWRITE_NO_EFFECT (0 << 2) /* R520+ only */ |
# define R500_GA_ENHANCE_REG_READWRITE_ENABLE (1 << 2) /* R520+ only, Enables GA support of simultaneous register reads and writes. */ |
# define R500_GA_ENHANCE_REG_NOSTALL_NO_EFFECT (0 << 3) |
# define R500_GA_ENHANCE_REG_NOSTALL_ENABLE (1 << 3) /* Enables GA support of no-stall reads for register read back. */ |
#define R300_GA_COLOR_CONTROL 0x4278 |
# define R300_GA_COLOR_CONTROL_RGB0_SHADING_SOLID (0 << 0) |
# define R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT (1 << 0) |
# define R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD (2 << 0) |
# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_SOLID (0 << 2) |
# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT (1 << 2) |
# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD (2 << 2) |
# define R300_GA_COLOR_CONTROL_RGB1_SHADING_SOLID (0 << 4) |
# define R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT (1 << 4) |
# define R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD (2 << 4) |
# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_SOLID (0 << 6) |
# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_FLAT (1 << 6) |
# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD (2 << 6) |
# define R300_GA_COLOR_CONTROL_RGB2_SHADING_SOLID (0 << 8) |
# define R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT (1 << 8) |
# define R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD (2 << 8) |
# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_SOLID (0 << 10) |
# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT (1 << 10) |
# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD (2 << 10) |
# define R300_GA_COLOR_CONTROL_RGB3_SHADING_SOLID (0 << 12) |
# define R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT (1 << 12) |
# define R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD (2 << 12) |
# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_SOLID (0 << 14) |
# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_FLAT (1 << 14) |
# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD (2 << 14) |
# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST (0 << 16) |
# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND (1 << 16) |
# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_THIRD (2 << 16) |
# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST (3 << 16) |
# define R300_SHADE_MODEL_FLAT ( \ |
R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT | \ |
R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT | \ |
R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT | \ |
R300_GA_COLOR_CONTROL_ALPHA1_SHADING_FLAT | \ |
R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | \ |
R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \ |
R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | \ |
R300_GA_COLOR_CONTROL_ALPHA3_SHADING_FLAT ) |
# define R300_SHADE_MODEL_SMOOTH ( \ |
R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | \ |
R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD | \ |
R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD | \ |
R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ |
R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | \ |
R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \ |
R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | \ |
R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD ) |
/* Specifies red & green components of fill color -- S312 format -- Backwards comp. */ |
#define R300_GA_SOLID_RG 0x427c |
# define GA_SOLID_RG_COLOR_GREEN_SHIFT 0 |
# define GA_SOLID_RG_COLOR_GREEN_MASK 0x0000ffff |
# define GA_SOLID_RG_COLOR_RED_SHIFT 16 |
# define GA_SOLID_RG_COLOR_RED_MASK 0xffff0000 |
/* Specifies blue & alpha components of fill color -- S312 format -- Backwards comp. */ |
#define R300_GA_SOLID_BA 0x4280 |
# define GA_SOLID_BA_COLOR_ALPHA_SHIFT 0 |
# define GA_SOLID_BA_COLOR_ALPHA_MASK 0x0000ffff |
# define GA_SOLID_BA_COLOR_BLUE_SHIFT 16 |
# define GA_SOLID_BA_COLOR_BLUE_MASK 0xffff0000 |
/* Polygon Mode |
* Dangerous |
*/ |
#define R300_GA_POLY_MODE 0x4288 |
# define R300_GA_POLY_MODE_DISABLE (0 << 0) |
# define R300_GA_POLY_MODE_DUAL (1 << 0) /* send 2 sets of 3 polys with specified poly type */ |
/* reserved */ |
# define R300_GA_POLY_MODE_FRONT_PTYPE_POINT (0 << 4) |
# define R300_GA_POLY_MODE_FRONT_PTYPE_LINE (1 << 4) |
# define R300_GA_POLY_MODE_FRONT_PTYPE_TRI (2 << 4) |
/* reserved */ |
# define R300_GA_POLY_MODE_BACK_PTYPE_POINT (0 << 7) |
# define R300_GA_POLY_MODE_BACK_PTYPE_LINE (1 << 7) |
# define R300_GA_POLY_MODE_BACK_PTYPE_TRI (2 << 7) |
/* reserved */ |
/* Specifies the rouding mode for geometry & color SPFP to FP conversions. */ |
#define R300_GA_ROUND_MODE 0x428c |
# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_TRUNC (0 << 0) |
# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST (1 << 0) |
# define R300_GA_ROUND_MODE_COLOR_ROUND_TRUNC (0 << 2) |
# define R300_GA_ROUND_MODE_COLOR_ROUND_NEAREST (1 << 2) |
# define R300_GA_ROUND_MODE_RGB_CLAMP_RGB (0 << 4) |
# define R300_GA_ROUND_MODE_RGB_CLAMP_FP20 (1 << 4) |
# define R300_GA_ROUND_MODE_ALPHA_CLAMP_RGB (0 << 5) |
# define R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20 (1 << 5) |
# define R500_GA_ROUND_MODE_GEOMETRY_MASK_SHIFT 6 |
# define R500_GA_ROUND_MODE_GEOMETRY_MASK_MASK 0x000003c0 |
/* Specifies x & y offsets for vertex data after conversion to FP. |
* Offsets are in S15 format (subpixels -- 1/12 or 1/16, even in 8b |
* subprecision). |
*/ |
#define R300_GA_OFFSET 0x4290 |
# define R300_GA_OFFSET_X_OFFSET_SHIFT 0 |
# define R300_GA_OFFSET_X_OFFSET_MASK 0x0000ffff |
# define R300_GA_OFFSET_Y_OFFSET_SHIFT 16 |
# define R300_GA_OFFSET_Y_OFFSET_MASK 0xffff0000 |
/* Specifies the scale to apply to fog. */ |
#define R300_GA_FOG_SCALE 0x4294 |
/* Specifies the offset to apply to fog. */ |
#define R300_GA_FOG_OFFSET 0x4298 |
/* Specifies number of cycles to assert reset, and also causes RB3D soft reset to assert. */ |
#define R300_GA_SOFT_RESET 0x429c |
/* Not sure why there are duplicate of factor and constant values. |
* My best guess so far is that there are seperate zbiases for test and write. |
* Ordering might be wrong. |
* Some of the tests indicate that fgl has a fallback implementation of zbias |
* via pixel shaders. |
*/ |
#define R300_SU_TEX_WRAP 0x42A0 |
#define R300_SU_POLY_OFFSET_FRONT_SCALE 0x42A4 |
#define R300_SU_POLY_OFFSET_FRONT_OFFSET 0x42A8 |
#define R300_SU_POLY_OFFSET_BACK_SCALE 0x42AC |
#define R300_SU_POLY_OFFSET_BACK_OFFSET 0x42B0 |
/* This register needs to be set to (1<<1) for RV350 to correctly |
* perform depth test (see --vb-triangles in r300_demo) |
* Don't know about other chips. - Vladimir |
* This is set to 3 when GL_POLYGON_OFFSET_FILL is on. |
* My guess is that there are two bits for each zbias primitive |
* (FILL, LINE, POINT). |
* One to enable depth test and one for depth write. |
* Yet this doesnt explain why depth writes work ... |
*/ |
#define R300_SU_POLY_OFFSET_ENABLE 0x42B4 |
# define R300_FRONT_ENABLE (1 << 0) |
# define R300_BACK_ENABLE (1 << 1) |
# define R300_PARA_ENABLE (1 << 2) |
#define R300_SU_CULL_MODE 0x42B8 |
# define R300_CULL_FRONT (1 << 0) |
# define R300_CULL_BACK (1 << 1) |
# define R300_FRONT_FACE_CCW (0 << 2) |
# define R300_FRONT_FACE_CW (1 << 2) |
/* SU Depth Scale value */ |
#define R300_SU_DEPTH_SCALE 0x42c0 |
/* SU Depth Offset value */ |
#define R300_SU_DEPTH_OFFSET 0x42c4 |
#define R300_SU_REG_DEST 0x42c8 |
# define R300_RASTER_PIPE_SELECT_0 (1 << 0) |
# define R300_RASTER_PIPE_SELECT_1 (1 << 1) |
# define R300_RASTER_PIPE_SELECT_2 (1 << 2) |
# define R300_RASTER_PIPE_SELECT_3 (1 << 3) |
# define R300_RASTER_PIPE_SELECT_ALL 0xf |
/* BEGIN: Rasterization / Interpolators - many guesses */ |
/* |
* TC_CNT is the number of incoming texture coordinate sets (i.e. it depends |
* on the vertex program, *not* the fragment program) |
*/ |
#define R300_RS_COUNT 0x4300 |
# define R300_IT_COUNT_SHIFT 0 |
# define R300_IT_COUNT_MASK 0x0000007f |
# define R300_IC_COUNT_SHIFT 7 |
# define R300_IC_COUNT_MASK 0x00000780 |
# define R300_W_ADDR_SHIFT 12 |
# define R300_W_ADDR_MASK 0x0003f000 |
# define R300_HIRES_DIS (0 << 18) |
# define R300_HIRES_EN (1 << 18) |
# define R300_IT_COUNT(x) ((x) << 0) |
# define R300_IC_COUNT(x) ((x) << 7) |
# define R300_W_COUNT(x) ((x) << 12) |
#define R300_RS_INST_COUNT 0x4304 |
# define R300_RS_INST_COUNT_SHIFT 0 |
# define R300_RS_INST_COUNT_MASK 0x0000000f |
# define R300_RS_TX_OFFSET_SHIFT 5 |
# define R300_RS_TX_OFFSET_MASK 0x000000e0 |
# define R300_RS_TX_OFFSET(x) ((x) << 5) |
/* gap */ |
/* Only used for texture coordinates. |
* Use the source field to route texture coordinate input from the |
* vertex program to the desired interpolator. Note that the source |
* field is relative to the outputs the vertex program *actually* |
* writes. If a vertex program only writes texcoord[1], this will |
* be source index 0. |
* Set INTERP_USED on all interpolators that produce data used by |
* the fragment program. INTERP_USED looks like a swizzling mask, |
* but I haven't seen it used that way. |
* |
* Note: The _UNKNOWN constants are always set in their respective |
* register. I don't know if this is necessary. |
*/ |
#define R300_RS_IP_0 0x4310 |
#define R300_RS_IP_1 0x4314 |
#define R300_RS_IP_2 0x4318 |
#define R300_RS_IP_3 0x431C |
# define R300_RS_INTERP_SRC_SHIFT 2 /* TODO: check for removal */ |
# define R300_RS_INTERP_SRC_MASK (7 << 2) /* TODO: check for removal */ |
# define R300_RS_TEX_PTR(x) (x << 0) |
# define R300_RS_COL_PTR(x) ((x) << 6) |
# define R300_RS_COL_FMT(x) ((x) << 9) |
# define R300_RS_COL_FMT_RGBA 0 |
# define R300_RS_COL_FMT_RGB0 1 |
# define R300_RS_COL_FMT_RGB1 2 |
# define R300_RS_COL_FMT_000A 4 |
# define R300_RS_COL_FMT_0000 5 |
# define R300_RS_COL_FMT_0001 6 |
# define R300_RS_COL_FMT_111A 8 |
# define R300_RS_COL_FMT_1110 9 |
# define R300_RS_COL_FMT_1111 10 |
# define R300_RS_SEL_S(x) ((x) << 13) |
# define R300_RS_SEL_T(x) ((x) << 16) |
# define R300_RS_SEL_R(x) ((x) << 19) |
# define R300_RS_SEL_Q(x) ((x) << 22) |
# define R300_RS_SEL_C0 0 |
# define R300_RS_SEL_C1 1 |
# define R300_RS_SEL_C2 2 |
# define R300_RS_SEL_C3 3 |
# define R300_RS_SEL_K0 4 |
# define R300_RS_SEL_K1 5 |
/* */ |
#define R500_RS_INST_0 0x4320 |
#define R500_RS_INST_1 0x4324 |
#define R500_RS_INST_2 0x4328 |
#define R500_RS_INST_3 0x432c |
#define R500_RS_INST_4 0x4330 |
#define R500_RS_INST_5 0x4334 |
#define R500_RS_INST_6 0x4338 |
#define R500_RS_INST_7 0x433c |
#define R500_RS_INST_8 0x4340 |
#define R500_RS_INST_9 0x4344 |
#define R500_RS_INST_10 0x4348 |
#define R500_RS_INST_11 0x434c |
#define R500_RS_INST_12 0x4350 |
#define R500_RS_INST_13 0x4354 |
#define R500_RS_INST_14 0x4358 |
#define R500_RS_INST_15 0x435c |
#define R500_RS_INST_TEX_ID_SHIFT 0 |
# define R500_RS_INST_TEX_ID(x) ((x) << 0) |
#define R500_RS_INST_TEX_CN_WRITE (1 << 4) |
#define R500_RS_INST_TEX_ADDR_SHIFT 5 |
# define R500_RS_INST_TEX_ADDR(x) ((x) << 5) |
#define R500_RS_INST_COL_ID_SHIFT 12 |
# define R500_RS_INST_COL_ID(x) ((x) << 12) |
#define R500_RS_INST_COL_CN_NO_WRITE (0 << 16) |
#define R500_RS_INST_COL_CN_WRITE (1 << 16) |
#define R500_RS_INST_COL_CN_WRITE_FBUFFER (2 << 16) |
#define R500_RS_INST_COL_CN_WRITE_BACKFACE (3 << 16) |
#define R500_RS_INST_COL_ADDR_SHIFT 18 |
# define R500_RS_INST_COL_ADDR(x) ((x) << 18) |
#define R500_RS_INST_TEX_ADJ (1 << 25) |
#define R500_RS_INST_W_CN (1 << 26) |
/* These DWORDs control how vertex data is routed into fragment program |
* registers, after interpolators. |
*/ |
#define R300_RS_INST_0 0x4330 |
#define R300_RS_INST_1 0x4334 |
#define R300_RS_INST_2 0x4338 |
#define R300_RS_INST_3 0x433C |
#define R300_RS_INST_4 0x4340 |
#define R300_RS_INST_5 0x4344 |
#define R300_RS_INST_6 0x4348 |
#define R300_RS_INST_7 0x434C |
# define R300_RS_INST_TEX_ID(x) ((x) << 0) |
# define R300_RS_INST_TEX_CN_WRITE (1 << 3) |
# define R300_RS_INST_TEX_ADDR(x) ((x) << 6) |
# define R300_RS_INST_TEX_ADDR_SHIFT 6 |
# define R300_RS_INST_COL_ID(x) ((x) << 11) |
# define R300_RS_INST_COL_CN_WRITE (1 << 14) |
# define R300_RS_INST_COL_ADDR(x) ((x) << 17) |
# define R300_RS_INST_COL_ADDR_SHIFT 17 |
# define R300_RS_INST_TEX_ADJ (1 << 22) |
# define R300_RS_COL_BIAS_UNUSED_SHIFT 23 |
/* END: Rasterization / Interpolators - many guesses */ |
/* Hierarchical Z Enable */ |
#define R300_SC_HYPERZ 0x43a4 |
# define R300_SC_HYPERZ_DISABLE (0 << 0) |
# define R300_SC_HYPERZ_ENABLE (1 << 0) |
# define R300_SC_HYPERZ_MIN (0 << 1) |
# define R300_SC_HYPERZ_MAX (1 << 1) |
# define R300_SC_HYPERZ_ADJ_256 (0 << 2) |
# define R300_SC_HYPERZ_ADJ_128 (1 << 2) |
# define R300_SC_HYPERZ_ADJ_64 (2 << 2) |
# define R300_SC_HYPERZ_ADJ_32 (3 << 2) |
# define R300_SC_HYPERZ_ADJ_16 (4 << 2) |
# define R300_SC_HYPERZ_ADJ_8 (5 << 2) |
# define R300_SC_HYPERZ_ADJ_4 (6 << 2) |
# define R300_SC_HYPERZ_ADJ_2 (7 << 2) |
# define R300_SC_HYPERZ_HZ_Z0MIN_NO (0 << 5) |
# define R300_SC_HYPERZ_HZ_Z0MIN (1 << 5) |
# define R300_SC_HYPERZ_HZ_Z0MAX_NO (0 << 6) |
# define R300_SC_HYPERZ_HZ_Z0MAX (1 << 6) |
#define R300_SC_EDGERULE 0x43a8 |
/* BEGIN: Scissors and cliprects */ |
/* There are four clipping rectangles. Their corner coordinates are inclusive. |
* Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending |
* on whether the pixel is inside cliprects 0-3, respectively. For example, |
* if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned |
* the number 3 (binary 0011). |
* Iff the bit corresponding to the pixel's number in RE_CLIPRECT_CNTL is set, |
* the pixel is rasterized. |
* |
* In addition to this, there is a scissors rectangle. Only pixels inside the |
* scissors rectangle are drawn. (coordinates are inclusive) |
* |
* For some reason, the top-left corner of the framebuffer is at (1440, 1440) |
* for the purpose of clipping and scissors. |
*/ |
#define R300_SC_CLIPRECT_TL_0 0x43B0 |
#define R300_SC_CLIPRECT_BR_0 0x43B4 |
#define R300_SC_CLIPRECT_TL_1 0x43B8 |
#define R300_SC_CLIPRECT_BR_1 0x43BC |
#define R300_SC_CLIPRECT_TL_2 0x43C0 |
#define R300_SC_CLIPRECT_BR_2 0x43C4 |
#define R300_SC_CLIPRECT_TL_3 0x43C8 |
#define R300_SC_CLIPRECT_BR_3 0x43CC |
# define R300_CLIPRECT_OFFSET 1440 |
# define R300_CLIPRECT_MASK 0x1FFF |
# define R300_CLIPRECT_X_SHIFT 0 |
# define R300_CLIPRECT_X_MASK (0x1FFF << 0) |
# define R300_CLIPRECT_Y_SHIFT 13 |
# define R300_CLIPRECT_Y_MASK (0x1FFF << 13) |
#define R300_SC_CLIP_RULE 0x43D0 |
# define R300_CLIP_OUT (1 << 0) |
# define R300_CLIP_0 (1 << 1) |
# define R300_CLIP_1 (1 << 2) |
# define R300_CLIP_10 (1 << 3) |
# define R300_CLIP_2 (1 << 4) |
# define R300_CLIP_20 (1 << 5) |
# define R300_CLIP_21 (1 << 6) |
# define R300_CLIP_210 (1 << 7) |
# define R300_CLIP_3 (1 << 8) |
# define R300_CLIP_30 (1 << 9) |
# define R300_CLIP_31 (1 << 10) |
# define R300_CLIP_310 (1 << 11) |
# define R300_CLIP_32 (1 << 12) |
# define R300_CLIP_320 (1 << 13) |
# define R300_CLIP_321 (1 << 14) |
# define R300_CLIP_3210 (1 << 15) |
/* gap */ |
#define R300_SC_SCISSORS_TL 0x43E0 |
#define R300_SC_SCISSORS_BR 0x43E4 |
# define R300_SCISSORS_OFFSET 1440 |
# define R300_SCISSORS_X_SHIFT 0 |
# define R300_SCISSORS_X_MASK (0x1FFF << 0) |
# define R300_SCISSORS_Y_SHIFT 13 |
# define R300_SCISSORS_Y_MASK (0x1FFF << 13) |
/* Screen door sample mask */ |
#define R300_SC_SCREENDOOR 0x43e8 |
/* END: Scissors and cliprects */ |
/* BEGIN: Texture specification */ |
/* |
* The texture specification dwords are grouped by meaning and not by texture |
* unit. This means that e.g. the offset for texture image unit N is found in |
* register TX_OFFSET_0 + (4*N) |
*/ |
#define R300_TX_FILTER0_0 0x4400 |
#define R300_TX_FILTER0_1 0x4404 |
#define R300_TX_FILTER0_2 0x4408 |
#define R300_TX_FILTER0_3 0x440c |
#define R300_TX_FILTER0_4 0x4410 |
#define R300_TX_FILTER0_5 0x4414 |
#define R300_TX_FILTER0_6 0x4418 |
#define R300_TX_FILTER0_7 0x441c |
#define R300_TX_FILTER0_8 0x4420 |
#define R300_TX_FILTER0_9 0x4424 |
#define R300_TX_FILTER0_10 0x4428 |
#define R300_TX_FILTER0_11 0x442c |
#define R300_TX_FILTER0_12 0x4430 |
#define R300_TX_FILTER0_13 0x4434 |
#define R300_TX_FILTER0_14 0x4438 |
#define R300_TX_FILTER0_15 0x443c |
# define R300_TX_REPEAT 0 |
# define R300_TX_MIRRORED 1 |
# define R300_TX_CLAMP_TO_EDGE 2 |
# define R300_TX_MIRROR_ONCE_TO_EDGE 3 |
# define R300_TX_CLAMP 4 |
# define R300_TX_MIRROR_ONCE 5 |
# define R300_TX_CLAMP_TO_BORDER 6 |
# define R300_TX_MIRROR_ONCE_TO_BORDER 7 |
# define R300_TX_WRAP_S_SHIFT 0 |
# define R300_TX_WRAP_S_MASK (7 << 0) |
# define R300_TX_WRAP_T_SHIFT 3 |
# define R300_TX_WRAP_T_MASK (7 << 3) |
# define R300_TX_WRAP_R_SHIFT 6 |
# define R300_TX_WRAP_R_MASK (7 << 6) |
# define R300_TX_MAG_FILTER_4 (0 << 9) |
# define R300_TX_MAG_FILTER_NEAREST (1 << 9) |
# define R300_TX_MAG_FILTER_LINEAR (2 << 9) |
# define R300_TX_MAG_FILTER_ANISO (3 << 9) |
# define R300_TX_MAG_FILTER_MASK (3 << 9) |
# define R300_TX_MIN_FILTER_NEAREST (1 << 11) |
# define R300_TX_MIN_FILTER_LINEAR (2 << 11) |
# define R300_TX_MIN_FILTER_ANISO (3 << 11) |
# define R300_TX_MIN_FILTER_MASK (3 << 11) |
# define R300_TX_MIN_FILTER_MIP_NONE (0 << 13) |
# define R300_TX_MIN_FILTER_MIP_NEAREST (1 << 13) |
# define R300_TX_MIN_FILTER_MIP_LINEAR (2 << 13) |
# define R300_TX_MIN_FILTER_MIP_MASK (3 << 13) |
# define R300_TX_MAX_MIP_LEVEL_SHIFT 17 |
# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 17) |
# define R300_TX_MAX_ANISO_1_TO_1 (0 << 21) |
# define R300_TX_MAX_ANISO_2_TO_1 (1 << 21) |
# define R300_TX_MAX_ANISO_4_TO_1 (2 << 21) |
# define R300_TX_MAX_ANISO_8_TO_1 (3 << 21) |
# define R300_TX_MAX_ANISO_16_TO_1 (4 << 21) |
# define R300_TX_MAX_ANISO_MASK (7 << 21) |
# define R300_TX_WRAP_S(x) ((x) << 0) |
# define R300_TX_WRAP_T(x) ((x) << 3) |
# define R300_TX_MAX_MIP_LEVEL(x) ((x) << 17) |
#define R300_TX_FILTER1_0 0x4440 |
# define R300_CHROMA_KEY_MODE_DISABLE 0 |
# define R300_CHROMA_KEY_FORCE 1 |
# define R300_CHROMA_KEY_BLEND 2 |
# define R300_MC_ROUND_NORMAL (0<<2) |
# define R300_MC_ROUND_MPEG4 (1<<2) |
# define R300_LOD_BIAS_SHIFT 3 |
# define R300_LOD_BIAS_MASK 0x1ff8 |
# define R300_EDGE_ANISO_EDGE_DIAG (0<<13) |
# define R300_EDGE_ANISO_EDGE_ONLY (1<<13) |
# define R300_MC_COORD_TRUNCATE_DISABLE (0<<14) |
# define R300_MC_COORD_TRUNCATE_MPEG (1<<14) |
# define R300_TX_TRI_PERF_0_8 (0<<15) |
# define R300_TX_TRI_PERF_1_8 (1<<15) |
# define R300_TX_TRI_PERF_1_4 (2<<15) |
# define R300_TX_TRI_PERF_3_8 (3<<15) |
# define R300_ANISO_THRESHOLD_MASK (7<<17) |
# define R400_DXTC_SWIZZLE_ENABLE (1<<21) |
# define R500_MACRO_SWITCH (1<<22) |
# define R500_TX_MAX_ANISO(x) ((x) << 23) |
# define R500_TX_MAX_ANISO_MASK (63 << 23) |
# define R500_TX_ANISO_HIGH_QUALITY (1 << 30) |
# define R500_BORDER_FIX (1<<31) |
#define R300_TX_FORMAT0_0 0x4480 |
# define R300_TX_WIDTHMASK_SHIFT 0 |
# define R300_TX_WIDTHMASK_MASK (2047 << 0) |
# define R300_TX_HEIGHTMASK_SHIFT 11 |
# define R300_TX_HEIGHTMASK_MASK (2047 << 11) |
# define R300_TX_DEPTHMASK_SHIFT 22 |
# define R300_TX_DEPTHMASK_MASK (0xf << 22) |
# define R300_TX_SIZE_PROJECTED (1 << 30) |
# define R300_TX_PITCH_EN (1 << 31) |
# define R300_TX_WIDTH(x) ((x) << 0) |
# define R300_TX_HEIGHT(x) ((x) << 11) |
# define R300_TX_DEPTH(x) ((x) << 22) |
# define R300_TX_NUM_LEVELS(x) ((x) << 26) |
#define R300_TX_FORMAT1_0 0x44C0 |
/* The interpretation of the format word by Wladimir van der Laan */ |
/* The X, Y, Z and W refer to the layout of the components. |
They are given meanings as R, G, B and Alpha by the swizzle |
specification */ |
# define R300_TX_FORMAT_X8 0x0 |
# define R300_TX_FORMAT_X16 0x1 |
# define R300_TX_FORMAT_Y4X4 0x2 |
# define R300_TX_FORMAT_Y8X8 0x3 |
# define R300_TX_FORMAT_Y16X16 0x4 |
# define R300_TX_FORMAT_Z3Y3X2 0x5 |
# define R300_TX_FORMAT_Z5Y6X5 0x6 |
# define R300_TX_FORMAT_Z6Y5X5 0x7 |
# define R300_TX_FORMAT_Z11Y11X10 0x8 |
# define R300_TX_FORMAT_Z10Y11X11 0x9 |
# define R300_TX_FORMAT_W4Z4Y4X4 0xA |
# define R300_TX_FORMAT_W1Z5Y5X5 0xB |
# define R300_TX_FORMAT_W8Z8Y8X8 0xC |
# define R300_TX_FORMAT_W2Z10Y10X10 0xD |
# define R300_TX_FORMAT_W16Z16Y16X16 0xE |
# define R300_TX_FORMAT_DXT1 0xF |
# define R300_TX_FORMAT_DXT3 0x10 |
# define R300_TX_FORMAT_DXT5 0x11 |
# define R300_TX_FORMAT_CxV8U8 0x12 |
# define R300_TX_FORMAT_AVYU444 0x13 |
# define R300_TX_FORMAT_VYUY422 0x14 |
# define R300_TX_FORMAT_YVYU422 0x15 |
# define R300_TX_FORMAT_16_MPEG 0x16 |
# define R300_TX_FORMAT_16_16_MPEG 0x17 |
# define R300_TX_FORMAT_16F 0x18 |
# define R300_TX_FORMAT_16F_16F 0x19 |
# define R300_TX_FORMAT_16F_16F_16F_16F 0x1A |
# define R300_TX_FORMAT_32F 0x1B |
# define R300_TX_FORMAT_32F_32F 0x1C |
# define R300_TX_FORMAT_32F_32F_32F_32F 0x1D |
# define R300_TX_FORMAT_W24_FP 0x1E |
# define R400_TX_FORMAT_ATI2N 0x1F |
/* These need TX_FORMAT2_[0-15].TXFORMAT_MSB set. |
My guess is the 10-bit formats are the 8-bit ones but with filtering being |
performed with the precision of 10 bits per channel. This makes sense |
with sRGB textures since the conversion to linear space reduces the precision |
significantly so the shader gets approximately the 8-bit precision |
in the end. It might also improve the quality of HDR rendering where |
high-precision filtering is desirable. |
Again, this is guessed, the formats might mean something entirely else. |
The others should be fine. */ |
# define R500_TX_FORMAT_X1 0x0 |
# define R500_TX_FORMAT_X1_REV 0x1 |
# define R500_TX_FORMAT_X10 0x2 |
# define R500_TX_FORMAT_Y10X10 0x3 |
# define R500_TX_FORMAT_W10Z10Y10X10 0x4 |
# define R500_TX_FORMAT_ATI1N 0x5 |
# define R500_TX_FORMAT_Y8X24 0x6 |
# define R300_TX_FORMAT_SIGNED_W (1 << 5) |
# define R300_TX_FORMAT_SIGNED_Z (1 << 6) |
# define R300_TX_FORMAT_SIGNED_Y (1 << 7) |
# define R300_TX_FORMAT_SIGNED_X (1 << 8) |
# define R300_TX_FORMAT_SIGNED (0xf << 5) |
# define R300_TX_FORMAT_3D (1 << 25) |
# define R300_TX_FORMAT_CUBIC_MAP (2 << 25) |
# define R300_TX_FORMAT_TEX_COORD_TYPE_MASK (0x3 << 25) |
/* alpha modes, convenience mostly */ |
/* if you have alpha, pick constant appropriate to the |
number of channels (1 for I8, 2 for I8A8, 4 for R8G8B8A8, etc */ |
# define R300_TX_FORMAT_ALPHA_1CH 0x000 |
# define R300_TX_FORMAT_ALPHA_2CH 0x200 |
# define R300_TX_FORMAT_ALPHA_4CH 0x600 |
# define R300_TX_FORMAT_ALPHA_NONE 0xA00 |
/* Swizzling */ |
/* constants */ |
# define R300_TX_FORMAT_X 0 |
# define R300_TX_FORMAT_Y 1 |
# define R300_TX_FORMAT_Z 2 |
# define R300_TX_FORMAT_W 3 |
# define R300_TX_FORMAT_ZERO 4 |
# define R300_TX_FORMAT_ONE 5 |
/* 2.0*Z, everything above 1.0 is set to 0.0 */ |
# define R300_TX_FORMAT_CUT_Z 6 |
/* 2.0*W, everything above 1.0 is set to 0.0 */ |
# define R300_TX_FORMAT_CUT_W 7 |
# define R300_TX_FORMAT_B_SHIFT 18 |
# define R300_TX_FORMAT_G_SHIFT 15 |
# define R300_TX_FORMAT_R_SHIFT 12 |
# define R300_TX_FORMAT_A_SHIFT 9 |
/* Convenience macro to take care of layout and swizzling */ |
# define R300_EASY_TX_FORMAT(B, G, R, A, FMT) ( \ |
((R300_TX_FORMAT_##B)<<R300_TX_FORMAT_B_SHIFT) \ |
| ((R300_TX_FORMAT_##G)<<R300_TX_FORMAT_G_SHIFT) \ |
| ((R300_TX_FORMAT_##R)<<R300_TX_FORMAT_R_SHIFT) \ |
| ((R300_TX_FORMAT_##A)<<R300_TX_FORMAT_A_SHIFT) \ |
| (R300_TX_FORMAT_##FMT) \ |
) |
/* These can be ORed with result of R300_EASY_TX_FORMAT() |
We don't really know what they do. Take values from a |
constant color ? */ |
# define R300_TX_FORMAT_CONST_X (1<<5) |
# define R300_TX_FORMAT_CONST_Y (2<<5) |
# define R300_TX_FORMAT_CONST_Z (4<<5) |
# define R300_TX_FORMAT_CONST_W (8<<5) |
# define R300_TX_FORMAT_GAMMA (1 << 21) |
# define R300_TX_FORMAT_YUV_TO_RGB (1 << 22) |
# define R300_TX_CACHE(x) ((x) << 27) |
# define R300_TX_CACHE_WHOLE 0 |
/* reserved */ |
# define R300_TX_CACHE_HALF_0 2 |
# define R300_TX_CACHE_HALF_1 3 |
# define R300_TX_CACHE_FOURTH_0 4 |
# define R300_TX_CACHE_FOURTH_1 5 |
# define R300_TX_CACHE_FOURTH_2 6 |
# define R300_TX_CACHE_FOURTH_3 7 |
# define R300_TX_CACHE_EIGHTH_0 8 |
# define R300_TX_CACHE_EIGHTH_1 9 |
# define R300_TX_CACHE_EIGHTH_2 10 |
# define R300_TX_CACHE_EIGHTH_3 11 |
# define R300_TX_CACHE_EIGHTH_4 12 |
# define R300_TX_CACHE_EIGHTH_5 13 |
# define R300_TX_CACHE_EIGHTH_6 14 |
# define R300_TX_CACHE_EIGHTH_7 15 |
# define R300_TX_CACHE_SIXTEENTH_0 16 |
# define R300_TX_CACHE_SIXTEENTH_1 17 |
# define R300_TX_CACHE_SIXTEENTH_2 18 |
# define R300_TX_CACHE_SIXTEENTH_3 19 |
# define R300_TX_CACHE_SIXTEENTH_4 20 |
# define R300_TX_CACHE_SIXTEENTH_5 21 |
# define R300_TX_CACHE_SIXTEENTH_6 22 |
# define R300_TX_CACHE_SIXTEENTH_7 23 |
# define R300_TX_CACHE_SIXTEENTH_8 24 |
# define R300_TX_CACHE_SIXTEENTH_9 25 |
# define R300_TX_CACHE_SIXTEENTH_10 26 |
# define R300_TX_CACHE_SIXTEENTH_11 27 |
# define R300_TX_CACHE_SIXTEENTH_12 28 |
# define R300_TX_CACHE_SIXTEENTH_13 29 |
# define R300_TX_CACHE_SIXTEENTH_14 30 |
# define R300_TX_CACHE_SIXTEENTH_15 31 |
#define R300_TX_FORMAT2_0 0x4500 /* obvious missing in gap */ |
# define R300_TX_PITCHMASK_SHIFT 0 |
# define R300_TX_PITCHMASK_MASK (2047 << 0) |
# define R500_TXFORMAT_MSB (1 << 14) |
# define R500_TXWIDTH_BIT11 (1 << 15) |
# define R500_TXHEIGHT_BIT11 (1 << 16) |
# define R500_POW2FIX2FLT (1 << 17) |
# define R500_SEL_FILTER4_TC0 (0 << 18) |
# define R500_SEL_FILTER4_TC1 (1 << 18) |
# define R500_SEL_FILTER4_TC2 (2 << 18) |
# define R500_SEL_FILTER4_TC3 (3 << 18) |
#define R300_TX_OFFSET_0 0x4540 |
#define R300_TX_OFFSET_1 0x4544 |
#define R300_TX_OFFSET_2 0x4548 |
#define R300_TX_OFFSET_3 0x454C |
#define R300_TX_OFFSET_4 0x4550 |
#define R300_TX_OFFSET_5 0x4554 |
#define R300_TX_OFFSET_6 0x4558 |
#define R300_TX_OFFSET_7 0x455C |
# define R300_TXO_ENDIAN_NO_SWAP (0 << 0) |
# define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0) |
# define R300_TXO_ENDIAN_WORD_SWAP (2 << 0) |
# define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0) |
# define R300_TXO_MACRO_TILE_LINEAR (0 << 2) |
# define R300_TXO_MACRO_TILE_TILED (1 << 2) |
# define R300_TXO_MACRO_TILE(x) ((x) << 2) |
# define R300_TXO_MICRO_TILE_LINEAR (0 << 3) |
# define R300_TXO_MICRO_TILE_TILED (1 << 3) |
# define R300_TXO_MICRO_TILE_TILED_SQUARE (2 << 3) |
# define R300_TXO_MICRO_TILE(x) ((x) << 3) |
# define R300_TXO_OFFSET_MASK 0xffffffe0 |
# define R300_TXO_OFFSET_SHIFT 5 |
/* 32 bit chroma key */ |
#define R300_TX_CHROMA_KEY_0 0x4580 |
#define R300_TX_CHROMA_KEY_1 0x4584 |
#define R300_TX_CHROMA_KEY_2 0x4588 |
#define R300_TX_CHROMA_KEY_3 0x458c |
#define R300_TX_CHROMA_KEY_4 0x4590 |
#define R300_TX_CHROMA_KEY_5 0x4594 |
#define R300_TX_CHROMA_KEY_6 0x4598 |
#define R300_TX_CHROMA_KEY_7 0x459c |
#define R300_TX_CHROMA_KEY_8 0x45a0 |
#define R300_TX_CHROMA_KEY_9 0x45a4 |
#define R300_TX_CHROMA_KEY_10 0x45a8 |
#define R300_TX_CHROMA_KEY_11 0x45ac |
#define R300_TX_CHROMA_KEY_12 0x45b0 |
#define R300_TX_CHROMA_KEY_13 0x45b4 |
#define R300_TX_CHROMA_KEY_14 0x45b8 |
#define R300_TX_CHROMA_KEY_15 0x45bc |
/* ff00ff00 == { 0, 1.0, 0, 1.0 } */ |
/* Border Color */ |
#define R300_TX_BORDER_COLOR_0 0x45c0 |
#define R300_TX_BORDER_COLOR_1 0x45c4 |
#define R300_TX_BORDER_COLOR_2 0x45c8 |
#define R300_TX_BORDER_COLOR_3 0x45cc |
#define R300_TX_BORDER_COLOR_4 0x45d0 |
#define R300_TX_BORDER_COLOR_5 0x45d4 |
#define R300_TX_BORDER_COLOR_6 0x45d8 |
#define R300_TX_BORDER_COLOR_7 0x45dc |
#define R300_TX_BORDER_COLOR_8 0x45e0 |
#define R300_TX_BORDER_COLOR_9 0x45e4 |
#define R300_TX_BORDER_COLOR_10 0x45e8 |
#define R300_TX_BORDER_COLOR_11 0x45ec |
#define R300_TX_BORDER_COLOR_12 0x45f0 |
#define R300_TX_BORDER_COLOR_13 0x45f4 |
#define R300_TX_BORDER_COLOR_14 0x45f8 |
#define R300_TX_BORDER_COLOR_15 0x45fc |
/* END: Texture specification */ |
/* BEGIN: Fragment program instruction set */ |
/* Fragment programs are written directly into register space. |
* There are separate instruction streams for texture instructions and ALU |
* instructions. |
* In order to synchronize these streams, the program is divided into up |
* to 4 nodes. Each node begins with a number of TEX operations, followed |
* by a number of ALU operations. |
* The first node can have zero TEX ops, all subsequent nodes must have at |
* least |
* one TEX ops. |
* All nodes must have at least one ALU op. |
* |
* The index of the last node is stored in PFS_CNTL_0: A value of 0 means |
* 1 node, a value of 3 means 4 nodes. |
* The total amount of instructions is defined in PFS_CNTL_2. The offsets are |
* offsets into the respective instruction streams, while *_END points to the |
* last instruction relative to this offset. |
*/ |
#define R300_US_CONFIG 0x4600 |
# define R300_PFS_CNTL_LAST_NODES_SHIFT 0 |
# define R300_PFS_CNTL_LAST_NODES_MASK (3 << 0) |
# define R300_PFS_CNTL_FIRST_NODE_HAS_TEX (1 << 3) |
#define R300_US_PIXSIZE 0x4604 |
/* There is an unshifted value here which has so far always been equal to the |
* index of the highest used temporary register. |
*/ |
#define R300_US_CODE_OFFSET 0x4608 |
# define R300_PFS_CNTL_ALU_OFFSET_SHIFT 0 |
# define R300_PFS_CNTL_ALU_OFFSET_MASK (63 << 0) |
# define R300_PFS_CNTL_ALU_END_SHIFT 6 |
# define R300_PFS_CNTL_ALU_END_MASK (63 << 6) |
# define R300_PFS_CNTL_TEX_OFFSET_SHIFT 13 |
# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 13) |
# define R300_PFS_CNTL_TEX_END_SHIFT 18 |
# define R300_PFS_CNTL_TEX_END_MASK (31 << 18) |
# define R400_PFS_CNTL_TEX_OFFSET_MSB_SHIFT 24 |
# define R400_PFS_CNTL_TEX_OFFSET_MSB_MASK (0xf << 24) |
# define R400_PFS_CNTL_TEX_END_MSB_SHIFT 28 |
# define R400_PFS_CNTL_TEX_END_MSB_MASK (0xf << 28) |
/* gap */ |
/* Nodes are stored backwards. The last active node is always stored in |
* PFS_NODE_3. |
* Example: In a 2-node program, NODE_0 and NODE_1 are set to 0. The |
* first node is stored in NODE_2, the second node is stored in NODE_3. |
* |
* Offsets are relative to the master offset from PFS_CNTL_2. |
*/ |
#define R300_US_CODE_ADDR_0 0x4610 |
#define R300_US_CODE_ADDR_1 0x4614 |
#define R300_US_CODE_ADDR_2 0x4618 |
#define R300_US_CODE_ADDR_3 0x461C |
# define R300_ALU_START_SHIFT 0 |
# define R300_ALU_START_MASK (63 << 0) |
# define R300_ALU_SIZE_SHIFT 6 |
# define R300_ALU_SIZE_MASK (63 << 6) |
# define R300_TEX_START_SHIFT 12 |
# define R300_TEX_START_MASK (31 << 12) |
# define R300_TEX_SIZE_SHIFT 17 |
# define R300_TEX_SIZE_MASK (31 << 17) |
# define R300_RGBA_OUT (1 << 22) |
# define R300_W_OUT (1 << 23) |
# define R400_TEX_START_MSB_SHIFT 24 |
# define R400_TEX_START_MSG_MASK (0xf << 24) |
# define R400_TEX_SIZE_MSB_SHIFT 28 |
# define R400_TEX_SIZE_MSG_MASK (0xf << 28) |
/* TEX |
* As far as I can tell, texture instructions cannot write into output |
* registers directly. A subsequent ALU instruction is always necessary, |
* even if it's just MAD o0, r0, 1, 0 |
*/ |
#define R300_US_TEX_INST_0 0x4620 |
# define R300_SRC_ADDR_SHIFT 0 |
# define R300_SRC_ADDR_MASK (31 << 0) |
# define R300_DST_ADDR_SHIFT 6 |
# define R300_DST_ADDR_MASK (31 << 6) |
# define R300_TEX_ID_SHIFT 11 |
# define R300_TEX_ID_MASK (15 << 11) |
# define R300_TEX_INST_SHIFT 15 |
# define R300_TEX_OP_NOP 0 |
# define R300_TEX_OP_LD 1 |
# define R300_TEX_OP_KIL 2 |
# define R300_TEX_OP_TXP 3 |
# define R300_TEX_OP_TXB 4 |
# define R300_TEX_INST_MASK (7 << 15) |
# define R400_SRC_ADDR_EXT_BIT (1 << 19) |
# define R400_DST_ADDR_EXT_BIT (1 << 20) |
/* Output format from the unfied shader */ |
#define R300_US_OUT_FMT_0 0x46A4 |
# define R300_US_OUT_FMT_C4_8 (0 << 0) |
# define R300_US_OUT_FMT_C4_10 (1 << 0) |
# define R300_US_OUT_FMT_C4_10_GAMMA (2 << 0) |
# define R300_US_OUT_FMT_C_16 (3 << 0) |
# define R300_US_OUT_FMT_C2_16 (4 << 0) |
# define R300_US_OUT_FMT_C4_16 (5 << 0) |
# define R300_US_OUT_FMT_C_16_MPEG (6 << 0) |
# define R300_US_OUT_FMT_C2_16_MPEG (7 << 0) |
# define R300_US_OUT_FMT_C2_4 (8 << 0) |
# define R300_US_OUT_FMT_C_3_3_2 (9 << 0) |
# define R300_US_OUT_FMT_C_6_5_6 (10 << 0) |
# define R300_US_OUT_FMT_C_11_11_10 (11 << 0) |
# define R300_US_OUT_FMT_C_10_11_11 (12 << 0) |
# define R300_US_OUT_FMT_C_2_10_10_10 (13 << 0) |
/* reserved */ |
# define R300_US_OUT_FMT_UNUSED (15 << 0) |
# define R300_US_OUT_FMT_C_16_FP (16 << 0) |
# define R300_US_OUT_FMT_C2_16_FP (17 << 0) |
# define R300_US_OUT_FMT_C4_16_FP (18 << 0) |
# define R300_US_OUT_FMT_C_32_FP (19 << 0) |
# define R300_US_OUT_FMT_C2_32_FP (20 << 0) |
# define R300_US_OUT_FMT_C4_32_FP (21 << 0) |
# define R300_C0_SEL_A (0 << 8) |
# define R300_C0_SEL_R (1 << 8) |
# define R300_C0_SEL_G (2 << 8) |
# define R300_C0_SEL_B (3 << 8) |
# define R300_C1_SEL_A (0 << 10) |
# define R300_C1_SEL_R (1 << 10) |
# define R300_C1_SEL_G (2 << 10) |
# define R300_C1_SEL_B (3 << 10) |
# define R300_C2_SEL_A (0 << 12) |
# define R300_C2_SEL_R (1 << 12) |
# define R300_C2_SEL_G (2 << 12) |
# define R300_C2_SEL_B (3 << 12) |
# define R300_C3_SEL_A (0 << 14) |
# define R300_C3_SEL_R (1 << 14) |
# define R300_C3_SEL_G (2 << 14) |
# define R300_C3_SEL_B (3 << 14) |
# define R300_OUT_SIGN(x) ((x) << 16) |
# define R500_ROUND_ADJ (1 << 20) |
/* ALU |
* The ALU instructions register blocks are enumerated according to the order |
* in which fglrx. I assume there is space for 64 instructions, since |
* each block has space for a maximum of 64 DWORDs, and this matches reported |
* native limits. |
* |
* The basic functional block seems to be one MAD for each color and alpha, |
* and an adder that adds all components after the MUL. |
* - ADD, MUL, MAD etc.: use MAD with appropriate neutral operands |
* - DP4: Use OUTC_DP4, OUTA_DP4 |
* - DP3: Use OUTC_DP3, OUTA_DP4, appropriate alpha operands |
* - DPH: Use OUTC_DP4, OUTA_DP4, appropriate alpha operands |
* - CMPH: If ARG2 > 0.5, return ARG0, else return ARG1 |
* - CMP: If ARG2 < 0, return ARG1, else return ARG0 |
* - FLR: use FRC+MAD |
* - XPD: use MAD+MAD |
* - SGE, SLT: use MAD+CMP |
* - RSQ: use ABS modifier for argument |
* - Use OUTC_REPL_ALPHA to write results of an alpha-only operation |
* (e.g. RCP) into color register |
* - apparently, there's no quick DST operation |
* - fglrx set FPI2_UNKNOWN_31 on a "MAD fragment.color, tmp0, tmp1, tmp2" |
* - fglrx set FPI2_UNKNOWN_31 on a "MAX r2, r1, c0" |
* - fglrx once set FPI0_UNKNOWN_31 on a "FRC r1, r1" |
* |
* Operand selection |
* First stage selects three sources from the available registers and |
* constant parameters. This is defined in INSTR1 (color) and INSTR3 (alpha). |
* fglrx sorts the three source fields: Registers before constants, |
* lower indices before higher indices; I do not know whether this is |
* necessary. |
* |
* fglrx fills unused sources with "read constant 0" |
* According to specs, you cannot select more than two different constants. |
* |
* Second stage selects the operands from the sources. This is defined in |
* INSTR0 (color) and INSTR2 (alpha). You can also select the special constants |
* zero and one. |
* Swizzling and negation happens in this stage, as well. |
* |
* Important: Color and alpha seem to be mostly separate, i.e. their sources |
* selection appears to be fully independent (the register storage is probably |
* physically split into a color and an alpha section). |
* However (because of the apparent physical split), there is some interaction |
* WRT swizzling. If, for example, you want to load an R component into an |
* Alpha operand, this R component is taken from a *color* source, not from |
* an alpha source. The corresponding register doesn't even have to appear in |
* the alpha sources list. (I hope this all makes sense to you) |
* |
* Destination selection |
* The destination register index is in FPI1 (color) and FPI3 (alpha) |
* together with enable bits. |
* There are separate enable bits for writing into temporary registers |
* (DSTC_REG_* /DSTA_REG) and program output registers (DSTC_OUTPUT_* |
* /DSTA_OUTPUT). You can write to both at once, or not write at all (the |
* same index must be used for both). |
* |
* Note: There is a special form for LRP |
* - Argument order is the same as in ARB_fragment_program. |
* - Operation is MAD |
* - ARG1 is set to ARGC_SRC1C_LRP/ARGC_SRC1A_LRP |
* - Set FPI0/FPI2_SPECIAL_LRP |
* Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD |
*/ |
#define R300_US_ALU_RGB_ADDR_0 0x46C0 |
# define R300_ALU_SRC0C_SHIFT 0 |
# define R300_ALU_SRC0C_MASK (31 << 0) |
# define R300_ALU_SRC0C_CONST (1 << 5) |
# define R300_ALU_SRC1C_SHIFT 6 |
# define R300_ALU_SRC1C_MASK (31 << 6) |
# define R300_ALU_SRC1C_CONST (1 << 11) |
# define R300_ALU_SRC2C_SHIFT 12 |
# define R300_ALU_SRC2C_MASK (31 << 12) |
# define R300_ALU_SRC2C_CONST (1 << 17) |
# define R300_ALU_SRC_MASK 0x0003ffff |
# define R300_ALU_DSTC_SHIFT 18 |
# define R300_ALU_DSTC_MASK (31 << 18) |
# define R300_ALU_DSTC_REG_MASK_SHIFT 23 |
# define R300_ALU_DSTC_REG_X (1 << 23) |
# define R300_ALU_DSTC_REG_Y (1 << 24) |
# define R300_ALU_DSTC_REG_Z (1 << 25) |
# define R300_ALU_DSTC_OUTPUT_MASK_SHIFT 26 |
# define R300_ALU_DSTC_OUTPUT_X (1 << 26) |
# define R300_ALU_DSTC_OUTPUT_Y (1 << 27) |
# define R300_ALU_DSTC_OUTPUT_Z (1 << 28) |
# define R300_ALU_DSTC_OUTPUT_XYZ (7 << 26) |
# define R300_RGB_ADDR0(x) ((x) << 0) |
# define R300_RGB_ADDR1(x) ((x) << 6) |
# define R300_RGB_ADDR2(x) ((x) << 12) |
# define R300_RGB_TARGET(x) ((x) << 29) |
#define R300_US_ALU_ALPHA_ADDR_0 0x47C0 |
# define R300_ALU_SRC0A_SHIFT 0 |
# define R300_ALU_SRC0A_MASK (31 << 0) |
# define R300_ALU_SRC0A_CONST (1 << 5) |
# define R300_ALU_SRC1A_SHIFT 6 |
# define R300_ALU_SRC1A_MASK (31 << 6) |
# define R300_ALU_SRC1A_CONST (1 << 11) |
# define R300_ALU_SRC2A_SHIFT 12 |
# define R300_ALU_SRC2A_MASK (31 << 12) |
# define R300_ALU_SRC2A_CONST (1 << 17) |
# define R300_ALU_SRC_MASK 0x0003ffff |
# define R300_ALU_DSTA_SHIFT 18 |
# define R300_ALU_DSTA_MASK (31 << 18) |
# define R300_ALU_DSTA_REG (1 << 23) |
# define R300_ALU_DSTA_OUTPUT (1 << 24) |
# define R300_ALU_DSTA_DEPTH (1 << 27) |
# define R300_ALPHA_ADDR0(x) ((x) << 0) |
# define R300_ALPHA_ADDR1(x) ((x) << 6) |
# define R300_ALPHA_ADDR2(x) ((x) << 12) |
# define R300_ALPHA_TARGET(x) ((x) << 25) |
#define R300_US_ALU_RGB_INST_0 0x48C0 |
# define R300_ALU_ARGC_SRC0C_XYZ 0 |
# define R300_ALU_ARGC_SRC0C_XXX 1 |
# define R300_ALU_ARGC_SRC0C_YYY 2 |
# define R300_ALU_ARGC_SRC0C_ZZZ 3 |
# define R300_ALU_ARGC_SRC1C_XYZ 4 |
# define R300_ALU_ARGC_SRC1C_XXX 5 |
# define R300_ALU_ARGC_SRC1C_YYY 6 |
# define R300_ALU_ARGC_SRC1C_ZZZ 7 |
# define R300_ALU_ARGC_SRC2C_XYZ 8 |
# define R300_ALU_ARGC_SRC2C_XXX 9 |
# define R300_ALU_ARGC_SRC2C_YYY 10 |
# define R300_ALU_ARGC_SRC2C_ZZZ 11 |
# define R300_ALU_ARGC_SRC0A 12 |
# define R300_ALU_ARGC_SRC1A 13 |
# define R300_ALU_ARGC_SRC2A 14 |
# define R300_ALU_ARGC_SRCP_XYZ 15 |
# define R300_ALU_ARGC_SRCP_XXX 16 |
# define R300_ALU_ARGC_SRCP_YYY 17 |
# define R300_ALU_ARGC_SRCP_ZZZ 18 |
# define R300_ALU_ARGC_SRCP_WWW 19 |
# define R300_ALU_ARGC_ZERO 20 |
# define R300_ALU_ARGC_ONE 21 |
# define R300_ALU_ARGC_HALF 22 |
# define R300_ALU_ARGC_SRC0C_YZX 23 |
# define R300_ALU_ARGC_SRC1C_YZX 24 |
# define R300_ALU_ARGC_SRC2C_YZX 25 |
# define R300_ALU_ARGC_SRC0C_ZXY 26 |
# define R300_ALU_ARGC_SRC1C_ZXY 27 |
# define R300_ALU_ARGC_SRC2C_ZXY 28 |
# define R300_ALU_ARGC_SRC0CA_WZY 29 |
# define R300_ALU_ARGC_SRC1CA_WZY 30 |
# define R300_ALU_ARGC_SRC2CA_WZY 31 |
# define R300_RGB_SWIZA(x) ((x) << 0) |
# define R300_RGB_SWIZB(x) ((x) << 7) |
# define R300_RGB_SWIZC(x) ((x) << 14) |
# define R300_ALU_ARG0C_SHIFT 0 |
# define R300_ALU_ARG0C_MASK (31 << 0) |
# define R300_ALU_ARG0C_NOP (0 << 5) |
# define R300_ALU_ARG0C_NEG (1 << 5) |
# define R300_ALU_ARG0C_ABS (2 << 5) |
# define R300_ALU_ARG0C_NAB (3 << 5) |
# define R300_ALU_ARG1C_SHIFT 7 |
# define R300_ALU_ARG1C_MASK (31 << 7) |
# define R300_ALU_ARG1C_NOP (0 << 12) |
# define R300_ALU_ARG1C_NEG (1 << 12) |
# define R300_ALU_ARG1C_ABS (2 << 12) |
# define R300_ALU_ARG1C_NAB (3 << 12) |
# define R300_ALU_ARG2C_SHIFT 14 |
# define R300_ALU_ARG2C_MASK (31 << 14) |
# define R300_ALU_ARG2C_NOP (0 << 19) |
# define R300_ALU_ARG2C_NEG (1 << 19) |
# define R300_ALU_ARG2C_ABS (2 << 19) |
# define R300_ALU_ARG2C_NAB (3 << 19) |
# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21) |
# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21) |
# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21) |
# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21) |
# define R300_ALU_OUTC_MAD (0 << 23) |
# define R300_ALU_OUTC_DP3 (1 << 23) |
# define R300_ALU_OUTC_DP4 (2 << 23) |
# define R300_ALU_OUTC_D2A (3 << 23) |
# define R300_ALU_OUTC_MIN (4 << 23) |
# define R300_ALU_OUTC_MAX (5 << 23) |
# define R300_ALU_OUTC_CND (7 << 23) |
# define R300_ALU_OUTC_CMP (8 << 23) |
# define R300_ALU_OUTC_FRC (9 << 23) |
# define R300_ALU_OUTC_REPL_ALPHA (10 << 23) |
# define R300_ALU_OUTC_MOD_SHIFT 27 |
# define R300_ALU_OUTC_MOD_NOP (0 << R300_ALU_OUTC_MOD_SHIFT) |
# define R300_ALU_OUTC_MOD_MUL2 (1 << R300_ALU_OUTC_MOD_SHIFT) |
# define R300_ALU_OUTC_MOD_MUL4 (2 << R300_ALU_OUTC_MOD_SHIFT) |
# define R300_ALU_OUTC_MOD_MUL8 (3 << R300_ALU_OUTC_MOD_SHIFT) |
# define R300_ALU_OUTC_MOD_DIV2 (4 << R300_ALU_OUTC_MOD_SHIFT) |
# define R300_ALU_OUTC_MOD_DIV4 (5 << R300_ALU_OUTC_MOD_SHIFT) |
# define R300_ALU_OUTC_MOD_DIV8 (6 << R300_ALU_OUTC_MOD_SHIFT) |
# define R300_ALU_OUTC_CLAMP (1 << 30) |
# define R300_ALU_INSERT_NOP (1 << 31) |
#define R300_US_ALU_ALPHA_INST_0 0x49C0 |
# define R300_ALU_ARGA_SRC0C_X 0 |
# define R300_ALU_ARGA_SRC0C_Y 1 |
# define R300_ALU_ARGA_SRC0C_Z 2 |
# define R300_ALU_ARGA_SRC1C_X 3 |
# define R300_ALU_ARGA_SRC1C_Y 4 |
# define R300_ALU_ARGA_SRC1C_Z 5 |
# define R300_ALU_ARGA_SRC2C_X 6 |
# define R300_ALU_ARGA_SRC2C_Y 7 |
# define R300_ALU_ARGA_SRC2C_Z 8 |
# define R300_ALU_ARGA_SRC0A 9 |
# define R300_ALU_ARGA_SRC1A 10 |
# define R300_ALU_ARGA_SRC2A 11 |
# define R300_ALU_ARGA_SRCP_X 12 |
# define R300_ALU_ARGA_SRCP_Y 13 |
# define R300_ALU_ARGA_SRCP_Z 14 |
# define R300_ALU_ARGA_SRCP_W 15 |
# define R300_ALU_ARGA_ZERO 16 |
# define R300_ALU_ARGA_ONE 17 |
# define R300_ALU_ARGA_HALF 18 |
# define R300_ALPHA_SWIZA(x) ((x) << 0) |
# define R300_ALPHA_SWIZB(x) ((x) << 7) |
# define R300_ALPHA_SWIZC(x) ((x) << 14) |
# define R300_ALU_ARG0A_SHIFT 0 |
# define R300_ALU_ARG0A_MASK (31 << 0) |
# define R300_ALU_ARG0A_NOP (0 << 5) |
# define R300_ALU_ARG0A_NEG (1 << 5) |
# define R300_ALU_ARG0A_ABS (2 << 5) |
# define R300_ALU_ARG0A_NAB (3 << 5) |
# define R300_ALU_ARG1A_SHIFT 7 |
# define R300_ALU_ARG1A_MASK (31 << 7) |
# define R300_ALU_ARG1A_NOP (0 << 12) |
# define R300_ALU_ARG1A_NEG (1 << 12) |
# define R300_ALU_ARG1A_ABS (2 << 12) |
# define R300_ALU_ARG1A_NAB (3 << 12) |
# define R300_ALU_ARG2A_SHIFT 14 |
# define R300_ALU_ARG2A_MASK (31 << 14) |
# define R300_ALU_ARG2A_NOP (0 << 19) |
# define R300_ALU_ARG2A_NEG (1 << 19) |
# define R300_ALU_ARG2A_ABS (2 << 19) |
# define R300_ALU_ARG2A_NAB (3 << 19) |
# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21) |
# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21) |
# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21) |
# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21) |
# define R300_ALU_OUTA_MAD (0 << 23) |
# define R300_ALU_OUTA_DP4 (1 << 23) |
# define R300_ALU_OUTA_MIN (2 << 23) |
# define R300_ALU_OUTA_MAX (3 << 23) |
# define R300_ALU_OUTA_CND (5 << 23) |
# define R300_ALU_OUTA_CMP (6 << 23) |
# define R300_ALU_OUTA_FRC (7 << 23) |
# define R300_ALU_OUTA_EX2 (8 << 23) |
# define R300_ALU_OUTA_LG2 (9 << 23) |
# define R300_ALU_OUTA_RCP (10 << 23) |
# define R300_ALU_OUTA_RSQ (11 << 23) |
# define R300_ALU_OUTA_MOD_NOP (0 << 27) |
# define R300_ALU_OUTA_MOD_MUL2 (1 << 27) |
# define R300_ALU_OUTA_MOD_MUL4 (2 << 27) |
# define R300_ALU_OUTA_MOD_MUL8 (3 << 27) |
# define R300_ALU_OUTA_MOD_DIV2 (4 << 27) |
# define R300_ALU_OUTA_MOD_DIV4 (5 << 27) |
# define R300_ALU_OUTA_MOD_DIV8 (6 << 27) |
# define R300_ALU_OUTA_CLAMP (1 << 30) |
/* END: Fragment program instruction set */ |
/* R4xx extended fragment shader registers. */ |
#define R400_US_ALU_EXT_ADDR_0 0x4ac0 /* up to 63 (0x4bbc) */ |
# define R400_ADDR_EXT_RGB_MSB_BIT(x) (1 << (x)) |
# define R400_ADDRD_EXT_RGB_MSB_BIT 0x08 |
# define R400_ADDR_EXT_A_MSB_BIT(x) (1 << ((x) + 4)) |
# define R400_ADDRD_EXT_A_MSB_BIT 0x80 |
#define R400_US_CODE_BANK 0x46b8 |
# define R400_BANK_SHIFT 0 |
# define R400_BANK_MASK 0xf |
# define R400_R390_MODE_ENABLE (1 << 4) |
#define R400_US_CODE_EXT 0x46bc |
# define R400_ALU_OFFSET_MSB_SHIFT 0 |
# define R400_ALU_OFFSET_MSB_MASK (0x7 << 0) |
# define R400_ALU_SIZE_MSB_SHIFT 3 |
# define R400_ALU_SIZE_MSB_MASK (0x7 << 3) |
# define R400_ALU_START0_MSB_SHIFT 6 |
# define R400_ALU_START0_MSB_MASK (0x7 << 6) |
# define R400_ALU_SIZE0_MSB_SHIFT 9 |
# define R400_ALU_SIZE0_MSB_MASK (0x7 << 9) |
# define R400_ALU_START1_MSB_SHIFT 12 |
# define R400_ALU_START1_MSB_MASK (0x7 << 12) |
# define R400_ALU_SIZE1_MSB_SHIFT 15 |
# define R400_ALU_SIZE1_MSB_MASK (0x7 << 15) |
# define R400_ALU_START2_MSB_SHIFT 18 |
# define R400_ALU_START2_MSB_MASK (0x7 << 18) |
# define R400_ALU_SIZE2_MSB_SHIFT 21 |
# define R400_ALU_SIZE2_MSB_MASK (0x7 << 21) |
# define R400_ALU_START3_MSB_SHIFT 24 |
# define R400_ALU_START3_MSB_MASK (0x7 << 24) |
# define R400_ALU_SIZE3_MSB_SHIFT 27 |
# define R400_ALU_SIZE3_MSB_MASK (0x7 << 27) |
/* END: R4xx extended fragment shader registers. */ |
/* Fog: Fog Blending Enable */ |
#define R300_FG_FOG_BLEND 0x4bc0 |
# define R300_FG_FOG_BLEND_DISABLE (0 << 0) |
# define R300_FG_FOG_BLEND_ENABLE (1 << 0) |
# define R300_FG_FOG_BLEND_FN_LINEAR (0 << 1) |
# define R300_FG_FOG_BLEND_FN_EXP (1 << 1) |
# define R300_FG_FOG_BLEND_FN_EXP2 (2 << 1) |
# define R300_FG_FOG_BLEND_FN_CONSTANT (3 << 1) |
# define R300_FG_FOG_BLEND_FN_MASK (3 << 1) |
/* Fog: Red Component of Fog Color */ |
#define R300_FG_FOG_COLOR_R 0x4bc8 |
/* Fog: Green Component of Fog Color */ |
#define R300_FG_FOG_COLOR_G 0x4bcc |
/* Fog: Blue Component of Fog Color */ |
#define R300_FG_FOG_COLOR_B 0x4bd0 |
# define R300_FG_FOG_COLOR_MASK 0x000003ff |
/* Fog: Constant Factor for Fog Blending */ |
#define R300_FG_FOG_FACTOR 0x4bc4 |
# define FG_FOG_FACTOR_MASK 0x000003ff |
/* Fog: Alpha function */ |
#define R300_FG_ALPHA_FUNC 0x4bd4 |
# define R300_FG_ALPHA_FUNC_VAL_MASK 0x000000ff |
# define R300_FG_ALPHA_FUNC_NEVER (0 << 8) |
# define R300_FG_ALPHA_FUNC_LESS (1 << 8) |
# define R300_FG_ALPHA_FUNC_EQUAL (2 << 8) |
# define R300_FG_ALPHA_FUNC_LE (3 << 8) |
# define R300_FG_ALPHA_FUNC_GREATER (4 << 8) |
# define R300_FG_ALPHA_FUNC_NOTEQUAL (5 << 8) |
# define R300_FG_ALPHA_FUNC_GE (6 << 8) |
# define R300_FG_ALPHA_FUNC_ALWAYS (7 << 8) |
# define R300_ALPHA_TEST_OP_MASK (7 << 8) |
# define R300_FG_ALPHA_FUNC_DISABLE (0 << 11) |
# define R300_FG_ALPHA_FUNC_ENABLE (1 << 11) |
# define R500_FG_ALPHA_FUNC_10BIT (0 << 12) |
# define R500_FG_ALPHA_FUNC_8BIT (1 << 12) |
# define R300_FG_ALPHA_FUNC_MASK_DISABLE (0 << 16) |
# define R300_FG_ALPHA_FUNC_MASK_ENABLE (1 << 16) |
# define R300_FG_ALPHA_FUNC_CFG_2_OF_4 (0 << 17) |
# define R300_FG_ALPHA_FUNC_CFG_3_OF_6 (1 << 17) |
# define R300_FG_ALPHA_FUNC_DITH_DISABLE (0 << 20) |
# define R300_FG_ALPHA_FUNC_DITH_ENABLE (1 << 20) |
# define R500_FG_ALPHA_FUNC_OFFSET_DISABLE (0 << 24) |
# define R500_FG_ALPHA_FUNC_OFFSET_ENABLE (1 << 24) /* Not supported in R520 */ |
# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_DISABLE (0 << 25) |
# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_ENABLE (1 << 25) |
# define R500_FG_ALPHA_FUNC_FP16_DISABLE (0 << 28) |
# define R500_FG_ALPHA_FUNC_FP16_ENABLE (1 << 28) |
/* Fog: Where does the depth come from? */ |
#define R300_FG_DEPTH_SRC 0x4bd8 |
# define R300_FG_DEPTH_SRC_SCAN (0 << 0) |
# define R300_FG_DEPTH_SRC_SHADER (1 << 0) |
/* Fog: Alpha Compare Value */ |
#define R500_FG_ALPHA_VALUE 0x4be0 |
# define R500_FG_ALPHA_VALUE_MASK 0x0000ffff |
#define RV530_FG_ZBREG_DEST 0x4be8 |
# define RV530_FG_ZBREG_DEST_PIPE_SELECT_0 (1 << 0) |
# define RV530_FG_ZBREG_DEST_PIPE_SELECT_1 (1 << 1) |
# define RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL (3 << 0) |
/* gap */ |
/* Fragment program parameters in 7.16 floating point */ |
#define R300_PFS_PARAM_0_X 0x4C00 |
#define R300_PFS_PARAM_0_Y 0x4C04 |
#define R300_PFS_PARAM_0_Z 0x4C08 |
#define R300_PFS_PARAM_0_W 0x4C0C |
/* last consts */ |
#define R300_PFS_PARAM_31_X 0x4DF0 |
#define R300_PFS_PARAM_31_Y 0x4DF4 |
#define R300_PFS_PARAM_31_Z 0x4DF8 |
#define R300_PFS_PARAM_31_W 0x4DFC |
/* Unpipelined. */ |
#define R300_RB3D_CCTL 0x4e00 |
# define R300_RB3D_CCTL_NUM_MULTIWRITES(x) (MAX2(((x)-1), 0) << 5) |
# define R300_RB3D_CCTL_NUM_MULTIWRITES_1_BUFFER (0 << 5) |
# define R300_RB3D_CCTL_NUM_MULTIWRITES_2_BUFFERS (1 << 5) |
# define R300_RB3D_CCTL_NUM_MULTIWRITES_3_BUFFERS (2 << 5) |
# define R300_RB3D_CCTL_NUM_MULTIWRITES_4_BUFFERS (3 << 5) |
# define R300_RB3D_CCTL_CLRCMP_FLIPE_DISABLE (0 << 7) |
# define R300_RB3D_CCTL_CLRCMP_FLIPE_ENABLE (1 << 7) |
# define R300_RB3D_CCTL_AA_COMPRESSION_DISABLE (0 << 9) |
# define R300_RB3D_CCTL_AA_COMPRESSION_ENABLE (1 << 9) |
# define R300_RB3D_CCTL_CMASK_DISABLE (0 << 10) |
# define R300_RB3D_CCTL_CMASK_ENABLE (1 << 10) |
/* reserved */ |
# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_DISABLE (0 << 12) |
# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_ENABLE (1 << 12) |
# define R300_RB3D_CCTL_WRITE_COMPRESSION_ENABLE (0 << 13) |
# define R300_RB3D_CCTL_WRITE_COMPRESSION_DISABLE (1 << 13) |
# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_DISABLE (0 << 14) |
# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE (1 << 14) |
/* Notes: |
* - AFAIK fglrx always sets BLEND_UNKNOWN when blending is used in |
* the application |
* - AFAIK fglrx always sets BLEND_NO_SEPARATE when CBLEND and ABLEND |
* are set to the same |
* function (both registers are always set up completely in any case) |
* - Most blend flags are simply copied from R200 and not tested yet |
*/ |
#define R300_RB3D_CBLEND 0x4E04 |
#define R300_RB3D_ABLEND 0x4E08 |
/* the following only appear in CBLEND */ |
# define R300_ALPHA_BLEND_ENABLE (1 << 0) |
# define R300_SEPARATE_ALPHA_ENABLE (1 << 1) |
# define R300_READ_ENABLE (1 << 2) |
# define R300_DISCARD_SRC_PIXELS_DIS (0 << 3) |
# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0 (1 << 3) |
# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_0 (2 << 3) |
# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0 (3 << 3) |
# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1 (4 << 3) |
# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1 (5 << 3) |
# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1 (6 << 3) |
# define R500_SRC_ALPHA_0_NO_READ (1 << 30) |
# define R500_SRC_ALPHA_1_NO_READ (1 << 31) |
/* the following are shared between CBLEND and ABLEND */ |
# define R300_FCN_MASK (3 << 12) |
# define R300_COMB_FCN_ADD_CLAMP (0 << 12) |
# define R300_COMB_FCN_ADD_NOCLAMP (1 << 12) |
# define R300_COMB_FCN_SUB_CLAMP (2 << 12) |
# define R300_COMB_FCN_SUB_NOCLAMP (3 << 12) |
# define R300_COMB_FCN_MIN (4 << 12) |
# define R300_COMB_FCN_MAX (5 << 12) |
# define R300_COMB_FCN_RSUB_CLAMP (6 << 12) |
# define R300_COMB_FCN_RSUB_NOCLAMP (7 << 12) |
# define R300_BLEND_GL_ZERO (32) |
# define R300_BLEND_GL_ONE (33) |
# define R300_BLEND_GL_SRC_COLOR (34) |
# define R300_BLEND_GL_ONE_MINUS_SRC_COLOR (35) |
# define R300_BLEND_GL_DST_COLOR (36) |
# define R300_BLEND_GL_ONE_MINUS_DST_COLOR (37) |
# define R300_BLEND_GL_SRC_ALPHA (38) |
# define R300_BLEND_GL_ONE_MINUS_SRC_ALPHA (39) |
# define R300_BLEND_GL_DST_ALPHA (40) |
# define R300_BLEND_GL_ONE_MINUS_DST_ALPHA (41) |
# define R300_BLEND_GL_SRC_ALPHA_SATURATE (42) |
# define R300_BLEND_GL_CONST_COLOR (43) |
# define R300_BLEND_GL_ONE_MINUS_CONST_COLOR (44) |
# define R300_BLEND_GL_CONST_ALPHA (45) |
# define R300_BLEND_GL_ONE_MINUS_CONST_ALPHA (46) |
# define R300_BLEND_MASK (63) |
# define R300_SRC_BLEND_SHIFT (16) |
# define R300_DST_BLEND_SHIFT (24) |
/* Constant color used by the blender. Pipelined through the blender. |
* Note: For R520, this field is ignored, use RB3D_CONSTANT_COLOR_GB__BLUE, |
* RB3D_CONSTANT_COLOR_GB__GREEN, etc. instead. |
*/ |
#define R300_RB3D_BLEND_COLOR 0x4E10 |
/* 3D Color Channel Mask. If all the channels used in the current color format |
* are disabled, then the cb will discard all the incoming quads. Pipelined |
* through the blender. |
*/ |
#define RB3D_COLOR_CHANNEL_MASK 0x4E0C |
# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 (1 << 0) |
# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 (1 << 1) |
# define RB3D_COLOR_CHANNEL_MASK_RED_MASK0 (1 << 2) |
# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 (1 << 3) |
# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK1 (1 << 4) |
# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK1 (1 << 5) |
# define RB3D_COLOR_CHANNEL_MASK_RED_MASK1 (1 << 6) |
# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK1 (1 << 7) |
# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK2 (1 << 8) |
# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK2 (1 << 9) |
# define RB3D_COLOR_CHANNEL_MASK_RED_MASK2 (1 << 10) |
# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK2 (1 << 11) |
# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK3 (1 << 12) |
# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK3 (1 << 13) |
# define RB3D_COLOR_CHANNEL_MASK_RED_MASK3 (1 << 14) |
# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK3 (1 << 15) |
/* Clear color that is used when the color mask is set to 00. Unpipelined. |
* Program this register with a 32-bit value in ARGB8888 or ARGB2101010 |
* formats, ignoring the fields. |
*/ |
#define R300_RB3D_COLOR_CLEAR_VALUE 0x4E14 |
/* For FP16 AA. */ |
#define R500_RB3D_COLOR_CLEAR_VALUE_AR 0x46C0 |
#define R500_RB3D_COLOR_CLEAR_VALUE_GB 0x46C4 |
/* gap */ |
/* Color Compare Color. Stalls the 2d/3d datapath until it is idle. */ |
#define RB3D_CLRCMP_CLR 0x4e20 |
/* Color Compare Mask. Stalls the 2d/3d datapath until it is idle. */ |
#define RB3D_CLRCMP_MSK 0x4e24 |
/* Color Buffer Address Offset of multibuffer 0. Unpipelined. */ |
#define R300_RB3D_COLOROFFSET0 0x4E28 |
# define R300_COLOROFFSET_MASK 0xFFFFFFE0 |
/* Color Buffer Address Offset of multibuffer 1. Unpipelined. */ |
#define R300_RB3D_COLOROFFSET1 0x4E2C |
/* Color Buffer Address Offset of multibuffer 2. Unpipelined. */ |
#define R300_RB3D_COLOROFFSET2 0x4E30 |
/* Color Buffer Address Offset of multibuffer 3. Unpipelined. */ |
#define R300_RB3D_COLOROFFSET3 0x4E34 |
/* Color buffer format and tiling control for all the multibuffers and the |
* pitch of multibuffer 0 to 3. Unpipelined. The cache must be empty before any |
* of the registers are changed. |
* |
* Bit 16: Larger tiles |
* Bit 17: 4x2 tiles |
* Bit 18: Extremely weird tile like, but some pixels duplicated? |
*/ |
#define R300_RB3D_COLORPITCH0 0x4E38 |
# define R300_COLORPITCH_MASK 0x00003FFE |
# define R300_COLOR_TILE_DISABLE (0 << 16) |
# define R300_COLOR_TILE_ENABLE (1 << 16) |
# define R300_COLOR_TILE(x) ((x) << 16) |
# define R300_COLOR_MICROTILE_DISABLE (0 << 17) |
# define R300_COLOR_MICROTILE_ENABLE (1 << 17) |
# define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */ |
# define R300_COLOR_MICROTILE(x) ((x) << 17) |
# define R300_COLOR_ENDIAN_NO_SWAP (0 << 19) |
# define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19) |
# define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19) |
# define R300_COLOR_ENDIAN_HALF_DWORD_SWAP (3 << 19) |
# define R500_COLOR_FORMAT_ARGB10101010 (0 << 21) |
# define R500_COLOR_FORMAT_UV1010 (1 << 21) |
# define R500_COLOR_FORMAT_CI8 (2 << 21) /* 2D only */ |
# define R300_COLOR_FORMAT_ARGB1555 (3 << 21) |
# define R300_COLOR_FORMAT_RGB565 (4 << 21) |
# define R500_COLOR_FORMAT_ARGB2101010 (5 << 21) |
# define R300_COLOR_FORMAT_ARGB8888 (6 << 21) |
# define R300_COLOR_FORMAT_ARGB32323232 (7 << 21) |
/* reserved */ |
# define R300_COLOR_FORMAT_I8 (9 << 21) |
# define R300_COLOR_FORMAT_ARGB16161616 (10 << 21) |
# define R300_COLOR_FORMAT_VYUY (11 << 21) |
# define R300_COLOR_FORMAT_YVYU (12 << 21) |
# define R300_COLOR_FORMAT_UV88 (13 << 21) |
# define R500_COLOR_FORMAT_I10 (14 << 21) |
# define R300_COLOR_FORMAT_ARGB4444 (15 << 21) |
#define R300_RB3D_COLORPITCH1 0x4E3C |
#define R300_RB3D_COLORPITCH2 0x4E40 |
#define R300_RB3D_COLORPITCH3 0x4E44 |
/* gap */ |
/* Destination Color Buffer Cache Control/Status. If the cb is in e2 mode, then |
* a flush or free will not occur upon a write to this register, but a sync |
* will be immediately sent if one is requested. If both DC_FLUSH and DC_FREE |
* are zero but DC_FINISH is one, then a sync will be sent immediately -- the |
* cb will not wait for all the previous operations to complete before sending |
* the sync. Unpipelined except when DC_FINISH and DC_FREE are both set to |
* zero. |
* |
* Set to 0A before 3D operations, set to 02 afterwards. |
*/ |
#define R300_RB3D_DSTCACHE_CTLSTAT 0x4e4c |
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT (0 << 0) |
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT_1 (1 << 0) |
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D (2 << 0) |
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D_1 (3 << 0) |
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT (0 << 2) |
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT_1 (1 << 2) |
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS (2 << 2) |
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS_1 (3 << 2) |
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_NO_SIGNAL (0 << 4) |
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL (1 << 4) |
#define R300_RB3D_DITHER_CTL 0x4E50 |
# define R300_RB3D_DITHER_CTL_DITHER_MODE_TRUNCATE (0 << 0) |
# define R300_RB3D_DITHER_CTL_DITHER_MODE_ROUND (1 << 0) |
# define R300_RB3D_DITHER_CTL_DITHER_MODE_LUT (2 << 0) |
/* reserved */ |
# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_TRUNCATE (0 << 2) |
# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_ROUND (1 << 2) |
# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT (2 << 2) |
/* reserved */ |
#define R300_RB3D_CMASK_OFFSET0 0x4E54 |
#define R300_RB3D_CMASK_OFFSET1 0x4E58 |
#define R300_RB3D_CMASK_OFFSET2 0x4E5C |
#define R300_RB3D_CMASK_OFFSET3 0x4E60 |
#define R300_RB3D_CMASK_PITCH0 0x4E64 |
#define R300_RB3D_CMASK_PITCH1 0x4E68 |
#define R300_RB3D_CMASK_PITCH2 0x4E6C |
#define R300_RB3D_CMASK_PITCH3 0x4E70 |
#define R300_RB3D_CMASK_WRINDEX 0x4E74 |
#define R300_RB3D_CMASK_DWORD 0x4E78 |
#define R300_RB3D_CMASK_RDINDEX 0x4E7C |
/* Resolve buffer destination address. The cache must be empty before changing |
* this register if the cb is in resolve mode. Unpipelined |
*/ |
#define R300_RB3D_AARESOLVE_OFFSET 0x4e80 |
# define R300_RB3D_AARESOLVE_OFFSET_SHIFT 5 |
# define R300_RB3D_AARESOLVE_OFFSET_MASK 0xffffffe0 /* At least according to the calculations of Christoph Brill */ |
/* Resolve Buffer Pitch and Tiling Control. The cache must be empty before |
* changing this register if the cb is in resolve mode. Unpipelined |
*/ |
#define R300_RB3D_AARESOLVE_PITCH 0x4e84 |
# define R300_RB3D_AARESOLVE_PITCH_SHIFT 1 |
# define R300_RB3D_AARESOLVE_PITCH_MASK 0x00003ffe /* At least according to the calculations of Christoph Brill */ |
/* Resolve Buffer Control. Unpipelined */ |
#define R300_RB3D_AARESOLVE_CTL 0x4e88 |
# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_NORMAL (0 << 0) |
# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE (1 << 0) |
# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_10 (0 << 1) |
# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_22 (1 << 1) |
# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_SAMPLE0 (0 << 2) |
# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE (1 << 2) |
/* Discard src pixels less than or equal to threshold. */ |
#define R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD 0x4ea0 |
/* Discard src pixels greater than or equal to threshold. */ |
#define R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD 0x4ea4 |
# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_SHIFT 0 |
# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_MASK 0x000000ff |
# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_SHIFT 8 |
# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_MASK 0x0000ff00 |
# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_SHIFT 16 |
# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_MASK 0x00ff0000 |
# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_SHIFT 24 |
# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_MASK 0xff000000 |
/* 3D ROP Control. Stalls the 2d/3d datapath until it is idle. */ |
#define R300_RB3D_ROPCNTL 0x4e18 |
# define R300_RB3D_ROPCNTL_ROP_ENABLE 0x00000004 |
# define R300_RB3D_ROPCNTL_ROP_MASK (15 << 8) |
# define R300_RB3D_ROPCNTL_ROP_SHIFT 8 |
/* Color Compare Flip. Stalls the 2d/3d datapath until it is idle. */ |
#define R300_RB3D_CLRCMP_FLIPE 0x4e1c |
/* Sets the fifo sizes */ |
#define R500_RB3D_FIFO_SIZE 0x4ef4 |
# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_FULL (0 << 0) |
# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_HALF (1 << 0) |
# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_QUATER (2 << 0) |
# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_EIGTHS (3 << 0) |
/* Constant color used by the blender. Pipelined through the blender. */ |
#define R500_RB3D_CONSTANT_COLOR_AR 0x4ef8 |
# define R500_RB3D_CONSTANT_COLOR_AR_RED_MASK 0x0000ffff |
# define R500_RB3D_CONSTANT_COLOR_AR_RED_SHIFT 0 |
# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_MASK 0xffff0000 |
# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_SHIFT 16 |
/* Constant color used by the blender. Pipelined through the blender. */ |
#define R500_RB3D_CONSTANT_COLOR_GB 0x4efc |
# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_MASK 0x0000ffff |
# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_SHIFT 0 |
# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_MASK 0xffff0000 |
# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_SHIFT 16 |
/* gap */ |
/* There seems to be no "write only" setting, so use Z-test = ALWAYS |
* for this. |
* Bit (1<<8) is the "test" bit. so plain write is 6 - vd |
*/ |
#define R300_ZB_CNTL 0x4F00 |
# define R300_STENCIL_ENABLE (1 << 0) |
# define R300_Z_ENABLE (1 << 1) |
# define R300_Z_WRITE_ENABLE (1 << 2) |
# define R300_Z_SIGNED_COMPARE (1 << 3) |
# define R300_STENCIL_FRONT_BACK (1 << 4) |
# define R500_STENCIL_ZSIGNED_MAGNITUDE (1 << 5) |
# define R500_STENCIL_REFMASK_FRONT_BACK (1 << 6) |
#define R300_ZB_ZSTENCILCNTL 0x4f04 |
/* functions */ |
# define R300_ZS_NEVER 0 |
# define R300_ZS_LESS 1 |
# define R300_ZS_LEQUAL 2 |
# define R300_ZS_EQUAL 3 |
# define R300_ZS_GEQUAL 4 |
# define R300_ZS_GREATER 5 |
# define R300_ZS_NOTEQUAL 6 |
# define R300_ZS_ALWAYS 7 |
# define R300_ZS_MASK 7 |
/* operations */ |
# define R300_ZS_KEEP 0 |
# define R300_ZS_ZERO 1 |
# define R300_ZS_REPLACE 2 |
# define R300_ZS_INCR 3 |
# define R300_ZS_DECR 4 |
# define R300_ZS_INVERT 5 |
# define R300_ZS_INCR_WRAP 6 |
# define R300_ZS_DECR_WRAP 7 |
# define R300_Z_FUNC_SHIFT 0 |
/* front and back refer to operations done for front |
and back faces, i.e. separate stencil function support */ |
# define R300_S_FRONT_FUNC_SHIFT 3 |
# define R300_S_FRONT_SFAIL_OP_SHIFT 6 |
# define R300_S_FRONT_ZPASS_OP_SHIFT 9 |
# define R300_S_FRONT_ZFAIL_OP_SHIFT 12 |
# define R300_S_BACK_FUNC_SHIFT 15 |
# define R300_S_BACK_SFAIL_OP_SHIFT 18 |
# define R300_S_BACK_ZPASS_OP_SHIFT 21 |
# define R300_S_BACK_ZFAIL_OP_SHIFT 24 |
#define R300_ZB_STENCILREFMASK 0x4f08 |
# define R300_STENCILREF_SHIFT 0 |
# define R300_STENCILREF_MASK 0x000000ff |
# define R300_STENCILMASK_SHIFT 8 |
# define R300_STENCILMASK_MASK 0x0000ff00 |
# define R300_STENCILWRITEMASK_SHIFT 16 |
# define R300_STENCILWRITEMASK_MASK 0x00ff0000 |
/* gap */ |
#define R300_ZB_FORMAT 0x4f10 |
# define R300_DEPTHFORMAT_16BIT_INT_Z (0 << 0) |
# define R300_DEPTHFORMAT_16BIT_13E3 (1 << 0) |
# define R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL (2 << 0) |
/* reserved up to (15 << 0) */ |
# define R300_INVERT_13E3_LEADING_ONES (0 << 4) |
# define R300_INVERT_13E3_LEADING_ZEROS (1 << 4) |
#define R300_ZB_ZTOP 0x4F14 |
# define R300_ZTOP_DISABLE (0 << 0) |
# define R300_ZTOP_ENABLE (1 << 0) |
/* gap */ |
#define R300_ZB_ZCACHE_CTLSTAT 0x4f18 |
# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_NO_EFFECT (0 << 0) |
# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE (1 << 0) |
# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_NO_EFFECT (0 << 1) |
# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE (1 << 1) |
# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_IDLE (0 << 31) |
# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_BUSY (1 << 31) |
#define R300_ZB_BW_CNTL 0x4f1c |
# define R300_HIZ_DISABLE (0 << 0) |
# define R300_HIZ_ENABLE (1 << 0) |
# define R300_HIZ_MAX (0 << 1) |
# define R300_HIZ_MIN (1 << 1) |
# define R300_FAST_FILL_DISABLE (0 << 2) |
# define R300_FAST_FILL_ENABLE (1 << 2) |
# define R300_RD_COMP_DISABLE (0 << 3) |
# define R300_RD_COMP_ENABLE (1 << 3) |
# define R300_WR_COMP_DISABLE (0 << 4) |
# define R300_WR_COMP_ENABLE (1 << 4) |
# define R300_ZB_CB_CLEAR_RMW (0 << 5) |
# define R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY (1 << 5) |
# define R300_FORCE_COMPRESSED_STENCIL_VALUE_DISABLE (0 << 6) |
# define R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE (1 << 6) |
# define R500_ZEQUAL_OPTIMIZE_ENABLE (0 << 7) |
# define R500_ZEQUAL_OPTIMIZE_DISABLE (1 << 7) |
# define R500_SEQUAL_OPTIMIZE_ENABLE (0 << 8) |
# define R500_SEQUAL_OPTIMIZE_DISABLE (1 << 8) |
# define R500_BMASK_ENABLE (0 << 10) |
# define R500_BMASK_DISABLE (1 << 10) |
# define R500_HIZ_EQUAL_REJECT_DISABLE (0 << 11) |
# define R500_HIZ_EQUAL_REJECT_ENABLE (1 << 11) |
# define R500_HIZ_FP_EXP_BITS_DISABLE (0 << 12) |
# define R500_HIZ_FP_EXP_BITS_1 (1 << 12) |
# define R500_HIZ_FP_EXP_BITS_2 (2 << 12) |
# define R500_HIZ_FP_EXP_BITS_3 (3 << 12) |
# define R500_HIZ_FP_EXP_BITS_4 (4 << 12) |
# define R500_HIZ_FP_EXP_BITS_5 (5 << 12) |
# define R500_HIZ_FP_INVERT_LEADING_ONES (0 << 15) |
# define R500_HIZ_FP_INVERT_LEADING_ZEROS (1 << 15) |
# define R500_TILE_OVERWRITE_RECOMPRESSION_ENABLE (0 << 16) |
# define R500_TILE_OVERWRITE_RECOMPRESSION_DISABLE (1 << 16) |
# define R500_CONTIGUOUS_6XAA_SAMPLES_ENABLE (0 << 17) |
# define R500_CONTIGUOUS_6XAA_SAMPLES_DISABLE (1 << 17) |
# define R500_PEQ_PACKING_DISABLE (0 << 18) |
# define R500_PEQ_PACKING_ENABLE (1 << 18) |
# define R500_COVERED_PTR_MASKING_DISABLE (0 << 18) |
# define R500_COVERED_PTR_MASKING_ENABLE (1 << 18) |
/* gap */ |
/* Z Buffer Address Offset. |
* Bits 31 to 5 are used for aligned Z buffer address offset for macro tiles. |
*/ |
#define R300_ZB_DEPTHOFFSET 0x4f20 |
/* Z Buffer Pitch and Endian Control */ |
#define R300_ZB_DEPTHPITCH 0x4f24 |
# define R300_DEPTHPITCH_MASK 0x00003FFC |
# define R300_DEPTHMACROTILE_DISABLE (0 << 16) |
# define R300_DEPTHMACROTILE_ENABLE (1 << 16) |
# define R300_DEPTHMACROTILE(x) ((x) << 16) |
# define R300_DEPTHMICROTILE_LINEAR (0 << 17) |
# define R300_DEPTHMICROTILE_TILED (1 << 17) |
# define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17) |
# define R300_DEPTHMICROTILE(x) ((x) << 17) |
# define R300_DEPTHENDIAN_NO_SWAP (0 << 18) |
# define R300_DEPTHENDIAN_WORD_SWAP (1 << 18) |
# define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18) |
# define R300_DEPTHENDIAN_HALF_DWORD_SWAP (3 << 18) |
/* Z Buffer Clear Value */ |
#define R300_ZB_DEPTHCLEARVALUE 0x4f28 |
/* Z Mask RAM is a Z compression buffer. |
* Each dword of the Z Mask contains compression info for 16 4x4 pixel blocks, |
* that is 2 bits for each block. |
* On chips with 2 Z pipes, every other dword maps to a different pipe. |
*/ |
/* The dword offset into Z mask RAM (bits 18:4) */ |
#define R300_ZB_ZMASK_OFFSET 0x4f30 |
/* Z Mask Pitch. */ |
#define R300_ZB_ZMASK_PITCH 0x4f34 |
/* Access to Z Mask RAM in a manner similar to HiZ RAM. |
* The indices are autoincrementing. */ |
#define R300_ZB_ZMASK_WRINDEX 0x4f38 |
#define R300_ZB_ZMASK_DWORD 0x4f3c |
#define R300_ZB_ZMASK_RDINDEX 0x4f40 |
/* Hierarchical Z Memory Offset */ |
#define R300_ZB_HIZ_OFFSET 0x4f44 |
/* Hierarchical Z Write Index */ |
#define R300_ZB_HIZ_WRINDEX 0x4f48 |
/* Hierarchical Z Data */ |
#define R300_ZB_HIZ_DWORD 0x4f4c |
/* Hierarchical Z Read Index */ |
#define R300_ZB_HIZ_RDINDEX 0x4f50 |
/* Hierarchical Z Pitch */ |
#define R300_ZB_HIZ_PITCH 0x4f54 |
/* Z Buffer Z Pass Counter Data */ |
#define R300_ZB_ZPASS_DATA 0x4f58 |
/* Z Buffer Z Pass Counter Address */ |
#define R300_ZB_ZPASS_ADDR 0x4f5c |
/* Depth buffer X and Y coordinate offset */ |
#define R300_ZB_DEPTHXY_OFFSET 0x4f60 |
# define R300_DEPTHX_OFFSET_SHIFT 1 |
# define R300_DEPTHX_OFFSET_MASK 0x000007FE |
# define R300_DEPTHY_OFFSET_SHIFT 17 |
# define R300_DEPTHY_OFFSET_MASK 0x07FE0000 |
/* Sets the fifo sizes */ |
#define R500_ZB_FIFO_SIZE 0x4fd0 |
# define R500_OP_FIFO_SIZE_FULL (0 << 0) |
# define R500_OP_FIFO_SIZE_HALF (1 << 0) |
# define R500_OP_FIFO_SIZE_QUATER (2 << 0) |
# define R500_OP_FIFO_SIZE_EIGTHS (4 << 0) |
/* Stencil Reference Value and Mask for backfacing quads */ |
/* R300_ZB_STENCILREFMASK handles front face */ |
#define R500_ZB_STENCILREFMASK_BF 0x4fd4 |
# define R500_STENCILREF_SHIFT 0 |
# define R500_STENCILREF_MASK 0x000000ff |
# define R500_STENCILMASK_SHIFT 8 |
# define R500_STENCILMASK_MASK 0x0000ff00 |
# define R500_STENCILWRITEMASK_SHIFT 16 |
# define R500_STENCILWRITEMASK_MASK 0x00ff0000 |
/** |
* \defgroup R3XX_R5XX_PROGRAMMABLE_VERTEX_SHADER_DESCRIPTION R3XX-R5XX PROGRAMMABLE VERTEX SHADER DESCRIPTION |
* |
* The PVS_DST_MATH_INST is used to identify whether the instruction is a Vector |
* Engine instruction or a Math Engine instruction. |
*/ |
/*\{*/ |
enum { |
/* R3XX */ |
VECTOR_NO_OP = 0, |
VE_DOT_PRODUCT = 1, |
VE_MULTIPLY = 2, |
VE_ADD = 3, |
VE_MULTIPLY_ADD = 4, |
VE_DISTANCE_VECTOR = 5, |
VE_FRACTION = 6, |
VE_MAXIMUM = 7, |
VE_MINIMUM = 8, |
VE_SET_GREATER_THAN_EQUAL = 9, |
VE_SET_LESS_THAN = 10, |
VE_MULTIPLYX2_ADD = 11, |
VE_MULTIPLY_CLAMP = 12, |
VE_FLT2FIX_DX = 13, |
VE_FLT2FIX_DX_RND = 14, |
/* R5XX */ |
VE_PRED_SET_EQ_PUSH = 15, |
VE_PRED_SET_GT_PUSH = 16, |
VE_PRED_SET_GTE_PUSH = 17, |
VE_PRED_SET_NEQ_PUSH = 18, |
VE_COND_WRITE_EQ = 19, |
VE_COND_WRITE_GT = 20, |
VE_COND_WRITE_GTE = 21, |
VE_COND_WRITE_NEQ = 22, |
VE_COND_MUX_EQ = 23, |
VE_COND_MUX_GT = 24, |
VE_COND_MUX_GTE = 25, |
VE_SET_GREATER_THAN = 26, |
VE_SET_EQUAL = 27, |
VE_SET_NOT_EQUAL = 28 |
}; |
enum { |
/* R3XX */ |
MATH_NO_OP = 0, |
ME_EXP_BASE2_DX = 1, |
ME_LOG_BASE2_DX = 2, |
ME_EXP_BASEE_FF = 3, |
ME_LIGHT_COEFF_DX = 4, |
ME_POWER_FUNC_FF = 5, |
ME_RECIP_DX = 6, |
ME_RECIP_FF = 7, |
ME_RECIP_SQRT_DX = 8, |
ME_RECIP_SQRT_FF = 9, |
ME_MULTIPLY = 10, |
ME_EXP_BASE2_FULL_DX = 11, |
ME_LOG_BASE2_FULL_DX = 12, |
ME_POWER_FUNC_FF_CLAMP_B = 13, |
ME_POWER_FUNC_FF_CLAMP_B1 = 14, |
ME_POWER_FUNC_FF_CLAMP_01 = 15, |
ME_SIN = 16, |
ME_COS = 17, |
/* R5XX */ |
ME_LOG_BASE2_IEEE = 18, |
ME_RECIP_IEEE = 19, |
ME_RECIP_SQRT_IEEE = 20, |
ME_PRED_SET_EQ = 21, |
ME_PRED_SET_GT = 22, |
ME_PRED_SET_GTE = 23, |
ME_PRED_SET_NEQ = 24, |
ME_PRED_SET_CLR = 25, |
ME_PRED_SET_INV = 26, |
ME_PRED_SET_POP = 27, |
ME_PRED_SET_RESTORE = 28 |
}; |
enum { |
/* R3XX */ |
PVS_MACRO_OP_2CLK_MADD = 0, |
PVS_MACRO_OP_2CLK_M2X_ADD = 1 |
}; |
enum { |
PVS_SRC_REG_TEMPORARY = 0, /* Intermediate Storage */ |
PVS_SRC_REG_INPUT = 1, /* Input Vertex Storage */ |
PVS_SRC_REG_CONSTANT = 2, /* Constant State Storage */ |
PVS_SRC_REG_ALT_TEMPORARY = 3 /* Alternate Intermediate Storage */ |
}; |
enum { |
PVS_DST_REG_TEMPORARY = 0, /* Intermediate Storage */ |
PVS_DST_REG_A0 = 1, /* Address Register Storage */ |
PVS_DST_REG_OUT = 2, /* Output Memory. Used for all outputs */ |
PVS_DST_REG_OUT_REPL_X = 3, /* Output Memory & Replicate X to all channels */ |
PVS_DST_REG_ALT_TEMPORARY = 4, /* Alternate Intermediate Storage */ |
PVS_DST_REG_INPUT = 5 /* Output Memory & Replicate X to all channels */ |
}; |
enum { |
PVS_SRC_SELECT_X = 0, /* Select X Component */ |
PVS_SRC_SELECT_Y = 1, /* Select Y Component */ |
PVS_SRC_SELECT_Z = 2, /* Select Z Component */ |
PVS_SRC_SELECT_W = 3, /* Select W Component */ |
PVS_SRC_SELECT_FORCE_0 = 4, /* Force Component to 0.0 */ |
PVS_SRC_SELECT_FORCE_1 = 5 /* Force Component to 1.0 */ |
}; |
/* PVS Opcode & Destination Operand Description */ |
enum { |
PVS_DST_OPCODE_MASK = 0x3f, |
PVS_DST_OPCODE_SHIFT = 0, |
PVS_DST_MATH_INST_MASK = 0x1, |
PVS_DST_MATH_INST_SHIFT = 6, |
PVS_DST_MACRO_INST_MASK = 0x1, |
PVS_DST_MACRO_INST_SHIFT = 7, |
PVS_DST_REG_TYPE_MASK = 0xf, |
PVS_DST_REG_TYPE_SHIFT = 8, |
PVS_DST_ADDR_MODE_1_MASK = 0x1, |
PVS_DST_ADDR_MODE_1_SHIFT = 12, |
PVS_DST_OFFSET_MASK = 0x7f, |
PVS_DST_OFFSET_SHIFT = 13, |
PVS_DST_WE_X_MASK = 0x1, |
PVS_DST_WE_X_SHIFT = 20, |
PVS_DST_WE_Y_MASK = 0x1, |
PVS_DST_WE_Y_SHIFT = 21, |
PVS_DST_WE_Z_MASK = 0x1, |
PVS_DST_WE_Z_SHIFT = 22, |
PVS_DST_WE_W_MASK = 0x1, |
PVS_DST_WE_W_SHIFT = 23, |
PVS_DST_VE_SAT_MASK = 0x1, |
PVS_DST_VE_SAT_SHIFT = 24, |
PVS_DST_ME_SAT_MASK = 0x1, |
PVS_DST_ME_SAT_SHIFT = 25, |
PVS_DST_PRED_ENABLE_MASK = 0x1, |
PVS_DST_PRED_ENABLE_SHIFT = 26, |
PVS_DST_PRED_SENSE_MASK = 0x1, |
PVS_DST_PRED_SENSE_SHIFT = 27, |
PVS_DST_DUAL_MATH_OP_MASK = 0x3, |
PVS_DST_DUAL_MATH_OP_SHIFT = 27, |
PVS_DST_ADDR_SEL_MASK = 0x3, |
PVS_DST_ADDR_SEL_SHIFT = 29, |
PVS_DST_ADDR_MODE_0_MASK = 0x1, |
PVS_DST_ADDR_MODE_0_SHIFT = 31 |
}; |
/* PVS Source Operand Description */ |
enum { |
PVS_SRC_REG_TYPE_MASK = 0x3, |
PVS_SRC_REG_TYPE_SHIFT = 0, |
SPARE_0_MASK = 0x1, |
SPARE_0_SHIFT = 2, |
PVS_SRC_ABS_XYZW_MASK = 0x1, |
PVS_SRC_ABS_XYZW_SHIFT = 3, |
PVS_SRC_ADDR_MODE_0_MASK = 0x1, |
PVS_SRC_ADDR_MODE_0_SHIFT = 4, |
PVS_SRC_OFFSET_MASK = 0xff, |
PVS_SRC_OFFSET_SHIFT = 5, |
PVS_SRC_SWIZZLE_X_MASK = 0x7, |
PVS_SRC_SWIZZLE_X_SHIFT = 13, |
PVS_SRC_SWIZZLE_Y_MASK = 0x7, |
PVS_SRC_SWIZZLE_Y_SHIFT = 16, |
PVS_SRC_SWIZZLE_Z_MASK = 0x7, |
PVS_SRC_SWIZZLE_Z_SHIFT = 19, |
PVS_SRC_SWIZZLE_W_MASK = 0x7, |
PVS_SRC_SWIZZLE_W_SHIFT = 22, |
PVS_SRC_MODIFIER_X_MASK = 0x1, |
PVS_SRC_MODIFIER_X_SHIFT = 25, |
PVS_SRC_MODIFIER_Y_MASK = 0x1, |
PVS_SRC_MODIFIER_Y_SHIFT = 26, |
PVS_SRC_MODIFIER_Z_MASK = 0x1, |
PVS_SRC_MODIFIER_Z_SHIFT = 27, |
PVS_SRC_MODIFIER_W_MASK = 0x1, |
PVS_SRC_MODIFIER_W_SHIFT = 28, |
PVS_SRC_ADDR_SEL_MASK = 0x3, |
PVS_SRC_ADDR_SEL_SHIFT = 29, |
PVS_SRC_ADDR_MODE_1_MASK = 0x0, |
PVS_SRC_ADDR_MODE_1_SHIFT = 32 |
}; |
/*\}*/ |
#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class, saturate) \ |
(((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \ |
| ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \ |
| ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \ |
| ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \ |
| ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \ |
| ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT)) \ |
| ((math_inst) ? (((saturate) & PVS_DST_ME_SAT_MASK) << PVS_DST_ME_SAT_SHIFT) : \ |
(((saturate) & PVS_DST_VE_SAT_MASK) << PVS_DST_VE_SAT_SHIFT)) |
#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \ |
(((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \ |
| ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \ |
| ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \ |
| ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \ |
| ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \ |
| ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \ |
| ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT)) |
/* BEGIN: Packet 3 commands */ |
/* A primitive emission dword. */ |
#define R300_PRIM_TYPE_NONE (0 << 0) |
#define R300_PRIM_TYPE_POINT (1 << 0) |
#define R300_PRIM_TYPE_LINE (2 << 0) |
#define R300_PRIM_TYPE_LINE_STRIP (3 << 0) |
#define R300_PRIM_TYPE_TRI_LIST (4 << 0) |
#define R300_PRIM_TYPE_TRI_FAN (5 << 0) |
#define R300_PRIM_TYPE_TRI_STRIP (6 << 0) |
#define R300_PRIM_TYPE_TRI_TYPE2 (7 << 0) |
#define R300_PRIM_TYPE_RECT_LIST (8 << 0) |
#define R300_PRIM_TYPE_3VRT_POINT_LIST (9 << 0) |
#define R300_PRIM_TYPE_3VRT_LINE_LIST (10 << 0) |
/* GUESS (based on r200) */ |
#define R300_PRIM_TYPE_POINT_SPRITES (11 << 0) |
#define R300_PRIM_TYPE_LINE_LOOP (12 << 0) |
#define R300_PRIM_TYPE_QUADS (13 << 0) |
#define R300_PRIM_TYPE_QUAD_STRIP (14 << 0) |
#define R300_PRIM_TYPE_POLYGON (15 << 0) |
#define R300_PRIM_TYPE_MASK 0xF |
#define R300_PRIM_WALK_IND (1 << 4) |
#define R300_PRIM_WALK_LIST (2 << 4) |
#define R300_PRIM_WALK_RING (3 << 4) |
#define R300_PRIM_WALK_MASK (3 << 4) |
/* GUESS (based on r200) */ |
#define R300_PRIM_COLOR_ORDER_BGRA (0 << 6) |
#define R300_PRIM_COLOR_ORDER_RGBA (1 << 6) |
#define R300_PRIM_NUM_VERTICES_SHIFT 16 |
#define R300_PRIM_NUM_VERTICES_MASK 0xffff |
/* |
* The R500 unified shader (US) registers come in banks of 512 each, one |
* for each instruction slot in the shader. You can't touch them directly. |
* R500_US_VECTOR_INDEX() sets the base instruction to modify; successive |
* writes to R500_GA_US_VECTOR_DATA autoincrement the index after the |
* instruction is fully specified. |
*/ |
#define R500_US_ALU_ALPHA_INST_0 0xa800 |
# define R500_ALPHA_OP_MAD 0 |
# define R500_ALPHA_OP_DP 1 |
# define R500_ALPHA_OP_MIN 2 |
# define R500_ALPHA_OP_MAX 3 |
/* #define R500_ALPHA_OP_RESERVED 4 */ |
# define R500_ALPHA_OP_CND 5 |
# define R500_ALPHA_OP_CMP 6 |
# define R500_ALPHA_OP_FRC 7 |
# define R500_ALPHA_OP_EX2 8 |
# define R500_ALPHA_OP_LN2 9 |
# define R500_ALPHA_OP_RCP 10 |
# define R500_ALPHA_OP_RSQ 11 |
# define R500_ALPHA_OP_SIN 12 |
# define R500_ALPHA_OP_COS 13 |
# define R500_ALPHA_OP_MDH 14 |
# define R500_ALPHA_OP_MDV 15 |
# define R500_ALPHA_ADDRD(x) ((x) << 4) |
# define R500_ALPHA_ADDRD_REL (1 << 11) |
# define R500_ALPHA_SEL_A_SHIFT 12 |
# define R500_ALPHA_SEL_A_SRC0 (0 << 12) |
# define R500_ALPHA_SEL_A_SRC1 (1 << 12) |
# define R500_ALPHA_SEL_A_SRC2 (2 << 12) |
# define R500_ALPHA_SEL_A_SRCP (3 << 12) |
# define R500_ALPHA_SWIZ_A_R (0 << 14) |
# define R500_ALPHA_SWIZ_A_G (1 << 14) |
# define R500_ALPHA_SWIZ_A_B (2 << 14) |
# define R500_ALPHA_SWIZ_A_A (3 << 14) |
# define R500_ALPHA_SWIZ_A_0 (4 << 14) |
# define R500_ALPHA_SWIZ_A_HALF (5 << 14) |
# define R500_ALPHA_SWIZ_A_1 (6 << 14) |
/* #define R500_ALPHA_SWIZ_A_UNUSED (7 << 14) */ |
# define R500_ALPHA_MOD_A_NOP (0 << 17) |
# define R500_ALPHA_MOD_A_NEG (1 << 17) |
# define R500_ALPHA_MOD_A_ABS (2 << 17) |
# define R500_ALPHA_MOD_A_NAB (3 << 17) |
# define R500_ALPHA_SEL_B_SHIFT 19 |
# define R500_ALPHA_SEL_B_SRC0 (0 << 19) |
# define R500_ALPHA_SEL_B_SRC1 (1 << 19) |
# define R500_ALPHA_SEL_B_SRC2 (2 << 19) |
# define R500_ALPHA_SEL_B_SRCP (3 << 19) |
# define R500_ALPHA_SWIZ_B_R (0 << 21) |
# define R500_ALPHA_SWIZ_B_G (1 << 21) |
# define R500_ALPHA_SWIZ_B_B (2 << 21) |
# define R500_ALPHA_SWIZ_B_A (3 << 21) |
# define R500_ALPHA_SWIZ_B_0 (4 << 21) |
# define R500_ALPHA_SWIZ_B_HALF (5 << 21) |
# define R500_ALPHA_SWIZ_B_1 (6 << 21) |
/* #define R500_ALPHA_SWIZ_B_UNUSED (7 << 21) */ |
# define R500_ALPHA_MOD_B_NOP (0 << 24) |
# define R500_ALPHA_MOD_B_NEG (1 << 24) |
# define R500_ALPHA_MOD_B_ABS (2 << 24) |
# define R500_ALPHA_MOD_B_NAB (3 << 24) |
# define R500_ALPHA_OMOD_SHIFT 26 |
# define R500_ALPHA_OMOD_IDENTITY (0 << R500_ALPHA_OMOD_SHIFT) |
# define R500_ALPHA_OMOD_MUL_2 (1 << R500_ALPHA_OMOD_SHIFT) |
# define R500_ALPHA_OMOD_MUL_4 (2 << R500_ALPHA_OMOD_SHIFT) |
# define R500_ALPHA_OMOD_MUL_8 (3 << R500_ALPHA_OMOD_SHIFT) |
# define R500_ALPHA_OMOD_DIV_2 (4 << R500_ALPHA_OMOD_SHIFT) |
# define R500_ALPHA_OMOD_DIV_4 (5 << R500_ALPHA_OMOD_SHIFT) |
# define R500_ALPHA_OMOD_DIV_8 (6 << R500_ALPHA_OMOD_SHIFT) |
# define R500_ALPHA_OMOD_DISABLE (7 << R500_ALPHA_OMOD_SHIFT) |
# define R500_ALPHA_TARGET(x) ((x) << 29) |
# define R500_ALPHA_W_OMASK (1 << 31) |
#define R500_US_ALU_ALPHA_ADDR_0 0x9800 |
# define R500_ALPHA_ADDR0(x) ((x) << 0) |
# define R500_ALPHA_ADDR0_CONST (1 << 8) |
# define R500_ALPHA_ADDR0_REL (1 << 9) |
# define R500_ALPHA_ADDR1(x) ((x) << 10) |
# define R500_ALPHA_ADDR1_CONST (1 << 18) |
# define R500_ALPHA_ADDR1_REL (1 << 19) |
# define R500_ALPHA_ADDR2(x) ((x) << 20) |
# define R500_ALPHA_ADDR2_CONST (1 << 28) |
# define R500_ALPHA_ADDR2_REL (1 << 29) |
# define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30) |
# define R500_ALPHA_SRCP_OP_A1_MINUS_A0 (1 << 30) |
# define R500_ALPHA_SRCP_OP_A1_PLUS_A0 (2 << 30) |
# define R500_ALPHA_SRCP_OP_1_MINUS_A0 (3 << 30) |
#define R500_US_ALU_RGBA_INST_0 0xb000 |
# define R500_ALU_RGBA_OP_MAD (0 << 0) |
# define R500_ALU_RGBA_OP_DP3 (1 << 0) |
# define R500_ALU_RGBA_OP_DP4 (2 << 0) |
# define R500_ALU_RGBA_OP_D2A (3 << 0) |
# define R500_ALU_RGBA_OP_MIN (4 << 0) |
# define R500_ALU_RGBA_OP_MAX (5 << 0) |
/* #define R500_ALU_RGBA_OP_RESERVED (6 << 0) */ |
# define R500_ALU_RGBA_OP_CND (7 << 0) |
# define R500_ALU_RGBA_OP_CMP (8 << 0) |
# define R500_ALU_RGBA_OP_FRC (9 << 0) |
# define R500_ALU_RGBA_OP_SOP (10 << 0) |
# define R500_ALU_RGBA_OP_MDH (11 << 0) |
# define R500_ALU_RGBA_OP_MDV (12 << 0) |
# define R500_ALU_RGBA_ADDRD(x) ((x) << 4) |
# define R500_ALU_RGBA_ADDRD_REL (1 << 11) |
# define R500_ALU_RGBA_SEL_C_SHIFT 12 |
# define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12) |
# define R500_ALU_RGBA_SEL_C_SRC1 (1 << 12) |
# define R500_ALU_RGBA_SEL_C_SRC2 (2 << 12) |
# define R500_ALU_RGBA_SEL_C_SRCP (3 << 12) |
# define R500_ALU_RGBA_R_SWIZ_R (0 << 14) |
# define R500_ALU_RGBA_R_SWIZ_G (1 << 14) |
# define R500_ALU_RGBA_R_SWIZ_B (2 << 14) |
# define R500_ALU_RGBA_R_SWIZ_A (3 << 14) |
# define R500_ALU_RGBA_R_SWIZ_0 (4 << 14) |
# define R500_ALU_RGBA_R_SWIZ_HALF (5 << 14) |
# define R500_ALU_RGBA_R_SWIZ_1 (6 << 14) |
/* #define R500_ALU_RGBA_R_SWIZ_UNUSED (7 << 14) */ |
# define R500_ALU_RGBA_G_SWIZ_R (0 << 17) |
# define R500_ALU_RGBA_G_SWIZ_G (1 << 17) |
# define R500_ALU_RGBA_G_SWIZ_B (2 << 17) |
# define R500_ALU_RGBA_G_SWIZ_A (3 << 17) |
# define R500_ALU_RGBA_G_SWIZ_0 (4 << 17) |
# define R500_ALU_RGBA_G_SWIZ_HALF (5 << 17) |
# define R500_ALU_RGBA_G_SWIZ_1 (6 << 17) |
/* #define R500_ALU_RGBA_G_SWIZ_UNUSED (7 << 17) */ |
# define R500_ALU_RGBA_B_SWIZ_R (0 << 20) |
# define R500_ALU_RGBA_B_SWIZ_G (1 << 20) |
# define R500_ALU_RGBA_B_SWIZ_B (2 << 20) |
# define R500_ALU_RGBA_B_SWIZ_A (3 << 20) |
# define R500_ALU_RGBA_B_SWIZ_0 (4 << 20) |
# define R500_ALU_RGBA_B_SWIZ_HALF (5 << 20) |
# define R500_ALU_RGBA_B_SWIZ_1 (6 << 20) |
/* #define R500_ALU_RGBA_B_SWIZ_UNUSED (7 << 20) */ |
# define R500_ALU_RGBA_MOD_C_NOP (0 << 23) |
# define R500_ALU_RGBA_MOD_C_NEG (1 << 23) |
# define R500_ALU_RGBA_MOD_C_ABS (2 << 23) |
# define R500_ALU_RGBA_MOD_C_NAB (3 << 23) |
# define R500_ALU_RGBA_ALPHA_SEL_C_SHIFT 25 |
# define R500_ALU_RGBA_ALPHA_SEL_C_SRC0 (0 << 25) |
# define R500_ALU_RGBA_ALPHA_SEL_C_SRC1 (1 << 25) |
# define R500_ALU_RGBA_ALPHA_SEL_C_SRC2 (2 << 25) |
# define R500_ALU_RGBA_ALPHA_SEL_C_SRCP (3 << 25) |
# define R500_ALU_RGBA_A_SWIZ_R (0 << 27) |
# define R500_ALU_RGBA_A_SWIZ_G (1 << 27) |
# define R500_ALU_RGBA_A_SWIZ_B (2 << 27) |
# define R500_ALU_RGBA_A_SWIZ_A (3 << 27) |
# define R500_ALU_RGBA_A_SWIZ_0 (4 << 27) |
# define R500_ALU_RGBA_A_SWIZ_HALF (5 << 27) |
# define R500_ALU_RGBA_A_SWIZ_1 (6 << 27) |
/* #define R500_ALU_RGBA_A_SWIZ_UNUSED (7 << 27) */ |
# define R500_ALU_RGBA_ALPHA_MOD_C_NOP (0 << 30) |
# define R500_ALU_RGBA_ALPHA_MOD_C_NEG (1 << 30) |
# define R500_ALU_RGBA_ALPHA_MOD_C_ABS (2 << 30) |
# define R500_ALU_RGBA_ALPHA_MOD_C_NAB (3 << 30) |
#define R500_US_ALU_RGB_INST_0 0xa000 |
# define R500_ALU_RGB_SEL_A_SHIFT 0 |
# define R500_ALU_RGB_SEL_A_SRC0 (0 << 0) |
# define R500_ALU_RGB_SEL_A_SRC1 (1 << 0) |
# define R500_ALU_RGB_SEL_A_SRC2 (2 << 0) |
# define R500_ALU_RGB_SEL_A_SRCP (3 << 0) |
# define R500_ALU_RGB_R_SWIZ_A_R (0 << 2) |
# define R500_ALU_RGB_R_SWIZ_A_G (1 << 2) |
# define R500_ALU_RGB_R_SWIZ_A_B (2 << 2) |
# define R500_ALU_RGB_R_SWIZ_A_A (3 << 2) |
# define R500_ALU_RGB_R_SWIZ_A_0 (4 << 2) |
# define R500_ALU_RGB_R_SWIZ_A_HALF (5 << 2) |
# define R500_ALU_RGB_R_SWIZ_A_1 (6 << 2) |
/* #define R500_ALU_RGB_R_SWIZ_A_UNUSED (7 << 2) */ |
# define R500_ALU_RGB_G_SWIZ_A_R (0 << 5) |
# define R500_ALU_RGB_G_SWIZ_A_G (1 << 5) |
# define R500_ALU_RGB_G_SWIZ_A_B (2 << 5) |
# define R500_ALU_RGB_G_SWIZ_A_A (3 << 5) |
# define R500_ALU_RGB_G_SWIZ_A_0 (4 << 5) |
# define R500_ALU_RGB_G_SWIZ_A_HALF (5 << 5) |
# define R500_ALU_RGB_G_SWIZ_A_1 (6 << 5) |
/* #define R500_ALU_RGB_G_SWIZ_A_UNUSED (7 << 5) */ |
# define R500_ALU_RGB_B_SWIZ_A_R (0 << 8) |
# define R500_ALU_RGB_B_SWIZ_A_G (1 << 8) |
# define R500_ALU_RGB_B_SWIZ_A_B (2 << 8) |
# define R500_ALU_RGB_B_SWIZ_A_A (3 << 8) |
# define R500_ALU_RGB_B_SWIZ_A_0 (4 << 8) |
# define R500_ALU_RGB_B_SWIZ_A_HALF (5 << 8) |
# define R500_ALU_RGB_B_SWIZ_A_1 (6 << 8) |
/* #define R500_ALU_RGB_B_SWIZ_A_UNUSED (7 << 8) */ |
# define R500_ALU_RGB_MOD_A_NOP (0 << 11) |
# define R500_ALU_RGB_MOD_A_NEG (1 << 11) |
# define R500_ALU_RGB_MOD_A_ABS (2 << 11) |
# define R500_ALU_RGB_MOD_A_NAB (3 << 11) |
# define R500_ALU_RGB_SEL_B_SHIFT 13 |
# define R500_ALU_RGB_SEL_B_SRC0 (0 << 13) |
# define R500_ALU_RGB_SEL_B_SRC1 (1 << 13) |
# define R500_ALU_RGB_SEL_B_SRC2 (2 << 13) |
# define R500_ALU_RGB_SEL_B_SRCP (3 << 13) |
# define R500_ALU_RGB_R_SWIZ_B_R (0 << 15) |
# define R500_ALU_RGB_R_SWIZ_B_G (1 << 15) |
# define R500_ALU_RGB_R_SWIZ_B_B (2 << 15) |
# define R500_ALU_RGB_R_SWIZ_B_A (3 << 15) |
# define R500_ALU_RGB_R_SWIZ_B_0 (4 << 15) |
# define R500_ALU_RGB_R_SWIZ_B_HALF (5 << 15) |
# define R500_ALU_RGB_R_SWIZ_B_1 (6 << 15) |
/* #define R500_ALU_RGB_R_SWIZ_B_UNUSED (7 << 15) */ |
# define R500_ALU_RGB_G_SWIZ_B_R (0 << 18) |
# define R500_ALU_RGB_G_SWIZ_B_G (1 << 18) |
# define R500_ALU_RGB_G_SWIZ_B_B (2 << 18) |
# define R500_ALU_RGB_G_SWIZ_B_A (3 << 18) |
# define R500_ALU_RGB_G_SWIZ_B_0 (4 << 18) |
# define R500_ALU_RGB_G_SWIZ_B_HALF (5 << 18) |
# define R500_ALU_RGB_G_SWIZ_B_1 (6 << 18) |
/* #define R500_ALU_RGB_G_SWIZ_B_UNUSED (7 << 18) */ |
# define R500_ALU_RGB_B_SWIZ_B_R (0 << 21) |
# define R500_ALU_RGB_B_SWIZ_B_G (1 << 21) |
# define R500_ALU_RGB_B_SWIZ_B_B (2 << 21) |
# define R500_ALU_RGB_B_SWIZ_B_A (3 << 21) |
# define R500_ALU_RGB_B_SWIZ_B_0 (4 << 21) |
# define R500_ALU_RGB_B_SWIZ_B_HALF (5 << 21) |
# define R500_ALU_RGB_B_SWIZ_B_1 (6 << 21) |
/* #define R500_ALU_RGB_B_SWIZ_B_UNUSED (7 << 21) */ |
# define R500_ALU_RGB_MOD_B_NOP (0 << 24) |
# define R500_ALU_RGB_MOD_B_NEG (1 << 24) |
# define R500_ALU_RGB_MOD_B_ABS (2 << 24) |
# define R500_ALU_RGB_MOD_B_NAB (3 << 24) |
# define R500_ALU_RGB_OMOD_SHIFT 26 |
# define R500_ALU_RGB_OMOD_IDENTITY (0 << R500_ALU_RGB_OMOD_SHIFT) |
# define R500_ALU_RGB_OMOD_MUL_2 (1 << R500_ALU_RGB_OMOD_SHIFT) |
# define R500_ALU_RGB_OMOD_MUL_4 (2 << R500_ALU_RGB_OMOD_SHIFT) |
# define R500_ALU_RGB_OMOD_MUL_8 (3 << R500_ALU_RGB_OMOD_SHIFT) |
# define R500_ALU_RGB_OMOD_DIV_2 (4 << R500_ALU_RGB_OMOD_SHIFT) |
# define R500_ALU_RGB_OMOD_DIV_4 (5 << R500_ALU_RGB_OMOD_SHIFT) |
# define R500_ALU_RGB_OMOD_DIV_8 (6 << R500_ALU_RGB_OMOD_SHIFT) |
# define R500_ALU_RGB_OMOD_DISABLE (7 << R500_ALU_RGB_OMOD_SHIFT) |
# define R500_ALU_RGB_TARGET(x) ((x) << 29) |
# define R500_ALU_RGB_WMASK (1 << 31) |
#define R500_US_ALU_RGB_ADDR_0 0x9000 |
# define R500_RGB_ADDR0(x) ((x) << 0) |
# define R500_RGB_ADDR0_CONST (1 << 8) |
# define R500_RGB_ADDR0_REL (1 << 9) |
# define R500_RGB_ADDR1(x) ((x) << 10) |
# define R500_RGB_ADDR1_CONST (1 << 18) |
# define R500_RGB_ADDR1_REL (1 << 19) |
# define R500_RGB_ADDR2(x) ((x) << 20) |
# define R500_RGB_ADDR2_CONST (1 << 28) |
# define R500_RGB_ADDR2_REL (1 << 29) |
# define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30) |
# define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 (1 << 30) |
# define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0 (2 << 30) |
# define R500_RGB_SRCP_OP_1_MINUS_RGB0 (3 << 30) |
#define R500_US_CMN_INST_0 0xb800 |
# define R500_INST_TYPE_MASK (3 << 0) |
# define R500_INST_TYPE_ALU (0 << 0) |
# define R500_INST_TYPE_OUT (1 << 0) |
# define R500_INST_TYPE_FC (2 << 0) |
# define R500_INST_TYPE_TEX (3 << 0) |
# define R500_INST_TEX_SEM_WAIT_SHIFT 2 |
# define R500_INST_TEX_SEM_WAIT (1 << R500_INST_TEX_SEM_WAIT_SHIFT) |
# define R500_INST_RGB_PRED_SEL_NONE (0 << 3) |
# define R500_INST_RGB_PRED_SEL_RGBA (1 << 3) |
# define R500_INST_RGB_PRED_SEL_RRRR (2 << 3) |
# define R500_INST_RGB_PRED_SEL_GGGG (3 << 3) |
# define R500_INST_RGB_PRED_SEL_BBBB (4 << 3) |
# define R500_INST_RGB_PRED_SEL_AAAA (5 << 3) |
# define R500_INST_RGB_PRED_INV (1 << 6) |
# define R500_INST_WRITE_INACTIVE (1 << 7) |
# define R500_INST_LAST (1 << 8) |
# define R500_INST_NOP (1 << 9) |
# define R500_INST_ALU_WAIT (1 << 10) |
# define R500_INST_RGB_WMASK_R (1 << 11) |
# define R500_INST_RGB_WMASK_G (1 << 12) |
# define R500_INST_RGB_WMASK_B (1 << 13) |
# define R500_INST_RGB_WMASK_RGB (7 << 11) |
# define R500_INST_ALPHA_WMASK (1 << 14) |
# define R500_INST_RGB_OMASK_R (1 << 15) |
# define R500_INST_RGB_OMASK_G (1 << 16) |
# define R500_INST_RGB_OMASK_B (1 << 17) |
# define R500_INST_RGB_OMASK_RGB (7 << 15) |
# define R500_INST_ALPHA_OMASK (1 << 18) |
# define R500_INST_RGB_CLAMP (1 << 19) |
# define R500_INST_ALPHA_CLAMP (1 << 20) |
# define R500_INST_ALU_RESULT_SEL (1 << 21) |
# define R500_INST_ALU_RESULT_SEL_RED (0 << 21) |
# define R500_INST_ALU_RESULT_SEL_ALPHA (1 << 21) |
# define R500_INST_ALPHA_PRED_INV (1 << 22) |
# define R500_INST_ALU_RESULT_OP_EQ (0 << 23) |
# define R500_INST_ALU_RESULT_OP_LT (1 << 23) |
# define R500_INST_ALU_RESULT_OP_GE (2 << 23) |
# define R500_INST_ALU_RESULT_OP_NE (3 << 23) |
# define R500_INST_ALPHA_PRED_SEL_NONE (0 << 25) |
# define R500_INST_ALPHA_PRED_SEL_RGBA (1 << 25) |
# define R500_INST_ALPHA_PRED_SEL_RRRR (2 << 25) |
# define R500_INST_ALPHA_PRED_SEL_GGGG (3 << 25) |
# define R500_INST_ALPHA_PRED_SEL_BBBB (4 << 25) |
# define R500_INST_ALPHA_PRED_SEL_AAAA (5 << 25) |
/* XXX next four are kind of guessed */ |
# define R500_INST_STAT_WE_R (1 << 28) |
# define R500_INST_STAT_WE_G (1 << 29) |
# define R500_INST_STAT_WE_B (1 << 30) |
# define R500_INST_STAT_WE_A (1 << 31) |
/* note that these are 8 bit lengths, despite the offsets, at least for R500 */ |
#define R500_US_CODE_ADDR 0x4630 |
# define R500_US_CODE_START_ADDR(x) ((x) << 0) |
# define R500_US_CODE_END_ADDR(x) ((x) << 16) |
#define R500_US_CODE_OFFSET 0x4638 |
# define R500_US_CODE_OFFSET_ADDR(x) ((x) << 0) |
#define R500_US_CODE_RANGE 0x4634 |
# define R500_US_CODE_RANGE_ADDR(x) ((x) << 0) |
# define R500_US_CODE_RANGE_SIZE(x) ((x) << 16) |
#define R500_US_CONFIG 0x4600 |
# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1) |
#define R500_US_FC_ADDR_0 0xa000 |
# define R500_FC_BOOL_ADDR(x) ((x) << 0) |
# define R500_FC_INT_ADDR(x) ((x) << 8) |
# define R500_FC_JUMP_ADDR(x) ((x) << 16) |
# define R500_FC_JUMP_GLOBAL (1 << 31) |
#define R500_US_FC_BOOL_CONST 0x4620 |
# define R500_FC_KBOOL(x) (x) |
#define R500_US_FC_CTRL 0x4624 |
# define R500_FC_TEST_EN (1 << 30) |
# define R500_FC_FULL_FC_EN (1 << 31) |
#define R500_US_FC_INST_0 0x9800 |
# define R500_FC_OP_JUMP (0 << 0) |
# define R500_FC_OP_LOOP (1 << 0) |
# define R500_FC_OP_ENDLOOP (2 << 0) |
# define R500_FC_OP_REP (3 << 0) |
# define R500_FC_OP_ENDREP (4 << 0) |
# define R500_FC_OP_BREAKLOOP (5 << 0) |
# define R500_FC_OP_BREAKREP (6 << 0) |
# define R500_FC_OP_CONTINUE (7 << 0) |
# define R500_FC_B_ELSE (1 << 4) |
# define R500_FC_JUMP_ANY (1 << 5) |
# define R500_FC_A_OP_NONE (0 << 6) |
# define R500_FC_A_OP_POP (1 << 6) |
# define R500_FC_A_OP_PUSH (2 << 6) |
# define R500_FC_JUMP_FUNC(x) ((x) << 8) |
# define R500_FC_B_POP_CNT(x) ((x) << 16) |
# define R500_FC_B_OP0_NONE (0 << 24) |
# define R500_FC_B_OP0_DECR (1 << 24) |
# define R500_FC_B_OP0_INCR (2 << 24) |
# define R500_FC_B_OP1_NONE (0 << 26) |
# define R500_FC_B_OP1_DECR (1 << 26) |
# define R500_FC_B_OP1_INCR (2 << 26) |
# define R500_FC_IGNORE_UNCOVERED (1 << 28) |
#define R500_US_FC_INT_CONST_0 0x4c00 |
# define R500_FC_INT_CONST_KR(x) ((x) << 0) |
# define R500_FC_INT_CONST_KG(x) ((x) << 8) |
# define R500_FC_INT_CONST_KB(x) ((x) << 16) |
/* _0 through _15 */ |
#define R500_US_FORMAT0_0 0x4640 |
# define R500_FORMAT_TXWIDTH(x) ((x) << 0) |
# define R500_FORMAT_TXHEIGHT(x) ((x) << 11) |
# define R500_FORMAT_TXDEPTH(x) ((x) << 22) |
#define R500_US_PIXSIZE 0x4604 |
# define R500_PIX_SIZE(x) (x) |
#define R500_US_TEX_ADDR_0 0x9800 |
# define R500_TEX_SRC_ADDR(x) ((x) << 0) |
# define R500_TEX_SRC_ADDR_REL (1 << 7) |
# define R500_TEX_SRC_S_SWIZ_R (0 << 8) |
# define R500_TEX_SRC_S_SWIZ_G (1 << 8) |
# define R500_TEX_SRC_S_SWIZ_B (2 << 8) |
# define R500_TEX_SRC_S_SWIZ_A (3 << 8) |
# define R500_TEX_SRC_T_SWIZ_R (0 << 10) |
# define R500_TEX_SRC_T_SWIZ_G (1 << 10) |
# define R500_TEX_SRC_T_SWIZ_B (2 << 10) |
# define R500_TEX_SRC_T_SWIZ_A (3 << 10) |
# define R500_TEX_SRC_R_SWIZ_R (0 << 12) |
# define R500_TEX_SRC_R_SWIZ_G (1 << 12) |
# define R500_TEX_SRC_R_SWIZ_B (2 << 12) |
# define R500_TEX_SRC_R_SWIZ_A (3 << 12) |
# define R500_TEX_SRC_Q_SWIZ_R (0 << 14) |
# define R500_TEX_SRC_Q_SWIZ_G (1 << 14) |
# define R500_TEX_SRC_Q_SWIZ_B (2 << 14) |
# define R500_TEX_SRC_Q_SWIZ_A (3 << 14) |
# define R500_TEX_DST_ADDR(x) ((x) << 16) |
# define R500_TEX_DST_ADDR_REL (1 << 23) |
# define R500_TEX_DST_R_SWIZ_R (0 << 24) |
# define R500_TEX_DST_R_SWIZ_G (1 << 24) |
# define R500_TEX_DST_R_SWIZ_B (2 << 24) |
# define R500_TEX_DST_R_SWIZ_A (3 << 24) |
# define R500_TEX_DST_G_SWIZ_R (0 << 26) |
# define R500_TEX_DST_G_SWIZ_G (1 << 26) |
# define R500_TEX_DST_G_SWIZ_B (2 << 26) |
# define R500_TEX_DST_G_SWIZ_A (3 << 26) |
# define R500_TEX_DST_B_SWIZ_R (0 << 28) |
# define R500_TEX_DST_B_SWIZ_G (1 << 28) |
# define R500_TEX_DST_B_SWIZ_B (2 << 28) |
# define R500_TEX_DST_B_SWIZ_A (3 << 28) |
# define R500_TEX_DST_A_SWIZ_R (0 << 30) |
# define R500_TEX_DST_A_SWIZ_G (1 << 30) |
# define R500_TEX_DST_A_SWIZ_B (2 << 30) |
# define R500_TEX_DST_A_SWIZ_A (3 << 30) |
#define R500_US_TEX_ADDR_DXDY_0 0xa000 |
# define R500_DX_ADDR(x) ((x) << 0) |
# define R500_DX_ADDR_REL (1 << 7) |
# define R500_DX_S_SWIZ_R (0 << 8) |
# define R500_DX_S_SWIZ_G (1 << 8) |
# define R500_DX_S_SWIZ_B (2 << 8) |
# define R500_DX_S_SWIZ_A (3 << 8) |
# define R500_DX_T_SWIZ_R (0 << 10) |
# define R500_DX_T_SWIZ_G (1 << 10) |
# define R500_DX_T_SWIZ_B (2 << 10) |
# define R500_DX_T_SWIZ_A (3 << 10) |
# define R500_DX_R_SWIZ_R (0 << 12) |
# define R500_DX_R_SWIZ_G (1 << 12) |
# define R500_DX_R_SWIZ_B (2 << 12) |
# define R500_DX_R_SWIZ_A (3 << 12) |
# define R500_DX_Q_SWIZ_R (0 << 14) |
# define R500_DX_Q_SWIZ_G (1 << 14) |
# define R500_DX_Q_SWIZ_B (2 << 14) |
# define R500_DX_Q_SWIZ_A (3 << 14) |
# define R500_DY_ADDR(x) ((x) << 16) |
# define R500_DY_ADDR_REL (1 << 17) |
# define R500_DY_S_SWIZ_R (0 << 24) |
# define R500_DY_S_SWIZ_G (1 << 24) |
# define R500_DY_S_SWIZ_B (2 << 24) |
# define R500_DY_S_SWIZ_A (3 << 24) |
# define R500_DY_T_SWIZ_R (0 << 26) |
# define R500_DY_T_SWIZ_G (1 << 26) |
# define R500_DY_T_SWIZ_B (2 << 26) |
# define R500_DY_T_SWIZ_A (3 << 26) |
# define R500_DY_R_SWIZ_R (0 << 28) |
# define R500_DY_R_SWIZ_G (1 << 28) |
# define R500_DY_R_SWIZ_B (2 << 28) |
# define R500_DY_R_SWIZ_A (3 << 28) |
# define R500_DY_Q_SWIZ_R (0 << 30) |
# define R500_DY_Q_SWIZ_G (1 << 30) |
# define R500_DY_Q_SWIZ_B (2 << 30) |
# define R500_DY_Q_SWIZ_A (3 << 30) |
#define R500_US_TEX_INST_0 0x9000 |
# define R500_TEX_ID(x) ((x) << 16) |
# define R500_TEX_INST_NOP (0 << 22) |
# define R500_TEX_INST_LD (1 << 22) |
# define R500_TEX_INST_TEXKILL (2 << 22) |
# define R500_TEX_INST_PROJ (3 << 22) |
# define R500_TEX_INST_LODBIAS (4 << 22) |
# define R500_TEX_INST_LOD (5 << 22) |
# define R500_TEX_INST_DXDY (6 << 22) |
# define R500_TEX_SEM_ACQUIRE_SHIFT 25 |
# define R500_TEX_SEM_ACQUIRE (1 << R500_TEX_SEM_ACQUIRE_SHIFT) |
# define R500_TEX_IGNORE_UNCOVERED (1 << 26) |
# define R500_TEX_UNSCALED (1 << 27) |
#define R300_US_W_FMT 0x46b4 |
# define R300_W_FMT_W0 (0 << 0) |
# define R300_W_FMT_W24 (1 << 0) |
# define R300_W_FMT_W24FP (2 << 0) |
# define R300_W_SRC_US (0 << 2) |
# define R300_W_SRC_RAS (1 << 2) |
/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR. |
* Two parameter dwords: |
* 0. VAP_VTX_FMT: The first parameter is not written to hardware |
* 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword. |
*/ |
#define R300_PACKET3_3D_DRAW_VBUF 0x00002800 |
/* Draw a primitive from immediate vertices in this packet |
* Up to 16382 dwords: |
* 0. VAP_VTX_FMT: The first parameter is not written to hardware |
* 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword. |
* 2 to end: Up to 16380 dwords of vertex data. |
*/ |
#define R300_PACKET3_3D_DRAW_IMMD 0x00002900 |
/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR and |
* immediate vertices in this packet |
* Up to 16382 dwords: |
* 0. VAP_VTX_FMT: The first parameter is not written to hardware |
* 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword. |
* 2 to end: Up to 16380 dwords of vertex data. |
*/ |
#define R300_PACKET3_3D_DRAW_INDX 0x00002A00 |
/* Specify the full set of vertex arrays as (address, stride). |
* The first parameter is the number of vertex arrays specified. |
* The rest of the command is a variable length list of blocks, where |
* each block is three dwords long and specifies two arrays. |
* The first dword of a block is split into two words, the lower significant |
* word refers to the first array, the more significant word to the second |
* array in the block. |
* The low byte of each word contains the size of an array entry in dwords, |
* the high byte contains the stride of the array. |
* The second dword of a block contains the pointer to the first array, |
* the third dword of a block contains the pointer to the second array. |
* Note that if the total number of arrays is odd, the third dword of |
* the last block is omitted. |
*/ |
#define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00 |
# define R300_VC_FORCE_PREFETCH (1 << 5) |
# define R300_VBPNTR_SIZE0(x) ((x) >> 2) |
# define R300_VBPNTR_STRIDE0(x) (((x) >> 2) << 8) |
# define R300_VBPNTR_SIZE1(x) (((x) >> 2) << 16) |
# define R300_VBPNTR_STRIDE1(x) (((x) >> 2) << 24) |
#define R300_PACKET3_3D_CLEAR_ZMASK 0x00003200 |
#define R300_PACKET3_INDX_BUFFER 0x00003300 |
# define R300_INDX_BUFFER_DST_SHIFT 0 |
# define R300_INDX_BUFFER_SKIP_SHIFT 16 |
# define R300_INDX_BUFFER_ONE_REG_WR (1<<31) |
/* Same as R300_PACKET3_3D_DRAW_VBUF but without VAP_VTX_FMT */ |
#define R300_PACKET3_3D_DRAW_VBUF_2 0x00003400 |
/* Same as R300_PACKET3_3D_DRAW_IMMD but without VAP_VTX_FMT */ |
#define R300_PACKET3_3D_DRAW_IMMD_2 0x00003500 |
/* Same as R300_PACKET3_3D_DRAW_INDX but without VAP_VTX_FMT */ |
#define R300_PACKET3_3D_DRAW_INDX_2 0x00003600 |
/* Clears a portion of hierachical Z RAM |
* 3 dword parameters |
* 0. START |
* 1. COUNT: 13:0 (max is 0x3FFF) |
* 2. CLEAR_VALUE: Value to write into HIZ RAM. |
*/ |
#define R300_PACKET3_3D_CLEAR_HIZ 0x00003700 |
#define R300_PACKET3_3D_CLEAR_CMASK 0x00003800 |
/* Draws a set of primitives using vertex buffers pointed by the state data. |
* At least 2 Parameters: |
* 0. VAP_VF_CNTL: The first parameter is a standard primitive emission dword. |
* 2 to end: Data or indices (see other 3D_DRAW_* packets for details) |
*/ |
#define R300_PACKET3_3D_DRAW_128 0x00003900 |
/* END: Packet 3 commands */ |
/* Color formats for 2d packets |
*/ |
#define R300_CP_COLOR_FORMAT_CI8 2 |
#define R300_CP_COLOR_FORMAT_ARGB1555 3 |
#define R300_CP_COLOR_FORMAT_RGB565 4 |
#define R300_CP_COLOR_FORMAT_ARGB8888 6 |
#define R300_CP_COLOR_FORMAT_RGB332 7 |
#define R300_CP_COLOR_FORMAT_RGB8 9 |
#define R300_CP_COLOR_FORMAT_ARGB4444 15 |
/* |
* CP type-3 packets |
*/ |
#define R300_CP_CMD_BITBLT_MULTI 0xC0009B00 |
/* XXX Corbin's stuff from radeon and r200 */ |
#define RADEON_WAIT_UNTIL 0x1720 |
# define RADEON_WAIT_CRTC_PFLIP (1 << 0) |
# define RADEON_WAIT_2D_IDLECLEAN (1 << 16) |
# define RADEON_WAIT_3D_IDLECLEAN (1 << 17) |
# define RADEON_WAIT_HOST_IDLECLEAN (1 << 18) |
#define R200_3D_DRAW_IMMD_2 0xC0003500 |
#define RADEON_CP_PACKET0 0x0 /* XXX stolen from radeon_reg.h */ |
#define RADEON_CP_PACKET3 0xC0000000 |
#define RADEON_ONE_REG_WR (1 << 15) |
#define CP_PACKET0(register, count) \ |
(RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2)) |
#define CP_PACKET3(op, count) \ |
(RADEON_CP_PACKET3 | (op) | ((count) << 16)) |
#endif /* _R300_REG_H */ |
/* *INDENT-ON* */ |
/* vim: set foldenable foldmarker=\\{,\\} foldmethod=marker : */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_render.c |
---|
0,0 → 1,1208 |
/* |
* Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Copyright 2010 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
/* r300_render: Vertex and index buffer primitive emission. Contains both |
* HW TCL fastpath rendering, and SW TCL Draw-assisted rendering. */ |
#include "draw/draw_context.h" |
#include "draw/draw_vbuf.h" |
#include "util/u_inlines.h" |
#include "util/u_format.h" |
#include "util/u_memory.h" |
#include "util/u_upload_mgr.h" |
#include "util/u_prim.h" |
#include "r300_cs.h" |
#include "r300_context.h" |
#include "r300_screen_buffer.h" |
#include "r300_emit.h" |
#include "r300_reg.h" |
#include <limits.h> |
#define IMMD_DWORDS 32 |
static uint32_t r300_translate_primitive(unsigned prim) |
{ |
static const int prim_conv[] = { |
R300_VAP_VF_CNTL__PRIM_POINTS, |
R300_VAP_VF_CNTL__PRIM_LINES, |
R300_VAP_VF_CNTL__PRIM_LINE_LOOP, |
R300_VAP_VF_CNTL__PRIM_LINE_STRIP, |
R300_VAP_VF_CNTL__PRIM_TRIANGLES, |
R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP, |
R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN, |
R300_VAP_VF_CNTL__PRIM_QUADS, |
R300_VAP_VF_CNTL__PRIM_QUAD_STRIP, |
R300_VAP_VF_CNTL__PRIM_POLYGON, |
-1, |
-1, |
-1, |
-1 |
}; |
unsigned hwprim = prim_conv[prim]; |
assert(hwprim != -1); |
return hwprim; |
} |
static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, |
unsigned mode) |
{ |
struct r300_rs_state* rs = (struct r300_rs_state*)r300->rs_state.state; |
uint32_t color_control = rs->color_control; |
/* By default (see r300_state.c:r300_create_rs_state) color_control is |
* initialized to provoking the first vertex. |
* |
* Triangle fans must be reduced to the second vertex, not the first, in |
* Gallium flatshade-first mode, as per the GL spec. |
* (http://www.opengl.org/registry/specs/ARB/provoking_vertex.txt) |
* |
* Quads never provoke correctly in flatshade-first mode. The first |
* vertex is never considered as provoking, so only the second, third, |
* and fourth vertices can be selected, and both "third" and "last" modes |
* select the fourth vertex. This is probably due to D3D lacking quads. |
* |
* Similarly, polygons reduce to the first, not the last, vertex, when in |
* "last" mode, and all other modes start from the second vertex. |
* |
* ~ C. |
*/ |
if (rs->rs.flatshade_first) { |
switch (mode) { |
case PIPE_PRIM_TRIANGLE_FAN: |
color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND; |
break; |
case PIPE_PRIM_QUADS: |
case PIPE_PRIM_QUAD_STRIP: |
case PIPE_PRIM_POLYGON: |
color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST; |
break; |
default: |
color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST; |
break; |
} |
} else { |
color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST; |
} |
return color_control; |
} |
void r500_emit_index_bias(struct r300_context *r300, int index_bias) |
{ |
CS_LOCALS(r300); |
BEGIN_CS(2); |
OUT_CS_REG(R500_VAP_INDEX_OFFSET, |
(index_bias & 0xFFFFFF) | (index_bias < 0 ? 1<<24 : 0)); |
END_CS; |
} |
static void r300_emit_draw_init(struct r300_context *r300, unsigned mode, |
unsigned max_index) |
{ |
CS_LOCALS(r300); |
assert(max_index < (1 << 24)); |
BEGIN_CS(5); |
OUT_CS_REG(R300_GA_COLOR_CONTROL, |
r300_provoking_vertex_fixes(r300, mode)); |
OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); |
OUT_CS(max_index); |
OUT_CS(0); |
END_CS; |
} |
/* This function splits the index bias value into two parts: |
* - buffer_offset: the value that can be safely added to buffer offsets |
* in r300_emit_vertex_arrays (it must yield a positive offset when added to |
* a vertex buffer offset) |
* - index_offset: the value that must be manually subtracted from indices |
* in an index buffer to achieve negative offsets. */ |
static void r300_split_index_bias(struct r300_context *r300, int index_bias, |
int *buffer_offset, int *index_offset) |
{ |
struct pipe_vertex_buffer *vb, *vbufs = r300->vertex_buffer; |
struct pipe_vertex_element *velem = r300->velems->velem; |
unsigned i, size; |
int max_neg_bias; |
if (index_bias < 0) { |
/* See how large index bias we may subtract. We must be careful |
* here because negative buffer offsets are not allowed |
* by the DRM API. */ |
max_neg_bias = INT_MAX; |
for (i = 0; i < r300->velems->count; i++) { |
vb = &vbufs[velem[i].vertex_buffer_index]; |
size = (vb->buffer_offset + velem[i].src_offset) / vb->stride; |
max_neg_bias = MIN2(max_neg_bias, size); |
} |
/* Now set the minimum allowed value. */ |
*buffer_offset = MAX2(-max_neg_bias, index_bias); |
} else { |
/* A positive index bias is OK. */ |
*buffer_offset = index_bias; |
} |
*index_offset = index_bias - *buffer_offset; |
} |
enum r300_prepare_flags { |
PREP_EMIT_STATES = (1 << 0), /* call emit_dirty_state and friends? */ |
PREP_VALIDATE_VBOS = (1 << 1), /* validate VBOs? */ |
PREP_EMIT_VARRAYS = (1 << 2), /* call emit_vertex_arrays? */ |
PREP_EMIT_VARRAYS_SWTCL = (1 << 3), /* call emit_vertex_arrays_swtcl? */ |
PREP_INDEXED = (1 << 4) /* is this draw_elements? */ |
}; |
/** |
* Check if the requested number of dwords is available in the CS and |
* if not, flush. |
* \param r300 The context. |
* \param flags See r300_prepare_flags. |
* \param cs_dwords The number of dwords to reserve in CS. |
* \return TRUE if the CS was flushed |
*/ |
static boolean r300_reserve_cs_dwords(struct r300_context *r300, |
enum r300_prepare_flags flags, |
unsigned cs_dwords) |
{ |
boolean flushed = FALSE; |
boolean emit_states = flags & PREP_EMIT_STATES; |
boolean emit_vertex_arrays = flags & PREP_EMIT_VARRAYS; |
boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_VARRAYS_SWTCL; |
/* Add dirty state, index offset, and AOS. */ |
if (emit_states) |
cs_dwords += r300_get_num_dirty_dwords(r300); |
if (r300->screen->caps.is_r500) |
cs_dwords += 2; /* emit_index_offset */ |
if (emit_vertex_arrays) |
cs_dwords += 55; /* emit_vertex_arrays */ |
if (emit_vertex_arrays_swtcl) |
cs_dwords += 7; /* emit_vertex_arrays_swtcl */ |
cs_dwords += r300_get_num_cs_end_dwords(r300); |
/* Reserve requested CS space. */ |
if (cs_dwords > (RADEON_MAX_CMDBUF_DWORDS - r300->cs->cdw)) { |
r300_flush(&r300->context, RADEON_FLUSH_ASYNC, NULL); |
flushed = TRUE; |
} |
return flushed; |
} |
/** |
* Validate buffers and emit dirty state. |
* \param r300 The context. |
* \param flags See r300_prepare_flags. |
* \param index_buffer The index buffer to validate. The parameter may be NULL. |
* \param buffer_offset The offset passed to emit_vertex_arrays. |
* \param index_bias The index bias to emit. |
* \param instance_id Index of instance to render |
* \return TRUE if rendering should be skipped |
*/ |
static boolean r300_emit_states(struct r300_context *r300, |
enum r300_prepare_flags flags, |
struct pipe_resource *index_buffer, |
int buffer_offset, |
int index_bias, int instance_id) |
{ |
boolean emit_states = flags & PREP_EMIT_STATES; |
boolean emit_vertex_arrays = flags & PREP_EMIT_VARRAYS; |
boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_VARRAYS_SWTCL; |
boolean indexed = flags & PREP_INDEXED; |
boolean validate_vbos = flags & PREP_VALIDATE_VBOS; |
/* Validate buffers and emit dirty state if needed. */ |
if (emit_states || (emit_vertex_arrays && validate_vbos)) { |
if (!r300_emit_buffer_validate(r300, validate_vbos, |
index_buffer)) { |
fprintf(stderr, "r300: CS space validation failed. " |
"(not enough memory?) Skipping rendering.\n"); |
return FALSE; |
} |
} |
if (emit_states) |
r300_emit_dirty_state(r300); |
if (r300->screen->caps.is_r500) { |
if (r300->screen->caps.has_tcl) |
r500_emit_index_bias(r300, index_bias); |
else |
r500_emit_index_bias(r300, 0); |
} |
if (emit_vertex_arrays && |
(r300->vertex_arrays_dirty || |
r300->vertex_arrays_indexed != indexed || |
r300->vertex_arrays_offset != buffer_offset || |
r300->vertex_arrays_instance_id != instance_id)) { |
r300_emit_vertex_arrays(r300, buffer_offset, indexed, instance_id); |
r300->vertex_arrays_dirty = FALSE; |
r300->vertex_arrays_indexed = indexed; |
r300->vertex_arrays_offset = buffer_offset; |
r300->vertex_arrays_instance_id = instance_id; |
} |
if (emit_vertex_arrays_swtcl) |
r300_emit_vertex_arrays_swtcl(r300, indexed); |
return TRUE; |
} |
/** |
* Check if the requested number of dwords is available in the CS and |
* if not, flush. Then validate buffers and emit dirty state. |
* \param r300 The context. |
* \param flags See r300_prepare_flags. |
* \param index_buffer The index buffer to validate. The parameter may be NULL. |
* \param cs_dwords The number of dwords to reserve in CS. |
* \param buffer_offset The offset passed to emit_vertex_arrays. |
* \param index_bias The index bias to emit. |
* \param instance_id The instance to render. |
* \return TRUE if rendering should be skipped |
*/ |
static boolean r300_prepare_for_rendering(struct r300_context *r300, |
enum r300_prepare_flags flags, |
struct pipe_resource *index_buffer, |
unsigned cs_dwords, |
int buffer_offset, |
int index_bias, |
int instance_id) |
{ |
/* Make sure there is enough space in the command stream and emit states. */ |
if (r300_reserve_cs_dwords(r300, flags, cs_dwords)) |
flags |= PREP_EMIT_STATES; |
return r300_emit_states(r300, flags, index_buffer, buffer_offset, |
index_bias, instance_id); |
} |
static boolean immd_is_good_idea(struct r300_context *r300, |
unsigned count) |
{ |
if (DBG_ON(r300, DBG_NO_IMMD)) { |
return FALSE; |
} |
if (count * r300->velems->vertex_size_dwords > IMMD_DWORDS) { |
return FALSE; |
} |
/* Buffers can only be used for read by r300 (except query buffers, but |
* those can't be bound by a state tracker as vertex buffers). */ |
return TRUE; |
} |
/***************************************************************************** |
* The HWTCL draw functions. * |
****************************************************************************/ |
static void r300_draw_arrays_immediate(struct r300_context *r300, |
const struct pipe_draw_info *info) |
{ |
struct pipe_vertex_element* velem; |
struct pipe_vertex_buffer* vbuf; |
unsigned vertex_element_count = r300->velems->count; |
unsigned i, v, vbi; |
/* Size of the vertex, in dwords. */ |
unsigned vertex_size = r300->velems->vertex_size_dwords; |
/* The number of dwords for this draw operation. */ |
unsigned dwords = 4 + info->count * vertex_size; |
/* Size of the vertex element, in dwords. */ |
unsigned size[PIPE_MAX_ATTRIBS]; |
/* Stride to the same attrib in the next vertex in the vertex buffer, |
* in dwords. */ |
unsigned stride[PIPE_MAX_ATTRIBS]; |
/* Mapped vertex buffers. */ |
uint32_t* map[PIPE_MAX_ATTRIBS] = {0}; |
uint32_t* mapelem[PIPE_MAX_ATTRIBS]; |
CS_LOCALS(r300); |
if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1)) |
return; |
/* Calculate the vertex size, offsets, strides etc. and map the buffers. */ |
for (i = 0; i < vertex_element_count; i++) { |
velem = &r300->velems->velem[i]; |
size[i] = r300->velems->format_size[i] / 4; |
vbi = velem->vertex_buffer_index; |
vbuf = &r300->vertex_buffer[vbi]; |
stride[i] = vbuf->stride / 4; |
/* Map the buffer. */ |
if (!map[vbi]) { |
map[vbi] = (uint32_t*)r300->rws->buffer_map( |
r300_resource(vbuf->buffer)->cs_buf, |
r300->cs, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED); |
map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * info->start; |
} |
mapelem[i] = map[vbi] + (velem->src_offset / 4); |
} |
r300_emit_draw_init(r300, info->mode, info->count-1); |
BEGIN_CS(dwords); |
OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); |
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, info->count * vertex_size); |
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (info->count << 16) | |
r300_translate_primitive(info->mode)); |
/* Emit vertices. */ |
for (v = 0; v < info->count; v++) { |
for (i = 0; i < vertex_element_count; i++) { |
OUT_CS_TABLE(&mapelem[i][stride[i] * v], size[i]); |
} |
} |
END_CS; |
} |
static void r300_emit_draw_arrays(struct r300_context *r300, |
unsigned mode, |
unsigned count) |
{ |
boolean alt_num_verts = count > 65535; |
CS_LOCALS(r300); |
if (count >= (1 << 24)) { |
fprintf(stderr, "r300: Got a huge number of vertices: %i, " |
"refusing to render.\n", count); |
return; |
} |
r300_emit_draw_init(r300, mode, count-1); |
BEGIN_CS(2 + (alt_num_verts ? 2 : 0)); |
if (alt_num_verts) { |
OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); |
} |
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); |
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | |
r300_translate_primitive(mode) | |
(alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0)); |
END_CS; |
} |
static void r300_emit_draw_elements(struct r300_context *r300, |
struct pipe_resource* indexBuffer, |
unsigned indexSize, |
unsigned max_index, |
unsigned mode, |
unsigned start, |
unsigned count, |
uint16_t *imm_indices3) |
{ |
uint32_t count_dwords, offset_dwords; |
boolean alt_num_verts = count > 65535; |
CS_LOCALS(r300); |
if (count >= (1 << 24)) { |
fprintf(stderr, "r300: Got a huge number of vertices: %i, " |
"refusing to render (max_index: %i).\n", count, max_index); |
return; |
} |
DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, max %u\n", |
count, max_index); |
r300_emit_draw_init(r300, mode, max_index); |
/* If start is odd, render the first triangle with indices embedded |
* in the command stream. This will increase start by 3 and make it |
* even. We can then proceed without a fallback. */ |
if (indexSize == 2 && (start & 1) && |
mode == PIPE_PRIM_TRIANGLES) { |
BEGIN_CS(4); |
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 2); |
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (3 << 16) | |
R300_VAP_VF_CNTL__PRIM_TRIANGLES); |
OUT_CS(imm_indices3[1] << 16 | imm_indices3[0]); |
OUT_CS(imm_indices3[2]); |
END_CS; |
start += 3; |
count -= 3; |
if (!count) |
return; |
} |
offset_dwords = indexSize * start / sizeof(uint32_t); |
BEGIN_CS(8 + (alt_num_verts ? 2 : 0)); |
if (alt_num_verts) { |
OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); |
} |
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); |
if (indexSize == 4) { |
count_dwords = count; |
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | |
R300_VAP_VF_CNTL__INDEX_SIZE_32bit | |
r300_translate_primitive(mode) | |
(alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0)); |
} else { |
count_dwords = (count + 1) / 2; |
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | |
r300_translate_primitive(mode) | |
(alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0)); |
} |
OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); |
OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) | |
(0 << R300_INDX_BUFFER_SKIP_SHIFT)); |
OUT_CS(offset_dwords << 2); |
OUT_CS(count_dwords); |
OUT_CS_RELOC(r300_resource(indexBuffer)); |
END_CS; |
} |
static void r300_draw_elements_immediate(struct r300_context *r300, |
const struct pipe_draw_info *info) |
{ |
const uint8_t *ptr1; |
const uint16_t *ptr2; |
const uint32_t *ptr4; |
unsigned index_size = r300->index_buffer.index_size; |
unsigned i, count_dwords = index_size == 4 ? info->count : |
(info->count + 1) / 2; |
CS_LOCALS(r300); |
/* 19 dwords for r300_draw_elements_immediate. Give up if the function fails. */ |
if (!r300_prepare_for_rendering(r300, |
PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS | |
PREP_INDEXED, NULL, 2+count_dwords, 0, info->index_bias, -1)) |
return; |
r300_emit_draw_init(r300, info->mode, info->max_index); |
BEGIN_CS(2 + count_dwords); |
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, count_dwords); |
switch (index_size) { |
case 1: |
ptr1 = (uint8_t*)r300->index_buffer.user_buffer; |
ptr1 += info->start; |
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) | |
r300_translate_primitive(info->mode)); |
if (info->index_bias && !r300->screen->caps.is_r500) { |
for (i = 0; i < info->count-1; i += 2) |
OUT_CS(((ptr1[i+1] + info->index_bias) << 16) | |
(ptr1[i] + info->index_bias)); |
if (info->count & 1) |
OUT_CS(ptr1[i] + info->index_bias); |
} else { |
for (i = 0; i < info->count-1; i += 2) |
OUT_CS(((ptr1[i+1]) << 16) | |
(ptr1[i] )); |
if (info->count & 1) |
OUT_CS(ptr1[i]); |
} |
break; |
case 2: |
ptr2 = (uint16_t*)r300->index_buffer.user_buffer; |
ptr2 += info->start; |
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) | |
r300_translate_primitive(info->mode)); |
if (info->index_bias && !r300->screen->caps.is_r500) { |
for (i = 0; i < info->count-1; i += 2) |
OUT_CS(((ptr2[i+1] + info->index_bias) << 16) | |
(ptr2[i] + info->index_bias)); |
if (info->count & 1) |
OUT_CS(ptr2[i] + info->index_bias); |
} else { |
OUT_CS_TABLE(ptr2, count_dwords); |
} |
break; |
case 4: |
ptr4 = (uint32_t*)r300->index_buffer.user_buffer; |
ptr4 += info->start; |
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) | |
R300_VAP_VF_CNTL__INDEX_SIZE_32bit | |
r300_translate_primitive(info->mode)); |
if (info->index_bias && !r300->screen->caps.is_r500) { |
for (i = 0; i < info->count; i++) |
OUT_CS(ptr4[i] + info->index_bias); |
} else { |
OUT_CS_TABLE(ptr4, count_dwords); |
} |
break; |
} |
END_CS; |
} |
static void r300_draw_elements(struct r300_context *r300, |
const struct pipe_draw_info *info, |
int instance_id) |
{ |
struct pipe_resource *indexBuffer = r300->index_buffer.buffer; |
unsigned indexSize = r300->index_buffer.index_size; |
struct pipe_resource* orgIndexBuffer = indexBuffer; |
unsigned start = info->start; |
unsigned count = info->count; |
boolean alt_num_verts = r300->screen->caps.is_r500 && |
count > 65536; |
unsigned short_count; |
int buffer_offset = 0, index_offset = 0; /* for index bias emulation */ |
uint16_t indices3[3]; |
if (info->index_bias && !r300->screen->caps.is_r500) { |
r300_split_index_bias(r300, info->index_bias, &buffer_offset, |
&index_offset); |
} |
r300_translate_index_buffer(r300, &r300->index_buffer, &indexBuffer, |
&indexSize, index_offset, &start, count); |
/* Fallback for misaligned ushort indices. */ |
if (indexSize == 2 && (start & 1) && indexBuffer) { |
/* If we got here, then orgIndexBuffer == indexBuffer. */ |
uint16_t *ptr = r300->rws->buffer_map(r300_resource(orgIndexBuffer)->cs_buf, |
r300->cs, |
PIPE_TRANSFER_READ | |
PIPE_TRANSFER_UNSYNCHRONIZED); |
if (info->mode == PIPE_PRIM_TRIANGLES) { |
memcpy(indices3, ptr + start, 6); |
} else { |
/* Copy the mapped index buffer directly to the upload buffer. |
* The start index will be aligned simply from the fact that |
* every sub-buffer in the upload buffer is aligned. */ |
r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start, |
count, (uint8_t*)ptr); |
} |
} else { |
if (r300->index_buffer.user_buffer) |
r300_upload_index_buffer(r300, &indexBuffer, indexSize, |
&start, count, |
r300->index_buffer.user_buffer); |
} |
/* 19 dwords for emit_draw_elements. Give up if the function fails. */ |
if (!r300_prepare_for_rendering(r300, |
PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS | |
PREP_INDEXED, indexBuffer, 19, buffer_offset, info->index_bias, |
instance_id)) |
goto done; |
if (alt_num_verts || count <= 65535) { |
r300_emit_draw_elements(r300, indexBuffer, indexSize, |
info->max_index, info->mode, start, count, |
indices3); |
} else { |
do { |
/* The maximum must be divisible by 4 and 3, |
* so that quad and triangle lists are split correctly. |
* |
* Strips, loops, and fans won't work. */ |
short_count = MIN2(count, 65532); |
r300_emit_draw_elements(r300, indexBuffer, indexSize, |
info->max_index, |
info->mode, start, short_count, indices3); |
start += short_count; |
count -= short_count; |
/* 15 dwords for emit_draw_elements */ |
if (count) { |
if (!r300_prepare_for_rendering(r300, |
PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS | PREP_INDEXED, |
indexBuffer, 19, buffer_offset, info->index_bias, |
instance_id)) |
goto done; |
} |
} while (count); |
} |
done: |
if (indexBuffer != orgIndexBuffer) { |
pipe_resource_reference( &indexBuffer, NULL ); |
} |
} |
static void r300_draw_arrays(struct r300_context *r300, |
const struct pipe_draw_info *info, |
int instance_id) |
{ |
boolean alt_num_verts = r300->screen->caps.is_r500 && |
info->count > 65536; |
unsigned start = info->start; |
unsigned count = info->count; |
unsigned short_count; |
/* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ |
if (!r300_prepare_for_rendering(r300, |
PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS, |
NULL, 9, start, 0, instance_id)) |
return; |
if (alt_num_verts || count <= 65535) { |
r300_emit_draw_arrays(r300, info->mode, count); |
} else { |
do { |
/* The maximum must be divisible by 4 and 3, |
* so that quad and triangle lists are split correctly. |
* |
* Strips, loops, and fans won't work. */ |
short_count = MIN2(count, 65532); |
r300_emit_draw_arrays(r300, info->mode, short_count); |
start += short_count; |
count -= short_count; |
/* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ |
if (count) { |
if (!r300_prepare_for_rendering(r300, |
PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS, NULL, 9, |
start, 0, instance_id)) |
return; |
} |
} while (count); |
} |
} |
static void r300_draw_arrays_instanced(struct r300_context *r300, |
const struct pipe_draw_info *info) |
{ |
int i; |
for (i = 0; i < info->instance_count; i++) |
r300_draw_arrays(r300, info, i); |
} |
static void r300_draw_elements_instanced(struct r300_context *r300, |
const struct pipe_draw_info *info) |
{ |
int i; |
for (i = 0; i < info->instance_count; i++) |
r300_draw_elements(r300, info, i); |
} |
static unsigned r300_max_vertex_count(struct r300_context *r300) |
{ |
unsigned i, nr = r300->velems->count; |
struct pipe_vertex_element *velems = r300->velems->velem; |
unsigned result = ~0; |
for (i = 0; i < nr; i++) { |
struct pipe_vertex_buffer *vb = |
&r300->vertex_buffer[velems[i].vertex_buffer_index]; |
unsigned size, max_count, value; |
/* We're not interested in constant and per-instance attribs. */ |
if (!vb->buffer || |
!vb->stride || |
velems[i].instance_divisor) { |
continue; |
} |
size = vb->buffer->width0; |
/* Subtract buffer_offset. */ |
value = vb->buffer_offset; |
if (value >= size) { |
return 0; |
} |
size -= value; |
/* Subtract src_offset. */ |
value = velems[i].src_offset; |
if (value >= size) { |
return 0; |
} |
size -= value; |
/* Subtract format_size. */ |
value = r300->velems->format_size[i]; |
if (value >= size) { |
return 0; |
} |
size -= value; |
/* Compute the max count. */ |
max_count = 1 + size / vb->stride; |
result = MIN2(result, max_count); |
} |
return result; |
} |
static void r300_draw_vbo(struct pipe_context* pipe, |
const struct pipe_draw_info *dinfo) |
{ |
struct r300_context* r300 = r300_context(pipe); |
struct pipe_draw_info info = *dinfo; |
info.indexed = info.indexed; |
if (r300->skip_rendering || |
!u_trim_pipe_prim(info.mode, &info.count)) { |
return; |
} |
r300_update_derived_state(r300); |
/* Draw. */ |
if (info.indexed) { |
unsigned max_count = r300_max_vertex_count(r300); |
if (!max_count) { |
fprintf(stderr, "r300: Skipping a draw command. There is a buffer " |
" which is too small to be used for rendering.\n"); |
return; |
} |
if (max_count == ~0) { |
/* There are no per-vertex vertex elements. Use the hardware maximum. */ |
max_count = 0xffffff; |
} |
info.max_index = max_count - 1; |
info.start += r300->index_buffer.offset / r300->index_buffer.index_size; |
if (info.instance_count <= 1) { |
if (info.count <= 8 && |
r300->index_buffer.user_buffer) { |
r300_draw_elements_immediate(r300, &info); |
} else { |
r300_draw_elements(r300, &info, -1); |
} |
} else { |
r300_draw_elements_instanced(r300, &info); |
} |
} else { |
if (info.instance_count <= 1) { |
if (immd_is_good_idea(r300, info.count)) { |
r300_draw_arrays_immediate(r300, &info); |
} else { |
r300_draw_arrays(r300, &info, -1); |
} |
} else { |
r300_draw_arrays_instanced(r300, &info); |
} |
} |
} |
/**************************************************************************** |
* The rest of this file is for SW TCL rendering only. Please be polite and * |
* keep these functions separated so that they are easier to locate. ~C. * |
***************************************************************************/ |
/* SW TCL elements, using Draw. */ |
static void r300_swtcl_draw_vbo(struct pipe_context* pipe, |
const struct pipe_draw_info *info) |
{ |
struct r300_context* r300 = r300_context(pipe); |
if (r300->skip_rendering) { |
return; |
} |
r300_update_derived_state(r300); |
draw_vbo(r300->draw, info); |
draw_flush(r300->draw); |
} |
/* Object for rendering using Draw. */ |
struct r300_render { |
/* Parent class */ |
struct vbuf_render base; |
/* Pipe context */ |
struct r300_context* r300; |
/* Vertex information */ |
size_t vertex_size; |
unsigned prim; |
unsigned hwprim; |
/* VBO */ |
size_t vbo_max_used; |
uint8_t *vbo_ptr; |
}; |
static INLINE struct r300_render* |
r300_render(struct vbuf_render* render) |
{ |
return (struct r300_render*)render; |
} |
static const struct vertex_info* |
r300_render_get_vertex_info(struct vbuf_render* render) |
{ |
struct r300_render* r300render = r300_render(render); |
struct r300_context* r300 = r300render->r300; |
return &r300->vertex_info; |
} |
static boolean r300_render_allocate_vertices(struct vbuf_render* render, |
ushort vertex_size, |
ushort count) |
{ |
struct r300_render* r300render = r300_render(render); |
struct r300_context* r300 = r300render->r300; |
struct radeon_winsys *rws = r300->rws; |
size_t size = (size_t)vertex_size * (size_t)count; |
DBG(r300, DBG_DRAW, "r300: render_allocate_vertices (size: %d)\n", size); |
if (!r300->vbo || size + r300->draw_vbo_offset > r300->vbo->size) { |
pb_reference(&r300->vbo, NULL); |
r300->vbo_cs = NULL; |
r300render->vbo_ptr = NULL; |
r300->vbo = rws->buffer_create(rws, |
MAX2(R300_MAX_DRAW_VBO_SIZE, size), |
R300_BUFFER_ALIGNMENT, TRUE, |
RADEON_DOMAIN_GTT); |
if (!r300->vbo) { |
return FALSE; |
} |
r300->vbo_cs = rws->buffer_get_cs_handle(r300->vbo); |
r300->draw_vbo_offset = 0; |
r300render->vbo_ptr = rws->buffer_map(r300->vbo_cs, r300->cs, |
PIPE_TRANSFER_WRITE); |
} |
r300render->vertex_size = vertex_size; |
return TRUE; |
} |
static void* r300_render_map_vertices(struct vbuf_render* render) |
{ |
struct r300_render* r300render = r300_render(render); |
struct r300_context* r300 = r300render->r300; |
DBG(r300, DBG_DRAW, "r300: render_map_vertices\n"); |
assert(r300render->vbo_ptr); |
return r300render->vbo_ptr + r300->draw_vbo_offset; |
} |
static void r300_render_unmap_vertices(struct vbuf_render* render, |
ushort min, |
ushort max) |
{ |
struct r300_render* r300render = r300_render(render); |
struct r300_context* r300 = r300render->r300; |
DBG(r300, DBG_DRAW, "r300: render_unmap_vertices\n"); |
r300render->vbo_max_used = MAX2(r300render->vbo_max_used, |
r300render->vertex_size * (max + 1)); |
} |
static void r300_render_release_vertices(struct vbuf_render* render) |
{ |
struct r300_render* r300render = r300_render(render); |
struct r300_context* r300 = r300render->r300; |
DBG(r300, DBG_DRAW, "r300: render_release_vertices\n"); |
r300->draw_vbo_offset += r300render->vbo_max_used; |
r300render->vbo_max_used = 0; |
} |
static void r300_render_set_primitive(struct vbuf_render* render, |
unsigned prim) |
{ |
struct r300_render* r300render = r300_render(render); |
r300render->prim = prim; |
r300render->hwprim = r300_translate_primitive(prim); |
} |
static void r300_render_draw_arrays(struct vbuf_render* render, |
unsigned start, |
unsigned count) |
{ |
struct r300_render* r300render = r300_render(render); |
struct r300_context* r300 = r300render->r300; |
uint8_t* ptr; |
unsigned i; |
unsigned dwords = 6; |
CS_LOCALS(r300); |
(void) i; (void) ptr; |
assert(start == 0); |
assert(count < (1 << 16)); |
DBG(r300, DBG_DRAW, "r300: render_draw_arrays (count: %d)\n", count); |
if (!r300_prepare_for_rendering(r300, |
PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL, |
NULL, dwords, 0, 0, -1)) { |
return; |
} |
BEGIN_CS(dwords); |
OUT_CS_REG(R300_GA_COLOR_CONTROL, |
r300_provoking_vertex_fixes(r300, r300render->prim)); |
OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1); |
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); |
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | |
r300render->hwprim); |
END_CS; |
} |
static void r300_render_draw_elements(struct vbuf_render* render, |
const ushort* indices, |
uint count) |
{ |
struct r300_render* r300render = r300_render(render); |
struct r300_context* r300 = r300render->r300; |
unsigned max_index = (r300->vbo->size - r300->draw_vbo_offset) / |
(r300render->r300->vertex_info.size * 4) - 1; |
struct pipe_resource *index_buffer = NULL; |
unsigned index_buffer_offset; |
CS_LOCALS(r300); |
DBG(r300, DBG_DRAW, "r300: render_draw_elements (count: %d)\n", count); |
u_upload_data(r300->uploader, 0, count * 2, indices, |
&index_buffer_offset, &index_buffer); |
if (!index_buffer) { |
return; |
} |
if (!r300_prepare_for_rendering(r300, |
PREP_EMIT_STATES | |
PREP_EMIT_VARRAYS_SWTCL | PREP_INDEXED, |
index_buffer, 12, 0, 0, -1)) { |
pipe_resource_reference(&index_buffer, NULL); |
return; |
} |
BEGIN_CS(12); |
OUT_CS_REG(R300_GA_COLOR_CONTROL, |
r300_provoking_vertex_fixes(r300, r300render->prim)); |
OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max_index); |
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); |
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | |
r300render->hwprim); |
OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); |
OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2)); |
OUT_CS(index_buffer_offset); |
OUT_CS((count + 1) / 2); |
OUT_CS_RELOC(r300_resource(index_buffer)); |
END_CS; |
pipe_resource_reference(&index_buffer, NULL); |
} |
static void r300_render_destroy(struct vbuf_render* render) |
{ |
FREE(render); |
} |
static struct vbuf_render* r300_render_create(struct r300_context* r300) |
{ |
struct r300_render* r300render = CALLOC_STRUCT(r300_render); |
r300render->r300 = r300; |
r300render->base.max_vertex_buffer_bytes = R300_MAX_DRAW_VBO_SIZE; |
r300render->base.max_indices = 16 * 1024; |
r300render->base.get_vertex_info = r300_render_get_vertex_info; |
r300render->base.allocate_vertices = r300_render_allocate_vertices; |
r300render->base.map_vertices = r300_render_map_vertices; |
r300render->base.unmap_vertices = r300_render_unmap_vertices; |
r300render->base.set_primitive = r300_render_set_primitive; |
r300render->base.draw_elements = r300_render_draw_elements; |
r300render->base.draw_arrays = r300_render_draw_arrays; |
r300render->base.release_vertices = r300_render_release_vertices; |
r300render->base.destroy = r300_render_destroy; |
return &r300render->base; |
} |
struct draw_stage* r300_draw_stage(struct r300_context* r300) |
{ |
struct vbuf_render* render; |
struct draw_stage* stage; |
render = r300_render_create(r300); |
if (!render) { |
return NULL; |
} |
stage = draw_vbuf_stage(r300->draw, render); |
if (!stage) { |
render->destroy(render); |
return NULL; |
} |
draw_set_render(r300->draw, render); |
return stage; |
} |
/**************************************************************************** |
* End of SW TCL functions * |
***************************************************************************/ |
/* This functions is used to draw a rectangle for the blitter module. |
* |
* If we rendered a quad, the pixels on the main diagonal |
* would be computed and stored twice, which makes the clear/copy codepaths |
* somewhat inefficient. Instead we use a rectangular point sprite. */ |
void r300_blitter_draw_rectangle(struct blitter_context *blitter, |
int x1, int y1, int x2, int y2, |
float depth, |
enum blitter_attrib_type type, |
const union pipe_color_union *attrib) |
{ |
struct r300_context *r300 = r300_context(util_blitter_get_pipe(blitter)); |
unsigned last_sprite_coord_enable = r300->sprite_coord_enable; |
unsigned width = x2 - x1; |
unsigned height = y2 - y1; |
unsigned vertex_size = |
type == UTIL_BLITTER_ATTRIB_COLOR || !r300->draw ? 8 : 4; |
unsigned dwords = 13 + vertex_size + |
(type == UTIL_BLITTER_ATTRIB_TEXCOORD ? 7 : 0); |
static const union pipe_color_union zeros; |
CS_LOCALS(r300); |
/* XXX workaround for a lockup in MSAA resolve on SWTCL chipsets, this |
* function most probably doesn't handle type=NONE correctly */ |
if (!r300->screen->caps.has_tcl && type == UTIL_BLITTER_ATTRIB_NONE) { |
util_blitter_draw_rectangle(blitter, x1, y1, x2, y2, depth, type, attrib); |
return; |
} |
if (r300->skip_rendering) |
return; |
if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) |
r300->sprite_coord_enable = 1; |
r300_update_derived_state(r300); |
/* Mark some states we don't care about as non-dirty. */ |
r300->viewport_state.dirty = FALSE; |
if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1)) |
goto done; |
DBG(r300, DBG_DRAW, "r300: draw_rectangle\n"); |
BEGIN_CS(dwords); |
/* Set up GA. */ |
OUT_CS_REG(R300_GA_POINT_SIZE, (height * 6) | ((width * 6) << 16)); |
if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) { |
/* Set up the GA to generate texcoords. */ |
OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE | |
(R300_GB_TEX_STR << R300_GB_TEX0_SOURCE_SHIFT)); |
OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4); |
OUT_CS_32F(attrib->f[0]); |
OUT_CS_32F(attrib->f[3]); |
OUT_CS_32F(attrib->f[2]); |
OUT_CS_32F(attrib->f[1]); |
} |
/* Set up VAP controls. */ |
OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); |
OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT); |
OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); |
OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); |
OUT_CS(1); |
OUT_CS(0); |
/* Draw. */ |
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, vertex_size); |
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (1 << 16) | |
R300_VAP_VF_CNTL__PRIM_POINTS); |
OUT_CS_32F(x1 + width * 0.5f); |
OUT_CS_32F(y1 + height * 0.5f); |
OUT_CS_32F(depth); |
OUT_CS_32F(1); |
if (vertex_size == 8) { |
if (!attrib) |
attrib = &zeros; |
OUT_CS_TABLE(attrib->f, 4); |
} |
END_CS; |
done: |
/* Restore the state. */ |
r300_mark_atom_dirty(r300, &r300->rs_state); |
r300_mark_atom_dirty(r300, &r300->viewport_state); |
r300->sprite_coord_enable = last_sprite_coord_enable; |
} |
void r300_init_render_functions(struct r300_context *r300) |
{ |
/* Set draw functions based on presence of HW TCL. */ |
if (r300->screen->caps.has_tcl) { |
r300->context.draw_vbo = r300_draw_vbo; |
} else { |
r300->context.draw_vbo = r300_swtcl_draw_vbo; |
} |
/* Plug in the two-sided stencil reference value fallback if needed. */ |
if (!r300->screen->caps.is_r500) |
r300_plug_in_stencil_ref_fallback(r300); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_render_stencilref.c |
---|
0,0 → 1,129 |
/* |
* Copyright 2010 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
/** |
* The two-sided stencil reference value fallback for r3xx-r4xx chips. |
* These chips support two-sided stencil functions but they do not support |
* a two-sided reference value. |
* |
* The functions below split every draw call which uses the two-sided |
* reference value into two draw calls -- the first one renders front faces |
* and the second renders back faces with the other reference value. |
*/ |
#include "r300_context.h" |
#include "r300_reg.h" |
struct r300_stencilref_context { |
void (*draw_vbo)(struct pipe_context *pipe, |
const struct pipe_draw_info *info); |
uint32_t rs_cull_mode; |
uint32_t zb_stencilrefmask; |
ubyte ref_value_front; |
}; |
static boolean r300_stencilref_needed(struct r300_context *r300) |
{ |
struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; |
return dsa->two_sided_stencil_ref || |
(dsa->two_sided && |
r300->stencil_ref.ref_value[0] != r300->stencil_ref.ref_value[1]); |
} |
/* Set drawing for front faces. */ |
static void r300_stencilref_begin(struct r300_context *r300) |
{ |
struct r300_stencilref_context *sr = r300->stencilref_fallback; |
struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; |
struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; |
/* Save state. */ |
sr->rs_cull_mode = rs->cb_main[rs->cull_mode_index]; |
sr->zb_stencilrefmask = dsa->stencil_ref_mask; |
sr->ref_value_front = r300->stencil_ref.ref_value[0]; |
/* We *cull* pixels, therefore no need to mask out the bits. */ |
rs->cb_main[rs->cull_mode_index] |= R300_CULL_BACK; |
r300_mark_atom_dirty(r300, &r300->rs_state); |
} |
/* Set drawing for back faces. */ |
static void r300_stencilref_switch_side(struct r300_context *r300) |
{ |
struct r300_stencilref_context *sr = r300->stencilref_fallback; |
struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; |
struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; |
rs->cb_main[rs->cull_mode_index] = sr->rs_cull_mode | R300_CULL_FRONT; |
dsa->stencil_ref_mask = dsa->stencil_ref_bf; |
r300->stencil_ref.ref_value[0] = r300->stencil_ref.ref_value[1]; |
r300_mark_atom_dirty(r300, &r300->rs_state); |
r300_mark_atom_dirty(r300, &r300->dsa_state); |
} |
/* Restore the original state. */ |
static void r300_stencilref_end(struct r300_context *r300) |
{ |
struct r300_stencilref_context *sr = r300->stencilref_fallback; |
struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; |
struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; |
/* Restore state. */ |
rs->cb_main[rs->cull_mode_index] = sr->rs_cull_mode; |
dsa->stencil_ref_mask = sr->zb_stencilrefmask; |
r300->stencil_ref.ref_value[0] = sr->ref_value_front; |
r300_mark_atom_dirty(r300, &r300->rs_state); |
r300_mark_atom_dirty(r300, &r300->dsa_state); |
} |
static void r300_stencilref_draw_vbo(struct pipe_context *pipe, |
const struct pipe_draw_info *info) |
{ |
struct r300_context *r300 = r300_context(pipe); |
struct r300_stencilref_context *sr = r300->stencilref_fallback; |
if (!r300_stencilref_needed(r300)) { |
sr->draw_vbo(pipe, info); |
} else { |
r300_stencilref_begin(r300); |
sr->draw_vbo(pipe, info); |
r300_stencilref_switch_side(r300); |
sr->draw_vbo(pipe, info); |
r300_stencilref_end(r300); |
} |
} |
void r300_plug_in_stencil_ref_fallback(struct r300_context *r300) |
{ |
r300->stencilref_fallback = CALLOC_STRUCT(r300_stencilref_context); |
/* Save original draw function. */ |
r300->stencilref_fallback->draw_vbo = r300->context.draw_vbo; |
/* Override the draw function. */ |
r300->context.draw_vbo = r300_stencilref_draw_vbo; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_render_translate.c |
---|
0,0 → 1,79 |
/* |
* Copyright 2010 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "r300_context.h" |
#include "util/u_index_modify.h" |
#include "util/u_upload_mgr.h" |
void r300_translate_index_buffer(struct r300_context *r300, |
struct pipe_index_buffer *ib, |
struct pipe_resource **out_buffer, |
unsigned *index_size, unsigned index_offset, |
unsigned *start, unsigned count) |
{ |
unsigned out_offset; |
void *ptr; |
switch (*index_size) { |
case 1: |
*out_buffer = NULL; |
u_upload_alloc(r300->uploader, 0, count * 2, |
&out_offset, out_buffer, &ptr); |
util_shorten_ubyte_elts_to_userptr( |
&r300->context, ib, index_offset, |
*start, count, ptr); |
*index_size = 2; |
*start = out_offset / 2; |
break; |
case 2: |
if (index_offset) { |
*out_buffer = NULL; |
u_upload_alloc(r300->uploader, 0, count * 2, |
&out_offset, out_buffer, &ptr); |
util_rebuild_ushort_elts_to_userptr(&r300->context, ib, |
index_offset, *start, |
count, ptr); |
*start = out_offset / 2; |
} |
break; |
case 4: |
if (index_offset) { |
*out_buffer = NULL; |
u_upload_alloc(r300->uploader, 0, count * 4, |
&out_offset, out_buffer, &ptr); |
util_rebuild_uint_elts_to_userptr(&r300->context, ib, |
index_offset, *start, |
count, ptr); |
*start = out_offset / 4; |
} |
break; |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_resource.c |
---|
0,0 → 1,57 |
/* |
* Copyright 2010 Red Hat Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Authors: Dave Airlie |
*/ |
#include "r300_context.h" |
#include "r300_texture.h" |
#include "r300_screen_buffer.h" |
static struct pipe_resource * |
r300_resource_create(struct pipe_screen *screen, |
const struct pipe_resource *templ) |
{ |
if (templ->target == PIPE_BUFFER) |
return r300_buffer_create(screen, templ); |
else |
return r300_texture_create(screen, templ); |
} |
void r300_init_resource_functions(struct r300_context *r300) |
{ |
r300->context.transfer_map = u_transfer_map_vtbl; |
r300->context.transfer_flush_region = u_default_transfer_flush_region; |
r300->context.transfer_unmap = u_transfer_unmap_vtbl; |
r300->context.transfer_inline_write = u_default_transfer_inline_write; |
r300->context.create_surface = r300_create_surface; |
r300->context.surface_destroy = r300_surface_destroy; |
} |
void r300_init_screen_resource_functions(struct r300_screen *r300screen) |
{ |
r300screen->screen.resource_create = r300_resource_create; |
r300screen->screen.resource_from_handle = r300_texture_from_handle; |
r300screen->screen.resource_get_handle = r300_resource_get_handle; |
r300screen->screen.resource_destroy = u_resource_destroy_vtbl; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_screen.c |
---|
0,0 → 1,640 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Copyright 2010 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "util/u_format.h" |
#include "util/u_format_s3tc.h" |
#include "util/u_memory.h" |
#include "os/os_time.h" |
#include "vl/vl_decoder.h" |
#include "vl/vl_video_buffer.h" |
#include "r300_context.h" |
#include "r300_texture.h" |
#include "r300_screen_buffer.h" |
#include "r300_state_inlines.h" |
#include "r300_public.h" |
#include "draw/draw_context.h" |
/* Return the identifier behind whom the brave coders responsible for this |
* amalgamation of code, sweat, and duct tape, routinely obscure their names. |
* |
* ...I should have just put "Corbin Simpson", but I'm not that cool. |
* |
* (Or egotistical. Yet.) */ |
static const char* r300_get_vendor(struct pipe_screen* pscreen) |
{ |
return "X.Org R300 Project"; |
} |
static const char* chip_families[] = { |
"unknown", |
"ATI R300", |
"ATI R350", |
"ATI RV350", |
"ATI RV370", |
"ATI RV380", |
"ATI RS400", |
"ATI RC410", |
"ATI RS480", |
"ATI R420", |
"ATI R423", |
"ATI R430", |
"ATI R480", |
"ATI R481", |
"ATI RV410", |
"ATI RS600", |
"ATI RS690", |
"ATI RS740", |
"ATI RV515", |
"ATI R520", |
"ATI RV530", |
"ATI R580", |
"ATI RV560", |
"ATI RV570" |
}; |
static const char* r300_get_name(struct pipe_screen* pscreen) |
{ |
struct r300_screen* r300screen = r300_screen(pscreen); |
return chip_families[r300screen->caps.family]; |
} |
static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) |
{ |
struct r300_screen* r300screen = r300_screen(pscreen); |
boolean is_r500 = r300screen->caps.is_r500; |
switch (param) { |
/* Supported features (boolean caps). */ |
case PIPE_CAP_NPOT_TEXTURES: |
case PIPE_CAP_TWO_SIDED_STENCIL: |
case PIPE_CAP_ANISOTROPIC_FILTER: |
case PIPE_CAP_POINT_SPRITE: |
case PIPE_CAP_OCCLUSION_QUERY: |
case PIPE_CAP_TEXTURE_SHADOW_MAP: |
case PIPE_CAP_TEXTURE_MIRROR_CLAMP: |
case PIPE_CAP_BLEND_EQUATION_SEPARATE: |
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: |
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: |
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: |
case PIPE_CAP_CONDITIONAL_RENDER: |
case PIPE_CAP_TEXTURE_BARRIER: |
case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: |
case PIPE_CAP_USER_INDEX_BUFFERS: |
case PIPE_CAP_USER_CONSTANT_BUFFERS: |
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: |
return 1; |
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: |
return R300_BUFFER_ALIGNMENT; |
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: |
return 16; |
case PIPE_CAP_GLSL_FEATURE_LEVEL: |
return 120; |
/* r300 cannot do swizzling of compressed textures. Supported otherwise. */ |
case PIPE_CAP_TEXTURE_SWIZZLE: |
return util_format_s3tc_enabled ? r300screen->caps.dxtc_swizzle : 1; |
/* We don't support color clamping on r500, so that we can use color |
* intepolators for generic varyings. */ |
case PIPE_CAP_VERTEX_COLOR_CLAMPED: |
return !is_r500; |
/* Supported on r500 only. */ |
case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: |
case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: |
case PIPE_CAP_SM3: |
return is_r500 ? 1 : 0; |
/* Unsupported features. */ |
case PIPE_CAP_QUERY_TIME_ELAPSED: |
case PIPE_CAP_QUERY_PIPELINE_STATISTICS: |
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: |
case PIPE_CAP_INDEP_BLEND_ENABLE: |
case PIPE_CAP_INDEP_BLEND_FUNC: |
case PIPE_CAP_DEPTH_CLIP_DISABLE: |
case PIPE_CAP_SHADER_STENCIL_EXPORT: |
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: |
case PIPE_CAP_TGSI_INSTANCEID: |
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: |
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: |
case PIPE_CAP_SEAMLESS_CUBE_MAP: |
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: |
case PIPE_CAP_SCALED_RESOLVE: |
case PIPE_CAP_MIN_TEXEL_OFFSET: |
case PIPE_CAP_MAX_TEXEL_OFFSET: |
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: |
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: |
case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: |
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: |
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: |
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: |
case PIPE_CAP_COMPUTE: |
case PIPE_CAP_START_INSTANCE: |
case PIPE_CAP_QUERY_TIMESTAMP: |
case PIPE_CAP_TEXTURE_MULTISAMPLE: |
case PIPE_CAP_CUBE_MAP_ARRAY: |
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: |
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: |
case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: |
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: |
return 0; |
/* SWTCL-only features. */ |
case PIPE_CAP_PRIMITIVE_RESTART: |
case PIPE_CAP_USER_VERTEX_BUFFERS: |
return !r300screen->caps.has_tcl; |
/* HWTCL-only features / limitations. */ |
case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: |
case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: |
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: |
return r300screen->caps.has_tcl; |
case PIPE_CAP_TGSI_TEXCOORD: |
return 0; |
/* Texturing. */ |
case PIPE_CAP_MAX_COMBINED_SAMPLERS: |
return r300screen->caps.num_tex_units; |
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: |
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: |
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: |
/* 13 == 4096, 12 == 2048 */ |
return is_r500 ? 13 : 12; |
/* Render targets. */ |
case PIPE_CAP_MAX_RENDER_TARGETS: |
return 4; |
case PIPE_CAP_ENDIANNESS: |
return PIPE_ENDIAN_LITTLE; |
} |
return 0; |
} |
static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, enum pipe_shader_cap param) |
{ |
struct r300_screen* r300screen = r300_screen(pscreen); |
boolean is_r400 = r300screen->caps.is_r400; |
boolean is_r500 = r300screen->caps.is_r500; |
switch (shader) { |
case PIPE_SHADER_FRAGMENT: |
switch (param) |
{ |
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: |
return is_r500 || is_r400 ? 512 : 96; |
case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: |
return is_r500 || is_r400 ? 512 : 64; |
case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: |
return is_r500 || is_r400 ? 512 : 32; |
case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: |
return is_r500 ? 511 : 4; |
case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: |
return is_r500 ? 64 : 0; /* Actually unlimited on r500. */ |
/* Fragment shader limits. */ |
case PIPE_SHADER_CAP_MAX_INPUTS: |
/* 2 colors + 8 texcoords are always supported |
* (minus fog and wpos). |
* |
* R500 has the ability to turn 3rd and 4th color into |
* additional texcoords but there is no two-sided color |
* selection then. However the facing bit can be used instead. */ |
return 10; |
case PIPE_SHADER_CAP_MAX_CONSTS: |
return is_r500 ? 256 : 32; |
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: |
return 1; |
case PIPE_SHADER_CAP_MAX_TEMPS: |
return is_r500 ? 128 : is_r400 ? 64 : 32; |
case PIPE_SHADER_CAP_MAX_PREDS: |
return is_r500 ? 1 : 0; |
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: |
return r300screen->caps.num_tex_units; |
case PIPE_SHADER_CAP_MAX_ADDRS: |
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: |
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: |
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: |
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: |
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: |
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: |
case PIPE_SHADER_CAP_SUBROUTINES: |
case PIPE_SHADER_CAP_INTEGERS: |
return 0; |
case PIPE_SHADER_CAP_PREFERRED_IR: |
return PIPE_SHADER_IR_TGSI; |
} |
break; |
case PIPE_SHADER_VERTEX: |
switch (param) |
{ |
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: |
case PIPE_SHADER_CAP_SUBROUTINES: |
return 0; |
default:; |
} |
if (!r300screen->caps.has_tcl) { |
return draw_get_shader_param(shader, param); |
} |
switch (param) |
{ |
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: |
case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: |
return is_r500 ? 1024 : 256; |
case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: |
return is_r500 ? 4 : 0; /* For loops; not sure about conditionals. */ |
case PIPE_SHADER_CAP_MAX_INPUTS: |
return 16; |
case PIPE_SHADER_CAP_MAX_CONSTS: |
return 256; |
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: |
return 1; |
case PIPE_SHADER_CAP_MAX_TEMPS: |
return 32; |
case PIPE_SHADER_CAP_MAX_ADDRS: |
return 1; /* XXX guessed */ |
case PIPE_SHADER_CAP_MAX_PREDS: |
return is_r500 ? 4 : 0; /* XXX guessed. */ |
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: |
return 1; |
case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: |
case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: |
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: |
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: |
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: |
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: |
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: |
case PIPE_SHADER_CAP_SUBROUTINES: |
case PIPE_SHADER_CAP_INTEGERS: |
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: |
return 0; |
case PIPE_SHADER_CAP_PREFERRED_IR: |
return PIPE_SHADER_IR_TGSI; |
} |
break; |
} |
return 0; |
} |
static float r300_get_paramf(struct pipe_screen* pscreen, |
enum pipe_capf param) |
{ |
struct r300_screen* r300screen = r300_screen(pscreen); |
switch (param) { |
case PIPE_CAPF_MAX_LINE_WIDTH: |
case PIPE_CAPF_MAX_LINE_WIDTH_AA: |
case PIPE_CAPF_MAX_POINT_WIDTH: |
case PIPE_CAPF_MAX_POINT_WIDTH_AA: |
/* The maximum dimensions of the colorbuffer are our practical |
* rendering limits. 2048 pixels should be enough for anybody. */ |
if (r300screen->caps.is_r500) { |
return 4096.0f; |
} else if (r300screen->caps.is_r400) { |
return 4021.0f; |
} else { |
return 2560.0f; |
} |
case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: |
return 16.0f; |
case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: |
return 16.0f; |
case PIPE_CAPF_GUARD_BAND_LEFT: |
case PIPE_CAPF_GUARD_BAND_TOP: |
case PIPE_CAPF_GUARD_BAND_RIGHT: |
case PIPE_CAPF_GUARD_BAND_BOTTOM: |
/* XXX I don't know what these should be but the least we can do is |
* silence the potential error message */ |
return 0.0f; |
default: |
debug_printf("r300: Warning: Unknown CAP %d in get_paramf.\n", |
param); |
return 0.0f; |
} |
} |
static int r300_get_video_param(struct pipe_screen *screen, |
enum pipe_video_profile profile, |
enum pipe_video_cap param) |
{ |
switch (param) { |
case PIPE_VIDEO_CAP_SUPPORTED: |
return vl_profile_supported(screen, profile); |
case PIPE_VIDEO_CAP_NPOT_TEXTURES: |
return 0; |
case PIPE_VIDEO_CAP_MAX_WIDTH: |
case PIPE_VIDEO_CAP_MAX_HEIGHT: |
return vl_video_buffer_max_size(screen); |
case PIPE_VIDEO_CAP_PREFERED_FORMAT: |
return PIPE_FORMAT_NV12; |
case PIPE_VIDEO_CAP_PREFERS_INTERLACED: |
return false; |
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: |
return false; |
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: |
return true; |
default: |
return 0; |
} |
} |
/** |
* Whether the format matches: |
* PIPE_FORMAT_?10?10?10?2_UNORM |
*/ |
static INLINE boolean |
util_format_is_rgba1010102_variant(const struct util_format_description *desc) |
{ |
static const unsigned size[4] = {10, 10, 10, 2}; |
unsigned chan; |
if (desc->block.width != 1 || |
desc->block.height != 1 || |
desc->block.bits != 32) |
return FALSE; |
for (chan = 0; chan < 4; ++chan) { |
if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED && |
desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) |
return FALSE; |
if (desc->channel[chan].size != size[chan]) |
return FALSE; |
} |
return TRUE; |
} |
static boolean r300_is_format_supported(struct pipe_screen* screen, |
enum pipe_format format, |
enum pipe_texture_target target, |
unsigned sample_count, |
unsigned usage) |
{ |
uint32_t retval = 0; |
boolean drm_2_8_0 = r300_screen(screen)->info.drm_minor >= 8; |
boolean is_r500 = r300_screen(screen)->caps.is_r500; |
boolean is_r400 = r300_screen(screen)->caps.is_r400; |
boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM || |
format == PIPE_FORMAT_R10G10B10X2_SNORM || |
format == PIPE_FORMAT_B10G10R10A2_UNORM || |
format == PIPE_FORMAT_R10SG10SB10SA2U_NORM; |
boolean is_ati1n = format == PIPE_FORMAT_RGTC1_UNORM || |
format == PIPE_FORMAT_RGTC1_SNORM || |
format == PIPE_FORMAT_LATC1_UNORM || |
format == PIPE_FORMAT_LATC1_SNORM; |
boolean is_ati2n = format == PIPE_FORMAT_RGTC2_UNORM || |
format == PIPE_FORMAT_RGTC2_SNORM || |
format == PIPE_FORMAT_LATC2_UNORM || |
format == PIPE_FORMAT_LATC2_SNORM; |
boolean is_x16f_xy16f = format == PIPE_FORMAT_R16_FLOAT || |
format == PIPE_FORMAT_R16G16_FLOAT || |
format == PIPE_FORMAT_A16_FLOAT || |
format == PIPE_FORMAT_L16_FLOAT || |
format == PIPE_FORMAT_L16A16_FLOAT || |
format == PIPE_FORMAT_R16A16_FLOAT || |
format == PIPE_FORMAT_I16_FLOAT; |
boolean is_half_float = format == PIPE_FORMAT_R16_FLOAT || |
format == PIPE_FORMAT_R16G16_FLOAT || |
format == PIPE_FORMAT_R16G16B16_FLOAT || |
format == PIPE_FORMAT_R16G16B16A16_FLOAT || |
format == PIPE_FORMAT_R16G16B16X16_FLOAT; |
const struct util_format_description *desc; |
if (!util_format_is_supported(format, usage)) |
return FALSE; |
/* Check multisampling support. */ |
switch (sample_count) { |
case 0: |
case 1: |
break; |
case 2: |
case 4: |
case 6: |
/* We need DRM 2.8.0. */ |
if (!drm_2_8_0) { |
return FALSE; |
} |
/* Only support R500, because I didn't test older chipsets, |
* but MSAA should work there too. */ |
if (!is_r500 && !debug_get_bool_option("RADEON_MSAA", FALSE)) { |
return FALSE; |
} |
/* No texturing and scanout. */ |
if (usage & (PIPE_BIND_SAMPLER_VIEW | |
PIPE_BIND_DISPLAY_TARGET | |
PIPE_BIND_SCANOUT)) { |
return FALSE; |
} |
desc = util_format_description(format); |
if (is_r500) { |
/* Only allow depth/stencil, RGBA8, RGBA1010102, RGBA16F. */ |
if (!util_format_is_depth_or_stencil(format) && |
!util_format_is_rgba8_variant(desc) && |
!util_format_is_rgba1010102_variant(desc) && |
format != PIPE_FORMAT_R16G16B16A16_FLOAT && |
format != PIPE_FORMAT_R16G16B16X16_FLOAT) { |
return FALSE; |
} |
} else { |
/* Only allow depth/stencil, RGBA8. */ |
if (!util_format_is_depth_or_stencil(format) && |
!util_format_is_rgba8_variant(desc)) { |
return FALSE; |
} |
} |
break; |
default: |
return FALSE; |
} |
/* Check sampler format support. */ |
if ((usage & PIPE_BIND_SAMPLER_VIEW) && |
/* these two are broken for an unknown reason */ |
format != PIPE_FORMAT_R8G8B8X8_SNORM && |
format != PIPE_FORMAT_R16G16B16X16_SNORM && |
/* ATI1N is r5xx-only. */ |
(is_r500 || !is_ati1n) && |
/* ATI2N is supported on r4xx-r5xx. */ |
(is_r400 || is_r500 || !is_ati2n) && |
/* R16F and RG16F texture support was added in as late as DRM 2.8.0 */ |
(drm_2_8_0 || !is_x16f_xy16f) && |
r300_is_sampler_format_supported(format)) { |
retval |= PIPE_BIND_SAMPLER_VIEW; |
} |
/* Check colorbuffer format support. */ |
if ((usage & (PIPE_BIND_RENDER_TARGET | |
PIPE_BIND_DISPLAY_TARGET | |
PIPE_BIND_SCANOUT | |
PIPE_BIND_SHARED)) && |
/* 2101010 cannot be rendered to on non-r5xx. */ |
(!is_color2101010 || (is_r500 && drm_2_8_0)) && |
r300_is_colorbuffer_format_supported(format)) { |
retval |= usage & |
(PIPE_BIND_RENDER_TARGET | |
PIPE_BIND_DISPLAY_TARGET | |
PIPE_BIND_SCANOUT | |
PIPE_BIND_SHARED); |
} |
/* Check depth-stencil format support. */ |
if (usage & PIPE_BIND_DEPTH_STENCIL && |
r300_is_zs_format_supported(format)) { |
retval |= PIPE_BIND_DEPTH_STENCIL; |
} |
/* Check vertex buffer format support. */ |
if (usage & PIPE_BIND_VERTEX_BUFFER) { |
if (r300_screen(screen)->caps.has_tcl) { |
/* Half float is supported on >= R400. */ |
if ((is_r400 || is_r500 || !is_half_float) && |
r300_translate_vertex_data_type(format) != R300_INVALID_FORMAT) { |
retval |= PIPE_BIND_VERTEX_BUFFER; |
} |
} else { |
/* SW TCL */ |
if (!util_format_is_pure_integer(format)) { |
retval |= PIPE_BIND_VERTEX_BUFFER; |
} |
} |
} |
/* Transfers are always supported. */ |
if (usage & PIPE_BIND_TRANSFER_READ) |
retval |= PIPE_BIND_TRANSFER_READ; |
if (usage & PIPE_BIND_TRANSFER_WRITE) |
retval |= PIPE_BIND_TRANSFER_WRITE; |
return retval == usage; |
} |
static void r300_destroy_screen(struct pipe_screen* pscreen) |
{ |
struct r300_screen* r300screen = r300_screen(pscreen); |
struct radeon_winsys *rws = radeon_winsys(pscreen); |
pipe_mutex_destroy(r300screen->cmask_mutex); |
if (rws) |
rws->destroy(rws); |
FREE(r300screen); |
} |
static void r300_fence_reference(struct pipe_screen *screen, |
struct pipe_fence_handle **ptr, |
struct pipe_fence_handle *fence) |
{ |
pb_reference((struct pb_buffer**)ptr, |
(struct pb_buffer*)fence); |
} |
static boolean r300_fence_signalled(struct pipe_screen *screen, |
struct pipe_fence_handle *fence) |
{ |
struct radeon_winsys *rws = r300_screen(screen)->rws; |
struct pb_buffer *rfence = (struct pb_buffer*)fence; |
return !rws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE); |
} |
static boolean r300_fence_finish(struct pipe_screen *screen, |
struct pipe_fence_handle *fence, |
uint64_t timeout) |
{ |
struct radeon_winsys *rws = r300_screen(screen)->rws; |
struct pb_buffer *rfence = (struct pb_buffer*)fence; |
if (timeout != PIPE_TIMEOUT_INFINITE) { |
int64_t start_time = os_time_get(); |
/* Convert to microseconds. */ |
timeout /= 1000; |
/* Wait in a loop. */ |
while (rws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) { |
if (os_time_get() - start_time >= timeout) { |
return FALSE; |
} |
os_time_sleep(10); |
} |
return TRUE; |
} |
rws->buffer_wait(rfence, RADEON_USAGE_READWRITE); |
return TRUE; |
} |
struct pipe_screen* r300_screen_create(struct radeon_winsys *rws) |
{ |
struct r300_screen *r300screen = CALLOC_STRUCT(r300_screen); |
if (!r300screen) { |
FREE(r300screen); |
return NULL; |
} |
rws->query_info(rws, &r300screen->info); |
r300_init_debug(r300screen); |
r300_parse_chipset(r300screen->info.pci_id, &r300screen->caps); |
if (SCREEN_DBG_ON(r300screen, DBG_NO_ZMASK)) |
r300screen->caps.zmask_ram = 0; |
if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ)) |
r300screen->caps.hiz_ram = 0; |
if (r300screen->info.drm_minor < 8) |
r300screen->caps.has_us_format = FALSE; |
r300screen->rws = rws; |
r300screen->screen.destroy = r300_destroy_screen; |
r300screen->screen.get_name = r300_get_name; |
r300screen->screen.get_vendor = r300_get_vendor; |
r300screen->screen.get_param = r300_get_param; |
r300screen->screen.get_shader_param = r300_get_shader_param; |
r300screen->screen.get_paramf = r300_get_paramf; |
r300screen->screen.get_video_param = r300_get_video_param; |
r300screen->screen.is_format_supported = r300_is_format_supported; |
r300screen->screen.is_video_format_supported = vl_video_buffer_is_format_supported; |
r300screen->screen.context_create = r300_create_context; |
r300screen->screen.fence_reference = r300_fence_reference; |
r300screen->screen.fence_signalled = r300_fence_signalled; |
r300screen->screen.fence_finish = r300_fence_finish; |
r300_init_screen_resource_functions(r300screen); |
util_format_s3tc_init(); |
pipe_mutex_init(r300screen->cmask_mutex); |
return &r300screen->screen; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_screen.h |
---|
0,0 → 1,125 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Copyright 2010 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#ifndef R300_SCREEN_H |
#define R300_SCREEN_H |
#include "r300_chipset.h" |
#include "../../winsys/radeon/drm/radeon_winsys.h" |
#include "pipe/p_screen.h" |
#include "util/u_slab.h" |
#include "os/os_thread.h" |
#include <stdio.h> |
struct r300_screen { |
/* Parent class */ |
struct pipe_screen screen; |
struct radeon_winsys *rws; |
/* Chipset info and capabilities. */ |
struct radeon_info info; |
struct r300_capabilities caps; |
/** Combination of DBG_xxx flags */ |
unsigned debug; |
/* The MSAA texture with CMASK access; */ |
struct pipe_resource *cmask_resource; |
pipe_mutex cmask_mutex; |
}; |
/* Convenience cast wrappers. */ |
static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { |
return (struct r300_screen*)screen; |
} |
static INLINE struct radeon_winsys * |
radeon_winsys(struct pipe_screen *screen) { |
return r300_screen(screen)->rws; |
} |
/* Debug functionality. */ |
/** |
* Debug flags to disable/enable certain groups of debugging outputs. |
* |
* \note These may be rather coarse, and the grouping may be impractical. |
* If you find, while debugging the driver, that a different grouping |
* of these flags would be beneficial, just feel free to change them |
* but make sure to update the documentation in r300_debug.c to reflect |
* those changes. |
*/ |
/*@{*/ |
/* Logging. */ |
#define DBG_PSC (1 << 0) |
#define DBG_FP (1 << 1) |
#define DBG_VP (1 << 2) |
#define DBG_SWTCL (1 << 3) |
#define DBG_DRAW (1 << 4) |
#define DBG_TEX (1 << 5) |
#define DBG_TEXALLOC (1 << 6) |
#define DBG_RS (1 << 7) |
#define DBG_FB (1 << 8) |
#define DBG_RS_BLOCK (1 << 9) |
#define DBG_CBZB (1 << 10) |
#define DBG_HYPERZ (1 << 11) |
#define DBG_SCISSOR (1 << 12) |
#define DBG_INFO (1 << 13) |
#define DBG_MSAA (1 << 14) |
/* Features. */ |
#define DBG_ANISOHQ (1 << 16) |
#define DBG_NO_TILING (1 << 17) |
#define DBG_NO_IMMD (1 << 18) |
#define DBG_NO_OPT (1 << 19) |
#define DBG_NO_CBZB (1 << 20) |
#define DBG_NO_ZMASK (1 << 21) |
#define DBG_NO_HIZ (1 << 22) |
#define DBG_NO_CMASK (1 << 23) |
/* Statistics. */ |
#define DBG_P_STAT (1 << 25) |
/*@}*/ |
static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags) |
{ |
return (screen->debug & flags) ? TRUE : FALSE; |
} |
static INLINE void SCREEN_DBG(struct r300_screen * screen, unsigned flags, |
const char * fmt, ...) |
{ |
if (SCREEN_DBG_ON(screen, flags)) { |
va_list va; |
va_start(va, fmt); |
vfprintf(stderr, fmt, va); |
va_end(va); |
} |
} |
void r300_init_debug(struct r300_screen* ctx); |
void r300_init_screen_resource_functions(struct r300_screen *r300screen); |
#endif /* R300_SCREEN_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_screen_buffer.c |
---|
0,0 → 1,198 |
/* |
* Copyright 2010 Red Hat Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Authors: Dave Airlie |
*/ |
#include <stdio.h> |
#include "util/u_inlines.h" |
#include "util/u_memory.h" |
#include "util/u_upload_mgr.h" |
#include "util/u_math.h" |
#include "r300_screen_buffer.h" |
void r300_upload_index_buffer(struct r300_context *r300, |
struct pipe_resource **index_buffer, |
unsigned index_size, unsigned *start, |
unsigned count, const uint8_t *ptr) |
{ |
unsigned index_offset; |
*index_buffer = NULL; |
u_upload_data(r300->uploader, |
0, count * index_size, |
ptr + (*start * index_size), |
&index_offset, |
index_buffer); |
*start = index_offset / index_size; |
} |
static void r300_buffer_destroy(struct pipe_screen *screen, |
struct pipe_resource *buf) |
{ |
struct r300_resource *rbuf = r300_resource(buf); |
align_free(rbuf->malloced_buffer); |
if (rbuf->buf) |
pb_reference(&rbuf->buf, NULL); |
FREE(rbuf); |
} |
static void * |
r300_buffer_transfer_map( struct pipe_context *context, |
struct pipe_resource *resource, |
unsigned level, |
unsigned usage, |
const struct pipe_box *box, |
struct pipe_transfer **ptransfer ) |
{ |
struct r300_context *r300 = r300_context(context); |
struct radeon_winsys *rws = r300->screen->rws; |
struct r300_resource *rbuf = r300_resource(resource); |
struct pipe_transfer *transfer; |
uint8_t *map; |
transfer = util_slab_alloc(&r300->pool_transfers); |
transfer->resource = resource; |
transfer->level = level; |
transfer->usage = usage; |
transfer->box = *box; |
transfer->stride = 0; |
transfer->layer_stride = 0; |
if (rbuf->malloced_buffer) { |
*ptransfer = transfer; |
return rbuf->malloced_buffer + box->x; |
} |
if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE && |
!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { |
assert(usage & PIPE_TRANSFER_WRITE); |
/* Check if mapping this buffer would cause waiting for the GPU. */ |
if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf, RADEON_USAGE_READWRITE) || |
r300->rws->buffer_is_busy(rbuf->buf, RADEON_USAGE_READWRITE)) { |
unsigned i; |
struct pb_buffer *new_buf; |
/* Create a new one in the same pipe_resource. */ |
new_buf = r300->rws->buffer_create(r300->rws, rbuf->b.b.width0, |
R300_BUFFER_ALIGNMENT, TRUE, |
rbuf->domain); |
if (new_buf) { |
/* Discard the old buffer. */ |
pb_reference(&rbuf->buf, NULL); |
rbuf->buf = new_buf; |
rbuf->cs_buf = r300->rws->buffer_get_cs_handle(rbuf->buf); |
/* We changed the buffer, now we need to bind it where the old one was bound. */ |
for (i = 0; i < r300->nr_vertex_buffers; i++) { |
if (r300->vertex_buffer[i].buffer == &rbuf->b.b) { |
r300->vertex_arrays_dirty = TRUE; |
break; |
} |
} |
} |
} |
} |
/* Buffers are never used for write, therefore mapping for read can be |
* unsynchronized. */ |
if (!(usage & PIPE_TRANSFER_WRITE)) { |
usage |= PIPE_TRANSFER_UNSYNCHRONIZED; |
} |
map = rws->buffer_map(rbuf->cs_buf, r300->cs, usage); |
if (map == NULL) { |
util_slab_free(&r300->pool_transfers, transfer); |
return NULL; |
} |
*ptransfer = transfer; |
return map + box->x; |
} |
static void r300_buffer_transfer_unmap( struct pipe_context *pipe, |
struct pipe_transfer *transfer ) |
{ |
struct r300_context *r300 = r300_context(pipe); |
util_slab_free(&r300->pool_transfers, transfer); |
} |
static const struct u_resource_vtbl r300_buffer_vtbl = |
{ |
NULL, /* get_handle */ |
r300_buffer_destroy, /* resource_destroy */ |
r300_buffer_transfer_map, /* transfer_map */ |
NULL, /* transfer_flush_region */ |
r300_buffer_transfer_unmap, /* transfer_unmap */ |
NULL /* transfer_inline_write */ |
}; |
struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, |
const struct pipe_resource *templ) |
{ |
struct r300_screen *r300screen = r300_screen(screen); |
struct r300_resource *rbuf; |
rbuf = MALLOC_STRUCT(r300_resource); |
rbuf->b.b = *templ; |
rbuf->b.vtbl = &r300_buffer_vtbl; |
pipe_reference_init(&rbuf->b.b.reference, 1); |
rbuf->b.b.screen = screen; |
rbuf->domain = RADEON_DOMAIN_GTT; |
rbuf->buf = NULL; |
rbuf->malloced_buffer = NULL; |
/* Allocate constant buffers and SWTCL vertex and index buffers in RAM. |
* Note that uploaded index buffers use the flag PIPE_BIND_CUSTOM, so that |
* we can distinguish them from user-created buffers. |
*/ |
if (templ->bind & PIPE_BIND_CONSTANT_BUFFER || |
(!r300screen->caps.has_tcl && !(templ->bind & PIPE_BIND_CUSTOM))) { |
rbuf->malloced_buffer = align_malloc(templ->width0, 64); |
return &rbuf->b.b; |
} |
rbuf->buf = |
r300screen->rws->buffer_create(r300screen->rws, rbuf->b.b.width0, |
R300_BUFFER_ALIGNMENT, TRUE, |
rbuf->domain); |
if (!rbuf->buf) { |
FREE(rbuf); |
return NULL; |
} |
rbuf->cs_buf = |
r300screen->rws->buffer_get_cs_handle(rbuf->buf); |
return &rbuf->b.b; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_screen_buffer.h |
---|
0,0 → 1,54 |
/* |
* Copyright 2010 Red Hat Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Authors: Dave Airlie |
*/ |
#ifndef R300_SCREEN_BUFFER_H |
#define R300_SCREEN_BUFFER_H |
#include <stdio.h> |
#include "pipe/p_compiler.h" |
#include "pipe/p_state.h" |
#include "util/u_transfer.h" |
#include "r300_screen.h" |
#include "r300_context.h" |
/* Functions. */ |
void r300_upload_index_buffer(struct r300_context *r300, |
struct pipe_resource **index_buffer, |
unsigned index_size, unsigned *start, |
unsigned count, const uint8_t *ptr); |
struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, |
const struct pipe_resource *templ); |
/* Inline functions. */ |
static INLINE struct r300_buffer *r300_buffer(struct pipe_resource *buffer) |
{ |
return (struct r300_buffer *)buffer; |
} |
#endif |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_shader_semantics.h |
---|
0,0 → 1,72 |
/* |
* Copyright 2009 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#ifndef R300_SHADER_SEMANTICS_H |
#define R300_SHADER_SEMANTICS_H |
#define ATTR_UNUSED (-1) |
#define ATTR_COLOR_COUNT 2 |
#define ATTR_GENERIC_COUNT 32 |
/* This structure contains information about what attributes are written by VS |
* or read by FS. (but not both) It's much easier to work with than |
* tgsi_shader_info. |
* |
* The variables contain indices to tgsi_shader_info semantics and those |
* indices are nothing else than input/output register numbers. */ |
struct r300_shader_semantics { |
int pos; |
int psize; |
int color[ATTR_COLOR_COUNT]; |
int bcolor[ATTR_COLOR_COUNT]; |
int face; |
int generic[ATTR_GENERIC_COUNT]; |
int fog; |
int wpos; |
int num_generic; |
}; |
static INLINE void r300_shader_semantics_reset( |
struct r300_shader_semantics* info) |
{ |
int i; |
info->pos = ATTR_UNUSED; |
info->psize = ATTR_UNUSED; |
info->face = ATTR_UNUSED; |
info->fog = ATTR_UNUSED; |
info->wpos = ATTR_UNUSED; |
for (i = 0; i < ATTR_COLOR_COUNT; i++) { |
info->color[i] = ATTR_UNUSED; |
info->bcolor[i] = ATTR_UNUSED; |
} |
for (i = 0; i < ATTR_GENERIC_COUNT; i++) { |
info->generic[i] = ATTR_UNUSED; |
} |
info->num_generic = 0; |
} |
#endif |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_state.c |
---|
0,0 → 1,2189 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Copyright 2009 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "draw/draw_context.h" |
#include "util/u_framebuffer.h" |
#include "util/u_half.h" |
#include "util/u_helpers.h" |
#include "util/u_math.h" |
#include "util/u_mm.h" |
#include "util/u_memory.h" |
#include "util/u_pack_color.h" |
#include "util/u_transfer.h" |
#include "tgsi/tgsi_parse.h" |
#include "pipe/p_config.h" |
#include "r300_cb.h" |
#include "r300_context.h" |
#include "r300_emit.h" |
#include "r300_reg.h" |
#include "r300_screen.h" |
#include "r300_screen_buffer.h" |
#include "r300_state_inlines.h" |
#include "r300_fs.h" |
#include "r300_texture.h" |
#include "r300_vs.h" |
/* r300_state: Functions used to intialize state context by translating |
* Gallium state objects into semi-native r300 state objects. */ |
#define UPDATE_STATE(cso, atom) \ |
if (cso != atom.state) { \ |
atom.state = cso; \ |
r300_mark_atom_dirty(r300, &(atom)); \ |
} |
static boolean blend_discard_if_src_alpha_0(unsigned srcRGB, unsigned srcA, |
unsigned dstRGB, unsigned dstA) |
{ |
/* If the blend equation is ADD or REVERSE_SUBTRACT, |
* SRC_ALPHA == 0, and the following state is set, the colorbuffer |
* will not be changed. |
* Notice that the dst factors are the src factors inverted. */ |
return (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || |
srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || |
srcRGB == PIPE_BLENDFACTOR_ZERO) && |
(srcA == PIPE_BLENDFACTOR_SRC_COLOR || |
srcA == PIPE_BLENDFACTOR_SRC_ALPHA || |
srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || |
srcA == PIPE_BLENDFACTOR_ZERO) && |
(dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || |
dstRGB == PIPE_BLENDFACTOR_ONE) && |
(dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || |
dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || |
dstA == PIPE_BLENDFACTOR_ONE); |
} |
static boolean blend_discard_if_src_alpha_1(unsigned srcRGB, unsigned srcA, |
unsigned dstRGB, unsigned dstA) |
{ |
/* If the blend equation is ADD or REVERSE_SUBTRACT, |
* SRC_ALPHA == 1, and the following state is set, the colorbuffer |
* will not be changed. |
* Notice that the dst factors are the src factors inverted. */ |
return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || |
srcRGB == PIPE_BLENDFACTOR_ZERO) && |
(srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR || |
srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || |
srcA == PIPE_BLENDFACTOR_ZERO) && |
(dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || |
dstRGB == PIPE_BLENDFACTOR_ONE) && |
(dstA == PIPE_BLENDFACTOR_SRC_COLOR || |
dstA == PIPE_BLENDFACTOR_SRC_ALPHA || |
dstA == PIPE_BLENDFACTOR_ONE); |
} |
static boolean blend_discard_if_src_color_0(unsigned srcRGB, unsigned srcA, |
unsigned dstRGB, unsigned dstA) |
{ |
/* If the blend equation is ADD or REVERSE_SUBTRACT, |
* SRC_COLOR == (0,0,0), and the following state is set, the colorbuffer |
* will not be changed. |
* Notice that the dst factors are the src factors inverted. */ |
return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR || |
srcRGB == PIPE_BLENDFACTOR_ZERO) && |
(srcA == PIPE_BLENDFACTOR_ZERO) && |
(dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || |
dstRGB == PIPE_BLENDFACTOR_ONE) && |
(dstA == PIPE_BLENDFACTOR_ONE); |
} |
static boolean blend_discard_if_src_color_1(unsigned srcRGB, unsigned srcA, |
unsigned dstRGB, unsigned dstA) |
{ |
/* If the blend equation is ADD or REVERSE_SUBTRACT, |
* SRC_COLOR == (1,1,1), and the following state is set, the colorbuffer |
* will not be changed. |
* Notice that the dst factors are the src factors inverted. */ |
return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || |
srcRGB == PIPE_BLENDFACTOR_ZERO) && |
(srcA == PIPE_BLENDFACTOR_ZERO) && |
(dstRGB == PIPE_BLENDFACTOR_SRC_COLOR || |
dstRGB == PIPE_BLENDFACTOR_ONE) && |
(dstA == PIPE_BLENDFACTOR_ONE); |
} |
static boolean blend_discard_if_src_alpha_color_0(unsigned srcRGB, unsigned srcA, |
unsigned dstRGB, unsigned dstA) |
{ |
/* If the blend equation is ADD or REVERSE_SUBTRACT, |
* SRC_ALPHA_COLOR == (0,0,0,0), and the following state is set, |
* the colorbuffer will not be changed. |
* Notice that the dst factors are the src factors inverted. */ |
return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR || |
srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || |
srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || |
srcRGB == PIPE_BLENDFACTOR_ZERO) && |
(srcA == PIPE_BLENDFACTOR_SRC_COLOR || |
srcA == PIPE_BLENDFACTOR_SRC_ALPHA || |
srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || |
srcA == PIPE_BLENDFACTOR_ZERO) && |
(dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || |
dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || |
dstRGB == PIPE_BLENDFACTOR_ONE) && |
(dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || |
dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || |
dstA == PIPE_BLENDFACTOR_ONE); |
} |
static boolean blend_discard_if_src_alpha_color_1(unsigned srcRGB, unsigned srcA, |
unsigned dstRGB, unsigned dstA) |
{ |
/* If the blend equation is ADD or REVERSE_SUBTRACT, |
* SRC_ALPHA_COLOR == (1,1,1,1), and the following state is set, |
* the colorbuffer will not be changed. |
* Notice that the dst factors are the src factors inverted. */ |
return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || |
srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || |
srcRGB == PIPE_BLENDFACTOR_ZERO) && |
(srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR || |
srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || |
srcA == PIPE_BLENDFACTOR_ZERO) && |
(dstRGB == PIPE_BLENDFACTOR_SRC_COLOR || |
dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || |
dstRGB == PIPE_BLENDFACTOR_ONE) && |
(dstA == PIPE_BLENDFACTOR_SRC_COLOR || |
dstA == PIPE_BLENDFACTOR_SRC_ALPHA || |
dstA == PIPE_BLENDFACTOR_ONE); |
} |
static unsigned blend_discard_conditionally(unsigned eqRGB, unsigned eqA, |
unsigned dstRGB, unsigned dstA, |
unsigned srcRGB, unsigned srcA) |
{ |
unsigned blend_control = 0; |
/* Optimization: discard pixels which don't change the colorbuffer. |
* |
* The code below is non-trivial and some math is involved. |
* |
* Discarding pixels must be disabled when FP16 AA is enabled. |
* This is a hardware bug. Also, this implementation wouldn't work |
* with FP blending enabled and equation clamping disabled. |
* |
* Equations other than ADD are rarely used and therefore won't be |
* optimized. */ |
if ((eqRGB == PIPE_BLEND_ADD || eqRGB == PIPE_BLEND_REVERSE_SUBTRACT) && |
(eqA == PIPE_BLEND_ADD || eqA == PIPE_BLEND_REVERSE_SUBTRACT)) { |
/* ADD: X+Y |
* REVERSE_SUBTRACT: Y-X |
* |
* The idea is: |
* If X = src*srcFactor = 0 and Y = dst*dstFactor = 1, |
* then CB will not be changed. |
* |
* Given the srcFactor and dstFactor variables, we can derive |
* what src and dst should be equal to and discard appropriate |
* pixels. |
*/ |
if (blend_discard_if_src_alpha_0(srcRGB, srcA, dstRGB, dstA)) { |
blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0; |
} else if (blend_discard_if_src_alpha_1(srcRGB, srcA, |
dstRGB, dstA)) { |
blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1; |
} else if (blend_discard_if_src_color_0(srcRGB, srcA, |
dstRGB, dstA)) { |
blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0; |
} else if (blend_discard_if_src_color_1(srcRGB, srcA, |
dstRGB, dstA)) { |
blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1; |
} else if (blend_discard_if_src_alpha_color_0(srcRGB, srcA, |
dstRGB, dstA)) { |
blend_control |= |
R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0; |
} else if (blend_discard_if_src_alpha_color_1(srcRGB, srcA, |
dstRGB, dstA)) { |
blend_control |= |
R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1; |
} |
} |
return blend_control; |
} |
/* The hardware colormask is clunky a must be swizzled depending on the format. |
* This was figured out by trial-and-error. */ |
static unsigned bgra_cmask(unsigned mask) |
{ |
return ((mask & PIPE_MASK_R) << 2) | |
((mask & PIPE_MASK_B) >> 2) | |
(mask & (PIPE_MASK_G | PIPE_MASK_A)); |
} |
static unsigned rgba_cmask(unsigned mask) |
{ |
return mask & PIPE_MASK_RGBA; |
} |
static unsigned rrrr_cmask(unsigned mask) |
{ |
return (mask & PIPE_MASK_R) | |
((mask & PIPE_MASK_R) << 1) | |
((mask & PIPE_MASK_R) << 2) | |
((mask & PIPE_MASK_R) << 3); |
} |
static unsigned aaaa_cmask(unsigned mask) |
{ |
return ((mask & PIPE_MASK_A) >> 3) | |
((mask & PIPE_MASK_A) >> 2) | |
((mask & PIPE_MASK_A) >> 1) | |
(mask & PIPE_MASK_A); |
} |
static unsigned grrg_cmask(unsigned mask) |
{ |
return ((mask & PIPE_MASK_R) << 1) | |
((mask & PIPE_MASK_R) << 2) | |
((mask & PIPE_MASK_G) >> 1) | |
((mask & PIPE_MASK_G) << 2); |
} |
static unsigned arra_cmask(unsigned mask) |
{ |
return ((mask & PIPE_MASK_R) << 1) | |
((mask & PIPE_MASK_R) << 2) | |
((mask & PIPE_MASK_A) >> 3) | |
(mask & PIPE_MASK_A); |
} |
static unsigned blend_read_enable(unsigned eqRGB, unsigned eqA, |
unsigned dstRGB, unsigned dstA, |
unsigned srcRGB, unsigned srcA, |
boolean src_alpha_optz) |
{ |
unsigned blend_control = 0; |
/* Optimization: some operations do not require the destination color. |
* |
* When SRC_ALPHA_SATURATE is used, colorbuffer reads must be enabled, |
* otherwise blending gives incorrect results. It seems to be |
* a hardware bug. */ |
if (eqRGB == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MIN || |
eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MAX || |
dstRGB != PIPE_BLENDFACTOR_ZERO || |
dstA != PIPE_BLENDFACTOR_ZERO || |
srcRGB == PIPE_BLENDFACTOR_DST_COLOR || |
srcRGB == PIPE_BLENDFACTOR_DST_ALPHA || |
srcRGB == PIPE_BLENDFACTOR_INV_DST_COLOR || |
srcRGB == PIPE_BLENDFACTOR_INV_DST_ALPHA || |
srcA == PIPE_BLENDFACTOR_DST_COLOR || |
srcA == PIPE_BLENDFACTOR_DST_ALPHA || |
srcA == PIPE_BLENDFACTOR_INV_DST_COLOR || |
srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA || |
srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) { |
/* Enable reading from the colorbuffer. */ |
blend_control |= R300_READ_ENABLE; |
if (src_alpha_optz) { |
/* Optimization: Depending on incoming pixels, we can |
* conditionally disable the reading in hardware... */ |
if (eqRGB != PIPE_BLEND_MIN && eqA != PIPE_BLEND_MIN && |
eqRGB != PIPE_BLEND_MAX && eqA != PIPE_BLEND_MAX) { |
/* Disable reading if SRC_ALPHA == 0. */ |
if ((dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || |
dstRGB == PIPE_BLENDFACTOR_ZERO) && |
(dstA == PIPE_BLENDFACTOR_SRC_COLOR || |
dstA == PIPE_BLENDFACTOR_SRC_ALPHA || |
dstA == PIPE_BLENDFACTOR_ZERO) && |
(srcRGB != PIPE_BLENDFACTOR_DST_COLOR && |
srcRGB != PIPE_BLENDFACTOR_DST_ALPHA && |
srcRGB != PIPE_BLENDFACTOR_INV_DST_COLOR && |
srcRGB != PIPE_BLENDFACTOR_INV_DST_ALPHA)) { |
blend_control |= R500_SRC_ALPHA_0_NO_READ; |
} |
/* Disable reading if SRC_ALPHA == 1. */ |
if ((dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || |
dstRGB == PIPE_BLENDFACTOR_ZERO) && |
(dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || |
dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || |
dstA == PIPE_BLENDFACTOR_ZERO) && |
(srcRGB != PIPE_BLENDFACTOR_DST_COLOR && |
srcRGB != PIPE_BLENDFACTOR_DST_ALPHA && |
srcRGB != PIPE_BLENDFACTOR_INV_DST_COLOR && |
srcRGB != PIPE_BLENDFACTOR_INV_DST_ALPHA)) { |
blend_control |= R500_SRC_ALPHA_1_NO_READ; |
} |
} |
} |
} |
return blend_control; |
} |
/* Create a new blend state based on the CSO blend state. |
* |
* This encompasses alpha blending, logic/raster ops, and blend dithering. */ |
static void* r300_create_blend_state(struct pipe_context* pipe, |
const struct pipe_blend_state* state) |
{ |
struct r300_screen* r300screen = r300_screen(pipe->screen); |
struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state); |
uint32_t blend_control = 0; /* R300_RB3D_CBLEND: 0x4e04 */ |
uint32_t blend_control_noclamp = 0; /* R300_RB3D_CBLEND: 0x4e04 */ |
uint32_t blend_control_noalpha = 0; /* R300_RB3D_CBLEND: 0x4e04 */ |
uint32_t blend_control_noalpha_noclamp = 0; /* R300_RB3D_CBLEND: 0x4e04 */ |
uint32_t alpha_blend_control = 0; /* R300_RB3D_ABLEND: 0x4e08 */ |
uint32_t alpha_blend_control_noclamp = 0; /* R300_RB3D_ABLEND: 0x4e08 */ |
uint32_t alpha_blend_control_noalpha = 0; /* R300_RB3D_ABLEND: 0x4e08 */ |
uint32_t alpha_blend_control_noalpha_noclamp = 0; /* R300_RB3D_ABLEND: 0x4e08 */ |
uint32_t rop = 0; /* R300_RB3D_ROPCNTL: 0x4e18 */ |
uint32_t dither = 0; /* R300_RB3D_DITHER_CTL: 0x4e50 */ |
int i; |
const unsigned eqRGB = state->rt[0].rgb_func; |
const unsigned srcRGB = state->rt[0].rgb_src_factor; |
const unsigned dstRGB = state->rt[0].rgb_dst_factor; |
const unsigned eqA = state->rt[0].alpha_func; |
const unsigned srcA = state->rt[0].alpha_src_factor; |
const unsigned dstA = state->rt[0].alpha_dst_factor; |
unsigned srcRGBX = srcRGB; |
unsigned dstRGBX = dstRGB; |
CB_LOCALS; |
blend->state = *state; |
/* force DST_ALPHA to ONE where we can */ |
switch (srcRGBX) { |
case PIPE_BLENDFACTOR_DST_ALPHA: |
srcRGBX = PIPE_BLENDFACTOR_ONE; |
break; |
case PIPE_BLENDFACTOR_INV_DST_ALPHA: |
srcRGBX = PIPE_BLENDFACTOR_ZERO; |
break; |
} |
switch (dstRGBX) { |
case PIPE_BLENDFACTOR_DST_ALPHA: |
dstRGBX = PIPE_BLENDFACTOR_ONE; |
break; |
case PIPE_BLENDFACTOR_INV_DST_ALPHA: |
dstRGBX = PIPE_BLENDFACTOR_ZERO; |
break; |
} |
/* Get blending register values. */ |
if (state->rt[0].blend_enable) { |
unsigned blend_eq, blend_eq_noclamp; |
/* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha, |
* this is just the crappy D3D naming */ |
blend_control = blend_control_noclamp = |
R300_ALPHA_BLEND_ENABLE | |
( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) | |
( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT); |
blend_control_noalpha = blend_control_noalpha_noclamp = |
R300_ALPHA_BLEND_ENABLE | |
( r300_translate_blend_factor(srcRGBX) << R300_SRC_BLEND_SHIFT) | |
( r300_translate_blend_factor(dstRGBX) << R300_DST_BLEND_SHIFT); |
blend_eq = r300_translate_blend_function(eqRGB, TRUE); |
blend_eq_noclamp = r300_translate_blend_function(eqRGB, FALSE); |
blend_control |= blend_eq; |
blend_control_noalpha |= blend_eq; |
blend_control_noclamp |= blend_eq_noclamp; |
blend_control_noalpha_noclamp |= blend_eq_noclamp; |
/* Optimization: some operations do not require the destination color. */ |
blend_control |= blend_read_enable(eqRGB, eqA, dstRGB, dstA, |
srcRGB, srcA, r300screen->caps.is_r500); |
blend_control_noclamp |= blend_read_enable(eqRGB, eqA, dstRGB, dstA, |
srcRGB, srcA, FALSE); |
blend_control_noalpha |= blend_read_enable(eqRGB, eqA, dstRGBX, dstA, |
srcRGBX, srcA, r300screen->caps.is_r500); |
blend_control_noalpha_noclamp |= blend_read_enable(eqRGB, eqA, dstRGBX, dstA, |
srcRGBX, srcA, FALSE); |
/* Optimization: discard pixels which don't change the colorbuffer. |
* It cannot be used with FP16 AA. */ |
blend_control |= blend_discard_conditionally(eqRGB, eqA, dstRGB, dstA, |
srcRGB, srcA); |
blend_control_noalpha |= blend_discard_conditionally(eqRGB, eqA, dstRGBX, dstA, |
srcRGBX, srcA); |
/* separate alpha */ |
if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { |
blend_control |= R300_SEPARATE_ALPHA_ENABLE; |
blend_control_noclamp |= R300_SEPARATE_ALPHA_ENABLE; |
alpha_blend_control = alpha_blend_control_noclamp = |
(r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) | |
(r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT); |
alpha_blend_control |= r300_translate_blend_function(eqA, TRUE); |
alpha_blend_control_noclamp |= r300_translate_blend_function(eqA, FALSE); |
} |
if (srcA != srcRGBX || dstA != dstRGBX || eqA != eqRGB) { |
blend_control_noalpha |= R300_SEPARATE_ALPHA_ENABLE; |
blend_control_noalpha_noclamp |= R300_SEPARATE_ALPHA_ENABLE; |
alpha_blend_control_noalpha = alpha_blend_control_noalpha_noclamp = |
(r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) | |
(r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT); |
alpha_blend_control_noalpha |= r300_translate_blend_function(eqA, TRUE); |
alpha_blend_control_noalpha_noclamp |= r300_translate_blend_function(eqA, FALSE); |
} |
} |
/* PIPE_LOGICOP_* don't need to be translated, fortunately. */ |
if (state->logicop_enable) { |
rop = R300_RB3D_ROPCNTL_ROP_ENABLE | |
(state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT; |
} |
/* Neither fglrx nor classic r300 ever set this, regardless of dithering |
* state. Since it's an optional implementation detail, we can leave it |
* out and never dither. |
* |
* This could be revisited if we ever get quality or conformance hints. |
* |
if (state->dither) { |
dither = R300_RB3D_DITHER_CTL_DITHER_MODE_LUT | |
R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT; |
} |
*/ |
/* Build a command buffer. */ |
{ |
unsigned (*func[COLORMASK_NUM_SWIZZLES])(unsigned) = { |
bgra_cmask, |
rgba_cmask, |
rrrr_cmask, |
aaaa_cmask, |
grrg_cmask, |
arra_cmask, |
bgra_cmask, |
rgba_cmask |
}; |
for (i = 0; i < COLORMASK_NUM_SWIZZLES; i++) { |
boolean has_alpha = i != COLORMASK_RGBX && i != COLORMASK_BGRX; |
BEGIN_CB(blend->cb_clamp[i], 8); |
OUT_CB_REG(R300_RB3D_ROPCNTL, rop); |
OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3); |
OUT_CB(has_alpha ? blend_control : blend_control_noalpha); |
OUT_CB(has_alpha ? alpha_blend_control : alpha_blend_control_noalpha); |
OUT_CB(func[i](state->rt[0].colormask)); |
OUT_CB_REG(R300_RB3D_DITHER_CTL, dither); |
END_CB; |
} |
} |
/* Build a command buffer (for RGBA16F). */ |
BEGIN_CB(blend->cb_noclamp, 8); |
OUT_CB_REG(R300_RB3D_ROPCNTL, rop); |
OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3); |
OUT_CB(blend_control_noclamp); |
OUT_CB(alpha_blend_control_noclamp); |
OUT_CB(rgba_cmask(state->rt[0].colormask)); |
OUT_CB_REG(R300_RB3D_DITHER_CTL, dither); |
END_CB; |
/* Build a command buffer (for RGB16F). */ |
BEGIN_CB(blend->cb_noclamp_noalpha, 8); |
OUT_CB_REG(R300_RB3D_ROPCNTL, rop); |
OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3); |
OUT_CB(blend_control_noalpha_noclamp); |
OUT_CB(alpha_blend_control_noalpha_noclamp); |
OUT_CB(rgba_cmask(state->rt[0].colormask)); |
OUT_CB_REG(R300_RB3D_DITHER_CTL, dither); |
END_CB; |
/* The same as above, but with no colorbuffer reads and writes. */ |
BEGIN_CB(blend->cb_no_readwrite, 8); |
OUT_CB_REG(R300_RB3D_ROPCNTL, rop); |
OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3); |
OUT_CB(0); |
OUT_CB(0); |
OUT_CB(0); |
OUT_CB_REG(R300_RB3D_DITHER_CTL, dither); |
END_CB; |
return (void*)blend; |
} |
/* Bind blend state. */ |
static void r300_bind_blend_state(struct pipe_context* pipe, |
void* state) |
{ |
struct r300_context* r300 = r300_context(pipe); |
struct r300_blend_state *blend = (struct r300_blend_state*)state; |
boolean last_alpha_to_one = r300->alpha_to_one; |
boolean last_alpha_to_coverage = r300->alpha_to_coverage; |
UPDATE_STATE(state, r300->blend_state); |
if (!blend) |
return; |
r300->alpha_to_one = blend->state.alpha_to_one; |
r300->alpha_to_coverage = blend->state.alpha_to_coverage; |
if (r300->alpha_to_one != last_alpha_to_one && r300->msaa_enable && |
r300->fs_status == FRAGMENT_SHADER_VALID) { |
r300->fs_status = FRAGMENT_SHADER_MAYBE_DIRTY; |
} |
if (r300->alpha_to_coverage != last_alpha_to_coverage && |
r300->msaa_enable) { |
r300_mark_atom_dirty(r300, &r300->dsa_state); |
} |
} |
/* Free blend state. */ |
static void r300_delete_blend_state(struct pipe_context* pipe, |
void* state) |
{ |
FREE(state); |
} |
/* Convert float to 10bit integer */ |
static unsigned float_to_fixed10(float f) |
{ |
return CLAMP((unsigned)(f * 1023.9f), 0, 1023); |
} |
/* Set blend color. |
* Setup both R300 and R500 registers, figure out later which one to write. */ |
static void r300_set_blend_color(struct pipe_context* pipe, |
const struct pipe_blend_color* color) |
{ |
struct r300_context* r300 = r300_context(pipe); |
struct pipe_framebuffer_state *fb = r300->fb_state.state; |
struct r300_blend_color_state *state = |
(struct r300_blend_color_state*)r300->blend_color_state.state; |
struct pipe_blend_color c; |
enum pipe_format format = fb->nr_cbufs ? fb->cbufs[0]->format : 0; |
float tmp; |
CB_LOCALS; |
state->state = *color; /* Save it, so that we can reuse it in set_fb_state */ |
c = *color; |
/* The blend color is dependent on the colorbuffer format. */ |
if (fb->nr_cbufs) { |
switch (format) { |
case PIPE_FORMAT_R8_UNORM: |
case PIPE_FORMAT_L8_UNORM: |
case PIPE_FORMAT_I8_UNORM: |
c.color[1] = c.color[0]; |
break; |
case PIPE_FORMAT_A8_UNORM: |
c.color[1] = c.color[3]; |
break; |
case PIPE_FORMAT_R8G8_UNORM: |
c.color[2] = c.color[1]; |
break; |
case PIPE_FORMAT_L8A8_UNORM: |
case PIPE_FORMAT_R8A8_UNORM: |
c.color[2] = c.color[3]; |
break; |
case PIPE_FORMAT_R8G8B8A8_UNORM: |
case PIPE_FORMAT_R8G8B8X8_UNORM: |
tmp = c.color[0]; |
c.color[0] = c.color[2]; |
c.color[2] = tmp; |
break; |
default:; |
} |
} |
if (r300->screen->caps.is_r500) { |
BEGIN_CB(state->cb, 3); |
OUT_CB_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2); |
switch (format) { |
case PIPE_FORMAT_R16G16B16A16_FLOAT: |
case PIPE_FORMAT_R16G16B16X16_FLOAT: |
OUT_CB(util_float_to_half(c.color[2]) | |
(util_float_to_half(c.color[3]) << 16)); |
OUT_CB(util_float_to_half(c.color[0]) | |
(util_float_to_half(c.color[1]) << 16)); |
break; |
default: |
OUT_CB(float_to_fixed10(c.color[0]) | |
(float_to_fixed10(c.color[3]) << 16)); |
OUT_CB(float_to_fixed10(c.color[2]) | |
(float_to_fixed10(c.color[1]) << 16)); |
} |
END_CB; |
} else { |
union util_color uc; |
util_pack_color(c.color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); |
BEGIN_CB(state->cb, 2); |
OUT_CB_REG(R300_RB3D_BLEND_COLOR, uc.ui); |
END_CB; |
} |
r300_mark_atom_dirty(r300, &r300->blend_color_state); |
} |
static void r300_set_clip_state(struct pipe_context* pipe, |
const struct pipe_clip_state* state) |
{ |
struct r300_context* r300 = r300_context(pipe); |
struct r300_clip_state *clip = |
(struct r300_clip_state*)r300->clip_state.state; |
CB_LOCALS; |
if (r300->screen->caps.has_tcl) { |
BEGIN_CB(clip->cb, r300->clip_state.size); |
OUT_CB_REG(R300_VAP_PVS_VECTOR_INDX_REG, |
(r300->screen->caps.is_r500 ? |
R500_PVS_UCP_START : R300_PVS_UCP_START)); |
OUT_CB_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4); |
OUT_CB_TABLE(state->ucp, 6 * 4); |
END_CB; |
r300_mark_atom_dirty(r300, &r300->clip_state); |
} else { |
draw_set_clip_state(r300->draw, state); |
} |
} |
/* Create a new depth, stencil, and alpha state based on the CSO dsa state. |
* |
* This contains the depth buffer, stencil buffer, alpha test, and such. |
* On the Radeon, depth and stencil buffer setup are intertwined, which is |
* the reason for some of the strange-looking assignments across registers. */ |
static void* r300_create_dsa_state(struct pipe_context* pipe, |
const struct pipe_depth_stencil_alpha_state* state) |
{ |
boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500; |
struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state); |
CB_LOCALS; |
uint32_t alpha_value_fp16 = 0; |
uint32_t z_buffer_control = 0; |
uint32_t z_stencil_control = 0; |
uint32_t stencil_ref_mask = 0; |
uint32_t stencil_ref_bf = 0; |
dsa->dsa = *state; |
/* Depth test setup. - separate write mask depth for decomp flush */ |
if (state->depth.writemask) { |
z_buffer_control |= R300_Z_WRITE_ENABLE; |
} |
if (state->depth.enabled) { |
z_buffer_control |= R300_Z_ENABLE; |
z_stencil_control |= |
(r300_translate_depth_stencil_function(state->depth.func) << |
R300_Z_FUNC_SHIFT); |
} |
/* Stencil buffer setup. */ |
if (state->stencil[0].enabled) { |
z_buffer_control |= R300_STENCIL_ENABLE; |
z_stencil_control |= |
(r300_translate_depth_stencil_function(state->stencil[0].func) << |
R300_S_FRONT_FUNC_SHIFT) | |
(r300_translate_stencil_op(state->stencil[0].fail_op) << |
R300_S_FRONT_SFAIL_OP_SHIFT) | |
(r300_translate_stencil_op(state->stencil[0].zpass_op) << |
R300_S_FRONT_ZPASS_OP_SHIFT) | |
(r300_translate_stencil_op(state->stencil[0].zfail_op) << |
R300_S_FRONT_ZFAIL_OP_SHIFT); |
stencil_ref_mask = |
(state->stencil[0].valuemask << R300_STENCILMASK_SHIFT) | |
(state->stencil[0].writemask << R300_STENCILWRITEMASK_SHIFT); |
if (state->stencil[1].enabled) { |
dsa->two_sided = TRUE; |
z_buffer_control |= R300_STENCIL_FRONT_BACK; |
z_stencil_control |= |
(r300_translate_depth_stencil_function(state->stencil[1].func) << |
R300_S_BACK_FUNC_SHIFT) | |
(r300_translate_stencil_op(state->stencil[1].fail_op) << |
R300_S_BACK_SFAIL_OP_SHIFT) | |
(r300_translate_stencil_op(state->stencil[1].zpass_op) << |
R300_S_BACK_ZPASS_OP_SHIFT) | |
(r300_translate_stencil_op(state->stencil[1].zfail_op) << |
R300_S_BACK_ZFAIL_OP_SHIFT); |
stencil_ref_bf = |
(state->stencil[1].valuemask << R300_STENCILMASK_SHIFT) | |
(state->stencil[1].writemask << R300_STENCILWRITEMASK_SHIFT); |
if (is_r500) { |
z_buffer_control |= R500_STENCIL_REFMASK_FRONT_BACK; |
} else { |
dsa->two_sided_stencil_ref = |
(state->stencil[0].valuemask != state->stencil[1].valuemask || |
state->stencil[0].writemask != state->stencil[1].writemask); |
} |
} |
} |
/* Alpha test setup. */ |
if (state->alpha.enabled) { |
dsa->alpha_function = |
r300_translate_alpha_function(state->alpha.func) | |
R300_FG_ALPHA_FUNC_ENABLE; |
dsa->alpha_function |= float_to_ubyte(state->alpha.ref_value); |
alpha_value_fp16 = util_float_to_half(state->alpha.ref_value); |
} |
BEGIN_CB(&dsa->cb_begin, 8); |
OUT_CB_REG_SEQ(R300_ZB_CNTL, 3); |
OUT_CB(z_buffer_control); |
OUT_CB(z_stencil_control); |
OUT_CB(stencil_ref_mask); |
OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, stencil_ref_bf); |
OUT_CB_REG(R500_FG_ALPHA_VALUE, alpha_value_fp16); |
END_CB; |
BEGIN_CB(dsa->cb_zb_no_readwrite, 8); |
OUT_CB_REG_SEQ(R300_ZB_CNTL, 3); |
OUT_CB(0); |
OUT_CB(0); |
OUT_CB(0); |
OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, 0); |
OUT_CB_REG(R500_FG_ALPHA_VALUE, alpha_value_fp16); |
END_CB; |
return (void*)dsa; |
} |
static void r300_dsa_inject_stencilref(struct r300_context *r300) |
{ |
struct r300_dsa_state *dsa = |
(struct r300_dsa_state*)r300->dsa_state.state; |
if (!dsa) |
return; |
dsa->stencil_ref_mask = |
(dsa->stencil_ref_mask & ~R300_STENCILREF_MASK) | |
r300->stencil_ref.ref_value[0]; |
dsa->stencil_ref_bf = |
(dsa->stencil_ref_bf & ~R300_STENCILREF_MASK) | |
r300->stencil_ref.ref_value[1]; |
} |
/* Bind DSA state. */ |
static void r300_bind_dsa_state(struct pipe_context* pipe, |
void* state) |
{ |
struct r300_context* r300 = r300_context(pipe); |
if (!state) { |
return; |
} |
UPDATE_STATE(state, r300->dsa_state); |
r300_mark_atom_dirty(r300, &r300->hyperz_state); /* Will be updated before the emission. */ |
r300_dsa_inject_stencilref(r300); |
} |
/* Free DSA state. */ |
static void r300_delete_dsa_state(struct pipe_context* pipe, |
void* state) |
{ |
FREE(state); |
} |
static void r300_set_stencil_ref(struct pipe_context* pipe, |
const struct pipe_stencil_ref* sr) |
{ |
struct r300_context* r300 = r300_context(pipe); |
r300->stencil_ref = *sr; |
r300_dsa_inject_stencilref(r300); |
r300_mark_atom_dirty(r300, &r300->dsa_state); |
} |
static void r300_tex_set_tiling_flags(struct r300_context *r300, |
struct r300_resource *tex, |
unsigned level) |
{ |
/* Check if the macrotile flag needs to be changed. |
* Skip changing the flags otherwise. */ |
if (tex->tex.macrotile[tex->surface_level] != |
tex->tex.macrotile[level]) { |
r300->rws->buffer_set_tiling(tex->buf, r300->cs, |
tex->tex.microtile, tex->tex.macrotile[level], |
0, 0, 0, 0, 0, |
tex->tex.stride_in_bytes[0]); |
tex->surface_level = level; |
} |
} |
/* This switcheroo is needed just because of goddamned MACRO_SWITCH. */ |
static void r300_fb_set_tiling_flags(struct r300_context *r300, |
const struct pipe_framebuffer_state *state) |
{ |
unsigned i; |
/* Set tiling flags for new surfaces. */ |
for (i = 0; i < state->nr_cbufs; i++) { |
r300_tex_set_tiling_flags(r300, |
r300_resource(state->cbufs[i]->texture), |
state->cbufs[i]->u.tex.level); |
} |
if (state->zsbuf) { |
r300_tex_set_tiling_flags(r300, |
r300_resource(state->zsbuf->texture), |
state->zsbuf->u.tex.level); |
} |
} |
static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index, |
const char *binding) |
{ |
struct pipe_resource *tex = surf->texture; |
struct r300_resource *rtex = r300_resource(tex); |
fprintf(stderr, |
"r300: %s[%i] Dim: %ix%i, Firstlayer: %i, " |
"Lastlayer: %i, Level: %i, Format: %s\n" |
"r300: TEX: Macro: %s, Micro: %s, " |
"Dim: %ix%ix%i, LastLevel: %i, Format: %s\n", |
binding, index, surf->width, surf->height, |
surf->u.tex.first_layer, surf->u.tex.last_layer, surf->u.tex.level, |
util_format_short_name(surf->format), |
rtex->tex.macrotile[0] ? "YES" : " NO", |
rtex->tex.microtile ? "YES" : " NO", |
tex->width0, tex->height0, tex->depth0, |
tex->last_level, util_format_short_name(surf->format)); |
} |
void r300_mark_fb_state_dirty(struct r300_context *r300, |
enum r300_fb_state_change change) |
{ |
struct pipe_framebuffer_state *state = r300->fb_state.state; |
r300_mark_atom_dirty(r300, &r300->gpu_flush); |
r300_mark_atom_dirty(r300, &r300->fb_state); |
/* What is marked as dirty depends on the enum r300_fb_state_change. */ |
if (change == R300_CHANGED_FB_STATE) { |
r300_mark_atom_dirty(r300, &r300->aa_state); |
r300_mark_atom_dirty(r300, &r300->dsa_state); /* for AlphaRef */ |
r300_set_blend_color(&r300->context, r300->blend_color_state.state); |
} |
if (change == R300_CHANGED_FB_STATE || |
change == R300_CHANGED_HYPERZ_FLAG) { |
r300_mark_atom_dirty(r300, &r300->hyperz_state); |
} |
if (change == R300_CHANGED_FB_STATE || |
change == R300_CHANGED_MULTIWRITE) { |
r300_mark_atom_dirty(r300, &r300->fb_state_pipelined); |
} |
/* Now compute the fb_state atom size. */ |
r300->fb_state.size = 2 + (8 * state->nr_cbufs); |
if (r300->cbzb_clear) |
r300->fb_state.size += 10; |
else if (state->zsbuf) { |
r300->fb_state.size += 10; |
if (r300->hyperz_enabled) |
r300->fb_state.size += 8; |
} |
if (r300->cmask_in_use) { |
r300->fb_state.size += 6; |
if (r300->screen->caps.is_r500 && r300->screen->info.drm_minor >= 29) { |
r300->fb_state.size += 3; |
} |
} |
/* The size of the rest of atoms stays the same. */ |
} |
static unsigned r300_get_num_samples(struct r300_context *r300) |
{ |
struct pipe_framebuffer_state* fb = |
(struct pipe_framebuffer_state*)r300->fb_state.state; |
unsigned i, num_samples; |
if (!fb->nr_cbufs && !fb->zsbuf) |
return 1; |
num_samples = 6; |
for (i = 0; i < fb->nr_cbufs; i++) |
num_samples = MIN2(num_samples, fb->cbufs[i]->texture->nr_samples); |
if (fb->zsbuf) |
num_samples = MIN2(num_samples, fb->zsbuf->texture->nr_samples); |
if (!num_samples) |
num_samples = 1; |
return num_samples; |
} |
static void |
r300_set_framebuffer_state(struct pipe_context* pipe, |
const struct pipe_framebuffer_state* state) |
{ |
struct r300_context* r300 = r300_context(pipe); |
struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; |
struct pipe_framebuffer_state *old_state = r300->fb_state.state; |
unsigned max_width, max_height, i; |
uint32_t zbuffer_bpp = 0; |
boolean unlock_zbuffer = FALSE; |
if (r300->screen->caps.is_r500) { |
max_width = max_height = 4096; |
} else if (r300->screen->caps.is_r400) { |
max_width = max_height = 4021; |
} else { |
max_width = max_height = 2560; |
} |
if (state->width > max_width || state->height > max_height) { |
fprintf(stderr, "r300: Implementation error: Render targets are too " |
"big in %s, refusing to bind framebuffer state!\n", __FUNCTION__); |
return; |
} |
if (old_state->zsbuf && r300->zmask_in_use && !r300->locked_zbuffer) { |
/* There is a zmask in use, what are we gonna do? */ |
if (state->zsbuf) { |
if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) { |
/* Decompress the currently bound zbuffer before we bind another one. */ |
r300_decompress_zmask(r300); |
r300->hiz_in_use = FALSE; |
} |
} else { |
/* We don't bind another zbuffer, so lock the current one. */ |
pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf); |
} |
} else if (r300->locked_zbuffer) { |
/* We have a locked zbuffer now, what are we gonna do? */ |
if (state->zsbuf) { |
if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) { |
/* We are binding some other zbuffer, so decompress the locked one, |
* it gets unlocked automatically. */ |
r300_decompress_zmask_locked_unsafe(r300); |
r300->hiz_in_use = FALSE; |
} else { |
/* We are binding the locked zbuffer again, so unlock it. */ |
unlock_zbuffer = TRUE; |
} |
} |
} |
assert(state->zsbuf || (r300->locked_zbuffer && !unlock_zbuffer) || !r300->zmask_in_use); |
/* Set whether CMASK can be used. */ |
r300->cmask_in_use = |
state->nr_cbufs == 1 && |
r300->screen->cmask_resource == state->cbufs[0]->texture; |
/* Need to reset clamping or colormask. */ |
r300_mark_atom_dirty(r300, &r300->blend_state); |
/* Re-swizzle the blend color. */ |
r300_set_blend_color(pipe, &((struct r300_blend_color_state*)r300->blend_color_state.state)->state); |
/* If zsbuf is set from NULL to non-NULL or vice versa.. */ |
if (!!old_state->zsbuf != !!state->zsbuf) { |
r300_mark_atom_dirty(r300, &r300->dsa_state); |
} |
if (r300->screen->info.drm_minor < 12) { |
/* The tiling flags are dependent on the surface miplevel, unfortunately. |
* This workarounds a bad design decision in old kernels which were |
* rewriting tile fields in registers. */ |
r300_fb_set_tiling_flags(r300, state); |
} |
util_copy_framebuffer_state(r300->fb_state.state, state); |
if (unlock_zbuffer) { |
pipe_surface_reference(&r300->locked_zbuffer, NULL); |
} |
r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); |
if (state->zsbuf) { |
switch (util_format_get_blocksize(state->zsbuf->format)) { |
case 2: |
zbuffer_bpp = 16; |
break; |
case 4: |
zbuffer_bpp = 24; |
break; |
} |
/* Polygon offset depends on the zbuffer bit depth. */ |
if (r300->zbuffer_bpp != zbuffer_bpp) { |
r300->zbuffer_bpp = zbuffer_bpp; |
if (r300->polygon_offset_enabled) |
r300_mark_atom_dirty(r300, &r300->rs_state); |
} |
} |
r300->num_samples = r300_get_num_samples(r300); |
/* Set up AA config. */ |
if (r300->num_samples > 1) { |
switch (r300->num_samples) { |
case 2: |
aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE | |
R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; |
break; |
case 4: |
aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE | |
R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; |
break; |
case 6: |
aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE | |
R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; |
break; |
} |
} else { |
aa->aa_config = 0; |
} |
if (DBG_ON(r300, DBG_FB)) { |
fprintf(stderr, "r300: set_framebuffer_state:\n"); |
for (i = 0; i < state->nr_cbufs; i++) { |
r300_print_fb_surf_info(state->cbufs[i], i, "CB"); |
} |
if (state->zsbuf) { |
r300_print_fb_surf_info(state->zsbuf, 0, "ZB"); |
} |
} |
} |
/* Create fragment shader state. */ |
static void* r300_create_fs_state(struct pipe_context* pipe, |
const struct pipe_shader_state* shader) |
{ |
struct r300_fragment_shader* fs = NULL; |
fs = (struct r300_fragment_shader*)CALLOC_STRUCT(r300_fragment_shader); |
/* Copy state directly into shader. */ |
fs->state = *shader; |
fs->state.tokens = tgsi_dup_tokens(shader->tokens); |
return (void*)fs; |
} |
void r300_mark_fs_code_dirty(struct r300_context *r300) |
{ |
struct r300_fragment_shader* fs = r300_fs(r300); |
r300_mark_atom_dirty(r300, &r300->fs); |
r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state); |
r300_mark_atom_dirty(r300, &r300->fs_constants); |
r300->fs.size = fs->shader->cb_code_size; |
if (r300->screen->caps.is_r500) { |
r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 7; |
r300->fs_constants.size = fs->shader->externals_count * 4 + 3; |
} else { |
r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 5; |
r300->fs_constants.size = fs->shader->externals_count * 4 + 1; |
} |
((struct r300_constant_buffer*)r300->fs_constants.state)->remap_table = |
fs->shader->code.constants_remap_table; |
} |
/* Bind fragment shader state. */ |
static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) |
{ |
struct r300_context* r300 = r300_context(pipe); |
struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader; |
if (fs == NULL) { |
r300->fs.state = NULL; |
return; |
} |
r300->fs.state = fs; |
r300->fs_status = FRAGMENT_SHADER_DIRTY; |
r300_mark_atom_dirty(r300, &r300->rs_block_state); /* Will be updated before the emission. */ |
} |
/* Delete fragment shader state. */ |
static void r300_delete_fs_state(struct pipe_context* pipe, void* shader) |
{ |
struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader; |
struct r300_fragment_shader_code *tmp, *ptr = fs->first; |
while (ptr) { |
tmp = ptr; |
ptr = ptr->next; |
rc_constants_destroy(&tmp->code.constants); |
FREE(tmp->cb_code); |
FREE(tmp); |
} |
FREE((void*)fs->state.tokens); |
FREE(shader); |
} |
static void r300_set_polygon_stipple(struct pipe_context* pipe, |
const struct pipe_poly_stipple* state) |
{ |
/* XXX no idea how to set this up, but not terribly important */ |
} |
/* Create a new rasterizer state based on the CSO rasterizer state. |
* |
* This is a very large chunk of state, and covers most of the graphics |
* backend (GB), geometry assembly (GA), and setup unit (SU) blocks. |
* |
* In a not entirely unironic sidenote, this state has nearly nothing to do |
* with the actual block on the Radeon called the rasterizer (RS). */ |
static void* r300_create_rs_state(struct pipe_context* pipe, |
const struct pipe_rasterizer_state* state) |
{ |
struct r300_rs_state* rs = CALLOC_STRUCT(r300_rs_state); |
uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ |
uint32_t vap_clip_cntl; /* R300_VAP_CLIP_CNTL: 0x221C */ |
uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ |
uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ |
uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */ |
uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */ |
uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */ |
uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */ |
uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */ |
uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */ |
uint32_t clip_rule; /* R300_SC_CLIP_RULE: 0x43D0 */ |
uint32_t round_mode; /* R300_GA_ROUND_MODE: 0x428c */ |
/* Point sprites texture coordinates, 0: lower left, 1: upper right */ |
float point_texcoord_left = 0; /* R300_GA_POINT_S0: 0x4200 */ |
float point_texcoord_bottom = 0;/* R300_GA_POINT_T0: 0x4204 */ |
float point_texcoord_right = 1; /* R300_GA_POINT_S1: 0x4208 */ |
float point_texcoord_top = 0; /* R300_GA_POINT_T1: 0x420c */ |
boolean vclamp = !r300_context(pipe)->screen->caps.is_r500; |
CB_LOCALS; |
/* Copy rasterizer state. */ |
rs->rs = *state; |
rs->rs_draw = *state; |
rs->rs.sprite_coord_enable = state->point_quad_rasterization * |
state->sprite_coord_enable; |
/* Override some states for Draw. */ |
rs->rs_draw.sprite_coord_enable = 0; /* We can do this in HW. */ |
rs->rs_draw.offset_point = 0; |
rs->rs_draw.offset_line = 0; |
rs->rs_draw.offset_tri = 0; |
rs->rs_draw.offset_clamp = 0; |
#ifdef PIPE_ARCH_LITTLE_ENDIAN |
vap_control_status = R300_VC_NO_SWAP; |
#else |
vap_control_status = R300_VC_32BIT_SWAP; |
#endif |
/* If no TCL engine is present, turn off the HW TCL. */ |
if (!r300_screen(pipe->screen)->caps.has_tcl) { |
vap_control_status |= R300_VAP_TCL_BYPASS; |
} |
/* Point size width and height. */ |
point_size = |
pack_float_16_6x(state->point_size) | |
(pack_float_16_6x(state->point_size) << R300_POINTSIZE_X_SHIFT); |
/* Point size clamping. */ |
if (state->point_size_per_vertex) { |
/* Per-vertex point size. |
* Clamp to [0, max FB size] */ |
float min_psiz = util_get_min_point_size(state); |
float max_psiz = pipe->screen->get_paramf(pipe->screen, |
PIPE_CAPF_MAX_POINT_WIDTH); |
point_minmax = |
(pack_float_16_6x(min_psiz) << R300_GA_POINT_MINMAX_MIN_SHIFT) | |
(pack_float_16_6x(max_psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT); |
} else { |
/* We cannot disable the point-size vertex output, |
* so clamp it. */ |
float psiz = state->point_size; |
point_minmax = |
(pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MIN_SHIFT) | |
(pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT); |
} |
/* Line control. */ |
line_control = pack_float_16_6x(state->line_width) | |
R300_GA_LINE_CNTL_END_TYPE_COMP; |
/* Enable polygon mode */ |
polygon_mode = 0; |
if (state->fill_front != PIPE_POLYGON_MODE_FILL || |
state->fill_back != PIPE_POLYGON_MODE_FILL) { |
polygon_mode = R300_GA_POLY_MODE_DUAL; |
} |
/* Front face */ |
if (state->front_ccw) |
cull_mode = R300_FRONT_FACE_CCW; |
else |
cull_mode = R300_FRONT_FACE_CW; |
/* Polygon offset */ |
polygon_offset_enable = 0; |
if (util_get_offset(state, state->fill_front)) { |
polygon_offset_enable |= R300_FRONT_ENABLE; |
} |
if (util_get_offset(state, state->fill_back)) { |
polygon_offset_enable |= R300_BACK_ENABLE; |
} |
rs->polygon_offset_enable = polygon_offset_enable != 0; |
/* Polygon mode */ |
if (polygon_mode) { |
polygon_mode |= |
r300_translate_polygon_mode_front(state->fill_front); |
polygon_mode |= |
r300_translate_polygon_mode_back(state->fill_back); |
} |
if (state->cull_face & PIPE_FACE_FRONT) { |
cull_mode |= R300_CULL_FRONT; |
} |
if (state->cull_face & PIPE_FACE_BACK) { |
cull_mode |= R300_CULL_BACK; |
} |
if (state->line_stipple_enable) { |
line_stipple_config = |
R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE | |
(fui((float)state->line_stipple_factor) & |
R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK); |
/* XXX this might need to be scaled up */ |
line_stipple_value = state->line_stipple_pattern; |
} else { |
line_stipple_config = 0; |
line_stipple_value = 0; |
} |
if (state->flatshade) { |
rs->color_control = R300_SHADE_MODEL_FLAT; |
} else { |
rs->color_control = R300_SHADE_MODEL_SMOOTH; |
} |
clip_rule = state->scissor ? 0xAAAA : 0xFFFF; |
/* Point sprites coord mode */ |
if (rs->rs.sprite_coord_enable) { |
switch (state->sprite_coord_mode) { |
case PIPE_SPRITE_COORD_UPPER_LEFT: |
point_texcoord_top = 0.0f; |
point_texcoord_bottom = 1.0f; |
break; |
case PIPE_SPRITE_COORD_LOWER_LEFT: |
point_texcoord_top = 1.0f; |
point_texcoord_bottom = 0.0f; |
break; |
} |
} |
if (r300_screen(pipe->screen)->caps.has_tcl) { |
vap_clip_cntl = (state->clip_plane_enable & 63) | |
R300_PS_UCP_MODE_CLIP_AS_TRIFAN; |
} else { |
vap_clip_cntl = R300_CLIP_DISABLE; |
} |
/* Vertex color clamping. FP20 means no clamping. */ |
round_mode = |
R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST | |
(!vclamp ? (R300_GA_ROUND_MODE_RGB_CLAMP_FP20 | |
R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20) : 0); |
/* Build the main command buffer. */ |
BEGIN_CB(rs->cb_main, RS_STATE_MAIN_SIZE); |
OUT_CB_REG(R300_VAP_CNTL_STATUS, vap_control_status); |
OUT_CB_REG(R300_VAP_CLIP_CNTL, vap_clip_cntl); |
OUT_CB_REG(R300_GA_POINT_SIZE, point_size); |
OUT_CB_REG_SEQ(R300_GA_POINT_MINMAX, 2); |
OUT_CB(point_minmax); |
OUT_CB(line_control); |
OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2); |
OUT_CB(polygon_offset_enable); |
rs->cull_mode_index = 11; |
OUT_CB(cull_mode); |
OUT_CB_REG(R300_GA_LINE_STIPPLE_CONFIG, line_stipple_config); |
OUT_CB_REG(R300_GA_LINE_STIPPLE_VALUE, line_stipple_value); |
OUT_CB_REG(R300_GA_POLY_MODE, polygon_mode); |
OUT_CB_REG(R300_GA_ROUND_MODE, round_mode); |
OUT_CB_REG(R300_SC_CLIP_RULE, clip_rule); |
OUT_CB_REG_SEQ(R300_GA_POINT_S0, 4); |
OUT_CB_32F(point_texcoord_left); |
OUT_CB_32F(point_texcoord_bottom); |
OUT_CB_32F(point_texcoord_right); |
OUT_CB_32F(point_texcoord_top); |
END_CB; |
/* Build the two command buffers for polygon offset setup. */ |
if (polygon_offset_enable) { |
float scale = state->offset_scale * 12; |
float offset = state->offset_units * 4; |
BEGIN_CB(rs->cb_poly_offset_zb16, 5); |
OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); |
OUT_CB_32F(scale); |
OUT_CB_32F(offset); |
OUT_CB_32F(scale); |
OUT_CB_32F(offset); |
END_CB; |
offset = state->offset_units * 2; |
BEGIN_CB(rs->cb_poly_offset_zb24, 5); |
OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); |
OUT_CB_32F(scale); |
OUT_CB_32F(offset); |
OUT_CB_32F(scale); |
OUT_CB_32F(offset); |
END_CB; |
} |
return (void*)rs; |
} |
/* Bind rasterizer state. */ |
static void r300_bind_rs_state(struct pipe_context* pipe, void* state) |
{ |
struct r300_context* r300 = r300_context(pipe); |
struct r300_rs_state* rs = (struct r300_rs_state*)state; |
int last_sprite_coord_enable = r300->sprite_coord_enable; |
boolean last_two_sided_color = r300->two_sided_color; |
boolean last_msaa_enable = r300->msaa_enable; |
boolean last_flatshade = r300->flatshade; |
if (r300->draw && rs) { |
draw_set_rasterizer_state(r300->draw, &rs->rs_draw, state); |
} |
if (rs) { |
r300->polygon_offset_enabled = rs->polygon_offset_enable; |
r300->sprite_coord_enable = rs->rs.sprite_coord_enable; |
r300->two_sided_color = rs->rs.light_twoside; |
r300->msaa_enable = rs->rs.multisample; |
r300->flatshade = rs->rs.flatshade; |
} else { |
r300->polygon_offset_enabled = FALSE; |
r300->sprite_coord_enable = 0; |
r300->two_sided_color = FALSE; |
r300->msaa_enable = FALSE; |
r300->flatshade = FALSE; |
} |
UPDATE_STATE(state, r300->rs_state); |
r300->rs_state.size = RS_STATE_MAIN_SIZE + (r300->polygon_offset_enabled ? 5 : 0); |
if (last_sprite_coord_enable != r300->sprite_coord_enable || |
last_two_sided_color != r300->two_sided_color || |
last_flatshade != r300->flatshade) { |
r300_mark_atom_dirty(r300, &r300->rs_block_state); |
} |
if (last_msaa_enable != r300->msaa_enable) { |
if (r300->alpha_to_coverage) { |
r300_mark_atom_dirty(r300, &r300->dsa_state); |
} |
if (r300->alpha_to_one && |
r300->fs_status == FRAGMENT_SHADER_VALID) { |
r300->fs_status = FRAGMENT_SHADER_MAYBE_DIRTY; |
} |
} |
} |
/* Free rasterizer state. */ |
static void r300_delete_rs_state(struct pipe_context* pipe, void* state) |
{ |
FREE(state); |
} |
static void* |
r300_create_sampler_state(struct pipe_context* pipe, |
const struct pipe_sampler_state* state) |
{ |
struct r300_context* r300 = r300_context(pipe); |
struct r300_sampler_state* sampler = CALLOC_STRUCT(r300_sampler_state); |
boolean is_r500 = r300->screen->caps.is_r500; |
int lod_bias; |
sampler->state = *state; |
/* r300 doesn't handle CLAMP and MIRROR_CLAMP correctly when either MAG |
* or MIN filter is NEAREST. Since texwrap produces same results |
* for CLAMP and CLAMP_TO_EDGE, we use them instead. */ |
if (sampler->state.min_img_filter == PIPE_TEX_FILTER_NEAREST || |
sampler->state.mag_img_filter == PIPE_TEX_FILTER_NEAREST) { |
/* Wrap S. */ |
if (sampler->state.wrap_s == PIPE_TEX_WRAP_CLAMP) |
sampler->state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; |
else if (sampler->state.wrap_s == PIPE_TEX_WRAP_MIRROR_CLAMP) |
sampler->state.wrap_s = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE; |
/* Wrap T. */ |
if (sampler->state.wrap_t == PIPE_TEX_WRAP_CLAMP) |
sampler->state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; |
else if (sampler->state.wrap_t == PIPE_TEX_WRAP_MIRROR_CLAMP) |
sampler->state.wrap_t = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE; |
/* Wrap R. */ |
if (sampler->state.wrap_r == PIPE_TEX_WRAP_CLAMP) |
sampler->state.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; |
else if (sampler->state.wrap_r == PIPE_TEX_WRAP_MIRROR_CLAMP) |
sampler->state.wrap_r = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE; |
} |
sampler->filter0 |= |
(r300_translate_wrap(sampler->state.wrap_s) << R300_TX_WRAP_S_SHIFT) | |
(r300_translate_wrap(sampler->state.wrap_t) << R300_TX_WRAP_T_SHIFT) | |
(r300_translate_wrap(sampler->state.wrap_r) << R300_TX_WRAP_R_SHIFT); |
sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter, |
state->mag_img_filter, |
state->min_mip_filter, |
state->max_anisotropy > 1); |
sampler->filter0 |= r300_anisotropy(state->max_anisotropy); |
/* Unfortunately, r300-r500 don't support floating-point mipmap lods. */ |
/* We must pass these to the merge function to clamp them properly. */ |
sampler->min_lod = (unsigned)MAX2(state->min_lod, 0); |
sampler->max_lod = (unsigned)MAX2(ceilf(state->max_lod), 0); |
lod_bias = CLAMP((int)(state->lod_bias * 32 + 1), -(1 << 9), (1 << 9) - 1); |
sampler->filter1 |= (lod_bias << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK; |
/* This is very high quality anisotropic filtering for R5xx. |
* It's good for benchmarking the performance of texturing but |
* in practice we don't want to slow down the driver because it's |
* a pretty good performance killer. Feel free to play with it. */ |
if (DBG_ON(r300, DBG_ANISOHQ) && is_r500) { |
sampler->filter1 |= r500_anisotropy(state->max_anisotropy); |
} |
/* R500-specific fixups and optimizations */ |
if (r300->screen->caps.is_r500) { |
sampler->filter1 |= R500_BORDER_FIX; |
} |
return (void*)sampler; |
} |
static void r300_bind_sampler_states(struct pipe_context* pipe, |
unsigned count, |
void** states) |
{ |
struct r300_context* r300 = r300_context(pipe); |
struct r300_textures_state* state = |
(struct r300_textures_state*)r300->textures_state.state; |
unsigned tex_units = r300->screen->caps.num_tex_units; |
if (count > tex_units) { |
return; |
} |
memcpy(state->sampler_states, states, sizeof(void*) * count); |
state->sampler_state_count = count; |
r300_mark_atom_dirty(r300, &r300->textures_state); |
} |
static void r300_lacks_vertex_textures(struct pipe_context* pipe, |
unsigned count, |
void** states) |
{ |
} |
static void r300_delete_sampler_state(struct pipe_context* pipe, void* state) |
{ |
FREE(state); |
} |
static uint32_t r300_assign_texture_cache_region(unsigned index, unsigned num) |
{ |
/* This looks like a hack, but I believe it's suppose to work like |
* that. To illustrate how this works, let's assume you have 5 textures. |
* From docs, 5 and the successive numbers are: |
* |
* FOURTH_1 = 5 |
* FOURTH_2 = 6 |
* FOURTH_3 = 7 |
* EIGHTH_0 = 8 |
* EIGHTH_1 = 9 |
* |
* First 3 textures will get 3/4 of size of the cache, divived evenly |
* between them. The last 1/4 of the cache must be divided between |
* the last 2 textures, each will therefore get 1/8 of the cache. |
* Why not just to use "5 + texture_index" ? |
* |
* This simple trick works for all "num" <= 16. |
*/ |
if (num <= 1) |
return R300_TX_CACHE(R300_TX_CACHE_WHOLE); |
else |
return R300_TX_CACHE(num + index); |
} |
static void r300_set_fragment_sampler_views(struct pipe_context* pipe, |
unsigned count, |
struct pipe_sampler_view** views) |
{ |
struct r300_context* r300 = r300_context(pipe); |
struct r300_textures_state* state = |
(struct r300_textures_state*)r300->textures_state.state; |
struct r300_resource *texture; |
unsigned i, real_num_views = 0, view_index = 0; |
unsigned tex_units = r300->screen->caps.num_tex_units; |
boolean dirty_tex = FALSE; |
if (count > tex_units) { |
return; |
} |
/* Calculate the real number of views. */ |
for (i = 0; i < count; i++) { |
if (views[i]) |
real_num_views++; |
} |
for (i = 0; i < count; i++) { |
pipe_sampler_view_reference( |
(struct pipe_sampler_view**)&state->sampler_views[i], |
views[i]); |
if (!views[i]) { |
continue; |
} |
/* A new sampler view (= texture)... */ |
dirty_tex = TRUE; |
/* Set the texrect factor in the fragment shader. |
* Needed for RECT and NPOT fallback. */ |
texture = r300_resource(views[i]->texture); |
if (texture->tex.is_npot) { |
r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state); |
} |
state->sampler_views[i]->texcache_region = |
r300_assign_texture_cache_region(view_index, real_num_views); |
view_index++; |
} |
for (i = count; i < tex_units; i++) { |
if (state->sampler_views[i]) { |
pipe_sampler_view_reference( |
(struct pipe_sampler_view**)&state->sampler_views[i], |
NULL); |
} |
} |
state->sampler_view_count = count; |
r300_mark_atom_dirty(r300, &r300->textures_state); |
if (dirty_tex) { |
r300_mark_atom_dirty(r300, &r300->texture_cache_inval); |
} |
} |
struct pipe_sampler_view * |
r300_create_sampler_view_custom(struct pipe_context *pipe, |
struct pipe_resource *texture, |
const struct pipe_sampler_view *templ, |
unsigned width0_override, |
unsigned height0_override) |
{ |
struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view); |
struct r300_resource *tex = r300_resource(texture); |
boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500; |
boolean dxtc_swizzle = r300_screen(pipe->screen)->caps.dxtc_swizzle; |
if (view) { |
unsigned hwformat; |
view->base = *templ; |
view->base.reference.count = 1; |
view->base.context = pipe; |
view->base.texture = NULL; |
pipe_resource_reference(&view->base.texture, texture); |
view->width0_override = width0_override; |
view->height0_override = height0_override; |
view->swizzle[0] = templ->swizzle_r; |
view->swizzle[1] = templ->swizzle_g; |
view->swizzle[2] = templ->swizzle_b; |
view->swizzle[3] = templ->swizzle_a; |
hwformat = r300_translate_texformat(templ->format, |
view->swizzle, |
is_r500, |
dxtc_swizzle); |
if (hwformat == ~0) { |
fprintf(stderr, "r300: Ooops. Got unsupported format %s in %s.\n", |
util_format_short_name(templ->format), __func__); |
} |
assert(hwformat != ~0); |
r300_texture_setup_format_state(r300_screen(pipe->screen), tex, |
templ->format, 0, |
width0_override, height0_override, |
&view->format); |
view->format.format1 |= hwformat; |
if (is_r500) { |
view->format.format2 |= r500_tx_format_msb_bit(templ->format); |
} |
} |
return (struct pipe_sampler_view*)view; |
} |
static struct pipe_sampler_view * |
r300_create_sampler_view(struct pipe_context *pipe, |
struct pipe_resource *texture, |
const struct pipe_sampler_view *templ) |
{ |
return r300_create_sampler_view_custom(pipe, texture, templ, |
r300_resource(texture)->tex.width0, |
r300_resource(texture)->tex.height0); |
} |
static void |
r300_sampler_view_destroy(struct pipe_context *pipe, |
struct pipe_sampler_view *view) |
{ |
pipe_resource_reference(&view->texture, NULL); |
FREE(view); |
} |
static void r300_set_sample_mask(struct pipe_context *pipe, |
unsigned mask) |
{ |
struct r300_context* r300 = r300_context(pipe); |
*((unsigned*)r300->sample_mask.state) = mask; |
r300_mark_atom_dirty(r300, &r300->sample_mask); |
} |
static void r300_set_scissor_states(struct pipe_context* pipe, |
unsigned start_slot, |
unsigned num_scissors, |
const struct pipe_scissor_state* state) |
{ |
struct r300_context* r300 = r300_context(pipe); |
memcpy(r300->scissor_state.state, state, |
sizeof(struct pipe_scissor_state)); |
r300_mark_atom_dirty(r300, &r300->scissor_state); |
} |
static void r300_set_viewport_states(struct pipe_context* pipe, |
unsigned start_slot, |
unsigned num_viewports, |
const struct pipe_viewport_state* state) |
{ |
struct r300_context* r300 = r300_context(pipe); |
struct r300_viewport_state* viewport = |
(struct r300_viewport_state*)r300->viewport_state.state; |
r300->viewport = *state; |
if (r300->draw) { |
draw_set_viewport_states(r300->draw, start_slot, num_viewports, state); |
viewport->vte_control = R300_VTX_XY_FMT | R300_VTX_Z_FMT; |
return; |
} |
/* Do the transform in HW. */ |
viewport->vte_control = R300_VTX_W0_FMT; |
if (state->scale[0] != 1.0f) { |
viewport->xscale = state->scale[0]; |
viewport->vte_control |= R300_VPORT_X_SCALE_ENA; |
} |
if (state->scale[1] != 1.0f) { |
viewport->yscale = state->scale[1]; |
viewport->vte_control |= R300_VPORT_Y_SCALE_ENA; |
} |
if (state->scale[2] != 1.0f) { |
viewport->zscale = state->scale[2]; |
viewport->vte_control |= R300_VPORT_Z_SCALE_ENA; |
} |
if (state->translate[0] != 0.0f) { |
viewport->xoffset = state->translate[0]; |
viewport->vte_control |= R300_VPORT_X_OFFSET_ENA; |
} |
if (state->translate[1] != 0.0f) { |
viewport->yoffset = state->translate[1]; |
viewport->vte_control |= R300_VPORT_Y_OFFSET_ENA; |
} |
if (state->translate[2] != 0.0f) { |
viewport->zoffset = state->translate[2]; |
viewport->vte_control |= R300_VPORT_Z_OFFSET_ENA; |
} |
r300_mark_atom_dirty(r300, &r300->viewport_state); |
if (r300->fs.state && r300_fs(r300)->shader && |
r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED) { |
r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state); |
} |
} |
static void r300_set_vertex_buffers_hwtcl(struct pipe_context* pipe, |
unsigned start_slot, unsigned count, |
const struct pipe_vertex_buffer* buffers) |
{ |
struct r300_context* r300 = r300_context(pipe); |
util_set_vertex_buffers_count(r300->vertex_buffer, |
&r300->nr_vertex_buffers, |
buffers, start_slot, count); |
/* There must be at least one vertex buffer set, otherwise it locks up. */ |
if (!r300->nr_vertex_buffers) { |
util_set_vertex_buffers_count(r300->vertex_buffer, |
&r300->nr_vertex_buffers, |
&r300->dummy_vb, 0, 1); |
} |
r300->vertex_arrays_dirty = TRUE; |
} |
static void r300_set_vertex_buffers_swtcl(struct pipe_context* pipe, |
unsigned start_slot, unsigned count, |
const struct pipe_vertex_buffer* buffers) |
{ |
struct r300_context* r300 = r300_context(pipe); |
unsigned i; |
util_set_vertex_buffers_count(r300->vertex_buffer, |
&r300->nr_vertex_buffers, |
buffers, start_slot, count); |
draw_set_vertex_buffers(r300->draw, start_slot, count, buffers); |
if (!buffers) |
return; |
for (i = 0; i < count; i++) { |
if (buffers[i].user_buffer) { |
draw_set_mapped_vertex_buffer(r300->draw, start_slot + i, |
buffers[i].user_buffer, ~0); |
} else if (buffers[i].buffer) { |
draw_set_mapped_vertex_buffer(r300->draw, start_slot + i, |
r300_resource(buffers[i].buffer)->malloced_buffer, ~0); |
} |
} |
} |
static void r300_set_index_buffer_hwtcl(struct pipe_context* pipe, |
const struct pipe_index_buffer *ib) |
{ |
struct r300_context* r300 = r300_context(pipe); |
if (ib) { |
pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer); |
memcpy(&r300->index_buffer, ib, sizeof(*ib)); |
} else { |
pipe_resource_reference(&r300->index_buffer.buffer, NULL); |
} |
} |
static void r300_set_index_buffer_swtcl(struct pipe_context* pipe, |
const struct pipe_index_buffer *ib) |
{ |
struct r300_context* r300 = r300_context(pipe); |
if (ib) { |
const void *buf = NULL; |
if (ib->user_buffer) { |
buf = ib->user_buffer; |
} else if (ib->buffer) { |
buf = r300_resource(ib->buffer)->malloced_buffer; |
} |
draw_set_indexes(r300->draw, |
(const ubyte *) buf + ib->offset, |
ib->index_size, ~0); |
} |
} |
/* Initialize the PSC tables. */ |
static void r300_vertex_psc(struct r300_vertex_element_state *velems) |
{ |
struct r300_vertex_stream_state *vstream = &velems->vertex_stream; |
uint16_t type, swizzle; |
enum pipe_format format; |
unsigned i; |
/* Vertex shaders have no semantics on their inputs, |
* so PSC should just route stuff based on the vertex elements, |
* and not on attrib information. */ |
for (i = 0; i < velems->count; i++) { |
format = velems->velem[i].src_format; |
type = r300_translate_vertex_data_type(format); |
if (type == R300_INVALID_FORMAT) { |
fprintf(stderr, "r300: Bad vertex format %s.\n", |
util_format_short_name(format)); |
assert(0); |
abort(); |
} |
type |= i << R300_DST_VEC_LOC_SHIFT; |
swizzle = r300_translate_vertex_data_swizzle(format); |
if (i & 1) { |
vstream->vap_prog_stream_cntl[i >> 1] |= type << 16; |
vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; |
} else { |
vstream->vap_prog_stream_cntl[i >> 1] |= type; |
vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle; |
} |
} |
/* Set the last vector in the PSC. */ |
if (i) { |
i -= 1; |
} |
vstream->vap_prog_stream_cntl[i >> 1] |= |
(R300_LAST_VEC << (i & 1 ? 16 : 0)); |
vstream->count = (i >> 1) + 1; |
} |
static void* r300_create_vertex_elements_state(struct pipe_context* pipe, |
unsigned count, |
const struct pipe_vertex_element* attribs) |
{ |
struct r300_vertex_element_state *velems; |
unsigned i; |
struct pipe_vertex_element dummy_attrib = {0}; |
/* R300 Programmable Stream Control (PSC) doesn't support 0 vertex elements. */ |
if (!count) { |
dummy_attrib.src_format = PIPE_FORMAT_R8G8B8A8_UNORM; |
attribs = &dummy_attrib; |
count = 1; |
} else if (count > 16) { |
fprintf(stderr, "r300: More than 16 vertex elements are not supported," |
" requested %i, using 16.\n", count); |
count = 16; |
} |
velems = CALLOC_STRUCT(r300_vertex_element_state); |
if (!velems) |
return NULL; |
velems->count = count; |
memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count); |
if (r300_screen(pipe->screen)->caps.has_tcl) { |
/* Setup PSC. |
* The unused components will be replaced by (..., 0, 1). */ |
r300_vertex_psc(velems); |
for (i = 0; i < count; i++) { |
velems->format_size[i] = |
align(util_format_get_blocksize(velems->velem[i].src_format), 4); |
velems->vertex_size_dwords += velems->format_size[i] / 4; |
} |
} |
return velems; |
} |
static void r300_bind_vertex_elements_state(struct pipe_context *pipe, |
void *state) |
{ |
struct r300_context *r300 = r300_context(pipe); |
struct r300_vertex_element_state *velems = state; |
if (velems == NULL) { |
return; |
} |
r300->velems = velems; |
if (r300->draw) { |
draw_set_vertex_elements(r300->draw, velems->count, velems->velem); |
return; |
} |
UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state); |
r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2; |
r300->vertex_arrays_dirty = TRUE; |
} |
static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state) |
{ |
FREE(state); |
} |
static void* r300_create_vs_state(struct pipe_context* pipe, |
const struct pipe_shader_state* shader) |
{ |
struct r300_context* r300 = r300_context(pipe); |
struct r300_vertex_shader* vs = CALLOC_STRUCT(r300_vertex_shader); |
/* Copy state directly into shader. */ |
vs->state = *shader; |
vs->state.tokens = tgsi_dup_tokens(shader->tokens); |
if (r300->screen->caps.has_tcl) { |
r300_init_vs_outputs(r300, vs); |
r300_translate_vertex_shader(r300, vs); |
} else { |
r300_draw_init_vertex_shader(r300, vs); |
} |
return vs; |
} |
static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) |
{ |
struct r300_context* r300 = r300_context(pipe); |
struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; |
if (vs == NULL) { |
r300->vs_state.state = NULL; |
return; |
} |
if (vs == r300->vs_state.state) { |
return; |
} |
r300->vs_state.state = vs; |
/* The majority of the RS block bits is dependent on the vertex shader. */ |
r300_mark_atom_dirty(r300, &r300->rs_block_state); /* Will be updated before the emission. */ |
if (r300->screen->caps.has_tcl) { |
unsigned fc_op_dwords = r300->screen->caps.is_r500 ? 3 : 2; |
r300_mark_atom_dirty(r300, &r300->vs_state); |
r300->vs_state.size = vs->code.length + 9 + |
(R300_VS_MAX_FC_OPS * fc_op_dwords + 4); |
r300_mark_atom_dirty(r300, &r300->vs_constants); |
r300->vs_constants.size = |
2 + |
(vs->externals_count ? vs->externals_count * 4 + 3 : 0) + |
(vs->immediates_count ? vs->immediates_count * 4 + 3 : 0); |
((struct r300_constant_buffer*)r300->vs_constants.state)->remap_table = |
vs->code.constants_remap_table; |
r300_mark_atom_dirty(r300, &r300->pvs_flush); |
} else { |
draw_bind_vertex_shader(r300->draw, |
(struct draw_vertex_shader*)vs->draw_vs); |
} |
} |
static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) |
{ |
struct r300_context* r300 = r300_context(pipe); |
struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; |
if (r300->screen->caps.has_tcl) { |
rc_constants_destroy(&vs->code.constants); |
FREE(vs->code.constants_remap_table); |
} else { |
draw_delete_vertex_shader(r300->draw, |
(struct draw_vertex_shader*)vs->draw_vs); |
} |
FREE((void*)vs->state.tokens); |
FREE(shader); |
} |
static void r300_set_constant_buffer(struct pipe_context *pipe, |
uint shader, uint index, |
struct pipe_constant_buffer *cb) |
{ |
struct r300_context* r300 = r300_context(pipe); |
struct r300_constant_buffer *cbuf; |
uint32_t *mapped; |
if (!cb || (!cb->buffer && !cb->user_buffer)) |
return; |
switch (shader) { |
case PIPE_SHADER_VERTEX: |
cbuf = (struct r300_constant_buffer*)r300->vs_constants.state; |
break; |
case PIPE_SHADER_FRAGMENT: |
cbuf = (struct r300_constant_buffer*)r300->fs_constants.state; |
break; |
default: |
return; |
} |
if (cb->user_buffer) |
mapped = (uint32_t*)cb->user_buffer; |
else { |
struct r300_resource *rbuf = r300_resource(cb->buffer); |
if (rbuf && rbuf->malloced_buffer) |
mapped = (uint32_t*)rbuf->malloced_buffer; |
else |
return; |
} |
if (shader == PIPE_SHADER_FRAGMENT || |
(shader == PIPE_SHADER_VERTEX && r300->screen->caps.has_tcl)) { |
cbuf->ptr = mapped; |
} |
if (shader == PIPE_SHADER_VERTEX) { |
if (r300->screen->caps.has_tcl) { |
struct r300_vertex_shader *vs = |
(struct r300_vertex_shader*)r300->vs_state.state; |
if (!vs) { |
cbuf->buffer_base = 0; |
return; |
} |
cbuf->buffer_base = r300->vs_const_base; |
r300->vs_const_base += vs->code.constants.Count; |
if (r300->vs_const_base > R500_MAX_PVS_CONST_VECS) { |
r300->vs_const_base = vs->code.constants.Count; |
cbuf->buffer_base = 0; |
r300_mark_atom_dirty(r300, &r300->pvs_flush); |
} |
r300_mark_atom_dirty(r300, &r300->vs_constants); |
} else if (r300->draw) { |
draw_set_mapped_constant_buffer(r300->draw, PIPE_SHADER_VERTEX, |
0, mapped, cb->buffer_size); |
} |
} else if (shader == PIPE_SHADER_FRAGMENT) { |
r300_mark_atom_dirty(r300, &r300->fs_constants); |
} |
} |
static void r300_texture_barrier(struct pipe_context *pipe) |
{ |
struct r300_context *r300 = r300_context(pipe); |
r300_mark_atom_dirty(r300, &r300->gpu_flush); |
r300_mark_atom_dirty(r300, &r300->texture_cache_inval); |
} |
void r300_init_state_functions(struct r300_context* r300) |
{ |
r300->context.create_blend_state = r300_create_blend_state; |
r300->context.bind_blend_state = r300_bind_blend_state; |
r300->context.delete_blend_state = r300_delete_blend_state; |
r300->context.set_blend_color = r300_set_blend_color; |
r300->context.set_clip_state = r300_set_clip_state; |
r300->context.set_sample_mask = r300_set_sample_mask; |
r300->context.set_constant_buffer = r300_set_constant_buffer; |
r300->context.create_depth_stencil_alpha_state = r300_create_dsa_state; |
r300->context.bind_depth_stencil_alpha_state = r300_bind_dsa_state; |
r300->context.delete_depth_stencil_alpha_state = r300_delete_dsa_state; |
r300->context.set_stencil_ref = r300_set_stencil_ref; |
r300->context.set_framebuffer_state = r300_set_framebuffer_state; |
r300->context.create_fs_state = r300_create_fs_state; |
r300->context.bind_fs_state = r300_bind_fs_state; |
r300->context.delete_fs_state = r300_delete_fs_state; |
r300->context.set_polygon_stipple = r300_set_polygon_stipple; |
r300->context.create_rasterizer_state = r300_create_rs_state; |
r300->context.bind_rasterizer_state = r300_bind_rs_state; |
r300->context.delete_rasterizer_state = r300_delete_rs_state; |
r300->context.create_sampler_state = r300_create_sampler_state; |
r300->context.bind_fragment_sampler_states = r300_bind_sampler_states; |
r300->context.bind_vertex_sampler_states = r300_lacks_vertex_textures; |
r300->context.delete_sampler_state = r300_delete_sampler_state; |
r300->context.set_fragment_sampler_views = r300_set_fragment_sampler_views; |
r300->context.create_sampler_view = r300_create_sampler_view; |
r300->context.sampler_view_destroy = r300_sampler_view_destroy; |
r300->context.set_scissor_states = r300_set_scissor_states; |
r300->context.set_viewport_states = r300_set_viewport_states; |
if (r300->screen->caps.has_tcl) { |
r300->context.set_vertex_buffers = r300_set_vertex_buffers_hwtcl; |
r300->context.set_index_buffer = r300_set_index_buffer_hwtcl; |
} else { |
r300->context.set_vertex_buffers = r300_set_vertex_buffers_swtcl; |
r300->context.set_index_buffer = r300_set_index_buffer_swtcl; |
} |
r300->context.create_vertex_elements_state = r300_create_vertex_elements_state; |
r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state; |
r300->context.delete_vertex_elements_state = r300_delete_vertex_elements_state; |
r300->context.create_vs_state = r300_create_vs_state; |
r300->context.bind_vs_state = r300_bind_vs_state; |
r300->context.delete_vs_state = r300_delete_vs_state; |
r300->context.texture_barrier = r300_texture_barrier; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_state_derived.c |
---|
0,0 → 1,1089 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Copyright 2009 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "draw/draw_context.h" |
#include "util/u_math.h" |
#include "util/u_memory.h" |
#include "util/u_pack_color.h" |
#include "r300_context.h" |
#include "r300_fs.h" |
#include "r300_screen.h" |
#include "r300_shader_semantics.h" |
#include "r300_state_inlines.h" |
#include "r300_texture.h" |
#include "r300_vs.h" |
/* r300_state_derived: Various bits of state which are dependent upon |
* currently bound CSO data. */ |
enum r300_rs_swizzle { |
SWIZ_XYZW = 0, |
SWIZ_X001, |
SWIZ_XY01, |
SWIZ_0001, |
}; |
enum r300_rs_col_write_type { |
WRITE_COLOR = 0, |
WRITE_FACE |
}; |
static void r300_draw_emit_attrib(struct r300_context* r300, |
enum attrib_emit emit, |
enum interp_mode interp, |
int index) |
{ |
struct r300_vertex_shader* vs = r300->vs_state.state; |
struct tgsi_shader_info* info = &vs->info; |
int output; |
output = draw_find_shader_output(r300->draw, |
info->output_semantic_name[index], |
info->output_semantic_index[index]); |
draw_emit_vertex_attr(&r300->vertex_info, emit, interp, output); |
} |
static void r300_draw_emit_all_attribs(struct r300_context* r300) |
{ |
struct r300_vertex_shader* vs = r300->vs_state.state; |
struct r300_shader_semantics* vs_outputs = &vs->outputs; |
int i, gen_count; |
/* Position. */ |
if (vs_outputs->pos != ATTR_UNUSED) { |
r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, |
vs_outputs->pos); |
} else { |
assert(0); |
} |
/* Point size. */ |
if (vs_outputs->psize != ATTR_UNUSED) { |
r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS, |
vs_outputs->psize); |
} |
/* Colors. */ |
for (i = 0; i < ATTR_COLOR_COUNT; i++) { |
if (vs_outputs->color[i] != ATTR_UNUSED) { |
r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR, |
vs_outputs->color[i]); |
} |
} |
/* Back-face colors. */ |
for (i = 0; i < ATTR_COLOR_COUNT; i++) { |
if (vs_outputs->bcolor[i] != ATTR_UNUSED) { |
r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR, |
vs_outputs->bcolor[i]); |
} |
} |
/* Texture coordinates. */ |
/* Only 8 generic vertex attributes can be used. If there are more, |
* they won't be rasterized. */ |
gen_count = 0; |
for (i = 0; i < ATTR_GENERIC_COUNT && gen_count < 8; i++) { |
if (vs_outputs->generic[i] != ATTR_UNUSED && |
!(r300->sprite_coord_enable & (1 << i))) { |
r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, |
vs_outputs->generic[i]); |
gen_count++; |
} |
} |
/* Fog coordinates. */ |
if (gen_count < 8 && vs_outputs->fog != ATTR_UNUSED) { |
r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, |
vs_outputs->fog); |
gen_count++; |
} |
/* WPOS. */ |
if (r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED && gen_count < 8) { |
DBG(r300, DBG_SWTCL, "draw_emit_attrib: WPOS, index: %i\n", |
vs_outputs->wpos); |
r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, |
vs_outputs->wpos); |
} |
} |
/* Update the PSC tables for SW TCL, using Draw. */ |
static void r300_swtcl_vertex_psc(struct r300_context *r300) |
{ |
struct r300_vertex_stream_state *vstream = r300->vertex_stream_state.state; |
struct vertex_info *vinfo = &r300->vertex_info; |
uint16_t type, swizzle; |
enum pipe_format format; |
unsigned i, attrib_count; |
int* vs_output_tab = r300->stream_loc_notcl; |
memset(vstream, 0, sizeof(struct r300_vertex_stream_state)); |
/* For each Draw attribute, route it to the fragment shader according |
* to the vs_output_tab. */ |
attrib_count = vinfo->num_attribs; |
DBG(r300, DBG_SWTCL, "r300: attrib count: %d\n", attrib_count); |
for (i = 0; i < attrib_count; i++) { |
if (vs_output_tab[i] == -1) { |
assert(0); |
abort(); |
} |
format = draw_translate_vinfo_format(vinfo->attrib[i].emit); |
DBG(r300, DBG_SWTCL, |
"r300: swtcl_vertex_psc [%i] <- %s\n", |
vs_output_tab[i], util_format_short_name(format)); |
/* Obtain the type of data in this attribute. */ |
type = r300_translate_vertex_data_type(format); |
if (type == R300_INVALID_FORMAT) { |
fprintf(stderr, "r300: Bad vertex format %s.\n", |
util_format_short_name(format)); |
assert(0); |
abort(); |
} |
type |= vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT; |
/* Obtain the swizzle for this attribute. Note that the default |
* swizzle in the hardware is not XYZW! */ |
swizzle = r300_translate_vertex_data_swizzle(format); |
/* Add the attribute to the PSC table. */ |
if (i & 1) { |
vstream->vap_prog_stream_cntl[i >> 1] |= type << 16; |
vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; |
} else { |
vstream->vap_prog_stream_cntl[i >> 1] |= type; |
vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle; |
} |
} |
/* Set the last vector in the PSC. */ |
if (i) { |
i -= 1; |
} |
vstream->vap_prog_stream_cntl[i >> 1] |= |
(R300_LAST_VEC << (i & 1 ? 16 : 0)); |
vstream->count = (i >> 1) + 1; |
r300_mark_atom_dirty(r300, &r300->vertex_stream_state); |
r300->vertex_stream_state.size = (1 + vstream->count) * 2; |
} |
static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr, |
enum r300_rs_swizzle swiz) |
{ |
rs->ip[id] |= R300_RS_COL_PTR(ptr); |
if (swiz == SWIZ_0001) { |
rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001); |
} else { |
rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); |
} |
rs->inst[id] |= R300_RS_INST_COL_ID(id); |
} |
static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset, |
enum r300_rs_col_write_type type) |
{ |
assert(type == WRITE_COLOR); |
rs->inst[id] |= R300_RS_INST_COL_CN_WRITE | |
R300_RS_INST_COL_ADDR(fp_offset); |
} |
static void r300_rs_tex(struct r300_rs_block* rs, int id, int ptr, |
enum r300_rs_swizzle swiz) |
{ |
if (swiz == SWIZ_X001) { |
rs->ip[id] |= R300_RS_TEX_PTR(ptr) | |
R300_RS_SEL_S(R300_RS_SEL_C0) | |
R300_RS_SEL_T(R300_RS_SEL_K0) | |
R300_RS_SEL_R(R300_RS_SEL_K0) | |
R300_RS_SEL_Q(R300_RS_SEL_K1); |
} else if (swiz == SWIZ_XY01) { |
rs->ip[id] |= R300_RS_TEX_PTR(ptr) | |
R300_RS_SEL_S(R300_RS_SEL_C0) | |
R300_RS_SEL_T(R300_RS_SEL_C1) | |
R300_RS_SEL_R(R300_RS_SEL_K0) | |
R300_RS_SEL_Q(R300_RS_SEL_K1); |
} else { |
rs->ip[id] |= R300_RS_TEX_PTR(ptr) | |
R300_RS_SEL_S(R300_RS_SEL_C0) | |
R300_RS_SEL_T(R300_RS_SEL_C1) | |
R300_RS_SEL_R(R300_RS_SEL_C2) | |
R300_RS_SEL_Q(R300_RS_SEL_C3); |
} |
rs->inst[id] |= R300_RS_INST_TEX_ID(id); |
} |
static void r300_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset) |
{ |
rs->inst[id] |= R300_RS_INST_TEX_CN_WRITE | |
R300_RS_INST_TEX_ADDR(fp_offset); |
} |
static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr, |
enum r300_rs_swizzle swiz) |
{ |
rs->ip[id] |= R500_RS_COL_PTR(ptr); |
if (swiz == SWIZ_0001) { |
rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001); |
} else { |
rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); |
} |
rs->inst[id] |= R500_RS_INST_COL_ID(id); |
} |
static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset, |
enum r300_rs_col_write_type type) |
{ |
if (type == WRITE_FACE) |
rs->inst[id] |= R500_RS_INST_COL_CN_WRITE_BACKFACE | |
R500_RS_INST_COL_ADDR(fp_offset); |
else |
rs->inst[id] |= R500_RS_INST_COL_CN_WRITE | |
R500_RS_INST_COL_ADDR(fp_offset); |
} |
static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr, |
enum r300_rs_swizzle swiz) |
{ |
if (swiz == SWIZ_X001) { |
rs->ip[id] |= R500_RS_SEL_S(ptr) | |
R500_RS_SEL_T(R500_RS_IP_PTR_K0) | |
R500_RS_SEL_R(R500_RS_IP_PTR_K0) | |
R500_RS_SEL_Q(R500_RS_IP_PTR_K1); |
} else if (swiz == SWIZ_XY01) { |
rs->ip[id] |= R500_RS_SEL_S(ptr) | |
R500_RS_SEL_T(ptr + 1) | |
R500_RS_SEL_R(R500_RS_IP_PTR_K0) | |
R500_RS_SEL_Q(R500_RS_IP_PTR_K1); |
} else { |
rs->ip[id] |= R500_RS_SEL_S(ptr) | |
R500_RS_SEL_T(ptr + 1) | |
R500_RS_SEL_R(ptr + 2) | |
R500_RS_SEL_Q(ptr + 3); |
} |
rs->inst[id] |= R500_RS_INST_TEX_ID(id); |
} |
static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset) |
{ |
rs->inst[id] |= R500_RS_INST_TEX_CN_WRITE | |
R500_RS_INST_TEX_ADDR(fp_offset); |
} |
/* Set up the RS block. |
* |
* This is the part of the chipset that is responsible for linking vertex |
* and fragment shaders and stuffed texture coordinates. |
* |
* The rasterizer reads data from VAP, which produces vertex shader outputs, |
* and GA, which produces stuffed texture coordinates. VAP outputs have |
* precedence over GA. All outputs must be rasterized otherwise it locks up. |
* If there are more outputs rasterized than is set in VAP/GA, it locks up |
* too. The funky part is that this info has been pretty much obtained by trial |
* and error. */ |
static void r300_update_rs_block(struct r300_context *r300) |
{ |
struct r300_vertex_shader *vs = r300->vs_state.state; |
struct r300_shader_semantics *vs_outputs = &vs->outputs; |
struct r300_shader_semantics *fs_inputs = &r300_fs(r300)->shader->inputs; |
struct r300_rs_block rs = {0}; |
int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0, tex_ptr = 0; |
int gen_offset = 0; |
void (*rX00_rs_col)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); |
void (*rX00_rs_col_write)(struct r300_rs_block*, int, int, enum r300_rs_col_write_type); |
void (*rX00_rs_tex)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); |
void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int); |
boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || |
vs_outputs->bcolor[1] != ATTR_UNUSED; |
int *stream_loc_notcl = r300->stream_loc_notcl; |
uint32_t stuffing_enable = 0; |
if (r300->screen->caps.is_r500) { |
rX00_rs_col = r500_rs_col; |
rX00_rs_col_write = r500_rs_col_write; |
rX00_rs_tex = r500_rs_tex; |
rX00_rs_tex_write = r500_rs_tex_write; |
} else { |
rX00_rs_col = r300_rs_col; |
rX00_rs_col_write = r300_rs_col_write; |
rX00_rs_tex = r300_rs_tex; |
rX00_rs_tex_write = r300_rs_tex_write; |
} |
/* 0x5555 copied from classic, which means: |
* Select user color 0 for COLOR0 up to COLOR7. |
* What the hell does that mean? */ |
rs.vap_vtx_state_cntl = 0x5555; |
/* The position is always present in VAP. */ |
rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS; |
rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; |
stream_loc_notcl[loc++] = 0; |
/* Set up the point size in VAP. */ |
if (vs_outputs->psize != ATTR_UNUSED) { |
rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; |
stream_loc_notcl[loc++] = 1; |
} |
/* Set up and rasterize colors. */ |
for (i = 0; i < ATTR_COLOR_COUNT; i++) { |
if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || |
vs_outputs->color[1] != ATTR_UNUSED) { |
/* Set up the color in VAP. */ |
rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; |
rs.vap_out_vtx_fmt[0] |= |
R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; |
stream_loc_notcl[loc++] = 2 + i; |
/* Rasterize it. */ |
rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW); |
/* Write it to the FS input register if it's needed by the FS. */ |
if (fs_inputs->color[i] != ATTR_UNUSED) { |
rX00_rs_col_write(&rs, col_count, fp_offset, WRITE_COLOR); |
fp_offset++; |
DBG(r300, DBG_RS, |
"r300: Rasterized color %i written to FS.\n", i); |
} else { |
DBG(r300, DBG_RS, "r300: Rasterized color %i unused.\n", i); |
} |
col_count++; |
} else { |
/* Skip the FS input register, leave it uninitialized. */ |
/* If we try to set it to (0,0,0,1), it will lock up. */ |
if (fs_inputs->color[i] != ATTR_UNUSED) { |
fp_offset++; |
DBG(r300, DBG_RS, "r300: FS input color %i unassigned%s.\n", |
i); |
} |
} |
} |
/* Set up back-face colors. The rasterizer will do the color selection |
* automatically. */ |
if (any_bcolor_used) { |
if (r300->two_sided_color) { |
/* Rasterize as back-face colors. */ |
for (i = 0; i < ATTR_COLOR_COUNT; i++) { |
rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; |
rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i); |
stream_loc_notcl[loc++] = 4 + i; |
} |
} else { |
/* Rasterize two fake texcoords to prevent from the two-sided color |
* selection. */ |
/* XXX Consider recompiling the vertex shader to save 2 RS units. */ |
for (i = 0; i < 2; i++) { |
rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count); |
rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count)); |
stream_loc_notcl[loc++] = 6 + tex_count; |
/* Rasterize it. */ |
rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_XYZW); |
tex_count++; |
tex_ptr += 4; |
} |
} |
} |
/* gl_FrontFacing. |
* Note that we can use either the two-sided color selection based on |
* the front and back vertex shader colors, or gl_FrontFacing, |
* but not both! It locks up otherwise. |
* |
* In Direct3D 9, the two-sided color selection can be used |
* with shaders 2.0 only, while gl_FrontFacing can be used |
* with shaders 3.0 only. The hardware apparently hasn't been designed |
* to support both at the same time. */ |
if (r300->screen->caps.is_r500 && fs_inputs->face != ATTR_UNUSED && |
!(any_bcolor_used && r300->two_sided_color)) { |
rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW); |
rX00_rs_col_write(&rs, col_count, fp_offset, WRITE_FACE); |
fp_offset++; |
col_count++; |
DBG(r300, DBG_RS, "r300: Rasterized FACE written to FS.\n"); |
} else if (fs_inputs->face != ATTR_UNUSED) { |
fprintf(stderr, "r300: ERROR: FS input FACE unassigned.\n"); |
} |
/* Re-use color varyings for texcoords if possible. |
* |
* The colors are interpolated as 20-bit floats (reduced precision), |
* Use this hack only if there are too many generic varyings. |
* (number of generic varyings + fog + wpos > 8) */ |
if (r300->screen->caps.is_r500 && !any_bcolor_used && !r300->flatshade && |
fs_inputs->face == ATTR_UNUSED && |
vs_outputs->num_generic + (vs_outputs->fog != ATTR_UNUSED) + |
(fs_inputs->wpos != ATTR_UNUSED) > 8) { |
for (i = 0; i < ATTR_GENERIC_COUNT && col_count < 2; i++) { |
/* Cannot use color varyings for sprite coords. */ |
if (fs_inputs->generic[i] != ATTR_UNUSED && |
(r300->sprite_coord_enable & (1 << i))) { |
break; |
} |
if (vs_outputs->generic[i] != ATTR_UNUSED) { |
/* Set up the color in VAP. */ |
rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; |
rs.vap_out_vtx_fmt[0] |= |
R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << col_count; |
stream_loc_notcl[loc++] = 2 + col_count; |
/* Rasterize it. */ |
rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW); |
/* Write it to the FS input register if it's needed by the FS. */ |
if (fs_inputs->generic[i] != ATTR_UNUSED) { |
rX00_rs_col_write(&rs, col_count, fp_offset, WRITE_COLOR); |
fp_offset++; |
DBG(r300, DBG_RS, |
"r300: Rasterized generic %i redirected to color %i and written to FS.\n", |
i, col_count); |
} else { |
DBG(r300, DBG_RS, "r300: Rasterized generic %i redirected to color %i unused.\n", |
i, col_count); |
} |
col_count++; |
} else { |
/* Skip the FS input register, leave it uninitialized. */ |
/* If we try to set it to (0,0,0,1), it will lock up. */ |
if (fs_inputs->generic[i] != ATTR_UNUSED) { |
fp_offset++; |
DBG(r300, DBG_RS, "r300: FS input generic %i unassigned%s.\n", i); |
} |
} |
} |
gen_offset = i; |
} |
/* Rasterize texture coordinates. */ |
for (i = gen_offset; i < ATTR_GENERIC_COUNT && tex_count < 8; i++) { |
boolean sprite_coord = false; |
if (fs_inputs->generic[i] != ATTR_UNUSED) { |
sprite_coord = !!(r300->sprite_coord_enable & (1 << i)); |
} |
if (vs_outputs->generic[i] != ATTR_UNUSED || sprite_coord) { |
if (!sprite_coord) { |
/* Set up the texture coordinates in VAP. */ |
rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count); |
rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count)); |
stream_loc_notcl[loc++] = 6 + tex_count; |
} else |
stuffing_enable |= |
R300_GB_TEX_ST << (R300_GB_TEX0_SOURCE_SHIFT + (tex_count*2)); |
/* Rasterize it. */ |
rX00_rs_tex(&rs, tex_count, tex_ptr, |
sprite_coord ? SWIZ_XY01 : SWIZ_XYZW); |
/* Write it to the FS input register if it's needed by the FS. */ |
if (fs_inputs->generic[i] != ATTR_UNUSED) { |
rX00_rs_tex_write(&rs, tex_count, fp_offset); |
fp_offset++; |
DBG(r300, DBG_RS, |
"r300: Rasterized generic %i written to FS%s in texcoord %d.\n", |
i, sprite_coord ? " (sprite coord)" : "", tex_count); |
} else { |
DBG(r300, DBG_RS, |
"r300: Rasterized generic %i unused%s.\n", |
i, sprite_coord ? " (sprite coord)" : ""); |
} |
tex_count++; |
tex_ptr += sprite_coord ? 2 : 4; |
} else { |
/* Skip the FS input register, leave it uninitialized. */ |
/* If we try to set it to (0,0,0,1), it will lock up. */ |
if (fs_inputs->generic[i] != ATTR_UNUSED) { |
fp_offset++; |
DBG(r300, DBG_RS, "r300: FS input generic %i unassigned%s.\n", |
i, sprite_coord ? " (sprite coord)" : ""); |
} |
} |
} |
for (; i < ATTR_GENERIC_COUNT; i++) { |
if (fs_inputs->generic[i] != ATTR_UNUSED) { |
fprintf(stderr, "r300: ERROR: FS input generic %i unassigned, " |
"not enough hardware slots (it's not a bug, do not " |
"report it).\n", i); |
} |
} |
/* Rasterize fog coordinates. */ |
if (vs_outputs->fog != ATTR_UNUSED && tex_count < 8) { |
/* Set up the fog coordinates in VAP. */ |
rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count); |
rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count)); |
stream_loc_notcl[loc++] = 6 + tex_count; |
/* Rasterize it. */ |
rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_X001); |
/* Write it to the FS input register if it's needed by the FS. */ |
if (fs_inputs->fog != ATTR_UNUSED) { |
rX00_rs_tex_write(&rs, tex_count, fp_offset); |
fp_offset++; |
DBG(r300, DBG_RS, "r300: Rasterized fog written to FS.\n"); |
} else { |
DBG(r300, DBG_RS, "r300: Rasterized fog unused.\n"); |
} |
tex_count++; |
tex_ptr += 4; |
} else { |
/* Skip the FS input register, leave it uninitialized. */ |
/* If we try to set it to (0,0,0,1), it will lock up. */ |
if (fs_inputs->fog != ATTR_UNUSED) { |
fp_offset++; |
if (tex_count < 8) { |
DBG(r300, DBG_RS, "r300: FS input fog unassigned.\n"); |
} else { |
fprintf(stderr, "r300: ERROR: FS input fog unassigned, " |
"not enough hardware slots. (it's not a bug, " |
"do not report it)\n"); |
} |
} |
} |
/* Rasterize WPOS. */ |
/* Don't set it in VAP if the FS doesn't need it. */ |
if (fs_inputs->wpos != ATTR_UNUSED && tex_count < 8) { |
/* Set up the WPOS coordinates in VAP. */ |
rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count); |
rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count)); |
stream_loc_notcl[loc++] = 6 + tex_count; |
/* Rasterize it. */ |
rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_XYZW); |
/* Write it to the FS input register. */ |
rX00_rs_tex_write(&rs, tex_count, fp_offset); |
DBG(r300, DBG_RS, "r300: Rasterized WPOS written to FS.\n"); |
fp_offset++; |
tex_count++; |
tex_ptr += 4; |
} else { |
if (fs_inputs->wpos != ATTR_UNUSED && tex_count >= 8) { |
fprintf(stderr, "r300: ERROR: FS input WPOS unassigned, " |
"not enough hardware slots. (it's not a bug, do not " |
"report it)\n"); |
} |
} |
/* Invalidate the rest of the no-TCL (GA) stream locations. */ |
for (; loc < 16;) { |
stream_loc_notcl[loc++] = -1; |
} |
/* Rasterize at least one color, or bad things happen. */ |
if (col_count == 0 && tex_count == 0) { |
rX00_rs_col(&rs, 0, 0, SWIZ_0001); |
col_count++; |
DBG(r300, DBG_RS, "r300: Rasterized color 0 to prevent lockups.\n"); |
} |
DBG(r300, DBG_RS, "r300: --- Rasterizer status ---: colors: %i, " |
"generics: %i.\n", col_count, tex_count); |
rs.count = MIN2(tex_ptr, 32) | (col_count << R300_IC_COUNT_SHIFT) | |
R300_HIRES_EN; |
count = MAX3(col_count, tex_count, 1); |
rs.inst_count = count - 1; |
/* set the GB enable flags */ |
if (r300->sprite_coord_enable) |
stuffing_enable |= R300_GB_POINT_STUFF_ENABLE; |
rs.gb_enable = stuffing_enable; |
/* Now, after all that, see if we actually need to update the state. */ |
if (memcmp(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block))) { |
memcpy(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block)); |
r300->rs_block_state.size = 13 + count*2; |
} |
} |
static void rgba_to_bgra(float color[4]) |
{ |
float x = color[0]; |
color[0] = color[2]; |
color[2] = x; |
} |
static uint32_t r300_get_border_color(enum pipe_format format, |
const float border[4], |
boolean is_r500) |
{ |
const struct util_format_description *desc; |
float border_swizzled[4] = {0}; |
union util_color uc = {0}; |
desc = util_format_description(format); |
/* Do depth formats first. */ |
if (util_format_is_depth_or_stencil(format)) { |
switch (format) { |
case PIPE_FORMAT_Z16_UNORM: |
return util_pack_z(PIPE_FORMAT_Z16_UNORM, border[0]); |
case PIPE_FORMAT_X8Z24_UNORM: |
case PIPE_FORMAT_S8_UINT_Z24_UNORM: |
if (is_r500) { |
return util_pack_z(PIPE_FORMAT_X8Z24_UNORM, border[0]); |
} else { |
return util_pack_z(PIPE_FORMAT_Z16_UNORM, border[0]) << 16; |
} |
default: |
assert(0); |
return 0; |
} |
} |
/* Apply inverse swizzle of the format. */ |
util_format_unswizzle_4f(border_swizzled, border, desc->swizzle); |
/* Compressed formats. */ |
if (util_format_is_compressed(format)) { |
switch (format) { |
case PIPE_FORMAT_RGTC1_SNORM: |
case PIPE_FORMAT_LATC1_SNORM: |
border_swizzled[0] = border_swizzled[0] < 0 ? |
border_swizzled[0]*0.5+1 : |
border_swizzled[0]*0.5; |
/* Pass through. */ |
case PIPE_FORMAT_RGTC1_UNORM: |
case PIPE_FORMAT_LATC1_UNORM: |
/* Add 1/32 to round the border color instead of truncating. */ |
/* The Y component is used for the border color. */ |
border_swizzled[1] = border_swizzled[0] + 1.0f/32; |
util_pack_color(border_swizzled, PIPE_FORMAT_B4G4R4A4_UNORM, &uc); |
return uc.ui; |
case PIPE_FORMAT_RGTC2_SNORM: |
case PIPE_FORMAT_LATC2_SNORM: |
util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_SNORM, &uc); |
return uc.ui; |
case PIPE_FORMAT_RGTC2_UNORM: |
case PIPE_FORMAT_LATC2_UNORM: |
util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); |
return uc.ui; |
case PIPE_FORMAT_DXT1_SRGB: |
case PIPE_FORMAT_DXT1_SRGBA: |
case PIPE_FORMAT_DXT3_SRGBA: |
case PIPE_FORMAT_DXT5_SRGBA: |
util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_SRGB, &uc); |
return uc.ui; |
default: |
util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); |
return uc.ui; |
} |
} |
switch (desc->channel[0].size) { |
case 2: |
rgba_to_bgra(border_swizzled); |
util_pack_color(border_swizzled, PIPE_FORMAT_B2G3R3_UNORM, &uc); |
break; |
case 4: |
rgba_to_bgra(border_swizzled); |
util_pack_color(border_swizzled, PIPE_FORMAT_B4G4R4A4_UNORM, &uc); |
break; |
case 5: |
rgba_to_bgra(border_swizzled); |
if (desc->channel[1].size == 5) { |
util_pack_color(border_swizzled, PIPE_FORMAT_B5G5R5A1_UNORM, &uc); |
} else if (desc->channel[1].size == 6) { |
util_pack_color(border_swizzled, PIPE_FORMAT_B5G6R5_UNORM, &uc); |
} else { |
assert(0); |
} |
break; |
default: |
case 8: |
if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { |
util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_SNORM, &uc); |
} else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { |
if (desc->nr_channels == 2) { |
border_swizzled[3] = border_swizzled[1]; |
util_pack_color(border_swizzled, PIPE_FORMAT_L8A8_SRGB, &uc); |
} else { |
util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_SRGB, &uc); |
} |
} else { |
util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); |
} |
break; |
case 10: |
util_pack_color(border_swizzled, PIPE_FORMAT_R10G10B10A2_UNORM, &uc); |
break; |
case 16: |
if (desc->nr_channels <= 2) { |
if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) { |
util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_FLOAT, &uc); |
} else if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { |
util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_SNORM, &uc); |
} else { |
util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_UNORM, &uc); |
} |
} else { |
if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { |
util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_SNORM, &uc); |
} else { |
util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); |
} |
} |
break; |
case 32: |
if (desc->nr_channels == 1) { |
util_pack_color(border_swizzled, PIPE_FORMAT_R32_FLOAT, &uc); |
} else { |
util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); |
} |
break; |
} |
return uc.ui; |
} |
static void r300_merge_textures_and_samplers(struct r300_context* r300) |
{ |
struct r300_textures_state *state = |
(struct r300_textures_state*)r300->textures_state.state; |
struct r300_texture_sampler_state *texstate; |
struct r300_sampler_state *sampler; |
struct r300_sampler_view *view; |
struct r300_resource *tex; |
unsigned base_level, min_level, level_count, i, j, size; |
unsigned count = MIN2(state->sampler_view_count, |
state->sampler_state_count); |
boolean has_us_format = r300->screen->caps.has_us_format; |
/* The KIL opcode fix, see below. */ |
if (!count && !r300->screen->caps.is_r500) |
count = 1; |
state->tx_enable = 0; |
state->count = 0; |
size = 2; |
for (i = 0; i < count; i++) { |
if (state->sampler_views[i] && state->sampler_states[i]) { |
state->tx_enable |= 1 << i; |
view = state->sampler_views[i]; |
tex = r300_resource(view->base.texture); |
sampler = state->sampler_states[i]; |
texstate = &state->regs[i]; |
texstate->format = view->format; |
texstate->filter0 = sampler->filter0; |
texstate->filter1 = sampler->filter1; |
/* Set the border color. */ |
texstate->border_color = |
r300_get_border_color(view->base.format, |
sampler->state.border_color.f, |
r300->screen->caps.is_r500); |
/* determine min/max levels */ |
base_level = view->base.u.tex.first_level; |
min_level = sampler->min_lod; |
level_count = MIN3(sampler->max_lod, |
tex->b.b.last_level - base_level, |
view->base.u.tex.last_level - base_level); |
if (base_level + min_level) { |
unsigned offset; |
if (tex->tex.is_npot) { |
/* Even though we do not implement mipmapping for NPOT |
* textures, we should at least honor the minimum level |
* which is allowed to be displayed. We do this by setting up |
* an i-th mipmap level as the zero level. */ |
base_level += min_level; |
} |
offset = tex->tex.offset_in_bytes[base_level]; |
r300_texture_setup_format_state(r300->screen, tex, |
view->base.format, |
base_level, |
view->width0_override, |
view->height0_override, |
&texstate->format); |
texstate->format.tile_config |= offset & 0xffffffe0; |
assert((offset & 0x1f) == 0); |
} |
/* Assign a texture cache region. */ |
texstate->format.format1 |= view->texcache_region; |
/* Depth textures are kinda special. */ |
if (util_format_is_depth_or_stencil(view->base.format)) { |
unsigned char depth_swizzle[4]; |
if (!r300->screen->caps.is_r500 && |
util_format_get_blocksizebits(view->base.format) == 32) { |
/* X24x8 is sampled as Y16X16 on r3xx-r4xx. |
* The depth here is at the Y component. */ |
for (j = 0; j < 4; j++) |
depth_swizzle[j] = UTIL_FORMAT_SWIZZLE_Y; |
} else { |
for (j = 0; j < 4; j++) |
depth_swizzle[j] = UTIL_FORMAT_SWIZZLE_X; |
} |
/* If compare mode is disabled, sampler view swizzles |
* are stored in the format. |
* Otherwise, the swizzles must be applied after the compare |
* mode in the fragment shader. */ |
if (sampler->state.compare_mode == PIPE_TEX_COMPARE_NONE) { |
texstate->format.format1 |= |
r300_get_swizzle_combined(depth_swizzle, |
view->swizzle, FALSE); |
} else { |
texstate->format.format1 |= |
r300_get_swizzle_combined(depth_swizzle, 0, FALSE); |
} |
} |
if (r300->screen->caps.dxtc_swizzle && |
util_format_is_compressed(view->base.format)) { |
texstate->filter1 |= R400_DXTC_SWIZZLE_ENABLE; |
} |
/* to emulate 1D textures through 2D ones correctly */ |
if (tex->b.b.target == PIPE_TEXTURE_1D) { |
texstate->filter0 &= ~R300_TX_WRAP_T_MASK; |
texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); |
} |
/* The hardware doesn't like CLAMP and CLAMP_TO_BORDER |
* for the 3rd coordinate if the texture isn't 3D. */ |
if (tex->b.b.target != PIPE_TEXTURE_3D) { |
texstate->filter0 &= ~R300_TX_WRAP_R_MASK; |
} |
if (tex->tex.is_npot) { |
/* NPOT textures don't support mip filter, unfortunately. |
* This prevents incorrect rendering. */ |
texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; |
/* Mask out the mirrored flag. */ |
if (texstate->filter0 & R300_TX_WRAP_S(R300_TX_MIRRORED)) { |
texstate->filter0 &= ~R300_TX_WRAP_S(R300_TX_MIRRORED); |
} |
if (texstate->filter0 & R300_TX_WRAP_T(R300_TX_MIRRORED)) { |
texstate->filter0 &= ~R300_TX_WRAP_T(R300_TX_MIRRORED); |
} |
/* Change repeat to clamp-to-edge. |
* (the repeat bit has a value of 0, no masking needed). */ |
if ((texstate->filter0 & R300_TX_WRAP_S_MASK) == |
R300_TX_WRAP_S(R300_TX_REPEAT)) { |
texstate->filter0 |= R300_TX_WRAP_S(R300_TX_CLAMP_TO_EDGE); |
} |
if ((texstate->filter0 & R300_TX_WRAP_T_MASK) == |
R300_TX_WRAP_T(R300_TX_REPEAT)) { |
texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); |
} |
} else { |
/* the MAX_MIP level is the largest (finest) one */ |
texstate->format.format0 |= R300_TX_NUM_LEVELS(level_count); |
texstate->filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); |
} |
/* Float textures only support nearest and mip-nearest filtering. */ |
if (util_format_is_float(view->base.format)) { |
/* No MAG linear filtering. */ |
if ((texstate->filter0 & R300_TX_MAG_FILTER_MASK) == |
R300_TX_MAG_FILTER_LINEAR) { |
texstate->filter0 &= ~R300_TX_MAG_FILTER_MASK; |
texstate->filter0 |= R300_TX_MAG_FILTER_NEAREST; |
} |
/* No MIN linear filtering. */ |
if ((texstate->filter0 & R300_TX_MIN_FILTER_MASK) == |
R300_TX_MIN_FILTER_LINEAR) { |
texstate->filter0 &= ~R300_TX_MIN_FILTER_MASK; |
texstate->filter0 |= R300_TX_MIN_FILTER_NEAREST; |
} |
/* No mipmap linear filtering. */ |
if ((texstate->filter0 & R300_TX_MIN_FILTER_MIP_MASK) == |
R300_TX_MIN_FILTER_MIP_LINEAR) { |
texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; |
texstate->filter0 |= R300_TX_MIN_FILTER_MIP_NEAREST; |
} |
/* No anisotropic filtering. */ |
texstate->filter0 &= ~R300_TX_MAX_ANISO_MASK; |
texstate->filter1 &= ~R500_TX_MAX_ANISO_MASK; |
texstate->filter1 &= ~R500_TX_ANISO_HIGH_QUALITY; |
} |
texstate->filter0 |= i << 28; |
size += 16 + (has_us_format ? 2 : 0); |
state->count = i+1; |
} else { |
/* For the KIL opcode to work on r3xx-r4xx, the texture unit |
* assigned to this opcode (it's always the first one) must be |
* enabled. Otherwise the opcode doesn't work. |
* |
* In order to not depend on the fragment shader, we just make |
* the first unit enabled all the time. */ |
if (i == 0 && !r300->screen->caps.is_r500) { |
pipe_sampler_view_reference( |
(struct pipe_sampler_view**)&state->sampler_views[i], |
&r300->texkill_sampler->base); |
state->tx_enable |= 1 << i; |
texstate = &state->regs[i]; |
/* Just set some valid state. */ |
texstate->format = r300->texkill_sampler->format; |
texstate->filter0 = |
r300_translate_tex_filters(PIPE_TEX_FILTER_NEAREST, |
PIPE_TEX_FILTER_NEAREST, |
PIPE_TEX_FILTER_NEAREST, |
FALSE); |
texstate->filter1 = 0; |
texstate->border_color = 0; |
texstate->filter0 |= i << 28; |
size += 16 + (has_us_format ? 2 : 0); |
state->count = i+1; |
} |
} |
} |
r300->textures_state.size = size; |
/* Pick a fragment shader based on either the texture compare state |
* or the uses_pitch flag or some other external state. */ |
if (count && |
r300->fs_status == FRAGMENT_SHADER_VALID) { |
r300->fs_status = FRAGMENT_SHADER_MAYBE_DIRTY; |
} |
} |
static void r300_decompress_depth_textures(struct r300_context *r300) |
{ |
struct r300_textures_state *state = |
(struct r300_textures_state*)r300->textures_state.state; |
struct pipe_resource *tex; |
unsigned count = MIN2(state->sampler_view_count, |
state->sampler_state_count); |
unsigned i; |
if (!r300->locked_zbuffer) { |
return; |
} |
for (i = 0; i < count; i++) { |
if (state->sampler_views[i] && state->sampler_states[i]) { |
tex = state->sampler_views[i]->base.texture; |
if (tex == r300->locked_zbuffer->texture) { |
r300_decompress_zmask_locked(r300); |
return; |
} |
} |
} |
} |
static void r300_validate_fragment_shader(struct r300_context *r300) |
{ |
struct pipe_framebuffer_state *fb = r300->fb_state.state; |
if (r300->fs.state && r300->fs_status != FRAGMENT_SHADER_VALID) { |
/* Pick the fragment shader based on external states. |
* Then mark the state dirty if the fragment shader is either dirty |
* or the function r300_pick_fragment_shader changed the shader. */ |
if (r300_pick_fragment_shader(r300) || |
r300->fs_status == FRAGMENT_SHADER_DIRTY) { |
/* Mark the state atom as dirty. */ |
r300_mark_fs_code_dirty(r300); |
/* Does Multiwrite need to be changed? */ |
if (fb->nr_cbufs > 1) { |
boolean new_multiwrite = |
r300_fragment_shader_writes_all(r300_fs(r300)); |
if (r300->fb_multiwrite != new_multiwrite) { |
r300->fb_multiwrite = new_multiwrite; |
r300_mark_fb_state_dirty(r300, R300_CHANGED_MULTIWRITE); |
} |
} |
} |
r300->fs_status = FRAGMENT_SHADER_VALID; |
} |
} |
void r300_update_derived_state(struct r300_context* r300) |
{ |
if (r300->textures_state.dirty) { |
r300_decompress_depth_textures(r300); |
r300_merge_textures_and_samplers(r300); |
} |
r300_validate_fragment_shader(r300); |
if (r300->rs_block_state.dirty) { |
r300_update_rs_block(r300); |
if (r300->draw) { |
memset(&r300->vertex_info, 0, sizeof(struct vertex_info)); |
r300_draw_emit_all_attribs(r300); |
draw_compute_vertex_size(&r300->vertex_info); |
r300_swtcl_vertex_psc(r300); |
} |
} |
r300_update_hyperz_state(r300); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_state_inlines.h |
---|
0,0 → 1,439 |
/* |
* Copyright 2009 Joakim Sindholt <opensource@zhasha.com> |
* Corbin Simpson <MostAwesomeDude@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#ifndef R300_STATE_INLINES_H |
#define R300_STATE_INLINES_H |
#include "draw/draw_vertex.h" |
#include "pipe/p_format.h" |
#include "util/u_format.h" |
#include "r300_reg.h" |
#include <stdio.h> |
/* Some maths. These should probably find their way to u_math, if needed. */ |
static INLINE int pack_float_16_6x(float f) { |
return ((int)(f * 6.0) & 0xffff); |
} |
/* Blend state. */ |
static INLINE uint32_t r300_translate_blend_function(int blend_func, |
boolean clamp) |
{ |
switch (blend_func) { |
case PIPE_BLEND_ADD: |
return clamp ? R300_COMB_FCN_ADD_CLAMP : R300_COMB_FCN_ADD_NOCLAMP; |
case PIPE_BLEND_SUBTRACT: |
return clamp ? R300_COMB_FCN_SUB_CLAMP : R300_COMB_FCN_SUB_NOCLAMP; |
case PIPE_BLEND_REVERSE_SUBTRACT: |
return clamp ? R300_COMB_FCN_RSUB_CLAMP : R300_COMB_FCN_RSUB_NOCLAMP; |
case PIPE_BLEND_MIN: |
return R300_COMB_FCN_MIN; |
case PIPE_BLEND_MAX: |
return R300_COMB_FCN_MAX; |
default: |
fprintf(stderr, "r300: Unknown blend function %d\n", blend_func); |
assert(0); |
break; |
} |
return 0; |
} |
/* XXX we can also offer the D3D versions of some of these... */ |
static INLINE uint32_t r300_translate_blend_factor(int blend_fact) |
{ |
switch (blend_fact) { |
case PIPE_BLENDFACTOR_ONE: |
return R300_BLEND_GL_ONE; |
case PIPE_BLENDFACTOR_SRC_COLOR: |
return R300_BLEND_GL_SRC_COLOR; |
case PIPE_BLENDFACTOR_SRC_ALPHA: |
return R300_BLEND_GL_SRC_ALPHA; |
case PIPE_BLENDFACTOR_DST_ALPHA: |
return R300_BLEND_GL_DST_ALPHA; |
case PIPE_BLENDFACTOR_DST_COLOR: |
return R300_BLEND_GL_DST_COLOR; |
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: |
return R300_BLEND_GL_SRC_ALPHA_SATURATE; |
case PIPE_BLENDFACTOR_CONST_COLOR: |
return R300_BLEND_GL_CONST_COLOR; |
case PIPE_BLENDFACTOR_CONST_ALPHA: |
return R300_BLEND_GL_CONST_ALPHA; |
case PIPE_BLENDFACTOR_ZERO: |
return R300_BLEND_GL_ZERO; |
case PIPE_BLENDFACTOR_INV_SRC_COLOR: |
return R300_BLEND_GL_ONE_MINUS_SRC_COLOR; |
case PIPE_BLENDFACTOR_INV_SRC_ALPHA: |
return R300_BLEND_GL_ONE_MINUS_SRC_ALPHA; |
case PIPE_BLENDFACTOR_INV_DST_ALPHA: |
return R300_BLEND_GL_ONE_MINUS_DST_ALPHA; |
case PIPE_BLENDFACTOR_INV_DST_COLOR: |
return R300_BLEND_GL_ONE_MINUS_DST_COLOR; |
case PIPE_BLENDFACTOR_INV_CONST_COLOR: |
return R300_BLEND_GL_ONE_MINUS_CONST_COLOR; |
case PIPE_BLENDFACTOR_INV_CONST_ALPHA: |
return R300_BLEND_GL_ONE_MINUS_CONST_ALPHA; |
case PIPE_BLENDFACTOR_SRC1_COLOR: |
case PIPE_BLENDFACTOR_SRC1_ALPHA: |
case PIPE_BLENDFACTOR_INV_SRC1_COLOR: |
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: |
fprintf(stderr, "r300: Implementation error: " |
"Bad blend factor %d not supported!\n", blend_fact); |
assert(0); |
break; |
default: |
fprintf(stderr, "r300: Unknown blend factor %d\n", blend_fact); |
assert(0); |
break; |
} |
return 0; |
} |
/* DSA state. */ |
static INLINE uint32_t r300_translate_depth_stencil_function(int zs_func) |
{ |
switch (zs_func) { |
case PIPE_FUNC_NEVER: |
return R300_ZS_NEVER; |
case PIPE_FUNC_LESS: |
return R300_ZS_LESS; |
case PIPE_FUNC_EQUAL: |
return R300_ZS_EQUAL; |
case PIPE_FUNC_LEQUAL: |
return R300_ZS_LEQUAL; |
case PIPE_FUNC_GREATER: |
return R300_ZS_GREATER; |
case PIPE_FUNC_NOTEQUAL: |
return R300_ZS_NOTEQUAL; |
case PIPE_FUNC_GEQUAL: |
return R300_ZS_GEQUAL; |
case PIPE_FUNC_ALWAYS: |
return R300_ZS_ALWAYS; |
default: |
fprintf(stderr, "r300: Unknown depth/stencil function %d\n", |
zs_func); |
assert(0); |
break; |
} |
return 0; |
} |
static INLINE uint32_t r300_translate_stencil_op(int s_op) |
{ |
switch (s_op) { |
case PIPE_STENCIL_OP_KEEP: |
return R300_ZS_KEEP; |
case PIPE_STENCIL_OP_ZERO: |
return R300_ZS_ZERO; |
case PIPE_STENCIL_OP_REPLACE: |
return R300_ZS_REPLACE; |
case PIPE_STENCIL_OP_INCR: |
return R300_ZS_INCR; |
case PIPE_STENCIL_OP_DECR: |
return R300_ZS_DECR; |
case PIPE_STENCIL_OP_INCR_WRAP: |
return R300_ZS_INCR_WRAP; |
case PIPE_STENCIL_OP_DECR_WRAP: |
return R300_ZS_DECR_WRAP; |
case PIPE_STENCIL_OP_INVERT: |
return R300_ZS_INVERT; |
default: |
fprintf(stderr, "r300: Unknown stencil op %d", s_op); |
assert(0); |
break; |
} |
return 0; |
} |
static INLINE uint32_t r300_translate_alpha_function(int alpha_func) |
{ |
switch (alpha_func) { |
case PIPE_FUNC_NEVER: |
return R300_FG_ALPHA_FUNC_NEVER; |
case PIPE_FUNC_LESS: |
return R300_FG_ALPHA_FUNC_LESS; |
case PIPE_FUNC_EQUAL: |
return R300_FG_ALPHA_FUNC_EQUAL; |
case PIPE_FUNC_LEQUAL: |
return R300_FG_ALPHA_FUNC_LE; |
case PIPE_FUNC_GREATER: |
return R300_FG_ALPHA_FUNC_GREATER; |
case PIPE_FUNC_NOTEQUAL: |
return R300_FG_ALPHA_FUNC_NOTEQUAL; |
case PIPE_FUNC_GEQUAL: |
return R300_FG_ALPHA_FUNC_GE; |
case PIPE_FUNC_ALWAYS: |
return R300_FG_ALPHA_FUNC_ALWAYS; |
default: |
fprintf(stderr, "r300: Unknown alpha function %d", alpha_func); |
assert(0); |
break; |
} |
return 0; |
} |
static INLINE uint32_t |
r300_translate_polygon_mode_front(unsigned mode) { |
switch (mode) |
{ |
case PIPE_POLYGON_MODE_FILL: |
return R300_GA_POLY_MODE_FRONT_PTYPE_TRI; |
case PIPE_POLYGON_MODE_LINE: |
return R300_GA_POLY_MODE_FRONT_PTYPE_LINE; |
case PIPE_POLYGON_MODE_POINT: |
return R300_GA_POLY_MODE_FRONT_PTYPE_POINT; |
default: |
fprintf(stderr, "r300: Bad polygon mode %i in %s\n", mode, |
__FUNCTION__); |
return R300_GA_POLY_MODE_FRONT_PTYPE_TRI; |
} |
} |
static INLINE uint32_t |
r300_translate_polygon_mode_back(unsigned mode) { |
switch (mode) |
{ |
case PIPE_POLYGON_MODE_FILL: |
return R300_GA_POLY_MODE_BACK_PTYPE_TRI; |
case PIPE_POLYGON_MODE_LINE: |
return R300_GA_POLY_MODE_BACK_PTYPE_LINE; |
case PIPE_POLYGON_MODE_POINT: |
return R300_GA_POLY_MODE_BACK_PTYPE_POINT; |
default: |
fprintf(stderr, "r300: Bad polygon mode %i in %s\n", mode, |
__FUNCTION__); |
return R300_GA_POLY_MODE_BACK_PTYPE_TRI; |
} |
} |
/* Texture sampler state. */ |
static INLINE uint32_t r300_translate_wrap(int wrap) |
{ |
switch (wrap) { |
case PIPE_TEX_WRAP_REPEAT: |
return R300_TX_REPEAT; |
case PIPE_TEX_WRAP_CLAMP: |
return R300_TX_CLAMP; |
case PIPE_TEX_WRAP_CLAMP_TO_EDGE: |
return R300_TX_CLAMP_TO_EDGE; |
case PIPE_TEX_WRAP_CLAMP_TO_BORDER: |
return R300_TX_CLAMP_TO_BORDER; |
case PIPE_TEX_WRAP_MIRROR_REPEAT: |
return R300_TX_REPEAT | R300_TX_MIRRORED; |
case PIPE_TEX_WRAP_MIRROR_CLAMP: |
return R300_TX_CLAMP | R300_TX_MIRRORED; |
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: |
return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; |
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: |
return R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED; |
default: |
fprintf(stderr, "r300: Unknown texture wrap %d", wrap); |
assert(0); |
return 0; |
} |
} |
static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip, |
boolean is_anisotropic) |
{ |
uint32_t retval = 0; |
switch (min) { |
case PIPE_TEX_FILTER_NEAREST: |
retval |= R300_TX_MIN_FILTER_NEAREST; |
break; |
case PIPE_TEX_FILTER_LINEAR: |
retval |= is_anisotropic ? R300_TX_MIN_FILTER_ANISO : |
R300_TX_MIN_FILTER_LINEAR; |
break; |
default: |
fprintf(stderr, "r300: Unknown texture filter %d\n", min); |
assert(0); |
} |
switch (mag) { |
case PIPE_TEX_FILTER_NEAREST: |
retval |= R300_TX_MAG_FILTER_NEAREST; |
break; |
case PIPE_TEX_FILTER_LINEAR: |
retval |= is_anisotropic ? R300_TX_MAG_FILTER_ANISO : |
R300_TX_MAG_FILTER_LINEAR; |
break; |
default: |
fprintf(stderr, "r300: Unknown texture filter %d\n", mag); |
assert(0); |
} |
switch (mip) { |
case PIPE_TEX_MIPFILTER_NONE: |
retval |= R300_TX_MIN_FILTER_MIP_NONE; |
break; |
case PIPE_TEX_MIPFILTER_NEAREST: |
retval |= R300_TX_MIN_FILTER_MIP_NEAREST; |
break; |
case PIPE_TEX_MIPFILTER_LINEAR: |
retval |= R300_TX_MIN_FILTER_MIP_LINEAR; |
break; |
default: |
fprintf(stderr, "r300: Unknown texture filter %d\n", mip); |
assert(0); |
} |
return retval; |
} |
static INLINE uint32_t r300_anisotropy(unsigned max_aniso) |
{ |
if (max_aniso >= 16) { |
return R300_TX_MAX_ANISO_16_TO_1; |
} else if (max_aniso >= 8) { |
return R300_TX_MAX_ANISO_8_TO_1; |
} else if (max_aniso >= 4) { |
return R300_TX_MAX_ANISO_4_TO_1; |
} else if (max_aniso >= 2) { |
return R300_TX_MAX_ANISO_2_TO_1; |
} else { |
return R300_TX_MAX_ANISO_1_TO_1; |
} |
} |
static INLINE uint32_t r500_anisotropy(unsigned max_aniso) |
{ |
if (!max_aniso) { |
return 0; |
} |
max_aniso -= 1; |
// Map the range [0, 15] to [0, 63]. |
return R500_TX_MAX_ANISO(MIN2((unsigned)(max_aniso*4.2001), 63)) | |
R500_TX_ANISO_HIGH_QUALITY; |
} |
/* Translate pipe_formats into PSC vertex types. */ |
static INLINE uint16_t |
r300_translate_vertex_data_type(enum pipe_format format) { |
uint32_t result = 0; |
const struct util_format_description *desc; |
unsigned i; |
desc = util_format_description(format); |
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { |
return R300_INVALID_FORMAT; |
} |
/* Find the first non-VOID channel. */ |
for (i = 0; i < 4; i++) { |
if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { |
break; |
} |
} |
switch (desc->channel[i].type) { |
/* Half-floats, floats, doubles */ |
case UTIL_FORMAT_TYPE_FLOAT: |
switch (desc->channel[i].size) { |
case 16: |
/* Supported only on RV350 and later. */ |
if (desc->nr_channels > 2) { |
result = R300_DATA_TYPE_FLT16_4; |
} else { |
result = R300_DATA_TYPE_FLT16_2; |
} |
break; |
case 32: |
result = R300_DATA_TYPE_FLOAT_1 + (desc->nr_channels - 1); |
break; |
default: |
return R300_INVALID_FORMAT; |
} |
break; |
/* Unsigned ints */ |
case UTIL_FORMAT_TYPE_UNSIGNED: |
/* Signed ints */ |
case UTIL_FORMAT_TYPE_SIGNED: |
switch (desc->channel[i].size) { |
case 8: |
result = R300_DATA_TYPE_BYTE; |
break; |
case 16: |
if (desc->nr_channels > 2) { |
result = R300_DATA_TYPE_SHORT_4; |
} else { |
result = R300_DATA_TYPE_SHORT_2; |
} |
break; |
default: |
return R300_INVALID_FORMAT; |
} |
break; |
default: |
return R300_INVALID_FORMAT; |
} |
if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { |
result |= R300_SIGNED; |
} |
if (desc->channel[i].normalized) { |
result |= R300_NORMALIZE; |
} |
return result; |
} |
static INLINE uint16_t |
r300_translate_vertex_data_swizzle(enum pipe_format format) { |
const struct util_format_description *desc = util_format_description(format); |
unsigned i, swizzle = 0; |
assert(format); |
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { |
fprintf(stderr, "r300: Bad format %s in %s:%d\n", |
util_format_short_name(format), __FUNCTION__, __LINE__); |
return 0; |
} |
for (i = 0; i < desc->nr_channels; i++) { |
swizzle |= |
MIN2(desc->swizzle[i], R300_SWIZZLE_SELECT_FP_ONE) << (3*i); |
} |
/* Set (0,0,0,1) in unused components. */ |
for (; i < 3; i++) { |
swizzle |= R300_SWIZZLE_SELECT_FP_ZERO << (3*i); |
} |
for (; i < 4; i++) { |
swizzle |= R300_SWIZZLE_SELECT_FP_ONE << (3*i); |
} |
return swizzle | (0xf << R300_WRITE_ENA_SHIFT); |
} |
#endif /* R300_STATE_INLINES_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_texture.c |
---|
0,0 → 1,1232 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Copyright 2010 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
/* Always include headers in the reverse order!! ~ M. */ |
#include "r300_texture.h" |
#include "r300_context.h" |
#include "r300_reg.h" |
#include "r300_texture_desc.h" |
#include "r300_transfer.h" |
#include "r300_screen.h" |
#include "util/u_format.h" |
#include "util/u_format_s3tc.h" |
#include "util/u_math.h" |
#include "util/u_memory.h" |
#include "util/u_mm.h" |
#include "pipe/p_screen.h" |
unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, |
const unsigned char *swizzle_view, |
boolean dxtc_swizzle) |
{ |
unsigned i; |
unsigned char swizzle[4]; |
unsigned result = 0; |
const uint32_t swizzle_shift[4] = { |
R300_TX_FORMAT_R_SHIFT, |
R300_TX_FORMAT_G_SHIFT, |
R300_TX_FORMAT_B_SHIFT, |
R300_TX_FORMAT_A_SHIFT |
}; |
uint32_t swizzle_bit[4] = { |
dxtc_swizzle ? R300_TX_FORMAT_Z : R300_TX_FORMAT_X, |
R300_TX_FORMAT_Y, |
dxtc_swizzle ? R300_TX_FORMAT_X : R300_TX_FORMAT_Z, |
R300_TX_FORMAT_W |
}; |
if (swizzle_view) { |
/* Combine two sets of swizzles. */ |
util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle); |
} else { |
memcpy(swizzle, swizzle_format, 4); |
} |
/* Get swizzle. */ |
for (i = 0; i < 4; i++) { |
switch (swizzle[i]) { |
case UTIL_FORMAT_SWIZZLE_Y: |
result |= swizzle_bit[1] << swizzle_shift[i]; |
break; |
case UTIL_FORMAT_SWIZZLE_Z: |
result |= swizzle_bit[2] << swizzle_shift[i]; |
break; |
case UTIL_FORMAT_SWIZZLE_W: |
result |= swizzle_bit[3] << swizzle_shift[i]; |
break; |
case UTIL_FORMAT_SWIZZLE_0: |
result |= R300_TX_FORMAT_ZERO << swizzle_shift[i]; |
break; |
case UTIL_FORMAT_SWIZZLE_1: |
result |= R300_TX_FORMAT_ONE << swizzle_shift[i]; |
break; |
default: /* UTIL_FORMAT_SWIZZLE_X */ |
result |= swizzle_bit[0] << swizzle_shift[i]; |
} |
} |
return result; |
} |
/* Translate a pipe_format into a useful texture format for sampling. |
* |
* Some special formats are translated directly using R300_EASY_TX_FORMAT, |
* but the majority of them is translated in a generic way, automatically |
* supporting all the formats hw can support. |
* |
* R300_EASY_TX_FORMAT swizzles the texture. |
* Note the signature of R300_EASY_TX_FORMAT: |
* R300_EASY_TX_FORMAT(B, G, R, A, FORMAT); |
* |
* The FORMAT specifies how the texture sampler will treat the texture, and |
* makes available X, Y, Z, W, ZERO, and ONE for swizzling. */ |
uint32_t r300_translate_texformat(enum pipe_format format, |
const unsigned char *swizzle_view, |
boolean is_r500, |
boolean dxtc_swizzle) |
{ |
uint32_t result = 0; |
const struct util_format_description *desc; |
unsigned i; |
boolean uniform = TRUE; |
const uint32_t sign_bit[4] = { |
R300_TX_FORMAT_SIGNED_W, |
R300_TX_FORMAT_SIGNED_Z, |
R300_TX_FORMAT_SIGNED_Y, |
R300_TX_FORMAT_SIGNED_X, |
}; |
desc = util_format_description(format); |
/* Colorspace (return non-RGB formats directly). */ |
switch (desc->colorspace) { |
/* Depth stencil formats. |
* Swizzles are added in r300_merge_textures_and_samplers. */ |
case UTIL_FORMAT_COLORSPACE_ZS: |
switch (format) { |
case PIPE_FORMAT_Z16_UNORM: |
return R300_TX_FORMAT_X16; |
case PIPE_FORMAT_X8Z24_UNORM: |
case PIPE_FORMAT_S8_UINT_Z24_UNORM: |
if (is_r500) |
return R500_TX_FORMAT_Y8X24; |
else |
return R300_TX_FORMAT_Y16X16; |
default: |
return ~0; /* Unsupported. */ |
} |
/* YUV formats. */ |
case UTIL_FORMAT_COLORSPACE_YUV: |
result |= R300_TX_FORMAT_YUV_TO_RGB; |
switch (format) { |
case PIPE_FORMAT_UYVY: |
return R300_EASY_TX_FORMAT(X, Y, Z, ONE, YVYU422) | result; |
case PIPE_FORMAT_YUYV: |
return R300_EASY_TX_FORMAT(X, Y, Z, ONE, VYUY422) | result; |
default: |
return ~0; /* Unsupported/unknown. */ |
} |
/* Add gamma correction. */ |
case UTIL_FORMAT_COLORSPACE_SRGB: |
result |= R300_TX_FORMAT_GAMMA; |
break; |
default: |
switch (format) { |
/* Same as YUV but without the YUR->RGB conversion. */ |
case PIPE_FORMAT_R8G8_B8G8_UNORM: |
return R300_EASY_TX_FORMAT(X, Y, Z, ONE, YVYU422) | result; |
case PIPE_FORMAT_G8R8_G8B8_UNORM: |
return R300_EASY_TX_FORMAT(X, Y, Z, ONE, VYUY422) | result; |
default:; |
} |
} |
/* Add swizzling. */ |
/* The RGTC1_SNORM and LATC1_SNORM swizzle is done in the shader. */ |
if (format != PIPE_FORMAT_RGTC1_SNORM && |
format != PIPE_FORMAT_LATC1_SNORM) { |
if (util_format_is_compressed(format) && |
dxtc_swizzle && |
format != PIPE_FORMAT_RGTC2_UNORM && |
format != PIPE_FORMAT_RGTC2_SNORM && |
format != PIPE_FORMAT_LATC2_UNORM && |
format != PIPE_FORMAT_LATC2_SNORM) { |
result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, |
TRUE); |
} else { |
result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, |
FALSE); |
} |
} |
/* S3TC formats. */ |
if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { |
if (!util_format_s3tc_enabled) { |
return ~0; /* Unsupported. */ |
} |
switch (format) { |
case PIPE_FORMAT_DXT1_RGB: |
case PIPE_FORMAT_DXT1_RGBA: |
case PIPE_FORMAT_DXT1_SRGB: |
case PIPE_FORMAT_DXT1_SRGBA: |
return R300_TX_FORMAT_DXT1 | result; |
case PIPE_FORMAT_DXT3_RGBA: |
case PIPE_FORMAT_DXT3_SRGBA: |
return R300_TX_FORMAT_DXT3 | result; |
case PIPE_FORMAT_DXT5_RGBA: |
case PIPE_FORMAT_DXT5_SRGBA: |
return R300_TX_FORMAT_DXT5 | result; |
default: |
return ~0; /* Unsupported/unknown. */ |
} |
} |
/* RGTC formats. */ |
if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { |
switch (format) { |
case PIPE_FORMAT_RGTC1_SNORM: |
case PIPE_FORMAT_LATC1_SNORM: |
case PIPE_FORMAT_LATC1_UNORM: |
case PIPE_FORMAT_RGTC1_UNORM: |
return R500_TX_FORMAT_ATI1N | result; |
case PIPE_FORMAT_RGTC2_SNORM: |
case PIPE_FORMAT_LATC2_SNORM: |
result |= sign_bit[1] | sign_bit[0]; |
case PIPE_FORMAT_RGTC2_UNORM: |
case PIPE_FORMAT_LATC2_UNORM: |
return R400_TX_FORMAT_ATI2N | result; |
default: |
return ~0; /* Unsupported/unknown. */ |
} |
} |
/* This is truly a special format. |
* It stores R8G8 and B is computed using sqrt(1 - R^2 - G^2) |
* in the sampler unit. Also known as D3DFMT_CxV8U8. */ |
if (format == PIPE_FORMAT_R8G8Bx_SNORM) { |
return R300_TX_FORMAT_CxV8U8 | result; |
} |
/* Integer and fixed-point 16.16 textures are not supported. */ |
for (i = 0; i < 4; i++) { |
if (desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED || |
((desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED || |
desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) && |
(!desc->channel[i].normalized || |
desc->channel[i].pure_integer))) { |
return ~0; /* Unsupported/unknown. */ |
} |
} |
/* Add sign. */ |
for (i = 0; i < desc->nr_channels; i++) { |
if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { |
result |= sign_bit[i]; |
} |
} |
/* See whether the components are of the same size. */ |
for (i = 1; i < desc->nr_channels; i++) { |
uniform = uniform && desc->channel[0].size == desc->channel[i].size; |
} |
/* Non-uniform formats. */ |
if (!uniform) { |
switch (desc->nr_channels) { |
case 3: |
if (desc->channel[0].size == 5 && |
desc->channel[1].size == 6 && |
desc->channel[2].size == 5) { |
return R300_TX_FORMAT_Z5Y6X5 | result; |
} |
if (desc->channel[0].size == 5 && |
desc->channel[1].size == 5 && |
desc->channel[2].size == 6) { |
return R300_TX_FORMAT_Z6Y5X5 | result; |
} |
if (desc->channel[0].size == 2 && |
desc->channel[1].size == 3 && |
desc->channel[2].size == 3) { |
return R300_TX_FORMAT_Z3Y3X2 | result; |
} |
return ~0; /* Unsupported/unknown. */ |
case 4: |
if (desc->channel[0].size == 5 && |
desc->channel[1].size == 5 && |
desc->channel[2].size == 5 && |
desc->channel[3].size == 1) { |
return R300_TX_FORMAT_W1Z5Y5X5 | result; |
} |
if (desc->channel[0].size == 10 && |
desc->channel[1].size == 10 && |
desc->channel[2].size == 10 && |
desc->channel[3].size == 2) { |
return R300_TX_FORMAT_W2Z10Y10X10 | result; |
} |
} |
return ~0; /* Unsupported/unknown. */ |
} |
/* Find the first non-VOID channel. */ |
for (i = 0; i < 4; i++) { |
if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { |
break; |
} |
} |
if (i == 4) |
return ~0; /* Unsupported/unknown. */ |
/* And finally, uniform formats. */ |
switch (desc->channel[i].type) { |
case UTIL_FORMAT_TYPE_UNSIGNED: |
case UTIL_FORMAT_TYPE_SIGNED: |
if (!desc->channel[i].normalized && |
desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) { |
return ~0; |
} |
switch (desc->channel[i].size) { |
case 4: |
switch (desc->nr_channels) { |
case 2: |
return R300_TX_FORMAT_Y4X4 | result; |
case 4: |
return R300_TX_FORMAT_W4Z4Y4X4 | result; |
} |
return ~0; |
case 8: |
switch (desc->nr_channels) { |
case 1: |
return R300_TX_FORMAT_X8 | result; |
case 2: |
return R300_TX_FORMAT_Y8X8 | result; |
case 4: |
return R300_TX_FORMAT_W8Z8Y8X8 | result; |
} |
return ~0; |
case 16: |
switch (desc->nr_channels) { |
case 1: |
return R300_TX_FORMAT_X16 | result; |
case 2: |
return R300_TX_FORMAT_Y16X16 | result; |
case 4: |
return R300_TX_FORMAT_W16Z16Y16X16 | result; |
} |
} |
return ~0; |
case UTIL_FORMAT_TYPE_FLOAT: |
switch (desc->channel[i].size) { |
case 16: |
switch (desc->nr_channels) { |
case 1: |
return R300_TX_FORMAT_16F | result; |
case 2: |
return R300_TX_FORMAT_16F_16F | result; |
case 4: |
return R300_TX_FORMAT_16F_16F_16F_16F | result; |
} |
return ~0; |
case 32: |
switch (desc->nr_channels) { |
case 1: |
return R300_TX_FORMAT_32F | result; |
case 2: |
return R300_TX_FORMAT_32F_32F | result; |
case 4: |
return R300_TX_FORMAT_32F_32F_32F_32F | result; |
} |
} |
} |
return ~0; /* Unsupported/unknown. */ |
} |
uint32_t r500_tx_format_msb_bit(enum pipe_format format) |
{ |
switch (format) { |
case PIPE_FORMAT_RGTC1_UNORM: |
case PIPE_FORMAT_RGTC1_SNORM: |
case PIPE_FORMAT_LATC1_UNORM: |
case PIPE_FORMAT_LATC1_SNORM: |
case PIPE_FORMAT_X8Z24_UNORM: |
case PIPE_FORMAT_S8_UINT_Z24_UNORM: |
return R500_TXFORMAT_MSB; |
default: |
return 0; |
} |
} |
/* Buffer formats. */ |
/* Colorbuffer formats. This is the unswizzled format of the RB3D block's |
* output. For the swizzling of the targets, check the shader's format. */ |
static uint32_t r300_translate_colorformat(enum pipe_format format) |
{ |
switch (format) { |
/* 8-bit buffers. */ |
case PIPE_FORMAT_A8_UNORM: |
case PIPE_FORMAT_A8_SNORM: |
case PIPE_FORMAT_I8_UNORM: |
case PIPE_FORMAT_I8_SNORM: |
case PIPE_FORMAT_L8_UNORM: |
case PIPE_FORMAT_L8_SNORM: |
case PIPE_FORMAT_R8_UNORM: |
case PIPE_FORMAT_R8_SNORM: |
return R300_COLOR_FORMAT_I8; |
/* 16-bit buffers. */ |
case PIPE_FORMAT_L8A8_UNORM: |
case PIPE_FORMAT_L8A8_SNORM: |
case PIPE_FORMAT_R8G8_UNORM: |
case PIPE_FORMAT_R8G8_SNORM: |
case PIPE_FORMAT_R8A8_UNORM: |
case PIPE_FORMAT_R8A8_SNORM: |
/* These formats work fine with UV88 if US_OUT_FMT is set correctly. */ |
case PIPE_FORMAT_A16_UNORM: |
case PIPE_FORMAT_A16_SNORM: |
case PIPE_FORMAT_A16_FLOAT: |
case PIPE_FORMAT_L16_UNORM: |
case PIPE_FORMAT_L16_SNORM: |
case PIPE_FORMAT_L16_FLOAT: |
case PIPE_FORMAT_I16_UNORM: |
case PIPE_FORMAT_I16_SNORM: |
case PIPE_FORMAT_I16_FLOAT: |
case PIPE_FORMAT_R16_UNORM: |
case PIPE_FORMAT_R16_SNORM: |
case PIPE_FORMAT_R16_FLOAT: |
return R300_COLOR_FORMAT_UV88; |
case PIPE_FORMAT_B5G6R5_UNORM: |
return R300_COLOR_FORMAT_RGB565; |
case PIPE_FORMAT_B5G5R5A1_UNORM: |
case PIPE_FORMAT_B5G5R5X1_UNORM: |
return R300_COLOR_FORMAT_ARGB1555; |
case PIPE_FORMAT_B4G4R4A4_UNORM: |
case PIPE_FORMAT_B4G4R4X4_UNORM: |
return R300_COLOR_FORMAT_ARGB4444; |
/* 32-bit buffers. */ |
case PIPE_FORMAT_B8G8R8A8_UNORM: |
/*case PIPE_FORMAT_B8G8R8A8_SNORM:*/ |
case PIPE_FORMAT_B8G8R8X8_UNORM: |
/*case PIPE_FORMAT_B8G8R8X8_SNORM:*/ |
case PIPE_FORMAT_R8G8B8A8_UNORM: |
case PIPE_FORMAT_R8G8B8A8_SNORM: |
case PIPE_FORMAT_R8G8B8X8_UNORM: |
case PIPE_FORMAT_R8G8B8X8_SNORM: |
/* These formats work fine with ARGB8888 if US_OUT_FMT is set |
* correctly. */ |
case PIPE_FORMAT_R16G16_UNORM: |
case PIPE_FORMAT_R16G16_SNORM: |
case PIPE_FORMAT_R16G16_FLOAT: |
case PIPE_FORMAT_L16A16_UNORM: |
case PIPE_FORMAT_L16A16_SNORM: |
case PIPE_FORMAT_L16A16_FLOAT: |
case PIPE_FORMAT_R16A16_UNORM: |
case PIPE_FORMAT_R16A16_SNORM: |
case PIPE_FORMAT_R16A16_FLOAT: |
case PIPE_FORMAT_A32_FLOAT: |
case PIPE_FORMAT_L32_FLOAT: |
case PIPE_FORMAT_I32_FLOAT: |
case PIPE_FORMAT_R32_FLOAT: |
return R300_COLOR_FORMAT_ARGB8888; |
case PIPE_FORMAT_R10G10B10A2_UNORM: |
case PIPE_FORMAT_R10G10B10X2_SNORM: |
case PIPE_FORMAT_B10G10R10A2_UNORM: |
case PIPE_FORMAT_B10G10R10X2_UNORM: |
return R500_COLOR_FORMAT_ARGB2101010; /* R5xx-only? */ |
/* 64-bit buffers. */ |
case PIPE_FORMAT_R16G16B16A16_UNORM: |
case PIPE_FORMAT_R16G16B16A16_SNORM: |
case PIPE_FORMAT_R16G16B16A16_FLOAT: |
case PIPE_FORMAT_R16G16B16X16_UNORM: |
case PIPE_FORMAT_R16G16B16X16_SNORM: |
case PIPE_FORMAT_R16G16B16X16_FLOAT: |
/* These formats work fine with ARGB16161616 if US_OUT_FMT is set |
* correctly. */ |
case PIPE_FORMAT_R32G32_FLOAT: |
case PIPE_FORMAT_L32A32_FLOAT: |
case PIPE_FORMAT_R32A32_FLOAT: |
return R300_COLOR_FORMAT_ARGB16161616; |
/* 128-bit buffers. */ |
case PIPE_FORMAT_R32G32B32A32_FLOAT: |
case PIPE_FORMAT_R32G32B32X32_FLOAT: |
return R300_COLOR_FORMAT_ARGB32323232; |
/* YUV buffers. */ |
case PIPE_FORMAT_UYVY: |
return R300_COLOR_FORMAT_YVYU; |
case PIPE_FORMAT_YUYV: |
return R300_COLOR_FORMAT_VYUY; |
default: |
return ~0; /* Unsupported. */ |
} |
} |
/* Depthbuffer and stencilbuffer. Thankfully, we only support two flavors. */ |
static uint32_t r300_translate_zsformat(enum pipe_format format) |
{ |
switch (format) { |
/* 16-bit depth, no stencil */ |
case PIPE_FORMAT_Z16_UNORM: |
return R300_DEPTHFORMAT_16BIT_INT_Z; |
/* 24-bit depth, ignored stencil */ |
case PIPE_FORMAT_X8Z24_UNORM: |
/* 24-bit depth, 8-bit stencil */ |
case PIPE_FORMAT_S8_UINT_Z24_UNORM: |
return R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; |
default: |
return ~0; /* Unsupported. */ |
} |
} |
/* Shader output formats. This is essentially the swizzle from the shader |
* to the RB3D block. |
* |
* Note that formats are stored from C3 to C0. */ |
static uint32_t r300_translate_out_fmt(enum pipe_format format) |
{ |
uint32_t modifier = 0; |
unsigned i; |
const struct util_format_description *desc; |
boolean uniform_sign; |
desc = util_format_description(format); |
/* Find the first non-VOID channel. */ |
for (i = 0; i < 4; i++) { |
if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { |
break; |
} |
} |
if (i == 4) |
return ~0; /* Unsupported/unknown. */ |
/* Specifies how the shader output is written to the fog unit. */ |
switch (desc->channel[i].type) { |
case UTIL_FORMAT_TYPE_FLOAT: |
switch (desc->channel[i].size) { |
case 32: |
switch (desc->nr_channels) { |
case 1: |
modifier |= R300_US_OUT_FMT_C_32_FP; |
break; |
case 2: |
modifier |= R300_US_OUT_FMT_C2_32_FP; |
break; |
case 4: |
modifier |= R300_US_OUT_FMT_C4_32_FP; |
break; |
} |
break; |
case 16: |
switch (desc->nr_channels) { |
case 1: |
modifier |= R300_US_OUT_FMT_C_16_FP; |
break; |
case 2: |
modifier |= R300_US_OUT_FMT_C2_16_FP; |
break; |
case 4: |
modifier |= R300_US_OUT_FMT_C4_16_FP; |
break; |
} |
break; |
} |
break; |
default: |
switch (desc->channel[i].size) { |
case 16: |
switch (desc->nr_channels) { |
case 1: |
modifier |= R300_US_OUT_FMT_C_16; |
break; |
case 2: |
modifier |= R300_US_OUT_FMT_C2_16; |
break; |
case 4: |
modifier |= R300_US_OUT_FMT_C4_16; |
break; |
} |
break; |
case 10: |
modifier |= R300_US_OUT_FMT_C4_10; |
break; |
default: |
/* C4_8 seems to be used for the formats whose pixel size |
* is <= 32 bits. */ |
modifier |= R300_US_OUT_FMT_C4_8; |
break; |
} |
} |
/* Add sign. */ |
uniform_sign = TRUE; |
for (i = 0; i < desc->nr_channels; i++) |
if (desc->channel[i].type != UTIL_FORMAT_TYPE_SIGNED) |
uniform_sign = FALSE; |
if (uniform_sign) |
modifier |= R300_OUT_SIGN(0xf); |
/* Add swizzles and return. */ |
switch (format) { |
/*** Special cases (non-standard channel mapping) ***/ |
/* X8 |
* COLORFORMAT_I8 stores the Z component (C2). */ |
case PIPE_FORMAT_A8_UNORM: |
case PIPE_FORMAT_A8_SNORM: |
return modifier | R300_C2_SEL_A; |
case PIPE_FORMAT_I8_UNORM: |
case PIPE_FORMAT_I8_SNORM: |
case PIPE_FORMAT_L8_UNORM: |
case PIPE_FORMAT_L8_SNORM: |
case PIPE_FORMAT_R8_UNORM: |
case PIPE_FORMAT_R8_SNORM: |
return modifier | R300_C2_SEL_R; |
/* X8Y8 |
* COLORFORMAT_UV88 stores ZX (C2 and C0). */ |
case PIPE_FORMAT_L8A8_SNORM: |
case PIPE_FORMAT_L8A8_UNORM: |
case PIPE_FORMAT_R8A8_SNORM: |
case PIPE_FORMAT_R8A8_UNORM: |
return modifier | R300_C0_SEL_A | R300_C2_SEL_R; |
case PIPE_FORMAT_R8G8_SNORM: |
case PIPE_FORMAT_R8G8_UNORM: |
return modifier | R300_C0_SEL_G | R300_C2_SEL_R; |
/* X32Y32 |
* ARGB16161616 stores XZ for RG32F */ |
case PIPE_FORMAT_R32G32_FLOAT: |
return modifier | R300_C0_SEL_R | R300_C2_SEL_G; |
/*** Generic cases (standard channel mapping) ***/ |
/* BGRA outputs. */ |
case PIPE_FORMAT_B5G6R5_UNORM: |
case PIPE_FORMAT_B5G5R5A1_UNORM: |
case PIPE_FORMAT_B5G5R5X1_UNORM: |
case PIPE_FORMAT_B4G4R4A4_UNORM: |
case PIPE_FORMAT_B4G4R4X4_UNORM: |
case PIPE_FORMAT_B8G8R8A8_UNORM: |
/*case PIPE_FORMAT_B8G8R8A8_SNORM:*/ |
case PIPE_FORMAT_B8G8R8X8_UNORM: |
/*case PIPE_FORMAT_B8G8R8X8_SNORM:*/ |
case PIPE_FORMAT_B10G10R10A2_UNORM: |
case PIPE_FORMAT_B10G10R10X2_UNORM: |
return modifier | |
R300_C0_SEL_B | R300_C1_SEL_G | |
R300_C2_SEL_R | R300_C3_SEL_A; |
/* ARGB outputs. */ |
case PIPE_FORMAT_A16_UNORM: |
case PIPE_FORMAT_A16_SNORM: |
case PIPE_FORMAT_A16_FLOAT: |
case PIPE_FORMAT_A32_FLOAT: |
return modifier | |
R300_C0_SEL_A | R300_C1_SEL_R | |
R300_C2_SEL_G | R300_C3_SEL_B; |
/* RGBA outputs. */ |
case PIPE_FORMAT_R8G8B8X8_UNORM: |
case PIPE_FORMAT_R8G8B8X8_SNORM: |
case PIPE_FORMAT_R8G8B8A8_UNORM: |
case PIPE_FORMAT_R8G8B8A8_SNORM: |
case PIPE_FORMAT_R10G10B10A2_UNORM: |
case PIPE_FORMAT_R10G10B10X2_SNORM: |
case PIPE_FORMAT_R16_UNORM: |
case PIPE_FORMAT_R16G16_UNORM: |
case PIPE_FORMAT_R16G16B16A16_UNORM: |
case PIPE_FORMAT_R16_SNORM: |
case PIPE_FORMAT_R16G16_SNORM: |
case PIPE_FORMAT_R16G16B16A16_SNORM: |
case PIPE_FORMAT_R16_FLOAT: |
case PIPE_FORMAT_R16G16_FLOAT: |
case PIPE_FORMAT_R16G16B16A16_FLOAT: |
case PIPE_FORMAT_R32_FLOAT: |
case PIPE_FORMAT_R32G32B32A32_FLOAT: |
case PIPE_FORMAT_R32G32B32X32_FLOAT: |
case PIPE_FORMAT_L16_UNORM: |
case PIPE_FORMAT_L16_SNORM: |
case PIPE_FORMAT_L16_FLOAT: |
case PIPE_FORMAT_L32_FLOAT: |
case PIPE_FORMAT_I16_UNORM: |
case PIPE_FORMAT_I16_SNORM: |
case PIPE_FORMAT_I16_FLOAT: |
case PIPE_FORMAT_I32_FLOAT: |
case PIPE_FORMAT_R16G16B16X16_UNORM: |
case PIPE_FORMAT_R16G16B16X16_SNORM: |
case PIPE_FORMAT_R16G16B16X16_FLOAT: |
return modifier | |
R300_C0_SEL_R | R300_C1_SEL_G | |
R300_C2_SEL_B | R300_C3_SEL_A; |
/* LA outputs. */ |
case PIPE_FORMAT_L16A16_UNORM: |
case PIPE_FORMAT_L16A16_SNORM: |
case PIPE_FORMAT_L16A16_FLOAT: |
case PIPE_FORMAT_R16A16_UNORM: |
case PIPE_FORMAT_R16A16_SNORM: |
case PIPE_FORMAT_R16A16_FLOAT: |
case PIPE_FORMAT_L32A32_FLOAT: |
case PIPE_FORMAT_R32A32_FLOAT: |
return modifier | |
R300_C0_SEL_R | R300_C1_SEL_A; |
default: |
return ~0; /* Unsupported. */ |
} |
} |
static uint32_t r300_translate_colormask_swizzle(enum pipe_format format) |
{ |
switch (format) { |
case PIPE_FORMAT_A8_UNORM: |
case PIPE_FORMAT_A8_SNORM: |
case PIPE_FORMAT_A16_UNORM: |
case PIPE_FORMAT_A16_SNORM: |
case PIPE_FORMAT_A16_FLOAT: |
case PIPE_FORMAT_A32_FLOAT: |
return COLORMASK_AAAA; |
case PIPE_FORMAT_I8_UNORM: |
case PIPE_FORMAT_I8_SNORM: |
case PIPE_FORMAT_L8_UNORM: |
case PIPE_FORMAT_L8_SNORM: |
case PIPE_FORMAT_R8_UNORM: |
case PIPE_FORMAT_R8_SNORM: |
case PIPE_FORMAT_R32_FLOAT: |
case PIPE_FORMAT_L32_FLOAT: |
case PIPE_FORMAT_I32_FLOAT: |
return COLORMASK_RRRR; |
case PIPE_FORMAT_L8A8_SNORM: |
case PIPE_FORMAT_L8A8_UNORM: |
case PIPE_FORMAT_R8A8_UNORM: |
case PIPE_FORMAT_R8A8_SNORM: |
case PIPE_FORMAT_L16A16_UNORM: |
case PIPE_FORMAT_L16A16_SNORM: |
case PIPE_FORMAT_L16A16_FLOAT: |
case PIPE_FORMAT_R16A16_UNORM: |
case PIPE_FORMAT_R16A16_SNORM: |
case PIPE_FORMAT_R16A16_FLOAT: |
case PIPE_FORMAT_L32A32_FLOAT: |
case PIPE_FORMAT_R32A32_FLOAT: |
return COLORMASK_ARRA; |
case PIPE_FORMAT_R8G8_SNORM: |
case PIPE_FORMAT_R8G8_UNORM: |
case PIPE_FORMAT_R16G16_UNORM: |
case PIPE_FORMAT_R16G16_SNORM: |
case PIPE_FORMAT_R16G16_FLOAT: |
case PIPE_FORMAT_R32G32_FLOAT: |
return COLORMASK_GRRG; |
case PIPE_FORMAT_B5G5R5X1_UNORM: |
case PIPE_FORMAT_B4G4R4X4_UNORM: |
case PIPE_FORMAT_B8G8R8X8_UNORM: |
/*case PIPE_FORMAT_B8G8R8X8_SNORM:*/ |
case PIPE_FORMAT_B10G10R10X2_UNORM: |
return COLORMASK_BGRX; |
case PIPE_FORMAT_B5G6R5_UNORM: |
case PIPE_FORMAT_B5G5R5A1_UNORM: |
case PIPE_FORMAT_B4G4R4A4_UNORM: |
case PIPE_FORMAT_B8G8R8A8_UNORM: |
/*case PIPE_FORMAT_B8G8R8A8_SNORM:*/ |
case PIPE_FORMAT_B10G10R10A2_UNORM: |
return COLORMASK_BGRA; |
case PIPE_FORMAT_R8G8B8X8_UNORM: |
/* RGBX_SNORM formats are broken for an unknown reason */ |
/*case PIPE_FORMAT_R8G8B8X8_SNORM:*/ |
/*case PIPE_FORMAT_R10G10B10X2_SNORM:*/ |
case PIPE_FORMAT_R16G16B16X16_UNORM: |
/*case PIPE_FORMAT_R16G16B16X16_SNORM:*/ |
case PIPE_FORMAT_R16G16B16X16_FLOAT: |
case PIPE_FORMAT_R32G32B32X32_FLOAT: |
return COLORMASK_RGBX; |
case PIPE_FORMAT_R8G8B8A8_UNORM: |
case PIPE_FORMAT_R8G8B8A8_SNORM: |
case PIPE_FORMAT_R10G10B10A2_UNORM: |
case PIPE_FORMAT_R16_UNORM: |
case PIPE_FORMAT_R16G16B16A16_UNORM: |
case PIPE_FORMAT_R16_SNORM: |
case PIPE_FORMAT_R16G16B16A16_SNORM: |
case PIPE_FORMAT_R16_FLOAT: |
case PIPE_FORMAT_R16G16B16A16_FLOAT: |
case PIPE_FORMAT_R32G32B32A32_FLOAT: |
case PIPE_FORMAT_L16_UNORM: |
case PIPE_FORMAT_L16_SNORM: |
case PIPE_FORMAT_L16_FLOAT: |
case PIPE_FORMAT_I16_UNORM: |
case PIPE_FORMAT_I16_SNORM: |
case PIPE_FORMAT_I16_FLOAT: |
return COLORMASK_RGBA; |
default: |
return ~0; /* Unsupported. */ |
} |
} |
boolean r300_is_colorbuffer_format_supported(enum pipe_format format) |
{ |
return r300_translate_colorformat(format) != ~0 && |
r300_translate_out_fmt(format) != ~0 && |
r300_translate_colormask_swizzle(format) != ~0; |
} |
boolean r300_is_zs_format_supported(enum pipe_format format) |
{ |
return r300_translate_zsformat(format) != ~0; |
} |
boolean r300_is_sampler_format_supported(enum pipe_format format) |
{ |
return r300_translate_texformat(format, 0, TRUE, FALSE) != ~0; |
} |
void r300_texture_setup_format_state(struct r300_screen *screen, |
struct r300_resource *tex, |
enum pipe_format format, |
unsigned level, |
unsigned width0_override, |
unsigned height0_override, |
struct r300_texture_format_state *out) |
{ |
struct pipe_resource *pt = &tex->b.b; |
struct r300_texture_desc *desc = &tex->tex; |
boolean is_r500 = screen->caps.is_r500; |
unsigned width, height, depth; |
unsigned txwidth, txheight, txdepth; |
width = u_minify(width0_override, level); |
height = u_minify(height0_override, level); |
depth = u_minify(desc->depth0, level); |
txwidth = (width - 1) & 0x7ff; |
txheight = (height - 1) & 0x7ff; |
txdepth = util_logbase2(depth) & 0xf; |
/* Mask out all the fields we change. */ |
out->format0 = 0; |
out->format1 &= ~R300_TX_FORMAT_TEX_COORD_TYPE_MASK; |
out->format2 &= R500_TXFORMAT_MSB; |
out->tile_config = 0; |
/* Set sampler state. */ |
out->format0 = |
R300_TX_WIDTH(txwidth) | |
R300_TX_HEIGHT(txheight) | |
R300_TX_DEPTH(txdepth); |
if (desc->uses_stride_addressing) { |
unsigned stride = |
r300_stride_to_width(format, desc->stride_in_bytes[level]); |
/* rectangles love this */ |
out->format0 |= R300_TX_PITCH_EN; |
out->format2 = (stride - 1) & 0x1fff; |
} |
if (pt->target == PIPE_TEXTURE_CUBE) { |
out->format1 |= R300_TX_FORMAT_CUBIC_MAP; |
} |
if (pt->target == PIPE_TEXTURE_3D) { |
out->format1 |= R300_TX_FORMAT_3D; |
} |
/* large textures on r500 */ |
if (is_r500) |
{ |
unsigned us_width = txwidth; |
unsigned us_height = txheight; |
unsigned us_depth = txdepth; |
if (width > 2048) { |
out->format2 |= R500_TXWIDTH_BIT11; |
} |
if (height > 2048) { |
out->format2 |= R500_TXHEIGHT_BIT11; |
} |
/* The US_FORMAT register fixes an R500 TX addressing bug. |
* Don't ask why it must be set like this. I don't know it either. */ |
if (width > 2048) { |
us_width = (0x000007FF + us_width) >> 1; |
us_depth |= 0x0000000D; |
} |
if (height > 2048) { |
us_height = (0x000007FF + us_height) >> 1; |
us_depth |= 0x0000000E; |
} |
out->us_format0 = |
R300_TX_WIDTH(us_width) | |
R300_TX_HEIGHT(us_height) | |
R300_TX_DEPTH(us_depth); |
} |
out->tile_config = R300_TXO_MACRO_TILE(desc->macrotile[level]) | |
R300_TXO_MICRO_TILE(desc->microtile); |
} |
static void r300_texture_setup_fb_state(struct r300_surface *surf) |
{ |
struct r300_resource *tex = r300_resource(surf->base.texture); |
unsigned level = surf->base.u.tex.level; |
unsigned stride = |
r300_stride_to_width(surf->base.format, tex->tex.stride_in_bytes[level]); |
/* Set framebuffer state. */ |
if (util_format_is_depth_or_stencil(surf->base.format)) { |
surf->pitch = |
stride | |
R300_DEPTHMACROTILE(tex->tex.macrotile[level]) | |
R300_DEPTHMICROTILE(tex->tex.microtile); |
surf->format = r300_translate_zsformat(surf->base.format); |
surf->pitch_zmask = tex->tex.zmask_stride_in_pixels[level]; |
surf->pitch_hiz = tex->tex.hiz_stride_in_pixels[level]; |
} else { |
surf->pitch = |
stride | |
r300_translate_colorformat(surf->base.format) | |
R300_COLOR_TILE(tex->tex.macrotile[level]) | |
R300_COLOR_MICROTILE(tex->tex.microtile); |
surf->format = r300_translate_out_fmt(surf->base.format); |
surf->colormask_swizzle = |
r300_translate_colormask_swizzle(surf->base.format); |
surf->pitch_cmask = tex->tex.cmask_stride_in_pixels; |
} |
} |
static void r300_texture_destroy(struct pipe_screen *screen, |
struct pipe_resource* texture) |
{ |
struct r300_screen *rscreen = r300_screen(screen); |
struct r300_resource* tex = (struct r300_resource*)texture; |
if (tex->tex.cmask_dwords) { |
pipe_mutex_lock(rscreen->cmask_mutex); |
if (texture == rscreen->cmask_resource) { |
rscreen->cmask_resource = NULL; |
} |
pipe_mutex_unlock(rscreen->cmask_mutex); |
} |
pb_reference(&tex->buf, NULL); |
FREE(tex); |
} |
boolean r300_resource_get_handle(struct pipe_screen* screen, |
struct pipe_resource *texture, |
struct winsys_handle *whandle) |
{ |
struct radeon_winsys *rws = r300_screen(screen)->rws; |
struct r300_resource* tex = (struct r300_resource*)texture; |
if (!tex) { |
return FALSE; |
} |
return rws->buffer_get_handle(tex->buf, |
tex->tex.stride_in_bytes[0], whandle); |
} |
static const struct u_resource_vtbl r300_texture_vtbl = |
{ |
NULL, /* get_handle */ |
r300_texture_destroy, /* resource_destroy */ |
r300_texture_transfer_map, /* transfer_map */ |
NULL, /* transfer_flush_region */ |
r300_texture_transfer_unmap, /* transfer_unmap */ |
NULL /* transfer_inline_write */ |
}; |
/* The common texture constructor. */ |
static struct r300_resource* |
r300_texture_create_object(struct r300_screen *rscreen, |
const struct pipe_resource *base, |
enum radeon_bo_layout microtile, |
enum radeon_bo_layout macrotile, |
unsigned stride_in_bytes_override, |
struct pb_buffer *buffer) |
{ |
struct radeon_winsys *rws = rscreen->rws; |
struct r300_resource *tex = NULL; |
tex = CALLOC_STRUCT(r300_resource); |
if (!tex) { |
goto fail; |
} |
pipe_reference_init(&tex->b.b.reference, 1); |
tex->b.b.screen = &rscreen->screen; |
tex->b.b.usage = base->usage; |
tex->b.b.bind = base->bind; |
tex->b.b.flags = base->flags; |
tex->b.vtbl = &r300_texture_vtbl; |
tex->tex.microtile = microtile; |
tex->tex.macrotile[0] = macrotile; |
tex->tex.stride_in_bytes_override = stride_in_bytes_override; |
tex->domain = (base->flags & R300_RESOURCE_FLAG_TRANSFER || |
base->usage == PIPE_USAGE_STAGING) ? RADEON_DOMAIN_GTT : |
base->nr_samples > 1 ? RADEON_DOMAIN_VRAM : |
RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT; |
tex->buf = buffer; |
r300_texture_desc_init(rscreen, tex, base); |
/* Figure out the ideal placement for the texture.. */ |
if (tex->domain & RADEON_DOMAIN_VRAM && |
tex->tex.size_in_bytes >= rscreen->info.vram_size) { |
tex->domain &= ~RADEON_DOMAIN_VRAM; |
tex->domain |= RADEON_DOMAIN_GTT; |
} |
if (tex->domain & RADEON_DOMAIN_GTT && |
tex->tex.size_in_bytes >= rscreen->info.gart_size) { |
tex->domain &= ~RADEON_DOMAIN_GTT; |
} |
/* Just fail if the texture is too large. */ |
if (!tex->domain) { |
goto fail; |
} |
/* Create the backing buffer if needed. */ |
if (!tex->buf) { |
tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048, TRUE, |
tex->domain); |
if (!tex->buf) { |
goto fail; |
} |
} |
if (SCREEN_DBG_ON(rscreen, DBG_MSAA) && base->nr_samples > 1) { |
fprintf(stderr, "r300: %ix MSAA %s buffer created\n", |
base->nr_samples, |
util_format_is_depth_or_stencil(base->format) ? "depth" : "color"); |
} |
tex->cs_buf = rws->buffer_get_cs_handle(tex->buf); |
rws->buffer_set_tiling(tex->buf, NULL, |
tex->tex.microtile, tex->tex.macrotile[0], |
0, 0, 0, 0, 0, |
tex->tex.stride_in_bytes[0]); |
return tex; |
fail: |
FREE(tex); |
if (buffer) |
pb_reference(&buffer, NULL); |
return NULL; |
} |
/* Create a new texture. */ |
struct pipe_resource *r300_texture_create(struct pipe_screen *screen, |
const struct pipe_resource *base) |
{ |
struct r300_screen *rscreen = r300_screen(screen); |
enum radeon_bo_layout microtile, macrotile; |
if ((base->flags & R300_RESOURCE_FLAG_TRANSFER) || |
(base->bind & PIPE_BIND_SCANOUT)) { |
microtile = RADEON_LAYOUT_LINEAR; |
macrotile = RADEON_LAYOUT_LINEAR; |
} else { |
/* This will make the texture_create_function select the layout. */ |
microtile = RADEON_LAYOUT_UNKNOWN; |
macrotile = RADEON_LAYOUT_UNKNOWN; |
} |
return (struct pipe_resource*) |
r300_texture_create_object(rscreen, base, microtile, macrotile, |
0, NULL); |
} |
struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, |
const struct pipe_resource *base, |
struct winsys_handle *whandle) |
{ |
struct r300_screen *rscreen = r300_screen(screen); |
struct radeon_winsys *rws = rscreen->rws; |
struct pb_buffer *buffer; |
enum radeon_bo_layout microtile, macrotile; |
unsigned stride; |
/* Support only 2D textures without mipmaps */ |
if ((base->target != PIPE_TEXTURE_2D && |
base->target != PIPE_TEXTURE_RECT) || |
base->depth0 != 1 || |
base->last_level != 0) { |
return NULL; |
} |
buffer = rws->buffer_from_handle(rws, whandle, &stride); |
if (!buffer) |
return NULL; |
rws->buffer_get_tiling(buffer, µtile, ¯otile, NULL, NULL, NULL, NULL, NULL); |
/* Enforce a microtiled zbuffer. */ |
if (util_format_is_depth_or_stencil(base->format) && |
microtile == RADEON_LAYOUT_LINEAR) { |
switch (util_format_get_blocksize(base->format)) { |
case 4: |
microtile = RADEON_LAYOUT_TILED; |
break; |
case 2: |
microtile = RADEON_LAYOUT_SQUARETILED; |
break; |
} |
} |
return (struct pipe_resource*) |
r300_texture_create_object(rscreen, base, microtile, macrotile, |
stride, buffer); |
} |
/* Not required to implement u_resource_vtbl, consider moving to another file: |
*/ |
struct pipe_surface* r300_create_surface_custom(struct pipe_context * ctx, |
struct pipe_resource* texture, |
const struct pipe_surface *surf_tmpl, |
unsigned width0_override, |
unsigned height0_override) |
{ |
struct r300_resource* tex = r300_resource(texture); |
struct r300_surface* surface = CALLOC_STRUCT(r300_surface); |
unsigned level = surf_tmpl->u.tex.level; |
assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); |
if (surface) { |
uint32_t offset, tile_height; |
pipe_reference_init(&surface->base.reference, 1); |
pipe_resource_reference(&surface->base.texture, texture); |
surface->base.context = ctx; |
surface->base.format = surf_tmpl->format; |
surface->base.width = u_minify(width0_override, level); |
surface->base.height = u_minify(height0_override, level); |
surface->base.u.tex.level = level; |
surface->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer; |
surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer; |
surface->buf = tex->buf; |
surface->cs_buf = tex->cs_buf; |
/* Prefer VRAM if there are multiple domains to choose from. */ |
surface->domain = tex->domain; |
if (surface->domain & RADEON_DOMAIN_VRAM) |
surface->domain &= ~RADEON_DOMAIN_GTT; |
surface->offset = r300_texture_get_offset(tex, level, |
surf_tmpl->u.tex.first_layer); |
r300_texture_setup_fb_state(surface); |
/* Parameters for the CBZB clear. */ |
surface->cbzb_allowed = tex->tex.cbzb_allowed[level]; |
surface->cbzb_width = align(surface->base.width, 64); |
/* Height must be aligned to the size of a tile. */ |
tile_height = r300_get_pixel_alignment(surface->base.format, |
tex->b.b.nr_samples, |
tex->tex.microtile, |
tex->tex.macrotile[level], |
DIM_HEIGHT, 0); |
surface->cbzb_height = align((surface->base.height + 1) / 2, |
tile_height); |
/* Offset must be aligned to 2K and must point at the beginning |
* of a scanline. */ |
offset = surface->offset + |
tex->tex.stride_in_bytes[level] * surface->cbzb_height; |
surface->cbzb_midpoint_offset = offset & ~2047; |
surface->cbzb_pitch = surface->pitch & 0x1ffffc; |
if (util_format_get_blocksizebits(surface->base.format) == 32) |
surface->cbzb_format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; |
else |
surface->cbzb_format = R300_DEPTHFORMAT_16BIT_INT_Z; |
DBG(r300_context(ctx), DBG_CBZB, |
"CBZB Allowed: %s, Dim: %ix%i, Misalignment: %i, Micro: %s, Macro: %s\n", |
surface->cbzb_allowed ? "YES" : " NO", |
surface->cbzb_width, surface->cbzb_height, |
offset & 2047, |
tex->tex.microtile ? "YES" : " NO", |
tex->tex.macrotile[level] ? "YES" : " NO"); |
} |
return &surface->base; |
} |
struct pipe_surface* r300_create_surface(struct pipe_context * ctx, |
struct pipe_resource* texture, |
const struct pipe_surface *surf_tmpl) |
{ |
return r300_create_surface_custom(ctx, texture, surf_tmpl, |
texture->width0, |
texture->height0); |
} |
/* Not required to implement u_resource_vtbl, consider moving to another file: |
*/ |
void r300_surface_destroy(struct pipe_context *ctx, struct pipe_surface* s) |
{ |
pipe_resource_reference(&s->texture, NULL); |
FREE(s); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_texture.h |
---|
0,0 → 1,88 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#ifndef R300_TEXTURE_H |
#define R300_TEXTURE_H |
#include "pipe/p_compiler.h" |
#include "pipe/p_format.h" |
struct pipe_screen; |
struct pipe_context; |
struct pipe_resource; |
struct winsys_handle; |
struct r300_texture_format_state; |
struct r300_texture_desc; |
struct r300_resource; |
struct r300_screen; |
unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, |
const unsigned char *swizzle_view, |
boolean dxtc_swizzle); |
uint32_t r300_translate_texformat(enum pipe_format format, |
const unsigned char *swizzle_view, |
boolean is_r500, |
boolean dxtc_swizzle); |
uint32_t r500_tx_format_msb_bit(enum pipe_format format); |
boolean r300_is_colorbuffer_format_supported(enum pipe_format format); |
boolean r300_is_zs_format_supported(enum pipe_format format); |
boolean r300_is_sampler_format_supported(enum pipe_format format); |
void r300_texture_setup_format_state(struct r300_screen *screen, |
struct r300_resource *tex, |
enum pipe_format format, |
unsigned level, |
unsigned width0_override, |
unsigned height0_override, |
struct r300_texture_format_state *out); |
boolean r300_resource_get_handle(struct pipe_screen* screen, |
struct pipe_resource *texture, |
struct winsys_handle *whandle); |
struct pipe_resource* |
r300_texture_from_handle(struct pipe_screen* screen, |
const struct pipe_resource* base, |
struct winsys_handle *whandle); |
struct pipe_resource* |
r300_texture_create(struct pipe_screen* screen, |
const struct pipe_resource* templ); |
struct pipe_surface* r300_create_surface_custom(struct pipe_context * ctx, |
struct pipe_resource* texture, |
const struct pipe_surface *surf_tmpl, |
unsigned width0_override, |
unsigned height0_override); |
struct pipe_surface* r300_create_surface(struct pipe_context *ctx, |
struct pipe_resource* texture, |
const struct pipe_surface *surf_tmpl); |
void r300_surface_destroy(struct pipe_context *ctx, struct pipe_surface* s); |
#endif /* R300_TEXTURE_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_texture_desc.c |
---|
0,0 → 1,642 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Copyright 2010 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "r300_texture_desc.h" |
#include "r300_context.h" |
#include "util/u_format.h" |
/* Returns the number of pixels that the texture should be aligned to |
* in the given dimension. */ |
unsigned r300_get_pixel_alignment(enum pipe_format format, |
unsigned num_samples, |
enum radeon_bo_layout microtile, |
enum radeon_bo_layout macrotile, |
enum r300_dim dim, boolean is_rs690) |
{ |
static const unsigned table[2][5][3][2] = |
{ |
{ |
/* Macro: linear linear linear |
Micro: linear tiled square-tiled */ |
{{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */ |
{{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */ |
{{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */ |
{{ 4, 1}, { 2, 2}, { 0, 0}}, /* 64 bits per pixel */ |
{{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ |
}, |
{ |
/* Macro: tiled tiled tiled |
Micro: linear tiled square-tiled */ |
{{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */ |
{{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */ |
{{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */ |
{{ 32, 8}, {16, 16}, { 0, 0}}, /* 64 bits per pixel */ |
{{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ |
} |
}; |
unsigned tile = 0; |
unsigned pixsize = util_format_get_blocksize(format); |
assert(macrotile <= RADEON_LAYOUT_TILED); |
assert(microtile <= RADEON_LAYOUT_SQUARETILED); |
assert(pixsize <= 16); |
assert(dim <= DIM_HEIGHT); |
tile = table[macrotile][util_logbase2(pixsize)][microtile][dim]; |
if (macrotile == 0 && is_rs690 && dim == DIM_WIDTH) { |
int align; |
int h_tile; |
h_tile = table[macrotile][util_logbase2(pixsize)][microtile][DIM_HEIGHT]; |
align = 64 / (pixsize * h_tile); |
if (tile < align) |
tile = align; |
} |
assert(tile); |
return tile; |
} |
/* Return true if macrotiling should be enabled on the miplevel. */ |
static boolean r300_texture_macro_switch(struct r300_resource *tex, |
unsigned level, |
boolean rv350_mode, |
enum r300_dim dim) |
{ |
unsigned tile, texdim; |
if (tex->b.b.nr_samples > 1) { |
return TRUE; |
} |
tile = r300_get_pixel_alignment(tex->b.b.format, tex->b.b.nr_samples, |
tex->tex.microtile, RADEON_LAYOUT_TILED, dim, 0); |
if (dim == DIM_WIDTH) { |
texdim = u_minify(tex->tex.width0, level); |
} else { |
texdim = u_minify(tex->tex.height0, level); |
} |
/* See TX_FILTER1_n.MACRO_SWITCH. */ |
if (rv350_mode) { |
return texdim >= tile; |
} else { |
return texdim > tile; |
} |
} |
/** |
* Return the stride, in bytes, of the texture image of the given texture |
* at the given level. |
*/ |
static unsigned r300_texture_get_stride(struct r300_screen *screen, |
struct r300_resource *tex, |
unsigned level) |
{ |
unsigned tile_width, width, stride; |
boolean is_rs690 = (screen->caps.family == CHIP_RS600 || |
screen->caps.family == CHIP_RS690 || |
screen->caps.family == CHIP_RS740); |
if (tex->tex.stride_in_bytes_override) |
return tex->tex.stride_in_bytes_override; |
/* Check the level. */ |
if (level > tex->b.b.last_level) { |
SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", |
__FUNCTION__, level, tex->b.b.last_level); |
return 0; |
} |
width = u_minify(tex->tex.width0, level); |
if (util_format_is_plain(tex->b.b.format)) { |
tile_width = r300_get_pixel_alignment(tex->b.b.format, |
tex->b.b.nr_samples, |
tex->tex.microtile, |
tex->tex.macrotile[level], |
DIM_WIDTH, is_rs690); |
width = align(width, tile_width); |
stride = util_format_get_stride(tex->b.b.format, width); |
/* The alignment to 32 bytes is sort of implied by the layout... */ |
return stride; |
} else { |
return align(util_format_get_stride(tex->b.b.format, width), is_rs690 ? 64 : 32); |
} |
} |
static unsigned r300_texture_get_nblocksy(struct r300_resource *tex, |
unsigned level, |
boolean *out_aligned_for_cbzb) |
{ |
unsigned height, tile_height; |
height = u_minify(tex->tex.height0, level); |
/* Mipmapped and 3D textures must have their height aligned to POT. */ |
if ((tex->b.b.target != PIPE_TEXTURE_1D && |
tex->b.b.target != PIPE_TEXTURE_2D && |
tex->b.b.target != PIPE_TEXTURE_RECT) || |
tex->b.b.last_level != 0) { |
height = util_next_power_of_two(height); |
} |
if (util_format_is_plain(tex->b.b.format)) { |
tile_height = r300_get_pixel_alignment(tex->b.b.format, |
tex->b.b.nr_samples, |
tex->tex.microtile, |
tex->tex.macrotile[level], |
DIM_HEIGHT, 0); |
height = align(height, tile_height); |
/* See if the CBZB clear can be used on the buffer, |
* taking the texture size into account. */ |
if (out_aligned_for_cbzb) { |
if (tex->tex.macrotile[level]) { |
/* When clearing, the layer (width*height) is horizontally split |
* into two, and the upper and lower halves are cleared by the CB |
* and ZB units, respectively. Therefore, the number of macrotiles |
* in the Y direction must be even. */ |
/* Align the height so that there is an even number of macrotiles. |
* Do so for 3 or more macrotiles in the Y direction. */ |
if (level == 0 && tex->b.b.last_level == 0 && |
(tex->b.b.target == PIPE_TEXTURE_1D || |
tex->b.b.target == PIPE_TEXTURE_2D || |
tex->b.b.target == PIPE_TEXTURE_RECT) && |
height >= tile_height * 3) { |
height = align(height, tile_height * 2); |
} |
*out_aligned_for_cbzb = height % (tile_height * 2) == 0; |
} else { |
*out_aligned_for_cbzb = FALSE; |
} |
} |
} |
return util_format_get_nblocksy(tex->b.b.format, height); |
} |
/* Get a width in pixels from a stride in bytes. */ |
unsigned r300_stride_to_width(enum pipe_format format, |
unsigned stride_in_bytes) |
{ |
return (stride_in_bytes / util_format_get_blocksize(format)) * |
util_format_get_blockwidth(format); |
} |
static void r300_setup_miptree(struct r300_screen *screen, |
struct r300_resource *tex, |
boolean align_for_cbzb) |
{ |
struct pipe_resource *base = &tex->b.b; |
unsigned stride, size, layer_size, nblocksy, i; |
boolean rv350_mode = screen->caps.family >= CHIP_R350; |
boolean aligned_for_cbzb; |
tex->tex.size_in_bytes = 0; |
SCREEN_DBG(screen, DBG_TEXALLOC, |
"r300: Making miptree for texture, format %s\n", |
util_format_short_name(base->format)); |
for (i = 0; i <= base->last_level; i++) { |
/* Let's see if this miplevel can be macrotiled. */ |
tex->tex.macrotile[i] = |
(tex->tex.macrotile[0] == RADEON_LAYOUT_TILED && |
r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) && |
r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ? |
RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; |
stride = r300_texture_get_stride(screen, tex, i); |
/* Compute the number of blocks in Y, see if the CBZB clear can be |
* used on the texture. */ |
aligned_for_cbzb = FALSE; |
if (align_for_cbzb && tex->tex.cbzb_allowed[i]) |
nblocksy = r300_texture_get_nblocksy(tex, i, &aligned_for_cbzb); |
else |
nblocksy = r300_texture_get_nblocksy(tex, i, NULL); |
layer_size = stride * nblocksy; |
if (base->nr_samples > 1) { |
layer_size *= base->nr_samples; |
} |
if (base->target == PIPE_TEXTURE_CUBE) |
size = layer_size * 6; |
else |
size = layer_size * u_minify(tex->tex.depth0, i); |
tex->tex.offset_in_bytes[i] = tex->tex.size_in_bytes; |
tex->tex.size_in_bytes = tex->tex.offset_in_bytes[i] + size; |
tex->tex.layer_size_in_bytes[i] = layer_size; |
tex->tex.stride_in_bytes[i] = stride; |
tex->tex.cbzb_allowed[i] = tex->tex.cbzb_allowed[i] && aligned_for_cbzb; |
SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " |
"(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", |
i, u_minify(tex->tex.width0, i), u_minify(tex->tex.height0, i), |
u_minify(tex->tex.depth0, i), stride, tex->tex.size_in_bytes, |
tex->tex.macrotile[i] ? "TRUE" : "FALSE"); |
} |
} |
static void r300_setup_flags(struct r300_resource *tex) |
{ |
tex->tex.uses_stride_addressing = |
!util_is_power_of_two(tex->b.b.width0) || |
(tex->tex.stride_in_bytes_override && |
r300_stride_to_width(tex->b.b.format, |
tex->tex.stride_in_bytes_override) != tex->b.b.width0); |
tex->tex.is_npot = |
tex->tex.uses_stride_addressing || |
!util_is_power_of_two(tex->b.b.height0) || |
!util_is_power_of_two(tex->b.b.depth0); |
} |
static void r300_setup_cbzb_flags(struct r300_screen *rscreen, |
struct r300_resource *tex) |
{ |
unsigned i, bpp; |
boolean first_level_valid; |
bpp = util_format_get_blocksizebits(tex->b.b.format); |
/* 1) The texture must be point-sampled, |
* 2) The depth must be 16 or 32 bits. |
* 3) If the midpoint ZB offset is not aligned to 2048, it returns garbage |
* with certain texture sizes. Macrotiling ensures the alignment. */ |
first_level_valid = tex->b.b.nr_samples <= 1 && |
(bpp == 16 || bpp == 32) && |
tex->tex.macrotile[0]; |
if (SCREEN_DBG_ON(rscreen, DBG_NO_CBZB)) |
first_level_valid = FALSE; |
for (i = 0; i <= tex->b.b.last_level; i++) |
tex->tex.cbzb_allowed[i] = first_level_valid && tex->tex.macrotile[i]; |
} |
static unsigned r300_pixels_to_dwords(unsigned stride, |
unsigned height, |
unsigned xblock, unsigned yblock) |
{ |
return (util_align_npot(stride, xblock) * align(height, yblock)) / (xblock * yblock); |
} |
static void r300_setup_hyperz_properties(struct r300_screen *screen, |
struct r300_resource *tex) |
{ |
/* The tile size of 1 DWORD in ZMASK RAM is: |
* |
* GPU Pipes 4x4 mode 8x8 mode |
* ------------------------------------------ |
* R580 4P/1Z 32x32 64x64 |
* RV570 3P/1Z 48x16 96x32 |
* RV530 1P/2Z 32x16 64x32 |
* 1P/1Z 16x16 32x32 |
*/ |
static unsigned zmask_blocks_x_per_dw[4] = {4, 8, 12, 8}; |
static unsigned zmask_blocks_y_per_dw[4] = {4, 4, 4, 8}; |
/* In HIZ RAM, one dword is always 8x8 pixels (each byte is 4x4 pixels), |
* but the blocks have very weird ordering. |
* |
* With 2 pipes and an image of size 8xY, where Y >= 1, |
* clearing 4 dwords clears blocks like this: |
* |
* 01012323 |
* |
* where numbers correspond to dword indices. The blocks are interleaved |
* in the X direction, so the alignment must be 4x1 blocks (32x8 pixels). |
* |
* With 4 pipes and an image of size 8xY, where Y >= 4, |
* clearing 8 dwords clears blocks like this: |
* 01012323 |
* 45456767 |
* 01012323 |
* 45456767 |
* where numbers correspond to dword indices. The blocks are interleaved |
* in both directions, so the alignment must be 4x4 blocks (32x32 pixels) |
*/ |
static unsigned hiz_align_x[4] = {8, 32, 48, 32}; |
static unsigned hiz_align_y[4] = {8, 8, 8, 32}; |
if (util_format_is_depth_or_stencil(tex->b.b.format) && |
util_format_get_blocksizebits(tex->b.b.format) == 32 && |
tex->tex.microtile) { |
unsigned i, pipes; |
if (screen->caps.family == CHIP_RV530) { |
pipes = screen->info.r300_num_z_pipes; |
} else { |
pipes = screen->info.r300_num_gb_pipes; |
} |
for (i = 0; i <= tex->b.b.last_level; i++) { |
unsigned zcomp_numdw, zcompsize, hiz_numdw, stride, height; |
stride = r300_stride_to_width(tex->b.b.format, |
tex->tex.stride_in_bytes[i]); |
stride = align(stride, 16); |
height = u_minify(tex->b.b.height0, i); |
/* The 8x8 compression mode needs macrotiling. */ |
zcompsize = screen->caps.z_compress == R300_ZCOMP_8X8 && |
tex->tex.macrotile[i] && |
tex->b.b.nr_samples <= 1 ? 8 : 4; |
/* Get the ZMASK buffer size in dwords. */ |
zcomp_numdw = r300_pixels_to_dwords(stride, height, |
zmask_blocks_x_per_dw[pipes-1] * zcompsize, |
zmask_blocks_y_per_dw[pipes-1] * zcompsize); |
/* Check whether we have enough ZMASK memory. */ |
if (util_format_get_blocksizebits(tex->b.b.format) == 32 && |
zcomp_numdw <= screen->caps.zmask_ram * pipes) { |
tex->tex.zmask_dwords[i] = zcomp_numdw; |
tex->tex.zcomp8x8[i] = zcompsize == 8; |
tex->tex.zmask_stride_in_pixels[i] = |
util_align_npot(stride, zmask_blocks_x_per_dw[pipes-1] * zcompsize); |
} else { |
tex->tex.zmask_dwords[i] = 0; |
tex->tex.zcomp8x8[i] = FALSE; |
tex->tex.zmask_stride_in_pixels[i] = 0; |
} |
/* Now setup HIZ. */ |
stride = util_align_npot(stride, hiz_align_x[pipes-1]); |
height = align(height, hiz_align_y[pipes-1]); |
/* Get the HIZ buffer size in dwords. */ |
hiz_numdw = (stride * height) / (8*8 * pipes); |
/* Check whether we have enough HIZ memory. */ |
if (hiz_numdw <= screen->caps.hiz_ram * pipes) { |
tex->tex.hiz_dwords[i] = hiz_numdw; |
tex->tex.hiz_stride_in_pixels[i] = stride; |
} else { |
tex->tex.hiz_dwords[i] = 0; |
tex->tex.hiz_stride_in_pixels[i] = 0; |
} |
} |
} |
} |
static void r300_setup_cmask_properties(struct r300_screen *screen, |
struct r300_resource *tex) |
{ |
static unsigned cmask_align_x[4] = {16, 32, 48, 32}; |
static unsigned cmask_align_y[4] = {16, 16, 16, 32}; |
unsigned pipes, stride, cmask_num_dw, cmask_max_size; |
/* We need an AA colorbuffer, no mipmaps. */ |
if (tex->b.b.nr_samples <= 1 || |
tex->b.b.last_level > 0 || |
util_format_is_depth_or_stencil(tex->b.b.format)) { |
return; |
} |
/* FP16 AA needs R500 and a fairly new DRM. */ |
if ((tex->b.b.format == PIPE_FORMAT_R16G16B16A16_FLOAT || |
tex->b.b.format == PIPE_FORMAT_R16G16B16X16_FLOAT) && |
(!screen->caps.is_r500 || screen->info.drm_minor < 29)) { |
return; |
} |
if (SCREEN_DBG_ON(screen, DBG_NO_CMASK)) { |
return; |
} |
/* CMASK is part of raster pipes. The number of Z pipes doesn't matter. */ |
pipes = screen->info.r300_num_gb_pipes; |
/* The single-pipe cards have 5120 dwords of CMASK RAM, |
* the other cards have 4096 dwords of CMASK RAM per pipe. */ |
cmask_max_size = pipes == 1 ? 5120 : pipes * 4096; |
stride = r300_stride_to_width(tex->b.b.format, |
tex->tex.stride_in_bytes[0]); |
stride = align(stride, 16); |
/* Get the CMASK size in dwords. */ |
cmask_num_dw = r300_pixels_to_dwords(stride, tex->b.b.height0, |
cmask_align_x[pipes-1], |
cmask_align_y[pipes-1]); |
/* Check the CMASK size against the CMASK memory limit. */ |
if (cmask_num_dw <= cmask_max_size) { |
tex->tex.cmask_dwords = cmask_num_dw; |
tex->tex.cmask_stride_in_pixels = |
util_align_npot(stride, cmask_align_x[pipes-1]); |
} |
} |
static void r300_setup_tiling(struct r300_screen *screen, |
struct r300_resource *tex) |
{ |
enum pipe_format format = tex->b.b.format; |
boolean rv350_mode = screen->caps.family >= CHIP_R350; |
boolean is_zb = util_format_is_depth_or_stencil(format); |
boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING); |
boolean force_microtiling = |
(tex->b.b.flags & R300_RESOURCE_FORCE_MICROTILING) != 0; |
if (tex->b.b.nr_samples > 1) { |
tex->tex.microtile = RADEON_LAYOUT_TILED; |
tex->tex.macrotile[0] = RADEON_LAYOUT_TILED; |
return; |
} |
tex->tex.microtile = RADEON_LAYOUT_LINEAR; |
tex->tex.macrotile[0] = RADEON_LAYOUT_LINEAR; |
if (tex->b.b.usage == PIPE_USAGE_STAGING) { |
return; |
} |
if (!util_format_is_plain(format)) { |
return; |
} |
/* If height == 1, disable microtiling except for zbuffer. */ |
if (!force_microtiling && !is_zb && |
(tex->b.b.height0 == 1 || dbg_no_tiling)) { |
return; |
} |
/* Set microtiling. */ |
switch (util_format_get_blocksize(format)) { |
case 1: |
case 4: |
case 8: |
tex->tex.microtile = RADEON_LAYOUT_TILED; |
break; |
case 2: |
tex->tex.microtile = RADEON_LAYOUT_SQUARETILED; |
break; |
} |
if (dbg_no_tiling) { |
return; |
} |
/* Set macrotiling. */ |
if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) && |
r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) { |
tex->tex.macrotile[0] = RADEON_LAYOUT_TILED; |
} |
} |
static void r300_tex_print_info(struct r300_resource *tex, |
const char *func) |
{ |
fprintf(stderr, |
"r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, " |
"LastLevel: %i, Size: %i, Format: %s, Samples: %i\n", |
func, |
tex->tex.macrotile[0] ? "YES" : " NO", |
tex->tex.microtile ? "YES" : " NO", |
r300_stride_to_width(tex->b.b.format, tex->tex.stride_in_bytes[0]), |
tex->b.b.width0, tex->b.b.height0, tex->b.b.depth0, |
tex->b.b.last_level, tex->tex.size_in_bytes, |
util_format_short_name(tex->b.b.format), |
tex->b.b.nr_samples); |
} |
void r300_texture_desc_init(struct r300_screen *rscreen, |
struct r300_resource *tex, |
const struct pipe_resource *base) |
{ |
tex->b.b.target = base->target; |
tex->b.b.format = base->format; |
tex->b.b.width0 = base->width0; |
tex->b.b.height0 = base->height0; |
tex->b.b.depth0 = base->depth0; |
tex->b.b.array_size = base->array_size; |
tex->b.b.last_level = base->last_level; |
tex->b.b.nr_samples = base->nr_samples; |
tex->tex.width0 = base->width0; |
tex->tex.height0 = base->height0; |
tex->tex.depth0 = base->depth0; |
/* There is a CB memory addressing hardware bug that limits the width |
* of the MSAA buffer in some cases in R520. In order to get around it, |
* the following code lowers the sample count depending on the format and |
* the width. |
* |
* The only catch is that all MSAA colorbuffers and a zbuffer which are |
* supposed to be used together should always be bound together. Only |
* then the correct minimum sample count of all bound buffers is used |
* for rendering. */ |
if (rscreen->caps.is_r500) { |
/* FP16 6x MSAA buffers are limited to a width of 1360 pixels. */ |
if ((tex->b.b.format == PIPE_FORMAT_R16G16B16A16_FLOAT || |
tex->b.b.format == PIPE_FORMAT_R16G16B16X16_FLOAT) && |
tex->b.b.nr_samples == 6 && tex->b.b.width0 > 1360) { |
tex->b.b.nr_samples = 4; |
} |
/* FP16 4x MSAA buffers are limited to a width of 2048 pixels. */ |
if ((tex->b.b.format == PIPE_FORMAT_R16G16B16A16_FLOAT || |
tex->b.b.format == PIPE_FORMAT_R16G16B16X16_FLOAT) && |
tex->b.b.nr_samples == 4 && tex->b.b.width0 > 2048) { |
tex->b.b.nr_samples = 2; |
} |
} |
/* 32-bit 6x MSAA buffers are limited to a width of 2720 pixels. |
* This applies to all R300-R500 cards. */ |
if (util_format_get_blocksizebits(tex->b.b.format) == 32 && |
!util_format_is_depth_or_stencil(tex->b.b.format) && |
tex->b.b.nr_samples == 6 && tex->b.b.width0 > 2720) { |
tex->b.b.nr_samples = 4; |
} |
r300_setup_flags(tex); |
/* Align a 3D NPOT texture to POT. */ |
if (base->target == PIPE_TEXTURE_3D && tex->tex.is_npot) { |
tex->tex.width0 = util_next_power_of_two(tex->tex.width0); |
tex->tex.height0 = util_next_power_of_two(tex->tex.height0); |
tex->tex.depth0 = util_next_power_of_two(tex->tex.depth0); |
} |
/* Setup tiling. */ |
if (tex->tex.microtile == RADEON_LAYOUT_UNKNOWN) { |
r300_setup_tiling(rscreen, tex); |
} |
r300_setup_cbzb_flags(rscreen, tex); |
/* Setup the miptree description. */ |
r300_setup_miptree(rscreen, tex, TRUE); |
/* If the required buffer size is larger than the given max size, |
* try again without the alignment for the CBZB clear. */ |
if (tex->buf && tex->tex.size_in_bytes > tex->buf->size) { |
r300_setup_miptree(rscreen, tex, FALSE); |
/* Make sure the buffer we got is large enough. */ |
if (tex->tex.size_in_bytes > tex->buf->size) { |
fprintf(stderr, |
"r300: I got a pre-allocated buffer to use it as a texture " |
"storage, but the buffer is too small. I'll use the buffer " |
"anyway, because I can't crash here, but it's dangerous. " |
"This can be a DDX bug. Got: %iB, Need: %iB, Info:\n", |
tex->buf->size, tex->tex.size_in_bytes); |
r300_tex_print_info(tex, "texture_desc_init"); |
/* Ooops, what now. Apps will break if we fail this, |
* so just pretend everything's okay. */ |
} |
} |
r300_setup_hyperz_properties(rscreen, tex); |
r300_setup_cmask_properties(rscreen, tex); |
if (SCREEN_DBG_ON(rscreen, DBG_TEX)) |
r300_tex_print_info(tex, "texture_desc_init"); |
} |
unsigned r300_texture_get_offset(struct r300_resource *tex, |
unsigned level, unsigned layer) |
{ |
unsigned offset = tex->tex.offset_in_bytes[level]; |
switch (tex->b.b.target) { |
case PIPE_TEXTURE_3D: |
case PIPE_TEXTURE_CUBE: |
return offset + layer * tex->tex.layer_size_in_bytes[level]; |
default: |
assert(layer == 0); |
return offset; |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_texture_desc.h |
---|
0,0 → 1,56 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Copyright 2010 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#ifndef R300_TEXTURE_DESC_H |
#define R300_TEXTURE_DESC_H |
#include "pipe/p_format.h" |
#include "r300_context.h" |
struct pipe_resource; |
struct r300_screen; |
struct r300_texture_desc; |
struct r300_resource; |
enum r300_dim { |
DIM_WIDTH = 0, |
DIM_HEIGHT = 1 |
}; |
unsigned r300_get_pixel_alignment(enum pipe_format format, |
unsigned num_samples, |
enum radeon_bo_layout microtile, |
enum radeon_bo_layout macrotile, |
enum r300_dim dim, boolean is_rs690); |
void r300_texture_desc_init(struct r300_screen *rscreen, |
struct r300_resource *tex, |
const struct pipe_resource *base); |
unsigned r300_texture_get_offset(struct r300_resource *tex, |
unsigned level, unsigned layer); |
unsigned r300_stride_to_width(enum pipe_format format, |
unsigned stride_in_bytes); |
#endif |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_tgsi_to_rc.c |
---|
0,0 → 1,385 |
/* |
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE COPYRIGHT HOLDER(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "r300_tgsi_to_rc.h" |
#include "compiler/radeon_compiler.h" |
#include "tgsi/tgsi_info.h" |
#include "tgsi/tgsi_parse.h" |
#include "tgsi/tgsi_scan.h" |
#include "tgsi/tgsi_util.h" |
static unsigned translate_opcode(unsigned opcode) |
{ |
switch(opcode) { |
case TGSI_OPCODE_ARL: return RC_OPCODE_ARL; |
case TGSI_OPCODE_MOV: return RC_OPCODE_MOV; |
case TGSI_OPCODE_LIT: return RC_OPCODE_LIT; |
case TGSI_OPCODE_RCP: return RC_OPCODE_RCP; |
case TGSI_OPCODE_RSQ: return RC_OPCODE_RSQ; |
case TGSI_OPCODE_EXP: return RC_OPCODE_EXP; |
case TGSI_OPCODE_LOG: return RC_OPCODE_LOG; |
case TGSI_OPCODE_MUL: return RC_OPCODE_MUL; |
case TGSI_OPCODE_ADD: return RC_OPCODE_ADD; |
case TGSI_OPCODE_DP3: return RC_OPCODE_DP3; |
case TGSI_OPCODE_DP4: return RC_OPCODE_DP4; |
case TGSI_OPCODE_DST: return RC_OPCODE_DST; |
case TGSI_OPCODE_MIN: return RC_OPCODE_MIN; |
case TGSI_OPCODE_MAX: return RC_OPCODE_MAX; |
case TGSI_OPCODE_SLT: return RC_OPCODE_SLT; |
case TGSI_OPCODE_SGE: return RC_OPCODE_SGE; |
case TGSI_OPCODE_MAD: return RC_OPCODE_MAD; |
case TGSI_OPCODE_SUB: return RC_OPCODE_SUB; |
case TGSI_OPCODE_LRP: return RC_OPCODE_LRP; |
case TGSI_OPCODE_CND: return RC_OPCODE_CND; |
/* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */ |
/* gap */ |
case TGSI_OPCODE_FRC: return RC_OPCODE_FRC; |
case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP; |
case TGSI_OPCODE_FLR: return RC_OPCODE_FLR; |
case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; |
case TGSI_OPCODE_EX2: return RC_OPCODE_EX2; |
case TGSI_OPCODE_LG2: return RC_OPCODE_LG2; |
case TGSI_OPCODE_POW: return RC_OPCODE_POW; |
case TGSI_OPCODE_XPD: return RC_OPCODE_XPD; |
/* gap */ |
case TGSI_OPCODE_ABS: return RC_OPCODE_ABS; |
/* case TGSI_OPCODE_RCC: return RC_OPCODE_RCC; */ |
case TGSI_OPCODE_DPH: return RC_OPCODE_DPH; |
case TGSI_OPCODE_COS: return RC_OPCODE_COS; |
case TGSI_OPCODE_DDX: return RC_OPCODE_DDX; |
case TGSI_OPCODE_DDY: return RC_OPCODE_DDY; |
case TGSI_OPCODE_KILL: return RC_OPCODE_KILP; |
/* case TGSI_OPCODE_PK2H: return RC_OPCODE_PK2H; */ |
/* case TGSI_OPCODE_PK2US: return RC_OPCODE_PK2US; */ |
/* case TGSI_OPCODE_PK4B: return RC_OPCODE_PK4B; */ |
/* case TGSI_OPCODE_PK4UB: return RC_OPCODE_PK4UB; */ |
/* case TGSI_OPCODE_RFL: return RC_OPCODE_RFL; */ |
case TGSI_OPCODE_SEQ: return RC_OPCODE_SEQ; |
case TGSI_OPCODE_SFL: return RC_OPCODE_SFL; |
case TGSI_OPCODE_SGT: return RC_OPCODE_SGT; |
case TGSI_OPCODE_SIN: return RC_OPCODE_SIN; |
case TGSI_OPCODE_SLE: return RC_OPCODE_SLE; |
case TGSI_OPCODE_SNE: return RC_OPCODE_SNE; |
/* case TGSI_OPCODE_STR: return RC_OPCODE_STR; */ |
case TGSI_OPCODE_TEX: return RC_OPCODE_TEX; |
case TGSI_OPCODE_TXD: return RC_OPCODE_TXD; |
case TGSI_OPCODE_TXP: return RC_OPCODE_TXP; |
/* case TGSI_OPCODE_UP2H: return RC_OPCODE_UP2H; */ |
/* case TGSI_OPCODE_UP2US: return RC_OPCODE_UP2US; */ |
/* case TGSI_OPCODE_UP4B: return RC_OPCODE_UP4B; */ |
/* case TGSI_OPCODE_UP4UB: return RC_OPCODE_UP4UB; */ |
/* case TGSI_OPCODE_X2D: return RC_OPCODE_X2D; */ |
/* case TGSI_OPCODE_ARA: return RC_OPCODE_ARA; */ |
/* case TGSI_OPCODE_ARR: return RC_OPCODE_ARR; */ |
/* case TGSI_OPCODE_BRA: return RC_OPCODE_BRA; */ |
/* case TGSI_OPCODE_CAL: return RC_OPCODE_CAL; */ |
/* case TGSI_OPCODE_RET: return RC_OPCODE_RET; */ |
case TGSI_OPCODE_SSG: return RC_OPCODE_SSG; |
case TGSI_OPCODE_CMP: return RC_OPCODE_CMP; |
case TGSI_OPCODE_SCS: return RC_OPCODE_SCS; |
case TGSI_OPCODE_TXB: return RC_OPCODE_TXB; |
/* case TGSI_OPCODE_NRM: return RC_OPCODE_NRM; */ |
/* case TGSI_OPCODE_DIV: return RC_OPCODE_DIV; */ |
case TGSI_OPCODE_DP2: return RC_OPCODE_DP2; |
case TGSI_OPCODE_TXL: return RC_OPCODE_TXL; |
case TGSI_OPCODE_BRK: return RC_OPCODE_BRK; |
case TGSI_OPCODE_IF: return RC_OPCODE_IF; |
case TGSI_OPCODE_BGNLOOP: return RC_OPCODE_BGNLOOP; |
case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE; |
case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF; |
case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP; |
/* case TGSI_OPCODE_PUSHA: return RC_OPCODE_PUSHA; */ |
/* case TGSI_OPCODE_POPA: return RC_OPCODE_POPA; */ |
case TGSI_OPCODE_CEIL: return RC_OPCODE_CEIL; |
/* case TGSI_OPCODE_I2F: return RC_OPCODE_I2F; */ |
/* case TGSI_OPCODE_NOT: return RC_OPCODE_NOT; */ |
case TGSI_OPCODE_TRUNC: return RC_OPCODE_TRUNC; |
/* case TGSI_OPCODE_SHL: return RC_OPCODE_SHL; */ |
/* case TGSI_OPCODE_ISHR: return RC_OPCODE_SHR; */ |
/* case TGSI_OPCODE_AND: return RC_OPCODE_AND; */ |
/* case TGSI_OPCODE_OR: return RC_OPCODE_OR; */ |
/* case TGSI_OPCODE_MOD: return RC_OPCODE_MOD; */ |
/* case TGSI_OPCODE_XOR: return RC_OPCODE_XOR; */ |
/* case TGSI_OPCODE_SAD: return RC_OPCODE_SAD; */ |
/* case TGSI_OPCODE_TXF: return RC_OPCODE_TXF; */ |
/* case TGSI_OPCODE_TXQ: return RC_OPCODE_TXQ; */ |
case TGSI_OPCODE_CONT: return RC_OPCODE_CONT; |
/* case TGSI_OPCODE_EMIT: return RC_OPCODE_EMIT; */ |
/* case TGSI_OPCODE_ENDPRIM: return RC_OPCODE_ENDPRIM; */ |
/* case TGSI_OPCODE_BGNLOOP2: return RC_OPCODE_BGNLOOP2; */ |
/* case TGSI_OPCODE_BGNSUB: return RC_OPCODE_BGNSUB; */ |
/* case TGSI_OPCODE_ENDLOOP2: return RC_OPCODE_ENDLOOP2; */ |
/* case TGSI_OPCODE_ENDSUB: return RC_OPCODE_ENDSUB; */ |
case TGSI_OPCODE_NOP: return RC_OPCODE_NOP; |
/* gap */ |
/* case TGSI_OPCODE_NRM4: return RC_OPCODE_NRM4; */ |
/* case TGSI_OPCODE_CALLNZ: return RC_OPCODE_CALLNZ; */ |
/* case TGSI_OPCODE_BREAKC: return RC_OPCODE_BREAKC; */ |
case TGSI_OPCODE_KILL_IF: return RC_OPCODE_KIL; |
} |
fprintf(stderr, "r300: Unknown TGSI/RC opcode: %s\n", tgsi_get_opcode_name(opcode)); |
return RC_OPCODE_ILLEGAL_OPCODE; |
} |
static unsigned translate_saturate(unsigned saturate) |
{ |
switch(saturate) { |
default: |
fprintf(stderr, "Unknown saturate mode: %i\n", saturate); |
/* fall-through */ |
case TGSI_SAT_NONE: return RC_SATURATE_NONE; |
case TGSI_SAT_ZERO_ONE: return RC_SATURATE_ZERO_ONE; |
} |
} |
static unsigned translate_register_file(unsigned file) |
{ |
switch(file) { |
case TGSI_FILE_CONSTANT: return RC_FILE_CONSTANT; |
case TGSI_FILE_IMMEDIATE: return RC_FILE_CONSTANT; |
case TGSI_FILE_INPUT: return RC_FILE_INPUT; |
case TGSI_FILE_OUTPUT: return RC_FILE_OUTPUT; |
default: |
fprintf(stderr, "Unhandled register file: %i\n", file); |
/* fall-through */ |
case TGSI_FILE_TEMPORARY: return RC_FILE_TEMPORARY; |
case TGSI_FILE_ADDRESS: return RC_FILE_ADDRESS; |
} |
} |
static int translate_register_index( |
struct tgsi_to_rc * ttr, |
unsigned file, |
int index) |
{ |
if (file == TGSI_FILE_IMMEDIATE) |
return ttr->immediate_offset + index; |
return index; |
} |
static void transform_dstreg( |
struct tgsi_to_rc * ttr, |
struct rc_dst_register * dst, |
struct tgsi_full_dst_register * src) |
{ |
dst->File = translate_register_file(src->Register.File); |
dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index); |
dst->WriteMask = src->Register.WriteMask; |
if (src->Register.Indirect) { |
ttr->error = TRUE; |
fprintf(stderr, "r300: Relative addressing of destination operands " |
"is unsupported.\n"); |
} |
} |
static void transform_srcreg( |
struct tgsi_to_rc * ttr, |
struct rc_src_register * dst, |
struct tgsi_full_src_register * src) |
{ |
unsigned i, j; |
dst->File = translate_register_file(src->Register.File); |
dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index); |
dst->RelAddr = src->Register.Indirect; |
dst->Swizzle = tgsi_util_get_full_src_register_swizzle(src, 0); |
dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 1) << 3; |
dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 2) << 6; |
dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9; |
dst->Abs = src->Register.Absolute; |
dst->Negate = src->Register.Negate ? RC_MASK_XYZW : 0; |
if (src->Register.File == TGSI_FILE_IMMEDIATE) { |
for (i = 0; i < ttr->imms_to_swizzle_count; i++) { |
if (ttr->imms_to_swizzle[i].index == src->Register.Index) { |
dst->File = RC_FILE_TEMPORARY; |
dst->Index = 0; |
dst->Swizzle = 0; |
for (j = 0; j < 4; j++) { |
dst->Swizzle |= GET_SWZ(ttr->imms_to_swizzle[i].swizzle, |
tgsi_util_get_full_src_register_swizzle(src, j)) << (j * 3); |
} |
break; |
} |
} |
} |
} |
static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_texture src, |
uint32_t *shadowSamplers) |
{ |
switch(src.Texture) { |
case TGSI_TEXTURE_1D: |
dst->U.I.TexSrcTarget = RC_TEXTURE_1D; |
break; |
case TGSI_TEXTURE_2D: |
dst->U.I.TexSrcTarget = RC_TEXTURE_2D; |
break; |
case TGSI_TEXTURE_3D: |
dst->U.I.TexSrcTarget = RC_TEXTURE_3D; |
break; |
case TGSI_TEXTURE_CUBE: |
dst->U.I.TexSrcTarget = RC_TEXTURE_CUBE; |
break; |
case TGSI_TEXTURE_RECT: |
dst->U.I.TexSrcTarget = RC_TEXTURE_RECT; |
break; |
case TGSI_TEXTURE_SHADOW1D: |
dst->U.I.TexSrcTarget = RC_TEXTURE_1D; |
dst->U.I.TexShadow = 1; |
*shadowSamplers |= 1 << dst->U.I.TexSrcUnit; |
break; |
case TGSI_TEXTURE_SHADOW2D: |
dst->U.I.TexSrcTarget = RC_TEXTURE_2D; |
dst->U.I.TexShadow = 1; |
*shadowSamplers |= 1 << dst->U.I.TexSrcUnit; |
break; |
case TGSI_TEXTURE_SHADOWRECT: |
dst->U.I.TexSrcTarget = RC_TEXTURE_RECT; |
dst->U.I.TexShadow = 1; |
*shadowSamplers |= 1 << dst->U.I.TexSrcUnit; |
break; |
} |
dst->U.I.TexSwizzle = RC_SWIZZLE_XYZW; |
} |
static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_instruction * src) |
{ |
struct rc_instruction * dst; |
int i; |
dst = rc_insert_new_instruction(ttr->compiler, ttr->compiler->Program.Instructions.Prev); |
dst->U.I.Opcode = translate_opcode(src->Instruction.Opcode); |
dst->U.I.SaturateMode = translate_saturate(src->Instruction.Saturate); |
if (src->Instruction.NumDstRegs) |
transform_dstreg(ttr, &dst->U.I.DstReg, &src->Dst[0]); |
for(i = 0; i < src->Instruction.NumSrcRegs; ++i) { |
if (src->Src[i].Register.File == TGSI_FILE_SAMPLER) |
dst->U.I.TexSrcUnit = src->Src[i].Register.Index; |
else |
transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->Src[i]); |
} |
/* Texturing. */ |
if (src->Instruction.Texture) |
transform_texture(dst, src->Texture, |
&ttr->compiler->Program.ShadowSamplers); |
} |
static void handle_immediate(struct tgsi_to_rc * ttr, |
struct tgsi_full_immediate * imm, |
unsigned index) |
{ |
struct rc_constant constant; |
unsigned swizzle = 0; |
boolean can_swizzle = TRUE; |
unsigned i; |
for (i = 0; i < 4; i++) { |
if (imm->u[i].Float == 0.0f) { |
swizzle |= RC_SWIZZLE_ZERO << (i * 3); |
} else if (imm->u[i].Float == 0.5f && ttr->use_half_swizzles) { |
swizzle |= RC_SWIZZLE_HALF << (i * 3); |
} else if (imm->u[i].Float == 1.0f) { |
swizzle |= RC_SWIZZLE_ONE << (i * 3); |
} else { |
can_swizzle = FALSE; |
break; |
} |
} |
if (can_swizzle) { |
ttr->imms_to_swizzle[ttr->imms_to_swizzle_count].index = index; |
ttr->imms_to_swizzle[ttr->imms_to_swizzle_count].swizzle = swizzle; |
ttr->imms_to_swizzle_count++; |
} else { |
constant.Type = RC_CONSTANT_IMMEDIATE; |
constant.Size = 4; |
for(i = 0; i < 4; ++i) |
constant.u.Immediate[i] = imm->u[i].Float; |
rc_constants_add(&ttr->compiler->Program.Constants, &constant); |
} |
} |
void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, |
const struct tgsi_token * tokens) |
{ |
struct tgsi_full_instruction *inst; |
struct tgsi_parse_context parser; |
unsigned imm_index = 0; |
int i; |
ttr->error = FALSE; |
/* Allocate constants placeholders. |
* |
* Note: What if declared constants are not contiguous? */ |
for(i = 0; i <= ttr->info->file_max[TGSI_FILE_CONSTANT]; ++i) { |
struct rc_constant constant; |
memset(&constant, 0, sizeof(constant)); |
constant.Type = RC_CONSTANT_EXTERNAL; |
constant.Size = 4; |
constant.u.External = i; |
rc_constants_add(&ttr->compiler->Program.Constants, &constant); |
} |
ttr->immediate_offset = ttr->compiler->Program.Constants.Count; |
ttr->imms_to_swizzle = malloc(ttr->info->immediate_count * sizeof(struct swizzled_imms)); |
ttr->imms_to_swizzle_count = 0; |
tgsi_parse_init(&parser, tokens); |
while (!tgsi_parse_end_of_tokens(&parser)) { |
tgsi_parse_token(&parser); |
switch (parser.FullToken.Token.Type) { |
case TGSI_TOKEN_TYPE_DECLARATION: |
break; |
case TGSI_TOKEN_TYPE_IMMEDIATE: |
handle_immediate(ttr, &parser.FullToken.FullImmediate, imm_index); |
imm_index++; |
break; |
case TGSI_TOKEN_TYPE_INSTRUCTION: |
inst = &parser.FullToken.FullInstruction; |
if (inst->Instruction.Opcode == TGSI_OPCODE_END) { |
break; |
} |
transform_instruction(ttr, inst); |
break; |
} |
} |
tgsi_parse_free(&parser); |
free(ttr->imms_to_swizzle); |
rc_calculate_inputs_outputs(ttr->compiler); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_tgsi_to_rc.h |
---|
0,0 → 1,57 |
/* |
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE COPYRIGHT HOLDER(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#ifndef R300_TGSI_TO_RC_H |
#define R300_TGSI_TO_RC_H |
#include "pipe/p_compiler.h" |
struct radeon_compiler; |
struct tgsi_full_declaration; |
struct tgsi_shader_info; |
struct tgsi_token; |
struct swizzled_imms { |
unsigned index; |
unsigned swizzle; |
}; |
struct tgsi_to_rc { |
struct radeon_compiler * compiler; |
const struct tgsi_shader_info * info; |
int immediate_offset; |
struct swizzled_imms * imms_to_swizzle; |
unsigned imms_to_swizzle_count; |
/* Vertex shaders have no half swizzles, and no way to handle them, so |
* until rc grows proper support, indicate if they're safe to use. */ |
boolean use_half_swizzles; |
/* If an error occured. */ |
boolean error; |
}; |
void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens); |
#endif /* R300_TGSI_TO_RC_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_transfer.c |
---|
0,0 → 1,266 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Copyright 2010 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "r300_transfer.h" |
#include "r300_texture_desc.h" |
#include "r300_screen_buffer.h" |
#include "util/u_memory.h" |
#include "util/u_format.h" |
#include "util/u_box.h" |
struct r300_transfer { |
/* Parent class */ |
struct pipe_transfer transfer; |
/* Offset from start of buffer. */ |
unsigned offset; |
/* Linear texture. */ |
struct r300_resource *linear_texture; |
}; |
/* Convenience cast wrapper. */ |
static INLINE struct r300_transfer* |
r300_transfer(struct pipe_transfer* transfer) |
{ |
return (struct r300_transfer*)transfer; |
} |
/* Copy from a tiled texture to a detiled one. */ |
static void r300_copy_from_tiled_texture(struct pipe_context *ctx, |
struct r300_transfer *r300transfer) |
{ |
struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer; |
struct pipe_resource *src = transfer->resource; |
struct pipe_resource *dst = &r300transfer->linear_texture->b.b; |
if (src->nr_samples <= 1) { |
ctx->resource_copy_region(ctx, dst, 0, 0, 0, 0, |
src, transfer->level, &transfer->box); |
} else { |
/* Resolve the resource. */ |
struct pipe_blit_info blit; |
memset(&blit, 0, sizeof(blit)); |
blit.src.resource = src; |
blit.src.format = src->format; |
blit.src.level = transfer->level; |
blit.src.box = transfer->box; |
blit.dst.resource = dst; |
blit.dst.format = dst->format; |
blit.dst.box.width = transfer->box.width; |
blit.dst.box.height = transfer->box.height; |
blit.dst.box.depth = transfer->box.depth; |
blit.mask = PIPE_MASK_RGBA; |
blit.filter = PIPE_TEX_FILTER_NEAREST; |
ctx->blit(ctx, &blit); |
} |
} |
/* Copy a detiled texture to a tiled one. */ |
static void r300_copy_into_tiled_texture(struct pipe_context *ctx, |
struct r300_transfer *r300transfer) |
{ |
struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer; |
struct pipe_resource *tex = transfer->resource; |
struct pipe_box src_box; |
u_box_3d(0, 0, 0, |
transfer->box.width, transfer->box.height, transfer->box.depth, |
&src_box); |
ctx->resource_copy_region(ctx, tex, transfer->level, |
transfer->box.x, transfer->box.y, transfer->box.z, |
&r300transfer->linear_texture->b.b, 0, &src_box); |
/* XXX remove this. */ |
r300_flush(ctx, 0, NULL); |
} |
void * |
r300_texture_transfer_map(struct pipe_context *ctx, |
struct pipe_resource *texture, |
unsigned level, |
unsigned usage, |
const struct pipe_box *box, |
struct pipe_transfer **transfer) |
{ |
struct r300_context *r300 = r300_context(ctx); |
struct r300_resource *tex = r300_resource(texture); |
struct r300_transfer *trans; |
boolean referenced_cs, referenced_hw; |
enum pipe_format format = tex->b.b.format; |
char *map; |
referenced_cs = |
r300->rws->cs_is_buffer_referenced(r300->cs, tex->cs_buf, RADEON_USAGE_READWRITE); |
if (referenced_cs) { |
referenced_hw = TRUE; |
} else { |
referenced_hw = |
r300->rws->buffer_is_busy(tex->buf, RADEON_USAGE_READWRITE); |
} |
trans = CALLOC_STRUCT(r300_transfer); |
if (trans) { |
/* Initialize the transfer object. */ |
trans->transfer.resource = texture; |
trans->transfer.level = level; |
trans->transfer.usage = usage; |
trans->transfer.box = *box; |
/* If the texture is tiled, we must create a temporary detiled texture |
* for this transfer. |
* Also make write transfers pipelined. */ |
if (tex->tex.microtile || tex->tex.macrotile[level] || |
(referenced_hw && !(usage & PIPE_TRANSFER_READ) && |
r300_is_blit_supported(texture->format))) { |
struct pipe_resource base; |
if (r300->blitter->running) { |
fprintf(stderr, "r300: ERROR: Blitter recursion in texture_get_transfer.\n"); |
os_break(); |
} |
memset(&base, 0, sizeof(base)); |
base.target = PIPE_TEXTURE_2D; |
base.format = texture->format; |
base.width0 = box->width; |
base.height0 = box->height; |
base.depth0 = 1; |
base.array_size = 1; |
base.usage = PIPE_USAGE_STAGING; |
base.flags = R300_RESOURCE_FLAG_TRANSFER; |
/* We must set the correct texture target and dimensions if needed for a 3D transfer. */ |
if (box->depth > 1 && util_max_layer(texture, level) > 0) { |
base.target = texture->target; |
if (base.target == PIPE_TEXTURE_3D) { |
base.depth0 = util_next_power_of_two(box->depth); |
} |
} |
/* Create the temporary texture. */ |
trans->linear_texture = r300_resource( |
ctx->screen->resource_create(ctx->screen, |
&base)); |
if (!trans->linear_texture) { |
/* Oh crap, the thing can't create the texture. |
* Let's flush and try again. */ |
r300_flush(ctx, 0, NULL); |
trans->linear_texture = r300_resource( |
ctx->screen->resource_create(ctx->screen, |
&base)); |
if (!trans->linear_texture) { |
fprintf(stderr, |
"r300: Failed to create a transfer object.\n"); |
FREE(trans); |
return NULL; |
} |
} |
assert(!trans->linear_texture->tex.microtile && |
!trans->linear_texture->tex.macrotile[0]); |
/* Set the stride. */ |
trans->transfer.stride = |
trans->linear_texture->tex.stride_in_bytes[0]; |
trans->transfer.layer_stride = |
trans->linear_texture->tex.layer_size_in_bytes[0]; |
if (usage & PIPE_TRANSFER_READ) { |
/* We cannot map a tiled texture directly because the data is |
* in a different order, therefore we do detiling using a blit. */ |
r300_copy_from_tiled_texture(ctx, trans); |
/* Always referenced in the blit. */ |
r300_flush(ctx, 0, NULL); |
} |
} else { |
/* Unpipelined transfer. */ |
trans->transfer.stride = tex->tex.stride_in_bytes[level]; |
trans->transfer.layer_stride = tex->tex.layer_size_in_bytes[level]; |
trans->offset = r300_texture_get_offset(tex, level, box->z); |
if (referenced_cs && |
!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { |
r300_flush(ctx, 0, NULL); |
} |
} |
} |
if (trans->linear_texture) { |
/* The detiled texture is of the same size as the region being mapped |
* (no offset needed). */ |
map = r300->rws->buffer_map(trans->linear_texture->cs_buf, |
r300->cs, usage); |
if (!map) { |
pipe_resource_reference( |
(struct pipe_resource**)&trans->linear_texture, NULL); |
FREE(trans); |
return NULL; |
} |
*transfer = &trans->transfer; |
return map; |
} else { |
/* Tiling is disabled. */ |
map = r300->rws->buffer_map(tex->cs_buf, r300->cs, usage); |
if (!map) { |
FREE(trans); |
return NULL; |
} |
*transfer = &trans->transfer; |
return map + trans->offset + |
box->y / util_format_get_blockheight(format) * trans->transfer.stride + |
box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); |
} |
} |
void r300_texture_transfer_unmap(struct pipe_context *ctx, |
struct pipe_transfer *transfer) |
{ |
struct radeon_winsys *rws = r300_context(ctx)->rws; |
struct r300_transfer *trans = r300_transfer(transfer); |
struct r300_resource *tex = r300_resource(transfer->resource); |
if (trans->linear_texture) { |
rws->buffer_unmap(trans->linear_texture->cs_buf); |
if (transfer->usage & PIPE_TRANSFER_WRITE) { |
r300_copy_into_tiled_texture(ctx, trans); |
} |
pipe_resource_reference( |
(struct pipe_resource**)&trans->linear_texture, NULL); |
} else { |
rws->buffer_unmap(tex->cs_buf); |
} |
FREE(transfer); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_transfer.h |
---|
0,0 → 1,44 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Copyright 2010 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#ifndef R300_TRANSFER |
#define R300_TRANSFER |
#include "pipe/p_context.h" |
struct r300_context; |
void * |
r300_texture_transfer_map(struct pipe_context *ctx, |
struct pipe_resource *texture, |
unsigned level, |
unsigned usage, |
const struct pipe_box *box, |
struct pipe_transfer **transfer); |
void |
r300_texture_transfer_unmap(struct pipe_context *ctx, |
struct pipe_transfer *transfer); |
#endif |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_vs.c |
---|
0,0 → 1,289 |
/* |
* Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Copyright 2009 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#include "r300_vs.h" |
#include "r300_context.h" |
#include "r300_screen.h" |
#include "r300_tgsi_to_rc.h" |
#include "r300_reg.h" |
#include "tgsi/tgsi_dump.h" |
#include "tgsi/tgsi_parse.h" |
#include "tgsi/tgsi_ureg.h" |
#include "compiler/radeon_compiler.h" |
/* Convert info about VS output semantics into r300_shader_semantics. */ |
static void r300_shader_read_vs_outputs( |
struct r300_context *r300, |
struct tgsi_shader_info* info, |
struct r300_shader_semantics* vs_outputs) |
{ |
int i; |
unsigned index; |
r300_shader_semantics_reset(vs_outputs); |
for (i = 0; i < info->num_outputs; i++) { |
index = info->output_semantic_index[i]; |
switch (info->output_semantic_name[i]) { |
case TGSI_SEMANTIC_POSITION: |
assert(index == 0); |
vs_outputs->pos = i; |
break; |
case TGSI_SEMANTIC_PSIZE: |
assert(index == 0); |
vs_outputs->psize = i; |
break; |
case TGSI_SEMANTIC_COLOR: |
assert(index < ATTR_COLOR_COUNT); |
vs_outputs->color[index] = i; |
break; |
case TGSI_SEMANTIC_BCOLOR: |
assert(index < ATTR_COLOR_COUNT); |
vs_outputs->bcolor[index] = i; |
break; |
case TGSI_SEMANTIC_GENERIC: |
assert(index < ATTR_GENERIC_COUNT); |
vs_outputs->generic[index] = i; |
vs_outputs->num_generic++; |
break; |
case TGSI_SEMANTIC_FOG: |
assert(index == 0); |
vs_outputs->fog = i; |
break; |
case TGSI_SEMANTIC_EDGEFLAG: |
assert(index == 0); |
fprintf(stderr, "r300 VP: cannot handle edgeflag output.\n"); |
break; |
case TGSI_SEMANTIC_CLIPVERTEX: |
assert(index == 0); |
/* Draw does clip vertex for us. */ |
if (r300->screen->caps.has_tcl) { |
fprintf(stderr, "r300 VP: cannot handle clip vertex output.\n"); |
} |
break; |
default: |
fprintf(stderr, "r300 VP: unknown vertex output semantic: %i.\n", |
info->output_semantic_name[i]); |
} |
} |
/* WPOS is a straight copy of POSITION and it's always emitted. */ |
vs_outputs->wpos = i; |
} |
static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) |
{ |
struct r300_vertex_shader * vs = c->UserData; |
struct r300_shader_semantics* outputs = &vs->outputs; |
struct tgsi_shader_info* info = &vs->info; |
int i, reg = 0; |
boolean any_bcolor_used = outputs->bcolor[0] != ATTR_UNUSED || |
outputs->bcolor[1] != ATTR_UNUSED; |
/* Fill in the input mapping */ |
for (i = 0; i < info->num_inputs; i++) |
c->code->inputs[i] = i; |
/* Position. */ |
if (outputs->pos != ATTR_UNUSED) { |
c->code->outputs[outputs->pos] = reg++; |
} else { |
assert(0); |
} |
/* Point size. */ |
if (outputs->psize != ATTR_UNUSED) { |
c->code->outputs[outputs->psize] = reg++; |
} |
/* If we're writing back facing colors we need to send |
* four colors to make front/back face colors selection work. |
* If the vertex program doesn't write all 4 colors, lets |
* pretend it does by skipping output index reg so the colors |
* get written into appropriate output vectors. |
*/ |
/* Colors. */ |
for (i = 0; i < ATTR_COLOR_COUNT; i++) { |
if (outputs->color[i] != ATTR_UNUSED) { |
c->code->outputs[outputs->color[i]] = reg++; |
} else if (any_bcolor_used || |
outputs->color[1] != ATTR_UNUSED) { |
reg++; |
} |
} |
/* Back-face colors. */ |
for (i = 0; i < ATTR_COLOR_COUNT; i++) { |
if (outputs->bcolor[i] != ATTR_UNUSED) { |
c->code->outputs[outputs->bcolor[i]] = reg++; |
} else if (any_bcolor_used) { |
reg++; |
} |
} |
/* Texture coordinates. */ |
for (i = 0; i < ATTR_GENERIC_COUNT; i++) { |
if (outputs->generic[i] != ATTR_UNUSED) { |
c->code->outputs[outputs->generic[i]] = reg++; |
} |
} |
/* Fog coordinates. */ |
if (outputs->fog != ATTR_UNUSED) { |
c->code->outputs[outputs->fog] = reg++; |
} |
/* WPOS. */ |
c->code->outputs[outputs->wpos] = reg++; |
} |
void r300_init_vs_outputs(struct r300_context *r300, |
struct r300_vertex_shader *vs) |
{ |
tgsi_scan_shader(vs->state.tokens, &vs->info); |
r300_shader_read_vs_outputs(r300, &vs->info, &vs->outputs); |
} |
static void r300_dummy_vertex_shader( |
struct r300_context* r300, |
struct r300_vertex_shader* shader) |
{ |
struct ureg_program *ureg; |
struct ureg_dst dst; |
struct ureg_src imm; |
/* Make a simple vertex shader which outputs (0, 0, 0, 1), |
* effectively rendering nothing. */ |
ureg = ureg_create(TGSI_PROCESSOR_VERTEX); |
dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); |
imm = ureg_imm4f(ureg, 0, 0, 0, 1); |
ureg_MOV(ureg, dst, imm); |
ureg_END(ureg); |
shader->state.tokens = tgsi_dup_tokens(ureg_finalize(ureg)); |
ureg_destroy(ureg); |
shader->dummy = TRUE; |
r300_init_vs_outputs(r300, shader); |
r300_translate_vertex_shader(r300, shader); |
} |
void r300_translate_vertex_shader(struct r300_context *r300, |
struct r300_vertex_shader *vs) |
{ |
struct r300_vertex_program_compiler compiler; |
struct tgsi_to_rc ttr; |
unsigned i; |
/* Setup the compiler */ |
memset(&compiler, 0, sizeof(compiler)); |
rc_init(&compiler.Base, NULL); |
DBG_ON(r300, DBG_VP) ? compiler.Base.Debug |= RC_DBG_LOG : 0; |
DBG_ON(r300, DBG_P_STAT) ? compiler.Base.Debug |= RC_DBG_STATS : 0; |
compiler.code = &vs->code; |
compiler.UserData = vs; |
compiler.Base.is_r500 = r300->screen->caps.is_r500; |
compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT); |
compiler.Base.has_half_swizzles = FALSE; |
compiler.Base.has_presub = FALSE; |
compiler.Base.has_omod = FALSE; |
compiler.Base.max_temp_regs = 32; |
compiler.Base.max_constants = 256; |
compiler.Base.max_alu_insts = r300->screen->caps.is_r500 ? 1024 : 256; |
if (compiler.Base.Debug & RC_DBG_LOG) { |
DBG(r300, DBG_VP, "r300: Initial vertex program\n"); |
tgsi_dump(vs->state.tokens, 0); |
} |
/* Translate TGSI to our internal representation */ |
ttr.compiler = &compiler.Base; |
ttr.info = &vs->info; |
ttr.use_half_swizzles = FALSE; |
r300_tgsi_to_rc(&ttr, vs->state.tokens); |
if (ttr.error) { |
fprintf(stderr, "r300 VP: Cannot translate a shader. " |
"Using a dummy shader instead.\n"); |
r300_dummy_vertex_shader(r300, vs); |
return; |
} |
if (compiler.Base.Program.Constants.Count > 200) { |
compiler.Base.remove_unused_constants = TRUE; |
} |
compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs + 1)); |
compiler.SetHwInputOutput = &set_vertex_inputs_outputs; |
/* Insert the WPOS output. */ |
rc_copy_output(&compiler.Base, 0, vs->outputs.wpos); |
/* Invoke the compiler */ |
r3xx_compile_vertex_program(&compiler); |
if (compiler.Base.Error) { |
fprintf(stderr, "r300 VP: Compiler error:\n%sUsing a dummy shader" |
" instead.\n", compiler.Base.ErrorMsg); |
if (vs->dummy) { |
fprintf(stderr, "r300 VP: Cannot compile the dummy shader! " |
"Giving up...\n"); |
abort(); |
} |
rc_destroy(&compiler.Base); |
r300_dummy_vertex_shader(r300, vs); |
return; |
} |
/* Initialize numbers of constants for each type. */ |
vs->externals_count = 0; |
for (i = 0; |
i < vs->code.constants.Count && |
vs->code.constants.Constants[i].Type == RC_CONSTANT_EXTERNAL; i++) { |
vs->externals_count = i+1; |
} |
for (; i < vs->code.constants.Count; i++) { |
assert(vs->code.constants.Constants[i].Type == RC_CONSTANT_IMMEDIATE); |
} |
vs->immediates_count = vs->code.constants.Count - vs->externals_count; |
/* And, finally... */ |
rc_destroy(&compiler.Base); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_vs.h |
---|
0,0 → 1,68 |
/* |
* Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Copyright 2009 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#ifndef R300_VS_H |
#define R300_VS_H |
#include "pipe/p_state.h" |
#include "tgsi/tgsi_scan.h" |
#include "compiler/radeon_code.h" |
#include "r300_context.h" |
#include "r300_shader_semantics.h" |
struct r300_context; |
struct r300_vertex_shader { |
/* Parent class */ |
struct pipe_shader_state state; |
struct tgsi_shader_info info; |
struct r300_shader_semantics outputs; |
/* Whether the shader was replaced by a dummy one due to a shader |
* compilation failure. */ |
boolean dummy; |
/* Numbers of constants for each type. */ |
unsigned externals_count; |
unsigned immediates_count; |
/* HWTCL-specific. */ |
/* Machine code (if translated) */ |
struct r300_vertex_program_code code; |
/* SWTCL-specific. */ |
void *draw_vs; |
}; |
void r300_init_vs_outputs(struct r300_context *r300, |
struct r300_vertex_shader *vs); |
void r300_translate_vertex_shader(struct r300_context *r300, |
struct r300_vertex_shader *vs); |
void r300_draw_init_vertex_shader(struct r300_context *r300, |
struct r300_vertex_shader *vs); |
#endif /* R300_VS_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_vs_draw.c |
---|
0,0 → 1,377 |
/************************************************************************** |
* |
* Copyright 2009 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/* This file contains the vertex shader tranformations for SW TCL needed |
* to overcome the limitations of the r300 rasterizer. |
* |
* Transformations: |
* 1) If the secondary color output is present, the primary color must be |
* present too. |
* 2) If any back-face color output is present, there must be all 4 color |
* outputs and missing ones must be inserted. |
* 3) Insert a trailing texcoord output containing a copy of POS, for WPOS. |
* |
* I know this code is cumbersome, but I don't know of any nicer way |
* of transforming TGSI shaders. ~ M. |
*/ |
#include "r300_vs.h" |
#include <stdio.h> |
#include "tgsi/tgsi_transform.h" |
#include "tgsi/tgsi_dump.h" |
#include "draw/draw_context.h" |
struct vs_transform_context { |
struct tgsi_transform_context base; |
boolean color_used[2]; |
boolean bcolor_used[2]; |
/* Index of the pos output, typically 0. */ |
unsigned pos_output; |
/* Index of the pos temp where all writes of pos are redirected to. */ |
unsigned pos_temp; |
/* The index of the last generic output, after which we insert a new |
* output for WPOS. */ |
int last_generic; |
unsigned num_outputs; |
/* Used to shift output decl. indices when inserting new ones. */ |
unsigned decl_shift; |
/* Used to remap writes to output decls if their indices changed. */ |
unsigned out_remap[32]; |
/* First instruction processed? */ |
boolean first_instruction; |
/* End instruction processed? */ |
boolean end_instruction; |
boolean temp_used[1024]; |
}; |
static void emit_temp(struct tgsi_transform_context *ctx, unsigned reg) |
{ |
struct tgsi_full_declaration decl; |
decl = tgsi_default_full_declaration(); |
decl.Declaration.File = TGSI_FILE_TEMPORARY; |
decl.Range.First = decl.Range.Last = reg; |
ctx->emit_declaration(ctx, &decl); |
} |
static void emit_output(struct tgsi_transform_context *ctx, |
unsigned name, unsigned index, unsigned interp, |
unsigned reg) |
{ |
struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx; |
struct tgsi_full_declaration decl; |
decl = tgsi_default_full_declaration(); |
decl.Declaration.File = TGSI_FILE_OUTPUT; |
decl.Declaration.Interpolate = 1; |
decl.Declaration.Semantic = TRUE; |
decl.Semantic.Name = name; |
decl.Semantic.Index = index; |
decl.Range.First = decl.Range.Last = reg; |
decl.Interp.Interpolate = interp; |
ctx->emit_declaration(ctx, &decl); |
++vsctx->num_outputs; |
} |
static void insert_output_before(struct tgsi_transform_context *ctx, |
struct tgsi_full_declaration *before, |
unsigned name, unsigned index, unsigned interp) |
{ |
struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx; |
unsigned i; |
/* Make a place for the new output. */ |
for (i = before->Range.First; i < Elements(vsctx->out_remap); i++) { |
++vsctx->out_remap[i]; |
} |
/* Insert the new output. */ |
emit_output(ctx, name, index, interp, |
before->Range.First + vsctx->decl_shift); |
++vsctx->decl_shift; |
} |
static void insert_output_after(struct tgsi_transform_context *ctx, |
struct tgsi_full_declaration *after, |
unsigned name, unsigned index, unsigned interp) |
{ |
struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx; |
unsigned i; |
/* Make a place for the new output. */ |
for (i = after->Range.First+1; i < Elements(vsctx->out_remap); i++) { |
++vsctx->out_remap[i]; |
} |
/* Insert the new output. */ |
emit_output(ctx, name, index, interp, |
after->Range.First + 1); |
++vsctx->decl_shift; |
} |
static void transform_decl(struct tgsi_transform_context *ctx, |
struct tgsi_full_declaration *decl) |
{ |
struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx; |
unsigned i; |
if (decl->Declaration.File == TGSI_FILE_OUTPUT) { |
switch (decl->Semantic.Name) { |
case TGSI_SEMANTIC_POSITION: |
vsctx->pos_output = decl->Range.First; |
break; |
case TGSI_SEMANTIC_COLOR: |
assert(decl->Semantic.Index < 2); |
/* We must rasterize the first color if the second one is |
* used, otherwise the rasterizer doesn't do the color |
* selection correctly. Declare it, but don't write to it. */ |
if (decl->Semantic.Index == 1 && !vsctx->color_used[0]) { |
insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 0, |
TGSI_INTERPOLATE_LINEAR); |
vsctx->color_used[0] = TRUE; |
} |
break; |
case TGSI_SEMANTIC_BCOLOR: |
assert(decl->Semantic.Index < 2); |
/* We must rasterize all 4 colors if back-face colors are |
* used, otherwise the rasterizer doesn't do the color |
* selection correctly. Declare it, but don't write to it. */ |
if (!vsctx->color_used[0]) { |
insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 0, |
TGSI_INTERPOLATE_LINEAR); |
vsctx->color_used[0] = TRUE; |
} |
if (!vsctx->color_used[1]) { |
insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 1, |
TGSI_INTERPOLATE_LINEAR); |
vsctx->color_used[1] = TRUE; |
} |
if (decl->Semantic.Index == 1 && !vsctx->bcolor_used[0]) { |
insert_output_before(ctx, decl, TGSI_SEMANTIC_BCOLOR, 0, |
TGSI_INTERPOLATE_LINEAR); |
vsctx->bcolor_used[0] = TRUE; |
} |
break; |
case TGSI_SEMANTIC_GENERIC: |
vsctx->last_generic = MAX2(vsctx->last_generic, decl->Semantic.Index); |
break; |
} |
/* Since we're inserting new outputs in between, the following outputs |
* should be moved to the right so that they don't overlap with |
* the newly added ones. */ |
decl->Range.First += vsctx->decl_shift; |
decl->Range.Last += vsctx->decl_shift; |
++vsctx->num_outputs; |
} else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { |
for (i = decl->Range.First; i <= decl->Range.Last; i++) { |
vsctx->temp_used[i] = TRUE; |
} |
} |
ctx->emit_declaration(ctx, decl); |
/* Insert BCOLOR1 if needed. */ |
if (decl->Declaration.File == TGSI_FILE_OUTPUT && |
decl->Semantic.Name == TGSI_SEMANTIC_BCOLOR && |
!vsctx->bcolor_used[1]) { |
insert_output_after(ctx, decl, TGSI_SEMANTIC_BCOLOR, 1, |
TGSI_INTERPOLATE_LINEAR); |
} |
} |
static void transform_inst(struct tgsi_transform_context *ctx, |
struct tgsi_full_instruction *inst) |
{ |
struct vs_transform_context *vsctx = (struct vs_transform_context *) ctx; |
struct tgsi_full_instruction new_inst; |
unsigned i; |
if (!vsctx->first_instruction) { |
vsctx->first_instruction = TRUE; |
/* Insert the generic output for WPOS. */ |
emit_output(ctx, TGSI_SEMANTIC_GENERIC, vsctx->last_generic + 1, |
TGSI_INTERPOLATE_PERSPECTIVE, vsctx->num_outputs); |
/* Find a free temp for POSITION. */ |
for (i = 0; i < Elements(vsctx->temp_used); i++) { |
if (!vsctx->temp_used[i]) { |
emit_temp(ctx, i); |
vsctx->pos_temp = i; |
break; |
} |
} |
} |
if (inst->Instruction.Opcode == TGSI_OPCODE_END) { |
/* MOV OUT[pos_output], TEMP[pos_temp]; */ |
new_inst = tgsi_default_full_instruction(); |
new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; |
new_inst.Instruction.NumDstRegs = 1; |
new_inst.Dst[0].Register.File = TGSI_FILE_OUTPUT; |
new_inst.Dst[0].Register.Index = vsctx->pos_output; |
new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; |
new_inst.Instruction.NumSrcRegs = 1; |
new_inst.Src[0].Register.File = TGSI_FILE_TEMPORARY; |
new_inst.Src[0].Register.Index = vsctx->pos_temp; |
ctx->emit_instruction(ctx, &new_inst); |
/* MOV OUT[n-1], TEMP[pos_temp]; */ |
new_inst = tgsi_default_full_instruction(); |
new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; |
new_inst.Instruction.NumDstRegs = 1; |
new_inst.Dst[0].Register.File = TGSI_FILE_OUTPUT; |
new_inst.Dst[0].Register.Index = vsctx->num_outputs - 1; |
new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; |
new_inst.Instruction.NumSrcRegs = 1; |
new_inst.Src[0].Register.File = TGSI_FILE_TEMPORARY; |
new_inst.Src[0].Register.Index = vsctx->pos_temp; |
ctx->emit_instruction(ctx, &new_inst); |
vsctx->end_instruction = TRUE; |
} else { |
/* Not an END instruction. */ |
/* Fix writes to outputs. */ |
for (i = 0; i < inst->Instruction.NumDstRegs; i++) { |
struct tgsi_full_dst_register *dst = &inst->Dst[i]; |
if (dst->Register.File == TGSI_FILE_OUTPUT) { |
if (dst->Register.Index == vsctx->pos_output) { |
/* Replace writes to OUT[pos_output] with TEMP[pos_temp]. */ |
dst->Register.File = TGSI_FILE_TEMPORARY; |
dst->Register.Index = vsctx->pos_temp; |
} else { |
/* Not a position, good... |
* Since we were changing the indices of output decls, |
* we must redirect writes into them too. */ |
dst->Register.Index = vsctx->out_remap[dst->Register.Index]; |
} |
} |
} |
/* Inserting 2 instructions before the END opcode moves all following |
* labels by 2. Subroutines are always after the END opcode so |
* they're always moved. */ |
if (inst->Instruction.Opcode == TGSI_OPCODE_CAL) { |
inst->Label.Label += 2; |
} |
/* The labels of the following opcodes are moved only after |
* the END opcode. */ |
if (vsctx->end_instruction && |
(inst->Instruction.Opcode == TGSI_OPCODE_IF || |
inst->Instruction.Opcode == TGSI_OPCODE_ELSE || |
inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP || |
inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP)) { |
inst->Label.Label += 2; |
} |
} |
ctx->emit_instruction(ctx, inst); |
} |
void r300_draw_init_vertex_shader(struct r300_context *r300, |
struct r300_vertex_shader *vs) |
{ |
struct draw_context *draw = r300->draw; |
struct pipe_shader_state new_vs; |
struct tgsi_shader_info info; |
struct vs_transform_context transform; |
const uint newLen = tgsi_num_tokens(vs->state.tokens) + 100 /* XXX */; |
unsigned i; |
tgsi_scan_shader(vs->state.tokens, &info); |
new_vs.tokens = tgsi_alloc_tokens(newLen); |
if (new_vs.tokens == NULL) |
return; |
memset(&transform, 0, sizeof(transform)); |
for (i = 0; i < Elements(transform.out_remap); i++) { |
transform.out_remap[i] = i; |
} |
transform.last_generic = -1; |
transform.base.transform_instruction = transform_inst; |
transform.base.transform_declaration = transform_decl; |
for (i = 0; i < info.num_outputs; i++) { |
unsigned index = info.output_semantic_index[i]; |
switch (info.output_semantic_name[i]) { |
case TGSI_SEMANTIC_COLOR: |
assert(index < 2); |
transform.color_used[index] = TRUE; |
break; |
case TGSI_SEMANTIC_BCOLOR: |
assert(index < 2); |
transform.bcolor_used[index] = TRUE; |
break; |
} |
} |
tgsi_transform_shader(vs->state.tokens, |
(struct tgsi_token*)new_vs.tokens, |
newLen, &transform.base); |
#if 0 |
printf("----------------------------------------------\norig shader:\n"); |
tgsi_dump(vs->state.tokens, 0); |
printf("----------------------------------------------\nnew shader:\n"); |
tgsi_dump(new_vs.tokens, 0); |
printf("----------------------------------------------\n"); |
#endif |
/* Free old tokens. */ |
FREE((void*)vs->state.tokens); |
vs->draw_vs = draw_create_vertex_shader(draw, &new_vs); |
/* Instead of duplicating and freeing the tokens, copy the pointer directly. */ |
vs->state.tokens = new_vs.tokens; |
/* Init the VS output table for the rasterizer. */ |
r300_init_vs_outputs(r300, vs); |
/* Make the last generic be WPOS. */ |
vs->outputs.wpos = vs->outputs.generic[transform.last_generic + 1]; |
vs->outputs.generic[transform.last_generic + 1] = ATTR_UNUSED; |
} |