Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 4357 → Rev 4358

/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/Android.mk
0,0 → 1,44
# Mesa 3-D graphics library
#
# Copyright (C) 2011 Chia-I Wu <olvaffe@gmail.com>
# Copyright (C) 2011 LunarG Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
 
LOCAL_PATH := $(call my-dir)
 
# get C_SOURCES
include $(LOCAL_PATH)/Makefile.sources
 
include $(CLEAR_VARS)
 
LOCAL_SRC_FILES := $(C_SOURCES)
 
LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/mapi \
$(MESA_TOP)/src/glsl \
$(MESA_TOP)/src/mesa \
$(DRM_TOP) \
$(DRM_TOP)/include/drm
 
LOCAL_MODULE := libmesa_pipe_r300
 
include $(GALLIUM_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
 
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/Makefile.am
0,0 → 1,44
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
 
noinst_LTLIBRARIES = libr300.la libr300-helper.la
check_PROGRAMS = r300_compiler_tests
testdir = compiler/tests
TESTS = r300_compiler_tests
 
AM_CFLAGS = \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src/mesa \
-I$(top_srcdir)/src/glsl \
-I$(top_srcdir)/src/mapi \
$(VISIBILITY_CFLAGS) \
$(GALLIUM_CFLAGS) \
$(LLVM_CFLAGS) \
$(RADEON_CFLAGS)
 
r300_compiler_tests_LDADD = libr300.la libr300-helper.la \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(GALLIUM_DRI_LIB_DEPS)
r300_compiler_tests_CPPFLAGS = \
-I$(top_srcdir)/src/gallium/drivers/r300/compiler
r300_compiler_tests_SOURCES = \
$(testdir)/r300_compiler_tests.c \
$(testdir)/radeon_compiler_optimize_tests.c \
$(testdir)/radeon_compiler_regalloc_tests.c \
$(testdir)/radeon_compiler_util_tests.c \
$(testdir)/rc_test_helpers.c \
$(testdir)/unit_test.c
 
libr300_la_SOURCES = $(C_SOURCES)
 
# These two files are included in libmesagallium, which is included in the dri
# targets. So, they were added directly to r300g the dri-r300 target would have
# duplicated symbols, and if they weren't the other *-r300 targets would fail
# with undefined symbols.
#
# Solve this by building them into a separate helper library that can be linked
# in place of libmesagallium.
libr300_helper_la_SOURCES = \
$(top_srcdir)/src/glsl/ralloc.c \
$(top_srcdir)/src/mesa/program/register_allocate.c
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/Makefile.in
0,0 → 1,1680
# Makefile.in generated by automake 1.14 from Makefile.am.
# @configure_input@
 
# Copyright (C) 1994-2013 Free Software Foundation, Inc.
 
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
 
@SET_MAKE@
 
VPATH = @srcdir@
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
DIST_COMMON = $(srcdir)/Makefile.sources \
$(top_srcdir)/src/gallium/Automake.inc $(srcdir)/Makefile.in \
$(srcdir)/Makefile.am $(top_srcdir)/bin/depcomp \
$(top_srcdir)/bin/test-driver
check_PROGRAMS = r300_compiler_tests$(EXEEXT)
TESTS = r300_compiler_tests$(EXEEXT)
subdir = src/gallium/drivers/r300
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_prog_bison.m4 \
$(top_srcdir)/m4/ax_prog_cc_for_build.m4 \
$(top_srcdir)/m4/ax_prog_cxx_for_build.m4 \
$(top_srcdir)/m4/ax_prog_flex.m4 \
$(top_srcdir)/m4/ax_pthread.m4 \
$(top_srcdir)/m4/ax_python_module.m4 \
$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
libr300_helper_la_LIBADD =
am_libr300_helper_la_OBJECTS = ralloc.lo register_allocate.lo
libr300_helper_la_OBJECTS = $(am_libr300_helper_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
am__v_lt_0 = --silent
am__v_lt_1 =
libr300_la_LIBADD =
am__objects_1 = r300_blit.lo r300_chipset.lo r300_context.lo \
r300_debug.lo r300_emit.lo r300_flush.lo r300_fs.lo \
r300_hyperz.lo r300_query.lo r300_render.lo \
r300_render_stencilref.lo r300_render_translate.lo \
r300_resource.lo r300_screen.lo r300_screen_buffer.lo \
r300_state.lo r300_state_derived.lo r300_vs.lo r300_vs_draw.lo \
r300_texture.lo r300_texture_desc.lo r300_tgsi_to_rc.lo \
r300_transfer.lo radeon_code.lo radeon_compiler.lo \
radeon_compiler_util.lo radeon_emulate_branches.lo \
radeon_emulate_loops.lo radeon_inline_literals.lo \
radeon_program.lo radeon_program_print.lo radeon_opcodes.lo \
radeon_program_alu.lo radeon_program_pair.lo \
radeon_program_tex.lo radeon_pair_translate.lo \
radeon_pair_schedule.lo radeon_pair_regalloc.lo \
radeon_pair_dead_sources.lo radeon_dataflow.lo \
radeon_dataflow_deadcode.lo radeon_dataflow_swizzles.lo \
radeon_list.lo radeon_optimize.lo radeon_remove_constants.lo \
radeon_rename_regs.lo radeon_vert_fc.lo radeon_variable.lo \
r3xx_fragprog.lo r300_fragprog.lo r300_fragprog_swizzle.lo \
r300_fragprog_emit.lo r500_fragprog.lo r500_fragprog_emit.lo \
r3xx_vertprog.lo r3xx_vertprog_dump.lo memory_pool.lo
am_libr300_la_OBJECTS = $(am__objects_1)
libr300_la_OBJECTS = $(am_libr300_la_OBJECTS)
am_r300_compiler_tests_OBJECTS = \
r300_compiler_tests-r300_compiler_tests.$(OBJEXT) \
r300_compiler_tests-radeon_compiler_optimize_tests.$(OBJEXT) \
r300_compiler_tests-radeon_compiler_regalloc_tests.$(OBJEXT) \
r300_compiler_tests-radeon_compiler_util_tests.$(OBJEXT) \
r300_compiler_tests-rc_test_helpers.$(OBJEXT) \
r300_compiler_tests-unit_test.$(OBJEXT)
r300_compiler_tests_OBJECTS = $(am_r300_compiler_tests_OBJECTS)
am__DEPENDENCIES_1 =
r300_compiler_tests_DEPENDENCIES = libr300.la libr300-helper.la \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(am__DEPENDENCIES_1)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
DEFAULT_INCLUDES = -I.@am__isrc@
depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
am__depfiles_maybe = depfiles
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
$(AM_CFLAGS) $(CFLAGS)
AM_V_CC = $(am__v_CC_@AM_V@)
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
am__v_CC_0 = @echo " CC " $@;
am__v_CC_1 =
CCLD = $(CC)
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(AM_LDFLAGS) $(LDFLAGS) -o $@
AM_V_CCLD = $(am__v_CCLD_@AM_V@)
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
am__v_CCLD_0 = @echo " CCLD " $@;
am__v_CCLD_1 =
SOURCES = $(libr300_helper_la_SOURCES) $(libr300_la_SOURCES) \
$(r300_compiler_tests_SOURCES)
DIST_SOURCES = $(libr300_helper_la_SOURCES) $(libr300_la_SOURCES) \
$(r300_compiler_tests_SOURCES)
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
am__tty_colors_dummy = \
mgn= red= grn= lgn= blu= brg= std=; \
am__color_tests=no
am__tty_colors = { \
$(am__tty_colors_dummy); \
if test "X$(AM_COLOR_TESTS)" = Xno; then \
am__color_tests=no; \
elif test "X$(AM_COLOR_TESTS)" = Xalways; then \
am__color_tests=yes; \
elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \
am__color_tests=yes; \
fi; \
if test $$am__color_tests = yes; then \
red=''; \
grn=''; \
lgn=''; \
blu=''; \
mgn=''; \
brg=''; \
std=''; \
fi; \
}
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
am__vpath_adj = case $$p in \
$(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
*) f=$$p;; \
esac;
am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
am__install_max = 40
am__nobase_strip_setup = \
srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
am__nobase_strip = \
for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
am__nobase_list = $(am__nobase_strip_setup); \
for p in $$list; do echo "$$p $$p"; done | \
sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
$(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
if (++n[$$2] == $(am__install_max)) \
{ print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
END { for (dir in files) print dir, files[dir] }'
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
am__uninstall_files_from_dir = { \
test -z "$$files" \
|| { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
|| { echo " ( cd '$$dir' && rm -f" $$files ")"; \
$(am__cd) "$$dir" && rm -f $$files; }; \
}
am__recheck_rx = ^[ ]*:recheck:[ ]*
am__global_test_result_rx = ^[ ]*:global-test-result:[ ]*
am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]*
# A command that, given a newline-separated list of test names on the
# standard input, print the name of the tests that are to be re-run
# upon "make recheck".
am__list_recheck_tests = $(AWK) '{ \
recheck = 1; \
while ((rc = (getline line < ($$0 ".trs"))) != 0) \
{ \
if (rc < 0) \
{ \
if ((getline line2 < ($$0 ".log")) < 0) \
recheck = 0; \
break; \
} \
else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \
{ \
recheck = 0; \
break; \
} \
else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \
{ \
break; \
} \
}; \
if (recheck) \
print $$0; \
close ($$0 ".trs"); \
close ($$0 ".log"); \
}'
# A command that, given a newline-separated list of test names on the
# standard input, create the global log from their .trs and .log files.
am__create_global_log = $(AWK) ' \
function fatal(msg) \
{ \
print "fatal: making $@: " msg | "cat >&2"; \
exit 1; \
} \
function rst_section(header) \
{ \
print header; \
len = length(header); \
for (i = 1; i <= len; i = i + 1) \
printf "="; \
printf "\n\n"; \
} \
{ \
copy_in_global_log = 1; \
global_test_result = "RUN"; \
while ((rc = (getline line < ($$0 ".trs"))) != 0) \
{ \
if (rc < 0) \
fatal("failed to read from " $$0 ".trs"); \
if (line ~ /$(am__global_test_result_rx)/) \
{ \
sub("$(am__global_test_result_rx)", "", line); \
sub("[ ]*$$", "", line); \
global_test_result = line; \
} \
else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \
copy_in_global_log = 0; \
}; \
if (copy_in_global_log) \
{ \
rst_section(global_test_result ": " $$0); \
while ((rc = (getline line < ($$0 ".log"))) != 0) \
{ \
if (rc < 0) \
fatal("failed to read from " $$0 ".log"); \
print line; \
}; \
printf "\n"; \
}; \
close ($$0 ".trs"); \
close ($$0 ".log"); \
}'
# Restructured Text title.
am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; }
# Solaris 10 'make', and several other traditional 'make' implementations,
# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it
# by disabling -e (using the XSI extension "set +e") if it's set.
am__sh_e_setup = case $$- in *e*) set +e;; esac
# Default flags passed to test drivers.
am__common_driver_flags = \
--color-tests "$$am__color_tests" \
--enable-hard-errors "$$am__enable_hard_errors" \
--expect-failure "$$am__expect_failure"
# To be inserted before the command running the test. Creates the
# directory for the log if needed. Stores in $dir the directory
# containing $f, in $tst the test, in $log the log. Executes the
# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and
# passes TESTS_ENVIRONMENT. Set up options for the wrapper that
# will run the test scripts (or their associated LOG_COMPILER, if
# thy have one).
am__check_pre = \
$(am__sh_e_setup); \
$(am__vpath_adj_setup) $(am__vpath_adj) \
$(am__tty_colors); \
srcdir=$(srcdir); export srcdir; \
case "$@" in \
*/*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \
*) am__odir=.;; \
esac; \
test "x$$am__odir" = x"." || test -d "$$am__odir" \
|| $(MKDIR_P) "$$am__odir" || exit $$?; \
if test -f "./$$f"; then dir=./; \
elif test -f "$$f"; then dir=; \
else dir="$(srcdir)/"; fi; \
tst=$$dir$$f; log='$@'; \
if test -n '$(DISABLE_HARD_ERRORS)'; then \
am__enable_hard_errors=no; \
else \
am__enable_hard_errors=yes; \
fi; \
case " $(XFAIL_TESTS) " in \
*[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \
am__expect_failure=yes;; \
*) \
am__expect_failure=no;; \
esac; \
$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT)
# A shell command to get the names of the tests scripts with any registered
# extension removed (i.e., equivalently, the names of the test logs, with
# the '.log' extension removed). The result is saved in the shell variable
# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly,
# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)",
# since that might cause problem with VPATH rewrites for suffix-less tests.
# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'.
am__set_TESTS_bases = \
bases='$(TEST_LOGS)'; \
bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \
bases=`echo $$bases`
RECHECK_LOGS = $(TEST_LOGS)
AM_RECURSIVE_TARGETS = check recheck
TEST_SUITE_LOG = test-suite.log
TEST_EXTENSIONS = @EXEEXT@ .test
LOG_DRIVER = $(SHELL) $(top_srcdir)/bin/test-driver
LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS)
am__set_b = \
case '$@' in \
*/*) \
case '$*' in \
*/*) b='$*';; \
*) b=`echo '$@' | sed 's/\.log$$//'`; \
esac;; \
*) \
b='$*';; \
esac
am__test_logs1 = $(TESTS:=.log)
am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log)
TEST_LOGS = $(am__test_logs2:.test.log=.log)
TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/bin/test-driver
TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \
$(TEST_LOG_FLAGS)
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
BUILD_EXEEXT = @BUILD_EXEEXT@
BUILD_OBJEXT = @BUILD_OBJEXT@
CC = @CC@
CCAS = @CCAS@
CCASDEPMODE = @CCASDEPMODE@
CCASFLAGS = @CCASFLAGS@
CCDEPMODE = @CCDEPMODE@
CC_FOR_BUILD = @CC_FOR_BUILD@
CFLAGS = @CFLAGS@
CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@
CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
CLOCK_LIB = @CLOCK_LIB@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@
CPP_FOR_BUILD = @CPP_FOR_BUILD@
CXX = @CXX@
CXXCPP = @CXXCPP@
CXXCPPFLAGS_FOR_BUILD = @CXXCPPFLAGS_FOR_BUILD@
CXXCPP_FOR_BUILD = @CXXCPP_FOR_BUILD@
CXXDEPMODE = @CXXDEPMODE@
CXXFLAGS = @CXXFLAGS@
CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@
CXX_FOR_BUILD = @CXX_FOR_BUILD@
CYGPATH_W = @CYGPATH_W@
DEFINES = @DEFINES@
DEFINES_FOR_BUILD = @DEFINES_FOR_BUILD@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DLOPEN_LIBS = @DLOPEN_LIBS@
DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
DRIGL_CFLAGS = @DRIGL_CFLAGS@
DRIGL_LIBS = @DRIGL_LIBS@
DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
DRI_LIB_DEPS = @DRI_LIB_DEPS@
DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGL_CFLAGS = @EGL_CFLAGS@
EGL_CLIENT_APIS = @EGL_CLIENT_APIS@
EGL_DRIVER_INSTALL_DIR = @EGL_DRIVER_INSTALL_DIR@
EGL_LIB_DEPS = @EGL_LIB_DEPS@
EGL_LIB_GLOB = @EGL_LIB_GLOB@
EGL_LIB_NAME = @EGL_LIB_NAME@
EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
EGL_PLATFORMS = @EGL_PLATFORMS@
EGREP = @EGREP@
ELF_LIB = @ELF_LIB@
EXEEXT = @EXEEXT@
EXPAT_INCLUDES = @EXPAT_INCLUDES@
FGREP = @FGREP@
FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
FREEDRENO_LIBS = @FREEDRENO_LIBS@
GALLIUM_DRI_LIB_DEPS = @GALLIUM_DRI_LIB_DEPS@
GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@
GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@
GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@
GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
GLAPI_LIB_GLOB = @GLAPI_LIB_GLOB@
GLAPI_LIB_NAME = @GLAPI_LIB_NAME@
GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
GLESv1_CM_LIB_GLOB = @GLESv1_CM_LIB_GLOB@
GLESv1_CM_LIB_NAME = @GLESv1_CM_LIB_NAME@
GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
GLESv2_LIB_GLOB = @GLESv2_LIB_GLOB@
GLESv2_LIB_NAME = @GLESv2_LIB_NAME@
GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
GLPROTO_LIBS = @GLPROTO_LIBS@
GLX_TLS = @GLX_TLS@
GL_LIB = @GL_LIB@
GL_LIB_DEPS = @GL_LIB_DEPS@
GL_LIB_GLOB = @GL_LIB_GLOB@
GL_LIB_NAME = @GL_LIB_NAME@
GL_PC_CFLAGS = @GL_PC_CFLAGS@
GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
GREP = @GREP@
HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
INDENT = @INDENT@
INDENT_FLAGS = @INDENT_FLAGS@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
INTEL_CFLAGS = @INTEL_CFLAGS@
INTEL_LIBS = @INTEL_LIBS@
LD = @LD@
LDFLAGS = @LDFLAGS@
LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@
LEX = @LEX@
LEXLIB = @LEXLIB@
LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
LIBDRM_LIBS = @LIBDRM_LIBS@
LIBDRM_XORG_CFLAGS = @LIBDRM_XORG_CFLAGS@
LIBDRM_XORG_LIBS = @LIBDRM_XORG_LIBS@
LIBKMS_XORG_CFLAGS = @LIBKMS_XORG_CFLAGS@
LIBKMS_XORG_LIBS = @LIBKMS_XORG_LIBS@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@
LIBUDEV_LIBS = @LIBUDEV_LIBS@
LIB_DIR = @LIB_DIR@
LIPO = @LIPO@
LLVM_BINDIR = @LLVM_BINDIR@
LLVM_CFLAGS = @LLVM_CFLAGS@
LLVM_CONFIG = @LLVM_CONFIG@
LLVM_CPPFLAGS = @LLVM_CPPFLAGS@
LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
LLVM_LDFLAGS = @LLVM_LDFLAGS@
LLVM_LIBDIR = @LLVM_LIBDIR@
LLVM_LIBS = @LLVM_LIBS@
LLVM_VERSION = @LLVM_VERSION@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
MAKE = @MAKE@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MESA_LLVM = @MESA_LLVM@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
NOUVEAU_LIBS = @NOUVEAU_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OPENCL_LIB_INSTALL_DIR = @OPENCL_LIB_INSTALL_DIR@
OSMESA_LIB = @OSMESA_LIB@
OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
OSMESA_LIB_NAME = @OSMESA_LIB_NAME@
OSMESA_MESA_DEPS = @OSMESA_MESA_DEPS@
OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
OSMESA_PC_REQ = @OSMESA_PC_REQ@
OSMESA_VERSION = @OSMESA_VERSION@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PERL = @PERL@
PKG_CONFIG = @PKG_CONFIG@
PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
POSIX_SHELL = @POSIX_SHELL@
PTHREAD_CC = @PTHREAD_CC@
PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
PTHREAD_LIBS = @PTHREAD_LIBS@
PYTHON2 = @PYTHON2@
RADEON_CFLAGS = @RADEON_CFLAGS@
RADEON_LIBS = @RADEON_LIBS@
RANLIB = @RANLIB@
SED = @SED@
SELINUX_LIBS = @SELINUX_LIBS@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
VDPAU_MAJOR = @VDPAU_MAJOR@
VDPAU_MINOR = @VDPAU_MINOR@
VERSION = @VERSION@
VG_LIB_DEPS = @VG_LIB_DEPS@
VG_LIB_GLOB = @VG_LIB_GLOB@
VG_LIB_NAME = @VG_LIB_NAME@
VG_PC_LIB_PRIV = @VG_PC_LIB_PRIV@
VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
WAYLAND_LIBS = @WAYLAND_LIBS@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
XA_TINY = @XA_TINY@
XA_VERSION = @XA_VERSION@
XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
XEXT_CFLAGS = @XEXT_CFLAGS@
XEXT_LIBS = @XEXT_LIBS@
XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
XLIBGL_LIBS = @XLIBGL_LIBS@
XORG_CFLAGS = @XORG_CFLAGS@
XORG_DRIVER_INSTALL_DIR = @XORG_DRIVER_INSTALL_DIR@
XORG_LIBS = @XORG_LIBS@
XVMC_CFLAGS = @XVMC_CFLAGS@
XVMC_LIBS = @XVMC_LIBS@
XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
XVMC_MAJOR = @XVMC_MAJOR@
XVMC_MINOR = @XVMC_MINOR@
YACC = @YACC@
YFLAGS = @YFLAGS@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_CC_FOR_BUILD = @ac_ct_CC_FOR_BUILD@
ac_ct_CXX = @ac_ct_CXX@
ac_ct_CXX_FOR_BUILD = @ac_ct_CXX_FOR_BUILD@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
ax_pthread_config = @ax_pthread_config@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target = @target@
target_alias = @target_alias@
target_cpu = @target_cpu@
target_os = @target_os@
target_vendor = @target_vendor@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
C_SOURCES = \
r300_blit.c \
r300_chipset.c \
r300_context.c \
r300_debug.c \
r300_emit.c \
r300_flush.c \
r300_fs.c \
r300_hyperz.c \
r300_query.c \
r300_render.c \
r300_render_stencilref.c \
r300_render_translate.c \
r300_resource.c \
r300_screen.c \
r300_screen_buffer.c \
r300_state.c \
r300_state_derived.c \
r300_vs.c \
r300_vs_draw.c \
r300_texture.c \
r300_texture_desc.c \
r300_tgsi_to_rc.c \
r300_transfer.c \
\
compiler/radeon_code.c \
compiler/radeon_compiler.c \
compiler/radeon_compiler_util.c \
compiler/radeon_emulate_branches.c \
compiler/radeon_emulate_loops.c \
compiler/radeon_inline_literals.c \
compiler/radeon_program.c \
compiler/radeon_program_print.c \
compiler/radeon_opcodes.c \
compiler/radeon_program_alu.c \
compiler/radeon_program_pair.c \
compiler/radeon_program_tex.c \
compiler/radeon_pair_translate.c \
compiler/radeon_pair_schedule.c \
compiler/radeon_pair_regalloc.c \
compiler/radeon_pair_dead_sources.c \
compiler/radeon_dataflow.c \
compiler/radeon_dataflow_deadcode.c \
compiler/radeon_dataflow_swizzles.c \
compiler/radeon_list.c \
compiler/radeon_optimize.c \
compiler/radeon_remove_constants.c \
compiler/radeon_rename_regs.c \
compiler/radeon_vert_fc.c \
compiler/radeon_variable.c \
compiler/r3xx_fragprog.c \
compiler/r300_fragprog.c \
compiler/r300_fragprog_swizzle.c \
compiler/r300_fragprog_emit.c \
compiler/r500_fragprog.c \
compiler/r500_fragprog_emit.c \
compiler/r3xx_vertprog.c \
compiler/r3xx_vertprog_dump.c \
compiler/memory_pool.c
 
GALLIUM_CFLAGS = \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src/gallium/include \
-I$(top_srcdir)/src/gallium/auxiliary \
$(DEFINES)
 
noinst_LTLIBRARIES = libr300.la libr300-helper.la
testdir = compiler/tests
AM_CFLAGS = \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src/mesa \
-I$(top_srcdir)/src/glsl \
-I$(top_srcdir)/src/mapi \
$(VISIBILITY_CFLAGS) \
$(GALLIUM_CFLAGS) \
$(LLVM_CFLAGS) \
$(RADEON_CFLAGS)
 
r300_compiler_tests_LDADD = libr300.la libr300-helper.la \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(GALLIUM_DRI_LIB_DEPS)
 
r300_compiler_tests_CPPFLAGS = \
-I$(top_srcdir)/src/gallium/drivers/r300/compiler
 
r300_compiler_tests_SOURCES = \
$(testdir)/r300_compiler_tests.c \
$(testdir)/radeon_compiler_optimize_tests.c \
$(testdir)/radeon_compiler_regalloc_tests.c \
$(testdir)/radeon_compiler_util_tests.c \
$(testdir)/rc_test_helpers.c \
$(testdir)/unit_test.c
 
libr300_la_SOURCES = $(C_SOURCES)
 
# These two files are included in libmesagallium, which is included in the dri
# targets. So, they were added directly to r300g the dri-r300 target would have
# duplicated symbols, and if they weren't the other *-r300 targets would fail
# with undefined symbols.
#
# Solve this by building them into a separate helper library that can be linked
# in place of libmesagallium.
libr300_helper_la_SOURCES = \
$(top_srcdir)/src/glsl/ralloc.c \
$(top_srcdir)/src/mesa/program/register_allocate.c
 
all: all-am
 
.SUFFIXES:
.SUFFIXES: .c .lo .log .o .obj .test .test$(EXEEXT) .trs
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/drivers/r300/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --foreign src/gallium/drivers/r300/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc:
 
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
 
$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
 
clean-noinstLTLIBRARIES:
-test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
@list='$(noinst_LTLIBRARIES)'; \
locs=`for p in $$list; do echo $$p; done | \
sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
sort -u`; \
test -z "$$locs" || { \
echo rm -f $${locs}; \
rm -f $${locs}; \
}
 
libr300-helper.la: $(libr300_helper_la_OBJECTS) $(libr300_helper_la_DEPENDENCIES) $(EXTRA_libr300_helper_la_DEPENDENCIES)
$(AM_V_CCLD)$(LINK) $(libr300_helper_la_OBJECTS) $(libr300_helper_la_LIBADD) $(LIBS)
 
libr300.la: $(libr300_la_OBJECTS) $(libr300_la_DEPENDENCIES) $(EXTRA_libr300_la_DEPENDENCIES)
$(AM_V_CCLD)$(LINK) $(libr300_la_OBJECTS) $(libr300_la_LIBADD) $(LIBS)
 
clean-checkPROGRAMS:
@list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \
echo " rm -f" $$list; \
rm -f $$list || exit $$?; \
test -n "$(EXEEXT)" || exit 0; \
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
 
r300_compiler_tests$(EXEEXT): $(r300_compiler_tests_OBJECTS) $(r300_compiler_tests_DEPENDENCIES) $(EXTRA_r300_compiler_tests_DEPENDENCIES)
@rm -f r300_compiler_tests$(EXEEXT)
$(AM_V_CCLD)$(LINK) $(r300_compiler_tests_OBJECTS) $(r300_compiler_tests_LDADD) $(LIBS)
 
mostlyclean-compile:
-rm -f *.$(OBJEXT)
 
distclean-compile:
-rm -f *.tab.c
 
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/memory_pool.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_blit.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_chipset.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_compiler_tests-r300_compiler_tests.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_compiler_tests-radeon_compiler_optimize_tests.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_compiler_tests-radeon_compiler_regalloc_tests.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_compiler_tests-radeon_compiler_util_tests.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_compiler_tests-rc_test_helpers.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_compiler_tests-unit_test.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_context.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_debug.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_emit.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_flush.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_fragprog.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_fragprog_emit.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_fragprog_swizzle.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_fs.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_hyperz.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_query.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_render.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_render_stencilref.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_render_translate.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_resource.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_screen.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_screen_buffer.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_state.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_state_derived.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_texture.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_texture_desc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_tgsi_to_rc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_transfer.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_vs.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r300_vs_draw.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r3xx_fragprog.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r3xx_vertprog.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r3xx_vertprog_dump.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r500_fragprog.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r500_fragprog_emit.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_code.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_compiler.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_compiler_util.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_dataflow.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_dataflow_deadcode.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_dataflow_swizzles.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_emulate_branches.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_emulate_loops.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_inline_literals.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_list.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_opcodes.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_optimize.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_pair_dead_sources.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_pair_regalloc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_pair_schedule.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_pair_translate.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_program.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_program_alu.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_program_pair.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_program_print.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_program_tex.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_remove_constants.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_rename_regs.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_variable.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vert_fc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ralloc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/register_allocate.Plo@am__quote@
 
.c.o:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
 
.c.obj:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
 
.c.lo:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
 
ralloc.lo: $(top_srcdir)/src/glsl/ralloc.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ralloc.lo -MD -MP -MF $(DEPDIR)/ralloc.Tpo -c -o ralloc.lo `test -f '$(top_srcdir)/src/glsl/ralloc.c' || echo '$(srcdir)/'`$(top_srcdir)/src/glsl/ralloc.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ralloc.Tpo $(DEPDIR)/ralloc.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(top_srcdir)/src/glsl/ralloc.c' object='ralloc.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ralloc.lo `test -f '$(top_srcdir)/src/glsl/ralloc.c' || echo '$(srcdir)/'`$(top_srcdir)/src/glsl/ralloc.c
 
register_allocate.lo: $(top_srcdir)/src/mesa/program/register_allocate.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT register_allocate.lo -MD -MP -MF $(DEPDIR)/register_allocate.Tpo -c -o register_allocate.lo `test -f '$(top_srcdir)/src/mesa/program/register_allocate.c' || echo '$(srcdir)/'`$(top_srcdir)/src/mesa/program/register_allocate.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/register_allocate.Tpo $(DEPDIR)/register_allocate.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(top_srcdir)/src/mesa/program/register_allocate.c' object='register_allocate.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o register_allocate.lo `test -f '$(top_srcdir)/src/mesa/program/register_allocate.c' || echo '$(srcdir)/'`$(top_srcdir)/src/mesa/program/register_allocate.c
 
radeon_code.lo: compiler/radeon_code.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_code.lo -MD -MP -MF $(DEPDIR)/radeon_code.Tpo -c -o radeon_code.lo `test -f 'compiler/radeon_code.c' || echo '$(srcdir)/'`compiler/radeon_code.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_code.Tpo $(DEPDIR)/radeon_code.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_code.c' object='radeon_code.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_code.lo `test -f 'compiler/radeon_code.c' || echo '$(srcdir)/'`compiler/radeon_code.c
 
radeon_compiler.lo: compiler/radeon_compiler.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_compiler.lo -MD -MP -MF $(DEPDIR)/radeon_compiler.Tpo -c -o radeon_compiler.lo `test -f 'compiler/radeon_compiler.c' || echo '$(srcdir)/'`compiler/radeon_compiler.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_compiler.Tpo $(DEPDIR)/radeon_compiler.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_compiler.c' object='radeon_compiler.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_compiler.lo `test -f 'compiler/radeon_compiler.c' || echo '$(srcdir)/'`compiler/radeon_compiler.c
 
radeon_compiler_util.lo: compiler/radeon_compiler_util.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_compiler_util.lo -MD -MP -MF $(DEPDIR)/radeon_compiler_util.Tpo -c -o radeon_compiler_util.lo `test -f 'compiler/radeon_compiler_util.c' || echo '$(srcdir)/'`compiler/radeon_compiler_util.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_compiler_util.Tpo $(DEPDIR)/radeon_compiler_util.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_compiler_util.c' object='radeon_compiler_util.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_compiler_util.lo `test -f 'compiler/radeon_compiler_util.c' || echo '$(srcdir)/'`compiler/radeon_compiler_util.c
 
radeon_emulate_branches.lo: compiler/radeon_emulate_branches.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_emulate_branches.lo -MD -MP -MF $(DEPDIR)/radeon_emulate_branches.Tpo -c -o radeon_emulate_branches.lo `test -f 'compiler/radeon_emulate_branches.c' || echo '$(srcdir)/'`compiler/radeon_emulate_branches.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_emulate_branches.Tpo $(DEPDIR)/radeon_emulate_branches.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_emulate_branches.c' object='radeon_emulate_branches.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_emulate_branches.lo `test -f 'compiler/radeon_emulate_branches.c' || echo '$(srcdir)/'`compiler/radeon_emulate_branches.c
 
radeon_emulate_loops.lo: compiler/radeon_emulate_loops.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_emulate_loops.lo -MD -MP -MF $(DEPDIR)/radeon_emulate_loops.Tpo -c -o radeon_emulate_loops.lo `test -f 'compiler/radeon_emulate_loops.c' || echo '$(srcdir)/'`compiler/radeon_emulate_loops.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_emulate_loops.Tpo $(DEPDIR)/radeon_emulate_loops.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_emulate_loops.c' object='radeon_emulate_loops.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_emulate_loops.lo `test -f 'compiler/radeon_emulate_loops.c' || echo '$(srcdir)/'`compiler/radeon_emulate_loops.c
 
radeon_inline_literals.lo: compiler/radeon_inline_literals.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_inline_literals.lo -MD -MP -MF $(DEPDIR)/radeon_inline_literals.Tpo -c -o radeon_inline_literals.lo `test -f 'compiler/radeon_inline_literals.c' || echo '$(srcdir)/'`compiler/radeon_inline_literals.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_inline_literals.Tpo $(DEPDIR)/radeon_inline_literals.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_inline_literals.c' object='radeon_inline_literals.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_inline_literals.lo `test -f 'compiler/radeon_inline_literals.c' || echo '$(srcdir)/'`compiler/radeon_inline_literals.c
 
radeon_program.lo: compiler/radeon_program.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_program.lo -MD -MP -MF $(DEPDIR)/radeon_program.Tpo -c -o radeon_program.lo `test -f 'compiler/radeon_program.c' || echo '$(srcdir)/'`compiler/radeon_program.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_program.Tpo $(DEPDIR)/radeon_program.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_program.c' object='radeon_program.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_program.lo `test -f 'compiler/radeon_program.c' || echo '$(srcdir)/'`compiler/radeon_program.c
 
radeon_program_print.lo: compiler/radeon_program_print.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_program_print.lo -MD -MP -MF $(DEPDIR)/radeon_program_print.Tpo -c -o radeon_program_print.lo `test -f 'compiler/radeon_program_print.c' || echo '$(srcdir)/'`compiler/radeon_program_print.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_program_print.Tpo $(DEPDIR)/radeon_program_print.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_program_print.c' object='radeon_program_print.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_program_print.lo `test -f 'compiler/radeon_program_print.c' || echo '$(srcdir)/'`compiler/radeon_program_print.c
 
radeon_opcodes.lo: compiler/radeon_opcodes.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_opcodes.lo -MD -MP -MF $(DEPDIR)/radeon_opcodes.Tpo -c -o radeon_opcodes.lo `test -f 'compiler/radeon_opcodes.c' || echo '$(srcdir)/'`compiler/radeon_opcodes.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_opcodes.Tpo $(DEPDIR)/radeon_opcodes.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_opcodes.c' object='radeon_opcodes.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_opcodes.lo `test -f 'compiler/radeon_opcodes.c' || echo '$(srcdir)/'`compiler/radeon_opcodes.c
 
radeon_program_alu.lo: compiler/radeon_program_alu.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_program_alu.lo -MD -MP -MF $(DEPDIR)/radeon_program_alu.Tpo -c -o radeon_program_alu.lo `test -f 'compiler/radeon_program_alu.c' || echo '$(srcdir)/'`compiler/radeon_program_alu.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_program_alu.Tpo $(DEPDIR)/radeon_program_alu.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_program_alu.c' object='radeon_program_alu.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_program_alu.lo `test -f 'compiler/radeon_program_alu.c' || echo '$(srcdir)/'`compiler/radeon_program_alu.c
 
radeon_program_pair.lo: compiler/radeon_program_pair.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_program_pair.lo -MD -MP -MF $(DEPDIR)/radeon_program_pair.Tpo -c -o radeon_program_pair.lo `test -f 'compiler/radeon_program_pair.c' || echo '$(srcdir)/'`compiler/radeon_program_pair.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_program_pair.Tpo $(DEPDIR)/radeon_program_pair.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_program_pair.c' object='radeon_program_pair.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_program_pair.lo `test -f 'compiler/radeon_program_pair.c' || echo '$(srcdir)/'`compiler/radeon_program_pair.c
 
radeon_program_tex.lo: compiler/radeon_program_tex.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_program_tex.lo -MD -MP -MF $(DEPDIR)/radeon_program_tex.Tpo -c -o radeon_program_tex.lo `test -f 'compiler/radeon_program_tex.c' || echo '$(srcdir)/'`compiler/radeon_program_tex.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_program_tex.Tpo $(DEPDIR)/radeon_program_tex.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_program_tex.c' object='radeon_program_tex.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_program_tex.lo `test -f 'compiler/radeon_program_tex.c' || echo '$(srcdir)/'`compiler/radeon_program_tex.c
 
radeon_pair_translate.lo: compiler/radeon_pair_translate.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_pair_translate.lo -MD -MP -MF $(DEPDIR)/radeon_pair_translate.Tpo -c -o radeon_pair_translate.lo `test -f 'compiler/radeon_pair_translate.c' || echo '$(srcdir)/'`compiler/radeon_pair_translate.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_pair_translate.Tpo $(DEPDIR)/radeon_pair_translate.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_pair_translate.c' object='radeon_pair_translate.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_pair_translate.lo `test -f 'compiler/radeon_pair_translate.c' || echo '$(srcdir)/'`compiler/radeon_pair_translate.c
 
radeon_pair_schedule.lo: compiler/radeon_pair_schedule.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_pair_schedule.lo -MD -MP -MF $(DEPDIR)/radeon_pair_schedule.Tpo -c -o radeon_pair_schedule.lo `test -f 'compiler/radeon_pair_schedule.c' || echo '$(srcdir)/'`compiler/radeon_pair_schedule.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_pair_schedule.Tpo $(DEPDIR)/radeon_pair_schedule.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_pair_schedule.c' object='radeon_pair_schedule.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_pair_schedule.lo `test -f 'compiler/radeon_pair_schedule.c' || echo '$(srcdir)/'`compiler/radeon_pair_schedule.c
 
radeon_pair_regalloc.lo: compiler/radeon_pair_regalloc.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_pair_regalloc.lo -MD -MP -MF $(DEPDIR)/radeon_pair_regalloc.Tpo -c -o radeon_pair_regalloc.lo `test -f 'compiler/radeon_pair_regalloc.c' || echo '$(srcdir)/'`compiler/radeon_pair_regalloc.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_pair_regalloc.Tpo $(DEPDIR)/radeon_pair_regalloc.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_pair_regalloc.c' object='radeon_pair_regalloc.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_pair_regalloc.lo `test -f 'compiler/radeon_pair_regalloc.c' || echo '$(srcdir)/'`compiler/radeon_pair_regalloc.c
 
radeon_pair_dead_sources.lo: compiler/radeon_pair_dead_sources.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_pair_dead_sources.lo -MD -MP -MF $(DEPDIR)/radeon_pair_dead_sources.Tpo -c -o radeon_pair_dead_sources.lo `test -f 'compiler/radeon_pair_dead_sources.c' || echo '$(srcdir)/'`compiler/radeon_pair_dead_sources.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_pair_dead_sources.Tpo $(DEPDIR)/radeon_pair_dead_sources.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_pair_dead_sources.c' object='radeon_pair_dead_sources.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_pair_dead_sources.lo `test -f 'compiler/radeon_pair_dead_sources.c' || echo '$(srcdir)/'`compiler/radeon_pair_dead_sources.c
 
radeon_dataflow.lo: compiler/radeon_dataflow.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_dataflow.lo -MD -MP -MF $(DEPDIR)/radeon_dataflow.Tpo -c -o radeon_dataflow.lo `test -f 'compiler/radeon_dataflow.c' || echo '$(srcdir)/'`compiler/radeon_dataflow.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_dataflow.Tpo $(DEPDIR)/radeon_dataflow.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_dataflow.c' object='radeon_dataflow.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_dataflow.lo `test -f 'compiler/radeon_dataflow.c' || echo '$(srcdir)/'`compiler/radeon_dataflow.c
 
radeon_dataflow_deadcode.lo: compiler/radeon_dataflow_deadcode.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_dataflow_deadcode.lo -MD -MP -MF $(DEPDIR)/radeon_dataflow_deadcode.Tpo -c -o radeon_dataflow_deadcode.lo `test -f 'compiler/radeon_dataflow_deadcode.c' || echo '$(srcdir)/'`compiler/radeon_dataflow_deadcode.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_dataflow_deadcode.Tpo $(DEPDIR)/radeon_dataflow_deadcode.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_dataflow_deadcode.c' object='radeon_dataflow_deadcode.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_dataflow_deadcode.lo `test -f 'compiler/radeon_dataflow_deadcode.c' || echo '$(srcdir)/'`compiler/radeon_dataflow_deadcode.c
 
radeon_dataflow_swizzles.lo: compiler/radeon_dataflow_swizzles.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_dataflow_swizzles.lo -MD -MP -MF $(DEPDIR)/radeon_dataflow_swizzles.Tpo -c -o radeon_dataflow_swizzles.lo `test -f 'compiler/radeon_dataflow_swizzles.c' || echo '$(srcdir)/'`compiler/radeon_dataflow_swizzles.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_dataflow_swizzles.Tpo $(DEPDIR)/radeon_dataflow_swizzles.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_dataflow_swizzles.c' object='radeon_dataflow_swizzles.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_dataflow_swizzles.lo `test -f 'compiler/radeon_dataflow_swizzles.c' || echo '$(srcdir)/'`compiler/radeon_dataflow_swizzles.c
 
radeon_list.lo: compiler/radeon_list.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_list.lo -MD -MP -MF $(DEPDIR)/radeon_list.Tpo -c -o radeon_list.lo `test -f 'compiler/radeon_list.c' || echo '$(srcdir)/'`compiler/radeon_list.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_list.Tpo $(DEPDIR)/radeon_list.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_list.c' object='radeon_list.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_list.lo `test -f 'compiler/radeon_list.c' || echo '$(srcdir)/'`compiler/radeon_list.c
 
radeon_optimize.lo: compiler/radeon_optimize.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_optimize.lo -MD -MP -MF $(DEPDIR)/radeon_optimize.Tpo -c -o radeon_optimize.lo `test -f 'compiler/radeon_optimize.c' || echo '$(srcdir)/'`compiler/radeon_optimize.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_optimize.Tpo $(DEPDIR)/radeon_optimize.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_optimize.c' object='radeon_optimize.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_optimize.lo `test -f 'compiler/radeon_optimize.c' || echo '$(srcdir)/'`compiler/radeon_optimize.c
 
radeon_remove_constants.lo: compiler/radeon_remove_constants.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_remove_constants.lo -MD -MP -MF $(DEPDIR)/radeon_remove_constants.Tpo -c -o radeon_remove_constants.lo `test -f 'compiler/radeon_remove_constants.c' || echo '$(srcdir)/'`compiler/radeon_remove_constants.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_remove_constants.Tpo $(DEPDIR)/radeon_remove_constants.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_remove_constants.c' object='radeon_remove_constants.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_remove_constants.lo `test -f 'compiler/radeon_remove_constants.c' || echo '$(srcdir)/'`compiler/radeon_remove_constants.c
 
radeon_rename_regs.lo: compiler/radeon_rename_regs.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_rename_regs.lo -MD -MP -MF $(DEPDIR)/radeon_rename_regs.Tpo -c -o radeon_rename_regs.lo `test -f 'compiler/radeon_rename_regs.c' || echo '$(srcdir)/'`compiler/radeon_rename_regs.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_rename_regs.Tpo $(DEPDIR)/radeon_rename_regs.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_rename_regs.c' object='radeon_rename_regs.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_rename_regs.lo `test -f 'compiler/radeon_rename_regs.c' || echo '$(srcdir)/'`compiler/radeon_rename_regs.c
 
radeon_vert_fc.lo: compiler/radeon_vert_fc.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_vert_fc.lo -MD -MP -MF $(DEPDIR)/radeon_vert_fc.Tpo -c -o radeon_vert_fc.lo `test -f 'compiler/radeon_vert_fc.c' || echo '$(srcdir)/'`compiler/radeon_vert_fc.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_vert_fc.Tpo $(DEPDIR)/radeon_vert_fc.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_vert_fc.c' object='radeon_vert_fc.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_vert_fc.lo `test -f 'compiler/radeon_vert_fc.c' || echo '$(srcdir)/'`compiler/radeon_vert_fc.c
 
radeon_variable.lo: compiler/radeon_variable.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT radeon_variable.lo -MD -MP -MF $(DEPDIR)/radeon_variable.Tpo -c -o radeon_variable.lo `test -f 'compiler/radeon_variable.c' || echo '$(srcdir)/'`compiler/radeon_variable.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/radeon_variable.Tpo $(DEPDIR)/radeon_variable.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/radeon_variable.c' object='radeon_variable.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o radeon_variable.lo `test -f 'compiler/radeon_variable.c' || echo '$(srcdir)/'`compiler/radeon_variable.c
 
r3xx_fragprog.lo: compiler/r3xx_fragprog.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r3xx_fragprog.lo -MD -MP -MF $(DEPDIR)/r3xx_fragprog.Tpo -c -o r3xx_fragprog.lo `test -f 'compiler/r3xx_fragprog.c' || echo '$(srcdir)/'`compiler/r3xx_fragprog.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r3xx_fragprog.Tpo $(DEPDIR)/r3xx_fragprog.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/r3xx_fragprog.c' object='r3xx_fragprog.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r3xx_fragprog.lo `test -f 'compiler/r3xx_fragprog.c' || echo '$(srcdir)/'`compiler/r3xx_fragprog.c
 
r300_fragprog.lo: compiler/r300_fragprog.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_fragprog.lo -MD -MP -MF $(DEPDIR)/r300_fragprog.Tpo -c -o r300_fragprog.lo `test -f 'compiler/r300_fragprog.c' || echo '$(srcdir)/'`compiler/r300_fragprog.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_fragprog.Tpo $(DEPDIR)/r300_fragprog.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/r300_fragprog.c' object='r300_fragprog.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_fragprog.lo `test -f 'compiler/r300_fragprog.c' || echo '$(srcdir)/'`compiler/r300_fragprog.c
 
r300_fragprog_swizzle.lo: compiler/r300_fragprog_swizzle.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_fragprog_swizzle.lo -MD -MP -MF $(DEPDIR)/r300_fragprog_swizzle.Tpo -c -o r300_fragprog_swizzle.lo `test -f 'compiler/r300_fragprog_swizzle.c' || echo '$(srcdir)/'`compiler/r300_fragprog_swizzle.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_fragprog_swizzle.Tpo $(DEPDIR)/r300_fragprog_swizzle.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/r300_fragprog_swizzle.c' object='r300_fragprog_swizzle.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_fragprog_swizzle.lo `test -f 'compiler/r300_fragprog_swizzle.c' || echo '$(srcdir)/'`compiler/r300_fragprog_swizzle.c
 
r300_fragprog_emit.lo: compiler/r300_fragprog_emit.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_fragprog_emit.lo -MD -MP -MF $(DEPDIR)/r300_fragprog_emit.Tpo -c -o r300_fragprog_emit.lo `test -f 'compiler/r300_fragprog_emit.c' || echo '$(srcdir)/'`compiler/r300_fragprog_emit.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_fragprog_emit.Tpo $(DEPDIR)/r300_fragprog_emit.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/r300_fragprog_emit.c' object='r300_fragprog_emit.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_fragprog_emit.lo `test -f 'compiler/r300_fragprog_emit.c' || echo '$(srcdir)/'`compiler/r300_fragprog_emit.c
 
r500_fragprog.lo: compiler/r500_fragprog.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r500_fragprog.lo -MD -MP -MF $(DEPDIR)/r500_fragprog.Tpo -c -o r500_fragprog.lo `test -f 'compiler/r500_fragprog.c' || echo '$(srcdir)/'`compiler/r500_fragprog.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r500_fragprog.Tpo $(DEPDIR)/r500_fragprog.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/r500_fragprog.c' object='r500_fragprog.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r500_fragprog.lo `test -f 'compiler/r500_fragprog.c' || echo '$(srcdir)/'`compiler/r500_fragprog.c
 
r500_fragprog_emit.lo: compiler/r500_fragprog_emit.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r500_fragprog_emit.lo -MD -MP -MF $(DEPDIR)/r500_fragprog_emit.Tpo -c -o r500_fragprog_emit.lo `test -f 'compiler/r500_fragprog_emit.c' || echo '$(srcdir)/'`compiler/r500_fragprog_emit.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r500_fragprog_emit.Tpo $(DEPDIR)/r500_fragprog_emit.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/r500_fragprog_emit.c' object='r500_fragprog_emit.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r500_fragprog_emit.lo `test -f 'compiler/r500_fragprog_emit.c' || echo '$(srcdir)/'`compiler/r500_fragprog_emit.c
 
r3xx_vertprog.lo: compiler/r3xx_vertprog.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r3xx_vertprog.lo -MD -MP -MF $(DEPDIR)/r3xx_vertprog.Tpo -c -o r3xx_vertprog.lo `test -f 'compiler/r3xx_vertprog.c' || echo '$(srcdir)/'`compiler/r3xx_vertprog.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r3xx_vertprog.Tpo $(DEPDIR)/r3xx_vertprog.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/r3xx_vertprog.c' object='r3xx_vertprog.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r3xx_vertprog.lo `test -f 'compiler/r3xx_vertprog.c' || echo '$(srcdir)/'`compiler/r3xx_vertprog.c
 
r3xx_vertprog_dump.lo: compiler/r3xx_vertprog_dump.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r3xx_vertprog_dump.lo -MD -MP -MF $(DEPDIR)/r3xx_vertprog_dump.Tpo -c -o r3xx_vertprog_dump.lo `test -f 'compiler/r3xx_vertprog_dump.c' || echo '$(srcdir)/'`compiler/r3xx_vertprog_dump.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r3xx_vertprog_dump.Tpo $(DEPDIR)/r3xx_vertprog_dump.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/r3xx_vertprog_dump.c' object='r3xx_vertprog_dump.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r3xx_vertprog_dump.lo `test -f 'compiler/r3xx_vertprog_dump.c' || echo '$(srcdir)/'`compiler/r3xx_vertprog_dump.c
 
memory_pool.lo: compiler/memory_pool.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT memory_pool.lo -MD -MP -MF $(DEPDIR)/memory_pool.Tpo -c -o memory_pool.lo `test -f 'compiler/memory_pool.c' || echo '$(srcdir)/'`compiler/memory_pool.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/memory_pool.Tpo $(DEPDIR)/memory_pool.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compiler/memory_pool.c' object='memory_pool.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o memory_pool.lo `test -f 'compiler/memory_pool.c' || echo '$(srcdir)/'`compiler/memory_pool.c
 
r300_compiler_tests-r300_compiler_tests.o: $(testdir)/r300_compiler_tests.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-r300_compiler_tests.o -MD -MP -MF $(DEPDIR)/r300_compiler_tests-r300_compiler_tests.Tpo -c -o r300_compiler_tests-r300_compiler_tests.o `test -f '$(testdir)/r300_compiler_tests.c' || echo '$(srcdir)/'`$(testdir)/r300_compiler_tests.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-r300_compiler_tests.Tpo $(DEPDIR)/r300_compiler_tests-r300_compiler_tests.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/r300_compiler_tests.c' object='r300_compiler_tests-r300_compiler_tests.o' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-r300_compiler_tests.o `test -f '$(testdir)/r300_compiler_tests.c' || echo '$(srcdir)/'`$(testdir)/r300_compiler_tests.c
 
r300_compiler_tests-r300_compiler_tests.obj: $(testdir)/r300_compiler_tests.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-r300_compiler_tests.obj -MD -MP -MF $(DEPDIR)/r300_compiler_tests-r300_compiler_tests.Tpo -c -o r300_compiler_tests-r300_compiler_tests.obj `if test -f '$(testdir)/r300_compiler_tests.c'; then $(CYGPATH_W) '$(testdir)/r300_compiler_tests.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/r300_compiler_tests.c'; fi`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-r300_compiler_tests.Tpo $(DEPDIR)/r300_compiler_tests-r300_compiler_tests.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/r300_compiler_tests.c' object='r300_compiler_tests-r300_compiler_tests.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-r300_compiler_tests.obj `if test -f '$(testdir)/r300_compiler_tests.c'; then $(CYGPATH_W) '$(testdir)/r300_compiler_tests.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/r300_compiler_tests.c'; fi`
 
r300_compiler_tests-radeon_compiler_optimize_tests.o: $(testdir)/radeon_compiler_optimize_tests.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-radeon_compiler_optimize_tests.o -MD -MP -MF $(DEPDIR)/r300_compiler_tests-radeon_compiler_optimize_tests.Tpo -c -o r300_compiler_tests-radeon_compiler_optimize_tests.o `test -f '$(testdir)/radeon_compiler_optimize_tests.c' || echo '$(srcdir)/'`$(testdir)/radeon_compiler_optimize_tests.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-radeon_compiler_optimize_tests.Tpo $(DEPDIR)/r300_compiler_tests-radeon_compiler_optimize_tests.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/radeon_compiler_optimize_tests.c' object='r300_compiler_tests-radeon_compiler_optimize_tests.o' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-radeon_compiler_optimize_tests.o `test -f '$(testdir)/radeon_compiler_optimize_tests.c' || echo '$(srcdir)/'`$(testdir)/radeon_compiler_optimize_tests.c
 
r300_compiler_tests-radeon_compiler_optimize_tests.obj: $(testdir)/radeon_compiler_optimize_tests.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-radeon_compiler_optimize_tests.obj -MD -MP -MF $(DEPDIR)/r300_compiler_tests-radeon_compiler_optimize_tests.Tpo -c -o r300_compiler_tests-radeon_compiler_optimize_tests.obj `if test -f '$(testdir)/radeon_compiler_optimize_tests.c'; then $(CYGPATH_W) '$(testdir)/radeon_compiler_optimize_tests.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/radeon_compiler_optimize_tests.c'; fi`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-radeon_compiler_optimize_tests.Tpo $(DEPDIR)/r300_compiler_tests-radeon_compiler_optimize_tests.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/radeon_compiler_optimize_tests.c' object='r300_compiler_tests-radeon_compiler_optimize_tests.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-radeon_compiler_optimize_tests.obj `if test -f '$(testdir)/radeon_compiler_optimize_tests.c'; then $(CYGPATH_W) '$(testdir)/radeon_compiler_optimize_tests.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/radeon_compiler_optimize_tests.c'; fi`
 
r300_compiler_tests-radeon_compiler_regalloc_tests.o: $(testdir)/radeon_compiler_regalloc_tests.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-radeon_compiler_regalloc_tests.o -MD -MP -MF $(DEPDIR)/r300_compiler_tests-radeon_compiler_regalloc_tests.Tpo -c -o r300_compiler_tests-radeon_compiler_regalloc_tests.o `test -f '$(testdir)/radeon_compiler_regalloc_tests.c' || echo '$(srcdir)/'`$(testdir)/radeon_compiler_regalloc_tests.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-radeon_compiler_regalloc_tests.Tpo $(DEPDIR)/r300_compiler_tests-radeon_compiler_regalloc_tests.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/radeon_compiler_regalloc_tests.c' object='r300_compiler_tests-radeon_compiler_regalloc_tests.o' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-radeon_compiler_regalloc_tests.o `test -f '$(testdir)/radeon_compiler_regalloc_tests.c' || echo '$(srcdir)/'`$(testdir)/radeon_compiler_regalloc_tests.c
 
r300_compiler_tests-radeon_compiler_regalloc_tests.obj: $(testdir)/radeon_compiler_regalloc_tests.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-radeon_compiler_regalloc_tests.obj -MD -MP -MF $(DEPDIR)/r300_compiler_tests-radeon_compiler_regalloc_tests.Tpo -c -o r300_compiler_tests-radeon_compiler_regalloc_tests.obj `if test -f '$(testdir)/radeon_compiler_regalloc_tests.c'; then $(CYGPATH_W) '$(testdir)/radeon_compiler_regalloc_tests.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/radeon_compiler_regalloc_tests.c'; fi`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-radeon_compiler_regalloc_tests.Tpo $(DEPDIR)/r300_compiler_tests-radeon_compiler_regalloc_tests.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/radeon_compiler_regalloc_tests.c' object='r300_compiler_tests-radeon_compiler_regalloc_tests.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-radeon_compiler_regalloc_tests.obj `if test -f '$(testdir)/radeon_compiler_regalloc_tests.c'; then $(CYGPATH_W) '$(testdir)/radeon_compiler_regalloc_tests.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/radeon_compiler_regalloc_tests.c'; fi`
 
r300_compiler_tests-radeon_compiler_util_tests.o: $(testdir)/radeon_compiler_util_tests.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-radeon_compiler_util_tests.o -MD -MP -MF $(DEPDIR)/r300_compiler_tests-radeon_compiler_util_tests.Tpo -c -o r300_compiler_tests-radeon_compiler_util_tests.o `test -f '$(testdir)/radeon_compiler_util_tests.c' || echo '$(srcdir)/'`$(testdir)/radeon_compiler_util_tests.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-radeon_compiler_util_tests.Tpo $(DEPDIR)/r300_compiler_tests-radeon_compiler_util_tests.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/radeon_compiler_util_tests.c' object='r300_compiler_tests-radeon_compiler_util_tests.o' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-radeon_compiler_util_tests.o `test -f '$(testdir)/radeon_compiler_util_tests.c' || echo '$(srcdir)/'`$(testdir)/radeon_compiler_util_tests.c
 
r300_compiler_tests-radeon_compiler_util_tests.obj: $(testdir)/radeon_compiler_util_tests.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-radeon_compiler_util_tests.obj -MD -MP -MF $(DEPDIR)/r300_compiler_tests-radeon_compiler_util_tests.Tpo -c -o r300_compiler_tests-radeon_compiler_util_tests.obj `if test -f '$(testdir)/radeon_compiler_util_tests.c'; then $(CYGPATH_W) '$(testdir)/radeon_compiler_util_tests.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/radeon_compiler_util_tests.c'; fi`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-radeon_compiler_util_tests.Tpo $(DEPDIR)/r300_compiler_tests-radeon_compiler_util_tests.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/radeon_compiler_util_tests.c' object='r300_compiler_tests-radeon_compiler_util_tests.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-radeon_compiler_util_tests.obj `if test -f '$(testdir)/radeon_compiler_util_tests.c'; then $(CYGPATH_W) '$(testdir)/radeon_compiler_util_tests.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/radeon_compiler_util_tests.c'; fi`
 
r300_compiler_tests-rc_test_helpers.o: $(testdir)/rc_test_helpers.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-rc_test_helpers.o -MD -MP -MF $(DEPDIR)/r300_compiler_tests-rc_test_helpers.Tpo -c -o r300_compiler_tests-rc_test_helpers.o `test -f '$(testdir)/rc_test_helpers.c' || echo '$(srcdir)/'`$(testdir)/rc_test_helpers.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-rc_test_helpers.Tpo $(DEPDIR)/r300_compiler_tests-rc_test_helpers.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/rc_test_helpers.c' object='r300_compiler_tests-rc_test_helpers.o' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-rc_test_helpers.o `test -f '$(testdir)/rc_test_helpers.c' || echo '$(srcdir)/'`$(testdir)/rc_test_helpers.c
 
r300_compiler_tests-rc_test_helpers.obj: $(testdir)/rc_test_helpers.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-rc_test_helpers.obj -MD -MP -MF $(DEPDIR)/r300_compiler_tests-rc_test_helpers.Tpo -c -o r300_compiler_tests-rc_test_helpers.obj `if test -f '$(testdir)/rc_test_helpers.c'; then $(CYGPATH_W) '$(testdir)/rc_test_helpers.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/rc_test_helpers.c'; fi`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-rc_test_helpers.Tpo $(DEPDIR)/r300_compiler_tests-rc_test_helpers.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/rc_test_helpers.c' object='r300_compiler_tests-rc_test_helpers.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-rc_test_helpers.obj `if test -f '$(testdir)/rc_test_helpers.c'; then $(CYGPATH_W) '$(testdir)/rc_test_helpers.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/rc_test_helpers.c'; fi`
 
r300_compiler_tests-unit_test.o: $(testdir)/unit_test.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-unit_test.o -MD -MP -MF $(DEPDIR)/r300_compiler_tests-unit_test.Tpo -c -o r300_compiler_tests-unit_test.o `test -f '$(testdir)/unit_test.c' || echo '$(srcdir)/'`$(testdir)/unit_test.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-unit_test.Tpo $(DEPDIR)/r300_compiler_tests-unit_test.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/unit_test.c' object='r300_compiler_tests-unit_test.o' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-unit_test.o `test -f '$(testdir)/unit_test.c' || echo '$(srcdir)/'`$(testdir)/unit_test.c
 
r300_compiler_tests-unit_test.obj: $(testdir)/unit_test.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT r300_compiler_tests-unit_test.obj -MD -MP -MF $(DEPDIR)/r300_compiler_tests-unit_test.Tpo -c -o r300_compiler_tests-unit_test.obj `if test -f '$(testdir)/unit_test.c'; then $(CYGPATH_W) '$(testdir)/unit_test.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/unit_test.c'; fi`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/r300_compiler_tests-unit_test.Tpo $(DEPDIR)/r300_compiler_tests-unit_test.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(testdir)/unit_test.c' object='r300_compiler_tests-unit_test.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(r300_compiler_tests_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o r300_compiler_tests-unit_test.obj `if test -f '$(testdir)/unit_test.c'; then $(CYGPATH_W) '$(testdir)/unit_test.c'; else $(CYGPATH_W) '$(srcdir)/$(testdir)/unit_test.c'; fi`
 
mostlyclean-libtool:
-rm -f *.lo
 
clean-libtool:
-rm -rf .libs _libs
 
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-am
TAGS: tags
 
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: ctags-am
 
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
 
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: cscopelist-am
 
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
 
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
 
# Recover from deleted '.trs' file; this should ensure that
# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create
# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells
# to avoid problems with "make -n".
.log.trs:
rm -f $< $@
$(MAKE) $(AM_MAKEFLAGS) $<
 
# Leading 'am--fnord' is there to ensure the list of targets does not
# expand to empty, as could happen e.g. with make check TESTS=''.
am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck)
am--force-recheck:
@:
 
$(TEST_SUITE_LOG): $(TEST_LOGS)
@$(am__set_TESTS_bases); \
am__f_ok () { test -f "$$1" && test -r "$$1"; }; \
redo_bases=`for i in $$bases; do \
am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \
done`; \
if test -n "$$redo_bases"; then \
redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \
redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \
if $(am__make_dryrun); then :; else \
rm -f $$redo_logs && rm -f $$redo_results || exit 1; \
fi; \
fi; \
if test -n "$$am__remaking_logs"; then \
echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \
"recursion detected" >&2; \
else \
am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \
fi; \
if $(am__make_dryrun); then :; else \
st=0; \
errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \
for i in $$redo_bases; do \
test -f $$i.trs && test -r $$i.trs \
|| { echo "$$errmsg $$i.trs" >&2; st=1; }; \
test -f $$i.log && test -r $$i.log \
|| { echo "$$errmsg $$i.log" >&2; st=1; }; \
done; \
test $$st -eq 0 || exit 1; \
fi
@$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \
ws='[ ]'; \
results=`for b in $$bases; do echo $$b.trs; done`; \
test -n "$$results" || results=/dev/null; \
all=` grep "^$$ws*:test-result:" $$results | wc -l`; \
pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \
fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \
skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \
xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \
xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \
error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \
if test `expr $$fail + $$xpass + $$error` -eq 0; then \
success=true; \
else \
success=false; \
fi; \
br='==================='; br=$$br$$br$$br$$br; \
result_count () \
{ \
if test x"$$1" = x"--maybe-color"; then \
maybe_colorize=yes; \
elif test x"$$1" = x"--no-color"; then \
maybe_colorize=no; \
else \
echo "$@: invalid 'result_count' usage" >&2; exit 4; \
fi; \
shift; \
desc=$$1 count=$$2; \
if test $$maybe_colorize = yes && test $$count -gt 0; then \
color_start=$$3 color_end=$$std; \
else \
color_start= color_end=; \
fi; \
echo "$${color_start}# $$desc $$count$${color_end}"; \
}; \
create_testsuite_report () \
{ \
result_count $$1 "TOTAL:" $$all "$$brg"; \
result_count $$1 "PASS: " $$pass "$$grn"; \
result_count $$1 "SKIP: " $$skip "$$blu"; \
result_count $$1 "XFAIL:" $$xfail "$$lgn"; \
result_count $$1 "FAIL: " $$fail "$$red"; \
result_count $$1 "XPASS:" $$xpass "$$red"; \
result_count $$1 "ERROR:" $$error "$$mgn"; \
}; \
{ \
echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \
$(am__rst_title); \
create_testsuite_report --no-color; \
echo; \
echo ".. contents:: :depth: 2"; \
echo; \
for b in $$bases; do echo $$b; done \
| $(am__create_global_log); \
} >$(TEST_SUITE_LOG).tmp || exit 1; \
mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \
if $$success; then \
col="$$grn"; \
else \
col="$$red"; \
test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \
fi; \
echo "$${col}$$br$${std}"; \
echo "$${col}Testsuite summary for $(PACKAGE_STRING)$${std}"; \
echo "$${col}$$br$${std}"; \
create_testsuite_report --maybe-color; \
echo "$$col$$br$$std"; \
if $$success; then :; else \
echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \
if test -n "$(PACKAGE_BUGREPORT)"; then \
echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \
fi; \
echo "$$col$$br$$std"; \
fi; \
$$success || exit 1
 
check-TESTS:
@list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list
@list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list
@test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG)
@set +e; $(am__set_TESTS_bases); \
log_list=`for i in $$bases; do echo $$i.log; done`; \
trs_list=`for i in $$bases; do echo $$i.trs; done`; \
log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \
$(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \
exit $$?;
recheck: all $(check_PROGRAMS)
@test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG)
@set +e; $(am__set_TESTS_bases); \
bases=`for i in $$bases; do echo $$i; done \
| $(am__list_recheck_tests)` || exit 1; \
log_list=`for i in $$bases; do echo $$i.log; done`; \
log_list=`echo $$log_list`; \
$(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \
am__force_recheck=am--force-recheck \
TEST_LOGS="$$log_list"; \
exit $$?
r300_compiler_tests.log: r300_compiler_tests$(EXEEXT)
@p='r300_compiler_tests$(EXEEXT)'; \
b='r300_compiler_tests'; \
$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
--log-file $$b.log --trs-file $$b.trs \
$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
"$$tst" $(AM_TESTS_FD_REDIRECT)
.test.log:
@p='$<'; \
$(am__set_b); \
$(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \
--log-file $$b.log --trs-file $$b.trs \
$(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \
"$$tst" $(AM_TESTS_FD_REDIRECT)
@am__EXEEXT_TRUE@.test$(EXEEXT).log:
@am__EXEEXT_TRUE@ @p='$<'; \
@am__EXEEXT_TRUE@ $(am__set_b); \
@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \
@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \
@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \
@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT)
 
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
check-am: all-am
$(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
$(MAKE) $(AM_MAKEFLAGS) check-TESTS
check: check-am
all-am: Makefile $(LTLIBRARIES)
installdirs:
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
 
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
 
installcheck: installcheck-am
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
-test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS)
-test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs)
-test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG)
 
clean-generic:
 
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
 
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
 
clean-am: clean-checkPROGRAMS clean-generic clean-libtool \
clean-noinstLTLIBRARIES mostlyclean-am
 
distclean: distclean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-tags
 
dvi: dvi-am
 
dvi-am:
 
html: html-am
 
html-am:
 
info: info-am
 
info-am:
 
install-data-am:
 
install-dvi: install-dvi-am
 
install-dvi-am:
 
install-exec-am:
 
install-html: install-html-am
 
install-html-am:
 
install-info: install-info-am
 
install-info-am:
 
install-man:
 
install-pdf: install-pdf-am
 
install-pdf-am:
 
install-ps: install-ps-am
 
install-ps-am:
 
installcheck-am:
 
maintainer-clean: maintainer-clean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
 
mostlyclean: mostlyclean-am
 
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
 
pdf: pdf-am
 
pdf-am:
 
ps: ps-am
 
ps-am:
 
uninstall-am:
 
.MAKE: check-am install-am install-strip
 
.PHONY: CTAGS GTAGS TAGS all all-am check check-TESTS check-am clean \
clean-checkPROGRAMS clean-generic clean-libtool \
clean-noinstLTLIBRARIES cscopelist-am ctags ctags-am distclean \
distclean-compile distclean-generic distclean-libtool \
distclean-tags distdir dvi dvi-am html html-am info info-am \
install install-am install-data install-data-am install-dvi \
install-dvi-am install-exec install-exec-am install-html \
install-html-am install-info install-info-am install-man \
install-pdf install-pdf-am install-ps install-ps-am \
install-strip installcheck installcheck-am installdirs \
maintainer-clean maintainer-clean-generic mostlyclean \
mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
pdf pdf-am ps ps-am recheck tags tags-am uninstall \
uninstall-am
 
 
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/Makefile.sources
0,0 → 1,59
C_SOURCES = \
r300_blit.c \
r300_chipset.c \
r300_context.c \
r300_debug.c \
r300_emit.c \
r300_flush.c \
r300_fs.c \
r300_hyperz.c \
r300_query.c \
r300_render.c \
r300_render_stencilref.c \
r300_render_translate.c \
r300_resource.c \
r300_screen.c \
r300_screen_buffer.c \
r300_state.c \
r300_state_derived.c \
r300_vs.c \
r300_vs_draw.c \
r300_texture.c \
r300_texture_desc.c \
r300_tgsi_to_rc.c \
r300_transfer.c \
\
compiler/radeon_code.c \
compiler/radeon_compiler.c \
compiler/radeon_compiler_util.c \
compiler/radeon_emulate_branches.c \
compiler/radeon_emulate_loops.c \
compiler/radeon_inline_literals.c \
compiler/radeon_program.c \
compiler/radeon_program_print.c \
compiler/radeon_opcodes.c \
compiler/radeon_program_alu.c \
compiler/radeon_program_pair.c \
compiler/radeon_program_tex.c \
compiler/radeon_pair_translate.c \
compiler/radeon_pair_schedule.c \
compiler/radeon_pair_regalloc.c \
compiler/radeon_pair_dead_sources.c \
compiler/radeon_dataflow.c \
compiler/radeon_dataflow_deadcode.c \
compiler/radeon_dataflow_swizzles.c \
compiler/radeon_list.c \
compiler/radeon_optimize.c \
compiler/radeon_remove_constants.c \
compiler/radeon_rename_regs.c \
compiler/radeon_vert_fc.c \
compiler/radeon_variable.c \
compiler/r3xx_fragprog.c \
compiler/r300_fragprog.c \
compiler/r300_fragprog_swizzle.c \
compiler/r300_fragprog_emit.c \
compiler/r500_fragprog.c \
compiler/r500_fragprog_emit.c \
compiler/r3xx_vertprog.c \
compiler/r3xx_vertprog_dump.c \
compiler/memory_pool.c
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/memory_pool.c
0,0 → 1,97
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "memory_pool.h"
 
#include <assert.h>
#include <stdlib.h>
#include <string.h>
 
 
#define POOL_LARGE_ALLOC 4096
#define POOL_ALIGN 8
 
 
struct memory_block {
struct memory_block * next;
};
 
void memory_pool_init(struct memory_pool * pool)
{
memset(pool, 0, sizeof(struct memory_pool));
}
 
 
void memory_pool_destroy(struct memory_pool * pool)
{
while(pool->blocks) {
struct memory_block * block = pool->blocks;
pool->blocks = block->next;
free(block);
}
}
 
static void refill_pool(struct memory_pool * pool)
{
unsigned int blocksize = pool->total_allocated;
struct memory_block * newblock;
 
if (!blocksize)
blocksize = 2*POOL_LARGE_ALLOC;
 
newblock = malloc(blocksize);
newblock->next = pool->blocks;
pool->blocks = newblock;
 
pool->head = (unsigned char*)(newblock + 1);
pool->end = ((unsigned char*)newblock) + blocksize;
pool->total_allocated += blocksize;
}
 
 
void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes)
{
if (bytes < POOL_LARGE_ALLOC) {
void * ptr;
 
if (pool->head + bytes > pool->end)
refill_pool(pool);
 
assert(pool->head + bytes <= pool->end);
 
ptr = pool->head;
 
pool->head += bytes;
pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1));
 
return ptr;
} else {
struct memory_block * block = malloc(bytes + sizeof(struct memory_block));
 
block->next = pool->blocks;
pool->blocks = block;
 
return (block + 1);
}
}
 
 
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/memory_pool.h
0,0 → 1,80
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#ifndef MEMORY_POOL_H
#define MEMORY_POOL_H
 
struct memory_block;
 
/**
* Provides a pool of memory that can quickly be allocated from, at the
* cost of being unable to explicitly free one of the allocated blocks.
* Instead, the entire pool can be freed at once.
*
* The idea is to allow one to quickly allocate a flexible amount of
* memory during operations like shader compilation while avoiding
* reference counting headaches.
*/
struct memory_pool {
unsigned char * head;
unsigned char * end;
unsigned int total_allocated;
struct memory_block * blocks;
};
 
 
void memory_pool_init(struct memory_pool * pool);
void memory_pool_destroy(struct memory_pool * pool);
void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes);
 
 
/**
* Generic helper for growing an array that has separate size/count
* and reserved counters to accomodate up to num new element.
*
* type * Array;
* unsigned int Size;
* unsigned int Reserved;
*
* memory_pool_array_reserve(pool, type, Array, Size, Reserved, k);
* assert(Size + k < Reserved);
*
* \note Size is not changed by this macro.
*
* \warning Array, Size, Reserved have to be lvalues and may be evaluated
* several times.
*/
#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \
unsigned int _num = (num); \
if ((size) + _num > (reserved)) { \
unsigned int newreserve = (reserved) * 2; \
type * newarray; \
if (newreserve < _num) \
newreserve = 4 * _num; /* arbitrary heuristic */ \
newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \
memcpy(newarray, (array), (size) * sizeof(type)); \
(array) = newarray; \
(reserved) = newreserve; \
} \
} while(0)
 
#endif /* MEMORY_POOL_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r300_fragprog.c
0,0 → 1,338
/*
* Copyright (C) 2005 Ben Skeggs.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include "r300_fragprog.h"
 
#include <stdio.h>
 
#include "../r300_reg.h"
 
static void presub_string(char out[10], unsigned int inst)
{
switch(inst & 0x600000){
case R300_ALU_SRCP_1_MINUS_2_SRC0:
sprintf(out, "bias");
break;
case R300_ALU_SRCP_SRC1_MINUS_SRC0:
sprintf(out, "sub");
break;
case R300_ALU_SRCP_SRC1_PLUS_SRC0:
sprintf(out, "add");
break;
case R300_ALU_SRCP_1_MINUS_SRC0:
sprintf(out, "inv ");
break;
}
}
 
static int get_msb(unsigned int bit, unsigned int r400_ext_addr)
{
return (r400_ext_addr & bit) ? 1 << 5 : 0;
}
 
/* just some random things... */
void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
{
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
struct r300_fragment_program_code *code = &compiler->code->code.r300;
int n, i, j;
static int pc = 0;
 
fprintf(stderr, "pc=%d*************************************\n", pc++);
 
fprintf(stderr, "Hardware program\n");
fprintf(stderr, "----------------\n");
if (c->is_r400) {
fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext);
}
 
for (n = 0; n <= (code->config & 3); n++) {
uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n];
unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) +
(((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6);
unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) +
(((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6);
int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT;
int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT;
 
fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, "
"alu_end: %u, tex_end: %d (code_addr: %08x)\n", n,
alu_offset, tex_offset, alu_end, tex_end, code_addr);
 
if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) {
fprintf(stderr, " TEX:\n");
for (i = tex_offset;
i <= tex_offset + tex_end;
++i) {
const char *instr;
 
switch ((code->tex.
inst[i] >> R300_TEX_INST_SHIFT) &
15) {
case R300_TEX_OP_LD:
instr = "TEX";
break;
case R300_TEX_OP_KIL:
instr = "KIL";
break;
case R300_TEX_OP_TXP:
instr = "TXP";
break;
case R300_TEX_OP_TXB:
instr = "TXB";
break;
default:
instr = "UNKNOWN";
}
 
fprintf(stderr,
" %s t%i, %c%i, texture[%i] (%08x)\n",
instr,
(code->tex.
inst[i] >> R300_DST_ADDR_SHIFT) & 31,
't',
(code->tex.
inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
(code->tex.
inst[i] & R300_TEX_ID_MASK) >>
R300_TEX_ID_SHIFT,
code->tex.inst[i]);
}
}
 
for (i = alu_offset;
i <= alu_offset + alu_end; ++i) {
char srcc[4][10], dstc[20];
char srca[4][10], dsta[20];
char argc[3][20];
char arga[3][20];
char flags[5], tmp[10];
 
for (j = 0; j < 3; ++j) {
int regc = code->alu.inst[i].rgb_addr >> (j * 6);
int rega = code->alu.inst[i].alpha_addr >> (j * 6);
int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j),
code->alu.inst[i].r400_ext_addr);
int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j),
code->alu.inst[i].r400_ext_addr);
 
sprintf(srcc[j], "%c%i",
(regc & 32) ? 'c' : 't', (regc & 31) | msbc);
sprintf(srca[j], "%c%i",
(rega & 32) ? 'c' : 't', (rega & 31) | msba);
}
 
dstc[0] = 0;
sprintf(flags, "%s%s%s",
(code->alu.inst[i].
rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "",
(code->alu.inst[i].
rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "",
(code->alu.inst[i].
rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : "");
if (flags[0] != 0) {
unsigned int msb = get_msb(
R400_ADDRD_EXT_RGB_MSB_BIT,
code->alu.inst[i].r400_ext_addr);
 
sprintf(dstc, "t%i.%s ",
((code->alu.inst[i].
rgb_addr >> R300_ALU_DSTC_SHIFT)
& 31) | msb,
flags);
}
sprintf(flags, "%s%s%s",
(code->alu.inst[i].
rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
(code->alu.inst[i].
rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
(code->alu.inst[i].
rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
if (flags[0] != 0) {
sprintf(tmp, "o%i.%s",
(code->alu.inst[i].
rgb_addr >> 29) & 3,
flags);
strcat(dstc, tmp);
}
/* Presub */
presub_string(srcc[3], code->alu.inst[i].rgb_inst);
presub_string(srca[3], code->alu.inst[i].alpha_inst);
 
dsta[0] = 0;
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
unsigned int msb = get_msb(
R400_ADDRD_EXT_A_MSB_BIT,
code->alu.inst[i].r400_ext_addr);
sprintf(dsta, "t%i.w ",
((code->alu.inst[i].
alpha_addr >> R300_ALU_DSTA_SHIFT) & 31)
| msb);
}
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) {
sprintf(tmp, "o%i.w ",
(code->alu.inst[i].
alpha_addr >> 25) & 3);
strcat(dsta, tmp);
}
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) {
strcat(dsta, "Z");
}
 
fprintf(stderr,
"%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n"
" w: %3s %3s %3s %5s-> %-20s (%08x)\n", i,
srcc[0], srcc[1], srcc[2], srcc[3], dstc,
code->alu.inst[i].rgb_addr, srca[0], srca[1],
srca[2], srca[3], dsta,
code->alu.inst[i].alpha_addr);
 
for (j = 0; j < 3; ++j) {
int regc = code->alu.inst[i].rgb_inst >> (j * 7);
int rega = code->alu.inst[i].alpha_inst >> (j * 7);
int d;
char buf[20];
 
d = regc & 31;
if (d < 12) {
switch (d % 4) {
case R300_ALU_ARGC_SRC0C_XYZ:
sprintf(buf, "%s.xyz",
srcc[d / 4]);
break;
case R300_ALU_ARGC_SRC0C_XXX:
sprintf(buf, "%s.xxx",
srcc[d / 4]);
break;
case R300_ALU_ARGC_SRC0C_YYY:
sprintf(buf, "%s.yyy",
srcc[d / 4]);
break;
case R300_ALU_ARGC_SRC0C_ZZZ:
sprintf(buf, "%s.zzz",
srcc[d / 4]);
break;
}
} else if (d < 15) {
sprintf(buf, "%s.www", srca[d - 12]);
} else if (d < 20 ) {
switch(d) {
case R300_ALU_ARGC_SRCP_XYZ:
sprintf(buf, "srcp.xyz");
break;
case R300_ALU_ARGC_SRCP_XXX:
sprintf(buf, "srcp.xxx");
break;
case R300_ALU_ARGC_SRCP_YYY:
sprintf(buf, "srcp.yyy");
break;
case R300_ALU_ARGC_SRCP_ZZZ:
sprintf(buf, "srcp.zzz");
break;
case R300_ALU_ARGC_SRCP_WWW:
sprintf(buf, "srcp.www");
break;
}
} else if (d == 20) {
sprintf(buf, "0.0");
} else if (d == 21) {
sprintf(buf, "1.0");
} else if (d == 22) {
sprintf(buf, "0.5");
} else if (d >= 23 && d < 32) {
d -= 23;
switch (d / 3) {
case 0:
sprintf(buf, "%s.yzx",
srcc[d % 3]);
break;
case 1:
sprintf(buf, "%s.zxy",
srcc[d % 3]);
break;
case 2:
sprintf(buf, "%s.Wzy",
srcc[d % 3]);
break;
}
} else {
sprintf(buf, "%i", d);
}
 
sprintf(argc[j], "%s%s%s%s",
(regc & 32) ? "-" : "",
(regc & 64) ? "|" : "",
buf, (regc & 64) ? "|" : "");
 
d = rega & 31;
if (d < 9) {
sprintf(buf, "%s.%c", srcc[d / 3],
'x' + (char)(d % 3));
} else if (d < 12) {
sprintf(buf, "%s.w", srca[d - 9]);
} else if (d < 16) {
switch(d) {
case R300_ALU_ARGA_SRCP_X:
sprintf(buf, "srcp.x");
break;
case R300_ALU_ARGA_SRCP_Y:
sprintf(buf, "srcp.y");
break;
case R300_ALU_ARGA_SRCP_Z:
sprintf(buf, "srcp.z");
break;
case R300_ALU_ARGA_SRCP_W:
sprintf(buf, "srcp.w");
break;
}
} else if (d == 16) {
sprintf(buf, "0.0");
} else if (d == 17) {
sprintf(buf, "1.0");
} else if (d == 18) {
sprintf(buf, "0.5");
} else {
sprintf(buf, "%i", d);
}
 
sprintf(arga[j], "%s%s%s%s",
(rega & 32) ? "-" : "",
(rega & 64) ? "|" : "",
buf, (rega & 64) ? "|" : "");
}
 
fprintf(stderr, " xyz: %8s %8s %8s op: %08x %s\n"
" w: %8s %8s %8s op: %08x\n",
argc[0], argc[1], argc[2],
code->alu.inst[i].rgb_inst,
code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ?
"NOP" : "",
arga[0], arga[1],arga[2],
code->alu.inst[i].alpha_inst);
}
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r300_fragprog.h
0,0 → 1,44
/*
* Copyright (C) 2005 Ben Skeggs.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
/*
* Authors:
* Ben Skeggs <darktama@iinet.net.au>
* Jerome Glisse <j.glisse@gmail.com>
*/
#ifndef __R300_FRAGPROG_H_
#define __R300_FRAGPROG_H_
 
#include "radeon_compiler.h"
#include "radeon_program.h"
 
 
extern void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user);
 
extern void r300FragmentProgramDump(struct radeon_compiler *c, void *user);
 
#endif
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c
0,0 → 1,552
/*
* Copyright (C) 2005 Ben Skeggs.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
/**
* \file
*
* Emit the r300_fragment_program_code that can be understood by the hardware.
* Input is a pre-transformed radeon_program.
*
* \author Ben Skeggs <darktama@iinet.net.au>
*
* \author Jerome Glisse <j.glisse@gmail.com>
*/
 
#include "r300_fragprog.h"
 
#include "../r300_reg.h"
 
#include "radeon_program_pair.h"
#include "r300_fragprog_swizzle.h"
 
 
struct r300_emit_state {
struct r300_fragment_program_compiler * compiler;
 
unsigned current_node : 2;
unsigned node_first_tex : 8;
unsigned node_first_alu : 8;
uint32_t node_flags;
};
 
#define PROG_CODE \
struct r300_fragment_program_compiler *c = emit->compiler; \
struct r300_fragment_program_code *code = &c->code->code.r300
 
#define error(fmt, args...) do { \
rc_error(&c->Base, "%s::%s(): " fmt "\n", \
__FILE__, __FUNCTION__, ##args); \
} while(0)
 
static unsigned int get_msbs_alu(unsigned int bits)
{
return (bits >> 6) & 0x7;
}
 
/**
* @param lsbs The number of least significant bits
*/
static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)
{
return (bits >> lsbs) & 0x15;
}
 
#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
 
/**
* Mark a temporary register as used.
*/
static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
{
if (index > code->pixsize)
code->pixsize = index;
}
 
static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src)
{
if (!src.Used)
return 0;
 
if (src.File == RC_FILE_CONSTANT) {
return src.Index | (1 << 5);
} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
use_temporary(code, src.Index);
return src.Index & 0x1f;
}
 
return 0;
}
 
 
static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
{
switch(opcode) {
case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
case RC_OPCODE_CND: return R300_ALU_OUTC_CND;
case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
default:
error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
/* fall through */
case RC_OPCODE_NOP:
/* fall through */
case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
}
}
 
static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
{
switch(opcode) {
case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
case RC_OPCODE_CND: return R300_ALU_OUTA_CND;
case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
default:
error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
/* fall through */
case RC_OPCODE_NOP:
/* fall through */
case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
}
}
 
/**
* Emit one paired ALU instruction.
*/
static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
{
int ip;
int j;
PROG_CODE;
 
if (code->alu.length >= c->Base.max_alu_insts) {
error("Too many ALU instructions");
return 0;
}
 
ip = code->alu.length++;
 
code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
 
for(j = 0; j < 3; ++j) {
/* Set the RGB address */
unsigned int src = use_source(code, inst->RGB.Src[j]);
unsigned int arg;
if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
 
code->alu.inst[ip].rgb_addr |= src << (6*j);
 
/* Set the Alpha address */
src = use_source(code, inst->Alpha.Src[j]);
if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
 
code->alu.inst[ip].alpha_addr |= src << (6*j);
 
arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
arg |= inst->RGB.Arg[j].Abs << 6;
arg |= inst->RGB.Arg[j].Negate << 5;
code->alu.inst[ip].rgb_inst |= arg << (7*j);
 
arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
arg |= inst->Alpha.Arg[j].Abs << 6;
arg |= inst->Alpha.Arg[j].Negate << 5;
code->alu.inst[ip].alpha_inst |= arg << (7*j);
}
 
/* Presubtract */
if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
case RC_PRESUB_BIAS:
code->alu.inst[ip].rgb_inst |=
R300_ALU_SRCP_1_MINUS_2_SRC0;
break;
case RC_PRESUB_ADD:
code->alu.inst[ip].rgb_inst |=
R300_ALU_SRCP_SRC1_PLUS_SRC0;
break;
case RC_PRESUB_SUB:
code->alu.inst[ip].rgb_inst |=
R300_ALU_SRCP_SRC1_MINUS_SRC0;
break;
case RC_PRESUB_INV:
code->alu.inst[ip].rgb_inst |=
R300_ALU_SRCP_1_MINUS_SRC0;
break;
default:
break;
}
}
 
if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
case RC_PRESUB_BIAS:
code->alu.inst[ip].alpha_inst |=
R300_ALU_SRCP_1_MINUS_2_SRC0;
break;
case RC_PRESUB_ADD:
code->alu.inst[ip].alpha_inst |=
R300_ALU_SRCP_SRC1_PLUS_SRC0;
break;
case RC_PRESUB_SUB:
code->alu.inst[ip].alpha_inst |=
R300_ALU_SRCP_SRC1_MINUS_SRC0;
break;
case RC_PRESUB_INV:
code->alu.inst[ip].alpha_inst |=
R300_ALU_SRCP_1_MINUS_SRC0;
break;
default:
break;
}
}
 
if (inst->RGB.Saturate)
code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
if (inst->Alpha.Saturate)
code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
 
if (inst->RGB.WriteMask) {
use_temporary(code, inst->RGB.DestIndex);
if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
code->alu.inst[ip].rgb_addr |=
((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
}
if (inst->RGB.OutputWriteMask) {
code->alu.inst[ip].rgb_addr |=
(inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
R300_RGB_TARGET(inst->RGB.Target);
emit->node_flags |= R300_RGBA_OUT;
}
 
if (inst->Alpha.WriteMask) {
use_temporary(code, inst->Alpha.DestIndex);
if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
code->alu.inst[ip].alpha_addr |=
((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |
R300_ALU_DSTA_REG;
}
if (inst->Alpha.OutputWriteMask) {
code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |
R300_ALPHA_TARGET(inst->Alpha.Target);
emit->node_flags |= R300_RGBA_OUT;
}
if (inst->Alpha.DepthWriteMask) {
code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
emit->node_flags |= R300_W_OUT;
c->code->writes_depth = 1;
}
if (inst->Nop)
code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
 
/* Handle Output Modifier
* According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */
if (inst->RGB.Omod) {
if (inst->RGB.Omod == RC_OMOD_DISABLE) {
rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
}
code->alu.inst[ip].rgb_inst |=
(inst->RGB.Omod << R300_ALU_OUTC_MOD_SHIFT);
}
if (inst->Alpha.Omod) {
if (inst->Alpha.Omod == RC_OMOD_DISABLE) {
rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
}
code->alu.inst[ip].alpha_inst |=
(inst->Alpha.Omod << R300_ALU_OUTC_MOD_SHIFT);
}
return 1;
}
 
 
/**
* Finish the current node without advancing to the next one.
*/
static int finish_node(struct r300_emit_state * emit)
{
struct r300_fragment_program_compiler * c = emit->compiler;
struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
unsigned alu_offset;
unsigned alu_end;
unsigned tex_offset;
unsigned tex_end;
 
unsigned int alu_offset_msbs, alu_end_msbs;
 
if (code->alu.length == emit->node_first_alu) {
/* Generate a single NOP for this node */
struct rc_pair_instruction inst;
memset(&inst, 0, sizeof(inst));
if (!emit_alu(emit, &inst))
return 0;
}
 
alu_offset = emit->node_first_alu;
alu_end = code->alu.length - alu_offset - 1;
tex_offset = emit->node_first_tex;
tex_end = code->tex.length - tex_offset - 1;
 
if (code->tex.length == emit->node_first_tex) {
if (emit->current_node > 0) {
error("Node %i has no TEX instructions", emit->current_node);
return 0;
}
 
tex_end = 0;
} else {
if (emit->current_node == 0)
code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
}
 
/* Write the config register.
* Note: The order in which the words for each node are written
* is not correct here and needs to be fixed up once we're entirely
* done
*
* Also note that the register specification from AMD is slightly
* incorrect in its description of this register. */
code->code_addr[emit->current_node] =
((alu_offset << R300_ALU_START_SHIFT)
& R300_ALU_START_MASK)
| ((alu_end << R300_ALU_SIZE_SHIFT)
& R300_ALU_SIZE_MASK)
| ((tex_offset << R300_TEX_START_SHIFT)
& R300_TEX_START_MASK)
| ((tex_end << R300_TEX_SIZE_SHIFT)
& R300_TEX_SIZE_MASK)
| emit->node_flags
| (get_msbs_tex(tex_offset, 5)
<< R400_TEX_START_MSB_SHIFT)
| (get_msbs_tex(tex_end, 5)
<< R400_TEX_SIZE_MSB_SHIFT)
;
 
/* Write r400 extended instruction fields. These will be ignored on
* r300 cards. */
alu_offset_msbs = get_msbs_alu(alu_offset);
alu_end_msbs = get_msbs_alu(alu_end);
switch(emit->current_node) {
case 0:
code->r400_code_offset_ext |=
alu_offset_msbs << R400_ALU_START3_MSB_SHIFT
| alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
break;
case 1:
code->r400_code_offset_ext |=
alu_offset_msbs << R400_ALU_START2_MSB_SHIFT
| alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
break;
case 2:
code->r400_code_offset_ext |=
alu_offset_msbs << R400_ALU_START1_MSB_SHIFT
| alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
break;
case 3:
code->r400_code_offset_ext |=
alu_offset_msbs << R400_ALU_START0_MSB_SHIFT
| alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
break;
}
return 1;
}
 
 
/**
* Begin a block of texture instructions.
* Create the necessary indirection.
*/
static int begin_tex(struct r300_emit_state * emit)
{
PROG_CODE;
 
if (code->alu.length == emit->node_first_alu &&
code->tex.length == emit->node_first_tex) {
return 1;
}
 
if (emit->current_node == 3) {
error("Too many texture indirections");
return 0;
}
 
if (!finish_node(emit))
return 0;
 
emit->current_node++;
emit->node_first_tex = code->tex.length;
emit->node_first_alu = code->alu.length;
emit->node_flags = 0;
return 1;
}
 
 
static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
{
unsigned int unit;
unsigned int dest;
unsigned int opcode;
PROG_CODE;
 
if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
error("Too many TEX instructions");
return 0;
}
 
unit = inst->U.I.TexSrcUnit;
dest = inst->U.I.DstReg.Index;
 
switch(inst->U.I.Opcode) {
case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
default:
error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
return 0;
}
 
if (inst->U.I.Opcode == RC_OPCODE_KIL) {
unit = 0;
dest = 0;
} else {
use_temporary(code, dest);
}
 
use_temporary(code, inst->U.I.SrcReg[0].Index);
 
code->tex.inst[code->tex.length++] =
((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT)
& R300_SRC_ADDR_MASK)
| ((dest << R300_DST_ADDR_SHIFT)
& R300_DST_ADDR_MASK)
| (unit << R300_TEX_ID_SHIFT)
| (opcode << R300_TEX_INST_SHIFT)
| (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ?
R400_SRC_ADDR_EXT_BIT : 0)
| (dest >= R300_PFS_NUM_TEMP_REGS ?
R400_DST_ADDR_EXT_BIT : 0)
;
return 1;
}
 
 
/**
* Final compilation step: Turn the intermediate radeon_program into
* machine-readable instructions.
*/
void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
{
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
struct r300_emit_state emit;
struct r300_fragment_program_code *code = &compiler->code->code.r300;
unsigned int tex_end;
 
memset(&emit, 0, sizeof(emit));
emit.compiler = compiler;
 
memset(code, 0, sizeof(struct r300_fragment_program_code));
 
for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
inst = inst->Next) {
if (inst->Type == RC_INSTRUCTION_NORMAL) {
if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
begin_tex(&emit);
continue;
}
 
emit_tex(&emit, inst);
} else {
emit_alu(&emit, &inst->U.P);
}
}
 
if (code->pixsize >= compiler->Base.max_temp_regs)
rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
 
if (compiler->Base.Error)
return;
 
/* Finish the program */
finish_node(&emit);
 
code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
 
/* Set r400 extended instruction fields. These values will be ignored
* on r300 cards. */
code->r400_code_offset_ext |=
(get_msbs_alu(0)
<< R400_ALU_OFFSET_MSB_SHIFT)
| (get_msbs_alu(code->alu.length - 1)
<< R400_ALU_SIZE_MSB_SHIFT);
 
tex_end = code->tex.length ? code->tex.length - 1 : 0;
code->code_offset =
((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
& R300_PFS_CNTL_ALU_OFFSET_MASK)
| (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)
& R300_PFS_CNTL_ALU_END_MASK)
| ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
& R300_PFS_CNTL_TEX_OFFSET_MASK)
| ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
& R300_PFS_CNTL_TEX_END_MASK)
| (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)
| (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)
;
 
if (emit.current_node < 3) {
int shift = 3 - emit.current_node;
int i;
for(i = emit.current_node; i >= 0; --i)
code->code_addr[shift + i] = code->code_addr[i];
for(i = 0; i < shift; ++i)
code->code_addr[i] = 0;
}
 
if (code->pixsize >= R300_PFS_NUM_TEMP_REGS
|| code->alu.length > R300_PFS_MAX_ALU_INST
|| code->tex.length > R300_PFS_MAX_TEX_INST) {
 
code->r390_mode = 1;
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c
0,0 → 1,243
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
/**
* @file
* Utilities to deal with the somewhat odd restriction on R300 fragment
* program swizzles.
*/
 
#include "r300_fragprog_swizzle.h"
 
#include <stdio.h>
 
#include "../r300_reg.h"
#include "radeon_compiler.h"
 
#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO))
 
struct swizzle_data {
unsigned int hash; /**< swizzle value this matches */
unsigned int base; /**< base value for hw swizzle */
unsigned int stride; /**< difference in base between arg0/1/2 */
unsigned int srcp_stride; /**< difference in base between arg0/scrp */
};
 
static const struct swizzle_data native_swizzles[] = {
{MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15},
{MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15},
{MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15},
{MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15},
{MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7},
{MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0},
{MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0},
{MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0},
{MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0},
{MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0},
{MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0}
};
 
static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
 
/**
* Find a native RGB swizzle that matches the given swizzle.
* Returns 0 if none found.
*/
static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
{
int i, comp;
 
for(i = 0; i < num_native_swizzles; ++i) {
const struct swizzle_data* sd = &native_swizzles[i];
for(comp = 0; comp < 3; ++comp) {
unsigned int swz = GET_SWZ(swizzle, comp);
if (swz == RC_SWIZZLE_UNUSED)
continue;
if (swz != GET_SWZ(sd->hash, comp))
break;
}
if (comp == 3)
return sd;
}
 
return 0;
}
 
/**
* Determines if the given swizzle is valid for r300/r400. In most situations
* it is better to use r300_swizzle_is_native() which can be accesed via
* struct radeon_compiler *c; c->SwizzleCaps->IsNative().
*/
int r300_swizzle_is_native_basic(unsigned int swizzle)
{
if(lookup_native_swizzle(swizzle))
return 1;
else
return 0;
}
 
/**
* Check whether the given instruction supports the swizzle and negate
* combinations in the given source register.
*/
static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
const struct swizzle_data* sd;
unsigned int relevant;
int j;
 
if (opcode == RC_OPCODE_KIL ||
opcode == RC_OPCODE_TEX ||
opcode == RC_OPCODE_TXB ||
opcode == RC_OPCODE_TXP) {
if (reg.Abs || reg.Negate)
return 0;
 
for(j = 0; j < 4; ++j) {
unsigned int swz = GET_SWZ(reg.Swizzle, j);
if (swz == RC_SWIZZLE_UNUSED)
continue;
if (swz != j)
return 0;
}
 
return 1;
}
 
relevant = 0;
 
for(j = 0; j < 3; ++j)
if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED)
relevant |= 1 << j;
 
if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
return 0;
 
sd = lookup_native_swizzle(reg.Swizzle);
if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0))
return 0;
 
return 1;
}
 
 
static void r300_swizzle_split(
struct rc_src_register src, unsigned int mask,
struct rc_swizzle_split * split)
{
split->NumPhases = 0;
 
while(mask) {
unsigned int best_matchcount = 0;
unsigned int best_matchmask = 0;
int i, comp;
 
for(i = 0; i < num_native_swizzles; ++i) {
const struct swizzle_data *sd = &native_swizzles[i];
unsigned int matchcount = 0;
unsigned int matchmask = 0;
for(comp = 0; comp < 3; ++comp) {
unsigned int swz;
if (!GET_BIT(mask, comp))
continue;
swz = GET_SWZ(src.Swizzle, comp);
if (swz == RC_SWIZZLE_UNUSED)
continue;
if (swz == GET_SWZ(sd->hash, comp)) {
/* check if the negate bit of current component
* is the same for already matched components */
if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp))))
continue;
 
matchcount++;
matchmask |= 1 << comp;
}
}
if (matchcount > best_matchcount) {
best_matchcount = matchcount;
best_matchmask = matchmask;
if (matchmask == (mask & RC_MASK_XYZ))
break;
}
}
 
if (mask & RC_MASK_W)
best_matchmask |= RC_MASK_W;
 
split->Phase[split->NumPhases++] = best_matchmask;
mask &= ~best_matchmask;
}
}
 
struct rc_swizzle_caps r300_swizzle_caps = {
.IsNative = r300_swizzle_is_native,
.Split = r300_swizzle_split
};
 
 
/**
* Translate an RGB (XYZ) swizzle into the hardware code for the given
* instruction source.
*/
unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
{
const struct swizzle_data* sd = lookup_native_swizzle(swizzle);
 
if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) {
fprintf(stderr, "Not a native swizzle: %08x\n", swizzle);
return 0;
}
 
if (src == RC_PAIR_PRESUB_SRC) {
return sd->base + sd->srcp_stride;
} else {
return sd->base + src*sd->stride;
}
}
 
 
/**
* Translate an Alpha (W) swizzle into the hardware code for the given
* instruction source.
*/
unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)
{
unsigned int swz = GET_SWZ(swizzle, 0);
if (src == RC_PAIR_PRESUB_SRC) {
return R300_ALU_ARGA_SRCP_X + swz;
}
if (swz < 3)
return swz + 3*src;
 
switch(swz) {
case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF;
default: return R300_ALU_ARGA_ONE;
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h
0,0 → 1,39
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#ifndef __R300_FRAGPROG_SWIZZLE_H_
#define __R300_FRAGPROG_SWIZZLE_H_
 
#include "radeon_swizzle.h"
 
extern struct rc_swizzle_caps r300_swizzle_caps;
 
unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle);
unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle);
int r300_swizzle_is_native_basic(unsigned int swizzle);
 
#endif /* __R300_FRAGPROG_SWIZZLE_H_ */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r3xx_fragprog.c
0,0 → 1,156
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "radeon_compiler.h"
 
#include <stdio.h>
 
#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_emulate_branches.h"
#include "radeon_emulate_loops.h"
#include "radeon_program_alu.h"
#include "radeon_program_tex.h"
#include "radeon_rename_regs.h"
#include "radeon_remove_constants.h"
#include "r300_fragprog.h"
#include "r300_fragprog_swizzle.h"
#include "r500_fragprog.h"
 
 
static void dataflow_outputs_mark_use(void * userdata, void * data,
void (*callback)(void *, unsigned int, unsigned int))
{
struct r300_fragment_program_compiler * c = userdata;
callback(data, c->OutputColor[0], RC_MASK_XYZW);
callback(data, c->OutputColor[1], RC_MASK_XYZW);
callback(data, c->OutputColor[2], RC_MASK_XYZW);
callback(data, c->OutputColor[3], RC_MASK_XYZW);
callback(data, c->OutputDepth, RC_MASK_W);
}
 
static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user)
{
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
struct rc_instruction *rci;
 
for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) {
struct rc_sub_instruction * inst = &rci->U.I;
unsigned i;
const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode);
 
if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth)
continue;
 
if (inst->DstReg.WriteMask & RC_MASK_Z) {
inst->DstReg.WriteMask = RC_MASK_W;
} else {
inst->DstReg.WriteMask = 0;
continue;
}
 
if (!info->IsComponentwise) {
continue;
}
 
for (i = 0; i < info->NumSrcRegs; i++) {
inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]);
}
}
}
 
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{
int is_r500 = c->Base.is_r500;
int opt = !c->Base.disable_optimizations;
int alpha2one = c->state.alpha_to_one;
 
/* Lists of instruction transformations. */
struct radeon_program_transformation force_alpha_to_one[] = {
{ &rc_force_output_alpha_to_one, c },
{ 0, 0 }
};
 
struct radeon_program_transformation rewrite_tex[] = {
{ &radeonTransformTEX, c },
{ 0, 0 }
};
 
struct radeon_program_transformation rewrite_if[] = {
{ &r500_transform_IF, 0 },
{0, 0}
};
 
struct radeon_program_transformation native_rewrite_r500[] = {
{ &radeonTransformALU, 0 },
{ &radeonTransformDeriv, 0 },
{ &radeonTransformTrigScale, 0 },
{ 0, 0 }
};
 
struct radeon_program_transformation native_rewrite_r300[] = {
{ &radeonTransformALU, 0 },
{ &r300_transform_trig_simple, 0 },
{ 0, 0 }
};
 
/* List of compiler passes. */
struct radeon_compiler_pass fs_list[] = {
/* NAME DUMP PREDICATE FUNCTION PARAM */
{"rewrite depth out", 1, 1, rc_rewrite_depth_out, NULL},
/* This transformation needs to be done before any of the IF
* instructions are modified. */
{"transform KILP", 1, 1, rc_transform_KILL, NULL},
{"unroll loops", 1, is_r500, rc_unroll_loops, NULL},
{"transform loops", 1, !is_r500, rc_transform_loops, NULL},
{"emulate branches", 1, !is_r500, rc_emulate_branches, NULL},
{"force alpha to one", 1, alpha2one, rc_local_transform, force_alpha_to_one},
{"transform TEX", 1, 1, rc_local_transform, rewrite_tex},
{"transform IF", 1, is_r500, rc_local_transform, rewrite_if},
{"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500},
{"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300},
{"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_use},
{"emulate loops", 1, !is_r500, rc_emulate_loops, NULL},
{"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL},
{"dataflow optimize", 1, opt, rc_optimize, NULL},
{"inline literals", 1, is_r500 && opt, rc_inline_literals, NULL},
{"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL},
{"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table},
{"pair translate", 1, 1, rc_pair_translate, NULL},
{"pair scheduling", 1, 1, rc_pair_schedule, &opt},
{"dead sources", 1, 1, rc_pair_remove_dead_sources, NULL},
{"register allocation", 1, 1, rc_pair_regalloc, &opt},
{"final code validation", 0, 1, rc_validate_final_shader, NULL},
{"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL},
{"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL},
{"dump machine code", 0, is_r500 && (c->Base.Debug & RC_DBG_LOG), r500FragmentProgramDump, NULL},
{"dump machine code", 0, !is_r500 && (c->Base.Debug & RC_DBG_LOG), r300FragmentProgramDump, NULL},
{NULL, 0, 0, NULL, NULL}
};
 
c->Base.type = RC_FRAGMENT_PROGRAM;
c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps;
 
rc_run_compiler(&c->Base, fs_list);
 
rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r3xx_vertprog.c
0,0 → 1,931
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "radeon_compiler.h"
 
#include <stdio.h>
 
#include "../r300_reg.h"
 
#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_program.h"
#include "radeon_program_alu.h"
#include "radeon_swizzle.h"
#include "radeon_emulate_branches.h"
#include "radeon_emulate_loops.h"
#include "radeon_remove_constants.h"
 
/*
* Take an already-setup and valid source then swizzle it appropriately to
* obtain a constant ZERO or ONE source.
*/
#define __CONST(x, y) \
(PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \
t_swizzle(y), \
t_swizzle(y), \
t_swizzle(y), \
t_swizzle(y), \
t_src_class(vpi->SrcReg[x].File), \
RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4))
 
 
static unsigned long t_dst_mask(unsigned int mask)
{
/* RC_MASK_* is equivalent to VSF_FLAG_* */
return mask & RC_MASK_XYZW;
}
 
static unsigned long t_dst_class(rc_register_file file)
{
switch (file) {
default:
fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
/* fall-through */
case RC_FILE_TEMPORARY:
return PVS_DST_REG_TEMPORARY;
case RC_FILE_OUTPUT:
return PVS_DST_REG_OUT;
case RC_FILE_ADDRESS:
return PVS_DST_REG_A0;
}
}
 
static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
struct rc_dst_register *dst)
{
if (dst->File == RC_FILE_OUTPUT)
return vp->outputs[dst->Index];
 
return dst->Index;
}
 
static unsigned long t_src_class(rc_register_file file)
{
switch (file) {
default:
fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
/* fall-through */
case RC_FILE_NONE:
case RC_FILE_TEMPORARY:
return PVS_SRC_REG_TEMPORARY;
case RC_FILE_INPUT:
return PVS_SRC_REG_INPUT;
case RC_FILE_CONSTANT:
return PVS_SRC_REG_CONSTANT;
}
}
 
static int t_src_conflict(struct rc_src_register a, struct rc_src_register b)
{
unsigned long aclass = t_src_class(a.File);
unsigned long bclass = t_src_class(b.File);
 
if (aclass != bclass)
return 0;
if (aclass == PVS_SRC_REG_TEMPORARY)
return 0;
 
if (a.RelAddr || b.RelAddr)
return 1;
if (a.Index != b.Index)
return 1;
 
return 0;
}
 
static inline unsigned long t_swizzle(unsigned int swizzle)
{
/* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
return swizzle;
}
 
static unsigned long t_src_index(struct r300_vertex_program_code *vp,
struct rc_src_register *src)
{
if (src->File == RC_FILE_INPUT) {
assert(vp->inputs[src->Index] != -1);
return vp->inputs[src->Index];
} else {
if (src->Index < 0) {
fprintf(stderr,
"negative offsets for indirect addressing do not work.\n");
return 0;
}
return src->Index;
}
}
 
/* these two functions should probably be merged... */
 
static unsigned long t_src(struct r300_vertex_program_code *vp,
struct rc_src_register *src)
{
/* src->Negate uses the RC_MASK_ flags from program_instruction.h,
* which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
*/
return PVS_SRC_OPERAND(t_src_index(vp, src),
t_swizzle(GET_SWZ(src->Swizzle, 0)),
t_swizzle(GET_SWZ(src->Swizzle, 1)),
t_swizzle(GET_SWZ(src->Swizzle, 2)),
t_swizzle(GET_SWZ(src->Swizzle, 3)),
t_src_class(src->File),
src->Negate) |
(src->RelAddr << 4) | (src->Abs << 3);
}
 
static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
struct rc_src_register *src)
{
/* src->Negate uses the RC_MASK_ flags from program_instruction.h,
* which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
*/
unsigned int swz = rc_get_scalar_src_swz(src->Swizzle);
 
return PVS_SRC_OPERAND(t_src_index(vp, src),
t_swizzle(swz),
t_swizzle(swz),
t_swizzle(swz),
t_swizzle(swz),
t_src_class(src->File),
src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
(src->RelAddr << 4) | (src->Abs << 3);
}
 
static int valid_dst(struct r300_vertex_program_code *vp,
struct rc_dst_register *dst)
{
if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) {
return 0;
} else if (dst->File == RC_FILE_ADDRESS) {
assert(dst->Index == 0);
}
 
return 1;
}
 
static void ei_vector1(struct r300_vertex_program_code *vp,
unsigned int hw_opcode,
struct rc_sub_instruction *vpi,
unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
0,
0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File),
vpi->SaturateMode == RC_SATURATE_ZERO_ONE);
inst[1] = t_src(vp, &vpi->SrcReg[0]);
inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
}
 
static void ei_vector2(struct r300_vertex_program_code *vp,
unsigned int hw_opcode,
struct rc_sub_instruction *vpi,
unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
0,
0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File),
vpi->SaturateMode == RC_SATURATE_ZERO_ONE);
inst[1] = t_src(vp, &vpi->SrcReg[0]);
inst[2] = t_src(vp, &vpi->SrcReg[1]);
inst[3] = __CONST(1, RC_SWIZZLE_ZERO);
}
 
static void ei_math1(struct r300_vertex_program_code *vp,
unsigned int hw_opcode,
struct rc_sub_instruction *vpi,
unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
1,
0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File),
vpi->SaturateMode == RC_SATURATE_ZERO_ONE);
inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
}
 
static void ei_lit(struct r300_vertex_program_code *vp,
struct rc_sub_instruction *vpi,
unsigned int * inst)
{
//LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
 
inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
1,
0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File),
vpi->SaturateMode == RC_SATURATE_ZERO_ONE);
/* NOTE: Users swizzling might not work. */
inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
PVS_SRC_SELECT_FORCE_0, // Z
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
t_src_class(vpi->SrcReg[0].File),
vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
(vpi->SrcReg[0].RelAddr << 4);
inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
PVS_SRC_SELECT_FORCE_0, // Z
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
t_src_class(vpi->SrcReg[0].File),
vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
(vpi->SrcReg[0].RelAddr << 4);
inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
PVS_SRC_SELECT_FORCE_0, // Z
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
t_src_class(vpi->SrcReg[0].File),
vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
(vpi->SrcReg[0].RelAddr << 4);
}
 
static void ei_mad(struct r300_vertex_program_code *vp,
struct rc_sub_instruction *vpi,
unsigned int * inst)
{
unsigned int i;
/* Remarks about hardware limitations of MAD
* (please preserve this comment, as this information is _NOT_
* in the documentation provided by AMD).
*
* As described in the documentation, MAD with three unique temporary
* source registers requires the use of the macro version.
*
* However (and this is not mentioned in the documentation), apparently
* the macro version is _NOT_ a full superset of the normal version.
* In particular, the macro version does not always work when relative
* addressing is used in the source operands.
*
* This limitation caused incorrect rendering in Sauerbraten's OpenGL
* assembly shader path when using medium quality animations
* (i.e. animations with matrix blending instead of quaternion blending).
*
* Unfortunately, I (nha) have been unable to extract a Piglit regression
* test for this issue - for some reason, it is possible to have vertex
* programs whose prefix is *exactly* the same as the prefix of the
* offending program in Sauerbraten up to the offending instruction
* without causing any trouble.
*
* Bottom line: Only use the macro version only when really necessary;
* according to AMD docs, this should improve performance by one clock
* as a nice side bonus.
*/
if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY &&
vpi->SrcReg[1].File == RC_FILE_TEMPORARY &&
vpi->SrcReg[2].File == RC_FILE_TEMPORARY &&
vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
0,
1,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File),
vpi->SaturateMode == RC_SATURATE_ZERO_ONE);
} else {
inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
0,
0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File),
vpi->SaturateMode == RC_SATURATE_ZERO_ONE);
 
/* Arguments with constant swizzles still count as a unique
* temporary, so we should make sure these arguments share a
* register index with one of the other arguments. */
for (i = 0; i < 3; i++) {
unsigned int j;
if (vpi->SrcReg[i].File != RC_FILE_NONE)
continue;
 
for (j = 0; j < 3; j++) {
if (i != j) {
vpi->SrcReg[i].Index =
vpi->SrcReg[j].Index;
break;
}
}
}
}
inst[1] = t_src(vp, &vpi->SrcReg[0]);
inst[2] = t_src(vp, &vpi->SrcReg[1]);
inst[3] = t_src(vp, &vpi->SrcReg[2]);
}
 
static void ei_pow(struct r300_vertex_program_code *vp,
struct rc_sub_instruction *vpi,
unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
1,
0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File),
vpi->SaturateMode == RC_SATURATE_ZERO_ONE);
inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
}
 
static void translate_vertex_program(struct radeon_compiler *c, void *user)
{
struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
struct rc_instruction *rci;
 
unsigned loops[R500_PVS_MAX_LOOP_DEPTH];
unsigned loop_depth = 0;
 
compiler->code->pos_end = 0; /* Not supported yet */
compiler->code->length = 0;
compiler->code->num_temporaries = 0;
 
compiler->SetHwInputOutput(compiler);
 
for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
struct rc_sub_instruction *vpi = &rci->U.I;
unsigned int *inst = compiler->code->body.d + compiler->code->length;
const struct rc_opcode_info *info = rc_get_opcode_info(vpi->Opcode);
 
/* Skip instructions writing to non-existing destination */
if (!valid_dst(compiler->code, &vpi->DstReg))
continue;
 
if (info->HasDstReg) {
/* Neither is Saturate. */
if (vpi->SaturateMode != RC_SATURATE_NONE && !c->is_r500) {
rc_error(&compiler->Base, "Vertex program does not support the Saturate "
"modifier (yet).\n");
}
}
 
if (compiler->code->length >= c->max_alu_insts * 4) {
rc_error(&compiler->Base, "Vertex program has too many instructions\n");
return;
}
 
assert(compiler->Base.is_r500 ||
(vpi->Opcode != RC_OPCODE_SEQ &&
vpi->Opcode != RC_OPCODE_SNE));
 
switch (vpi->Opcode) {
case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
case RC_OPCODE_BGNLOOP:
{
if ((!compiler->Base.is_r500
&& loop_depth >= R300_VS_MAX_LOOP_DEPTH)
|| loop_depth >= R500_PVS_MAX_LOOP_DEPTH) {
rc_error(&compiler->Base,
"Loops are nested too deep.");
return;
}
loops[loop_depth++] = ((compiler->code->length)/ 4) + 1;
break;
}
case RC_OPCODE_ENDLOOP:
{
unsigned int act_addr;
unsigned int last_addr;
unsigned int ret_addr;
 
ret_addr = loops[--loop_depth];
act_addr = ret_addr - 1;
last_addr = (compiler->code->length / 4) - 1;
 
if (loop_depth >= R300_VS_MAX_FC_OPS) {
rc_error(&compiler->Base,
"Too many flow control instructions.");
return;
}
if (compiler->Base.is_r500) {
compiler->code->fc_op_addrs.r500
[compiler->code->num_fc_ops].lw =
R500_PVS_FC_ACT_ADRS(act_addr)
| R500_PVS_FC_LOOP_CNT_JMP_INST(0x00ff)
;
compiler->code->fc_op_addrs.r500
[compiler->code->num_fc_ops].uw =
R500_PVS_FC_LAST_INST(last_addr)
| R500_PVS_FC_RTN_INST(ret_addr)
;
} else {
compiler->code->fc_op_addrs.r300
[compiler->code->num_fc_ops] =
R300_PVS_FC_ACT_ADRS(act_addr)
| R300_PVS_FC_LOOP_CNT_JMP_INST(0xff)
| R300_PVS_FC_LAST_INST(last_addr)
| R300_PVS_FC_RTN_INST(ret_addr)
;
}
compiler->code->fc_loop_index[compiler->code->num_fc_ops] =
R300_PVS_FC_LOOP_INIT_VAL(0x0)
| R300_PVS_FC_LOOP_STEP_VAL(0x1)
;
compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(
compiler->code->num_fc_ops);
compiler->code->num_fc_ops++;
 
break;
}
 
case RC_ME_PRED_SET_CLR:
ei_math1(compiler->code, ME_PRED_SET_CLR, vpi, inst);
break;
 
case RC_ME_PRED_SET_INV:
ei_math1(compiler->code, ME_PRED_SET_INV, vpi, inst);
break;
 
case RC_ME_PRED_SET_POP:
ei_math1(compiler->code, ME_PRED_SET_POP, vpi, inst);
break;
 
case RC_ME_PRED_SET_RESTORE:
ei_math1(compiler->code, ME_PRED_SET_RESTORE, vpi, inst);
break;
 
case RC_ME_PRED_SEQ:
ei_math1(compiler->code, ME_PRED_SET_EQ, vpi, inst);
break;
 
case RC_ME_PRED_SNEQ:
ei_math1(compiler->code, ME_PRED_SET_NEQ, vpi, inst);
break;
 
case RC_VE_PRED_SNEQ_PUSH:
ei_vector2(compiler->code, VE_PRED_SET_NEQ_PUSH,
vpi, inst);
break;
 
default:
rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name);
return;
}
 
if (vpi->DstReg.Pred != RC_PRED_DISABLED) {
inst[0] |= (PVS_DST_PRED_ENABLE_MASK
<< PVS_DST_PRED_ENABLE_SHIFT);
if (vpi->DstReg.Pred == RC_PRED_SET) {
inst[0] |= (PVS_DST_PRED_SENSE_MASK
<< PVS_DST_PRED_SENSE_SHIFT);
}
}
 
/* Update the number of temporaries. */
if (info->HasDstReg && vpi->DstReg.File == RC_FILE_TEMPORARY &&
vpi->DstReg.Index >= compiler->code->num_temporaries)
compiler->code->num_temporaries = vpi->DstReg.Index + 1;
 
for (unsigned i = 0; i < info->NumSrcRegs; i++)
if (vpi->SrcReg[i].File == RC_FILE_TEMPORARY &&
vpi->SrcReg[i].Index >= compiler->code->num_temporaries)
compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1;
 
if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) {
rc_error(&compiler->Base, "Too many temporaries.\n");
return;
}
 
compiler->code->length += 4;
 
if (compiler->Base.Error)
return;
}
}
 
struct temporary_allocation {
unsigned int Allocated:1;
unsigned int HwTemp:15;
struct rc_instruction * LastRead;
};
 
static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
{
struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
struct rc_instruction *inst;
struct rc_instruction *end_loop = NULL;
unsigned int num_orig_temps = 0;
char hwtemps[RC_REGISTER_MAX_INDEX];
struct temporary_allocation * ta;
unsigned int i, j;
 
memset(hwtemps, 0, sizeof(hwtemps));
 
rc_recompute_ips(c);
 
/* Pass 1: Count original temporaries. */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
for (i = 0; i < opcode->NumSrcRegs; ++i) {
if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
if (inst->U.I.SrcReg[i].Index >= num_orig_temps)
num_orig_temps = inst->U.I.SrcReg[i].Index + 1;
}
}
 
if (opcode->HasDstReg) {
if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
if (inst->U.I.DstReg.Index >= num_orig_temps)
num_orig_temps = inst->U.I.DstReg.Index + 1;
}
}
}
 
ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
sizeof(struct temporary_allocation) * num_orig_temps);
memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
 
/* Pass 2: Determine original temporary lifetimes */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
/* Instructions inside of loops need to use the ENDLOOP
* instruction as their LastRead. */
if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
int endloops = 1;
struct rc_instruction * ptr;
for(ptr = inst->Next;
ptr != &compiler->Base.Program.Instructions;
ptr = ptr->Next){
if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
endloops++;
} else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
endloops--;
if (endloops <= 0) {
end_loop = ptr;
break;
}
}
}
}
 
if (inst == end_loop) {
end_loop = NULL;
continue;
}
 
for (i = 0; i < opcode->NumSrcRegs; ++i) {
if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
ta[inst->U.I.SrcReg[i].Index].LastRead = end_loop ? end_loop : inst;
}
}
}
 
/* Pass 3: Register allocation */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
for (i = 0; i < opcode->NumSrcRegs; ++i) {
if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
unsigned int orig = inst->U.I.SrcReg[i].Index;
inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
 
if (ta[orig].Allocated && inst == ta[orig].LastRead)
hwtemps[ta[orig].HwTemp] = 0;
}
}
 
if (opcode->HasDstReg) {
if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
unsigned int orig = inst->U.I.DstReg.Index;
 
if (!ta[orig].Allocated) {
for(j = 0; j < c->max_temp_regs; ++j) {
if (!hwtemps[j])
break;
}
ta[orig].Allocated = 1;
ta[orig].HwTemp = j;
hwtemps[ta[orig].HwTemp] = 1;
}
 
inst->U.I.DstReg.Index = ta[orig].HwTemp;
}
}
}
}
 
/**
* R3xx-R4xx vertex engine does not support the Absolute source operand modifier
* and the Saturate opcode modifier. Only Absolute is currently transformed.
*/
static int transform_nonnative_modifiers(
struct radeon_compiler *c,
struct rc_instruction *inst,
void* unused)
{
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
unsigned i;
 
/* Transform ABS(a) to MAX(a, -a). */
for (i = 0; i < opcode->NumSrcRegs; i++) {
if (inst->U.I.SrcReg[i].Abs) {
struct rc_instruction *new_inst;
unsigned temp;
 
inst->U.I.SrcReg[i].Abs = 0;
 
temp = rc_find_free_temporary(c);
 
new_inst = rc_insert_new_instruction(c, inst->Prev);
new_inst->U.I.Opcode = RC_OPCODE_MAX;
new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
new_inst->U.I.DstReg.Index = temp;
new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i];
new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i];
new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
 
memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i]));
inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[i].Index = temp;
inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW;
}
}
return 1;
}
 
/**
* Vertex engine cannot read two inputs or two constants at the same time.
* Introduce intermediate MOVs to temporary registers to account for this.
*/
static int transform_source_conflicts(
struct radeon_compiler *c,
struct rc_instruction* inst,
void* unused)
{
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
if (opcode->NumSrcRegs == 3) {
if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])
|| t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) {
int tmpreg = rc_find_free_temporary(c);
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = tmpreg;
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
 
reset_srcreg(&inst->U.I.SrcReg[2]);
inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[2].Index = tmpreg;
}
}
 
if (opcode->NumSrcRegs >= 2) {
if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) {
int tmpreg = rc_find_free_temporary(c);
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = tmpreg;
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
 
reset_srcreg(&inst->U.I.SrcReg[1]);
inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[1].Index = tmpreg;
}
}
 
return 1;
}
 
static void rc_vs_add_artificial_outputs(struct radeon_compiler *c, void *user)
{
struct r300_vertex_program_compiler * compiler = (struct r300_vertex_program_compiler*)c;
int i;
 
for(i = 0; i < 32; ++i) {
if ((compiler->RequiredOutputs & (1 << i)) &&
!(compiler->Base.Program.OutputsWritten & (1 << i))) {
struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
inst->U.I.Opcode = RC_OPCODE_MOV;
 
inst->U.I.DstReg.File = RC_FILE_OUTPUT;
inst->U.I.DstReg.Index = i;
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
 
inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT;
inst->U.I.SrcReg[0].Index = 0;
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
 
compiler->Base.Program.OutputsWritten |= 1 << i;
}
}
}
 
static void dataflow_outputs_mark_used(void * userdata, void * data,
void (*callback)(void *, unsigned int, unsigned int))
{
struct r300_vertex_program_compiler * c = userdata;
int i;
 
for(i = 0; i < 32; ++i) {
if (c->RequiredOutputs & (1 << i))
callback(data, i, RC_MASK_XYZW);
}
}
 
static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
(void) opcode;
(void) reg;
 
return 1;
}
 
static void transform_negative_addressing(struct r300_vertex_program_compiler *c,
struct rc_instruction *arl,
struct rc_instruction *end,
int min_offset)
{
struct rc_instruction *inst, *add;
unsigned const_swizzle;
 
/* Transform ARL */
add = rc_insert_new_instruction(&c->Base, arl->Prev);
add->U.I.Opcode = RC_OPCODE_ADD;
add->U.I.DstReg.File = RC_FILE_TEMPORARY;
add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base);
add->U.I.DstReg.WriteMask = RC_MASK_X;
add->U.I.SrcReg[0] = arl->U.I.SrcReg[0];
add->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants,
min_offset, &const_swizzle);
add->U.I.SrcReg[1].Swizzle = const_swizzle;
 
arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index;
arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX;
 
/* Rewrite offsets up to and excluding inst. */
for (inst = arl->Next; inst != end; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
for (unsigned i = 0; i < opcode->NumSrcRegs; i++)
if (inst->U.I.SrcReg[i].RelAddr)
inst->U.I.SrcReg[i].Index -= min_offset;
}
}
 
static void rc_emulate_negative_addressing(struct radeon_compiler *compiler, void *user)
{
struct r300_vertex_program_compiler * c = (struct r300_vertex_program_compiler*)compiler;
struct rc_instruction *inst, *lastARL = NULL;
int min_offset = 0;
 
for (inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
if (inst->U.I.Opcode == RC_OPCODE_ARL) {
if (lastARL != NULL && min_offset < 0)
transform_negative_addressing(c, lastARL, inst, min_offset);
 
lastARL = inst;
min_offset = 0;
continue;
}
 
for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
if (inst->U.I.SrcReg[i].RelAddr &&
inst->U.I.SrcReg[i].Index < 0) {
/* ARL must precede any indirect addressing. */
if (lastARL == NULL) {
rc_error(&c->Base, "Vertex shader: Found relative addressing without ARL.");
return;
}
 
if (inst->U.I.SrcReg[i].Index < min_offset)
min_offset = inst->U.I.SrcReg[i].Index;
}
}
}
 
if (lastARL != NULL && min_offset < 0)
transform_negative_addressing(c, lastARL, inst, min_offset);
}
 
struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
.IsNative = &swizzle_is_native,
.Split = 0 /* should never be called */
};
 
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
{
int is_r500 = c->Base.is_r500;
int opt = !c->Base.disable_optimizations;
 
/* Lists of instruction transformations. */
struct radeon_program_transformation alu_rewrite_r500[] = {
{ &r300_transform_vertex_alu, 0 },
{ &r300_transform_trig_scale_vertex, 0 },
{ 0, 0 }
};
 
struct radeon_program_transformation alu_rewrite_r300[] = {
{ &r300_transform_vertex_alu, 0 },
{ &r300_transform_trig_simple, 0 },
{ 0, 0 }
};
 
/* Note: These passes have to be done seperately from ALU rewrite,
* otherwise non-native ALU instructions with source conflits
* or non-native modifiers will not be treated properly.
*/
struct radeon_program_transformation emulate_modifiers[] = {
{ &transform_nonnative_modifiers, 0 },
{ 0, 0 }
};
 
struct radeon_program_transformation resolve_src_conflicts[] = {
{ &transform_source_conflicts, 0 },
{ 0, 0 }
};
 
/* List of compiler passes. */
struct radeon_compiler_pass vs_list[] = {
/* NAME DUMP PREDICATE FUNCTION PARAM */
{"add artificial outputs", 0, 1, rc_vs_add_artificial_outputs, NULL},
{"emulate branches", 1, !is_r500, rc_emulate_branches, NULL},
{"emulate negative addressing", 1, 1, rc_emulate_negative_addressing, NULL},
{"native rewrite", 1, is_r500, rc_local_transform, alu_rewrite_r500},
{"native rewrite", 1, !is_r500, rc_local_transform, alu_rewrite_r300},
{"emulate modifiers", 1, !is_r500, rc_local_transform, emulate_modifiers},
{"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_used},
{"dataflow optimize", 1, opt, rc_optimize, NULL},
/* This pass must be done after optimizations. */
{"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts},
{"register allocation", 1, opt, allocate_temporary_registers, NULL},
{"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table},
{"lower control flow opcodes", 1, is_r500, rc_vert_fc, NULL},
{"final code validation", 0, 1, rc_validate_final_shader, NULL},
{"machine code generation", 0, 1, translate_vertex_program, NULL},
{"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL},
{NULL, 0, 0, NULL, NULL}
};
 
c->Base.type = RC_VERTEX_PROGRAM;
c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
 
rc_run_compiler(&c->Base, vs_list);
 
c->code->InputsRead = c->Base.Program.InputsRead;
c->code->OutputsWritten = c->Base.Program.OutputsWritten;
rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c
0,0 → 1,216
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "radeon_compiler.h"
#include "radeon_code.h"
#include "../r300_reg.h"
 
#include <stdio.h>
 
static char* r300_vs_ve_ops[] = {
/* R300 vector ops */
" VE_NO_OP",
" VE_DOT_PRODUCT",
" VE_MULTIPLY",
" VE_ADD",
" VE_MULTIPLY_ADD",
" VE_DISTANCE_FACTOR",
" VE_FRACTION",
" VE_MAXIMUM",
" VE_MINIMUM",
"VE_SET_GREATER_THAN_EQUAL",
" VE_SET_LESS_THAN",
" VE_MULTIPLYX2_ADD",
" VE_MULTIPLY_CLAMP",
" VE_FLT2FIX_DX",
" VE_FLT2FIX_DX_RND",
/* R500 vector ops */
" VE_PRED_SET_EQ_PUSH",
" VE_PRED_SET_GT_PUSH",
" VE_PRED_SET_GTE_PUSH",
" VE_PRED_SET_NEQ_PUSH",
" VE_COND_WRITE_EQ",
" VE_COND_WRITE_GT",
" VE_COND_WRITE_GTE",
" VE_COND_WRITE_NEQ",
" VE_COND_MUX_EQ",
" VE_COND_MUX_GT",
" VE_COND_MUX_GTE",
" VE_SET_GREATER_THAN",
" VE_SET_EQUAL",
" VE_SET_NOT_EQUAL",
" (reserved)",
" (reserved)",
" (reserved)",
};
 
static char* r300_vs_me_ops[] = {
/* R300 math ops */
" ME_NO_OP",
" ME_EXP_BASE2_DX",
" ME_LOG_BASE2_DX",
" ME_EXP_BASEE_FF",
" ME_LIGHT_COEFF_DX",
" ME_POWER_FUNC_FF",
" ME_RECIP_DX",
" ME_RECIP_FF",
" ME_RECIP_SQRT_DX",
" ME_RECIP_SQRT_FF",
" ME_MULTIPLY",
" ME_EXP_BASE2_FULL_DX",
" ME_LOG_BASE2_FULL_DX",
" ME_POWER_FUNC_FF_CLAMP_B",
"ME_POWER_FUNC_FF_CLAMP_B1",
"ME_POWER_FUNC_FF_CLAMP_01",
" ME_SIN",
" ME_COS",
/* R500 math ops */
" ME_LOG_BASE2_IEEE",
" ME_RECIP_IEEE",
" ME_RECIP_SQRT_IEEE",
" ME_PRED_SET_EQ",
" ME_PRED_SET_GT",
" ME_PRED_SET_GTE",
" ME_PRED_SET_NEQ",
" ME_PRED_SET_CLR",
" ME_PRED_SET_INV",
" ME_PRED_SET_POP",
" ME_PRED_SET_RESTORE",
" (reserved)",
" (reserved)",
" (reserved)",
};
 
/* XXX refactor to avoid clashing symbols */
static char* r300_vs_src_debug[] = {
"t",
"i",
"c",
"a",
};
 
static char* r300_vs_dst_debug[] = {
"t",
"a0",
"o",
"ox",
"a",
"i",
"u",
"u",
};
 
static char* r300_vs_swiz_debug[] = {
"X",
"Y",
"Z",
"W",
"0",
"1",
"U",
"U",
};
 
 
static void r300_vs_op_dump(uint32_t op)
{
fprintf(stderr, " dst: %d%s op: ",
(op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]);
if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) {
fprintf(stderr, "PRED %u",
(op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1);
}
if (op & 0x80) {
if (op & 0x1) {
fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n");
} else {
fprintf(stderr, " PVS_MACRO_OP_2CLK_MADD\n");
}
} else if (op & 0x40) {
fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]);
} else {
fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]);
}
}
 
static void r300_vs_src_dump(uint32_t src)
{
fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n",
(src >> 5) & 0xff, r300_vs_src_debug[src & 0x3],
src & (1 << 25) ? "-" : " ",
r300_vs_swiz_debug[(src >> 13) & 0x7],
src & (1 << 26) ? "-" : " ",
r300_vs_swiz_debug[(src >> 16) & 0x7],
src & (1 << 27) ? "-" : " ",
r300_vs_swiz_debug[(src >> 19) & 0x7],
src & (1 << 28) ? "-" : " ",
r300_vs_swiz_debug[(src >> 22) & 0x7]);
}
 
void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user)
{
struct r300_vertex_program_compiler *c = (struct r300_vertex_program_compiler*)compiler;
struct r300_vertex_program_code * vs = c->code;
unsigned instrcount = vs->length / 4;
unsigned i;
 
fprintf(stderr, "Final vertex program code:\n");
 
for(i = 0; i < instrcount; i++) {
unsigned offset = i*4;
unsigned src;
 
fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]);
r300_vs_op_dump(vs->body.d[offset]);
 
for(src = 0; src < 3; ++src) {
fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]);
r300_vs_src_dump(vs->body.d[offset+1+src]);
}
}
 
fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops);
for(i = 0; i < vs->num_fc_ops; i++) {
unsigned is_loop = 0;
switch((vs->fc_ops >> (i * 2)) & 0x3 ) {
case 0: fprintf(stderr, "NOP"); break;
case 1: fprintf(stderr, "JUMP"); break;
case 2: fprintf(stderr, "LOOP"); is_loop = 1; break;
case 3: fprintf(stderr, "JSR"); break;
}
if (c->Base.is_r500) {
fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x "
"loop data->0x%08x\n",
vs->fc_op_addrs.r500[i].uw,
vs->fc_op_addrs.r500[i].lw,
vs->fc_loop_index[i]);
if (is_loop) {
fprintf(stderr, "Before = %u First = %u Last = %u\n",
vs->fc_op_addrs.r500[i].lw & 0xffff,
(vs->fc_op_addrs.r500[i].uw >> 16) & 0xffff,
vs->fc_op_addrs.r500[i].uw & 0xffff);
}
} else {
fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]);
}
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r500_fragprog.c
0,0 → 1,541
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include "r500_fragprog.h"
 
#include <stdio.h>
 
#include "radeon_compiler_util.h"
#include "radeon_list.h"
#include "radeon_variable.h"
#include "../r300_reg.h"
 
/**
* Rewrite IF instructions to use the ALU result special register.
*/
int r500_transform_IF(
struct radeon_compiler * c,
struct rc_instruction * inst_if,
void *data)
{
struct rc_variable * writer;
struct rc_list * writer_list, * list_ptr;
struct rc_list * var_list = rc_get_variables(c);
unsigned int generic_if = 0;
unsigned int alu_chan;
 
if (inst_if->U.I.Opcode != RC_OPCODE_IF) {
return 0;
}
 
writer_list = rc_variable_list_get_writers(
var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]);
if (!writer_list) {
generic_if = 1;
} else {
 
/* Make sure it is safe for the writers to write to
* ALU Result */
for (list_ptr = writer_list; list_ptr;
list_ptr = list_ptr->Next) {
struct rc_instruction * inst;
writer = list_ptr->Item;
/* We are going to modify the destination register
* of writer, so if it has a reader other than
* inst_if (aka ReaderCount > 1) we must fall back to
* our generic IF.
* If the writer has a lower IP than inst_if, this
* means that inst_if is above the writer in a loop.
* I'm not sure why this would ever happen, but
* if it does we want to make sure we fall back
* to our generic IF. */
if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) {
generic_if = 1;
break;
}
 
/* The ALU Result is not preserved across IF
* instructions, so if there is another IF
* instruction between writer and inst_if, then
* we need to fall back to generic IF. */
for (inst = writer->Inst; inst != inst_if; inst = inst->Next) {
const struct rc_opcode_info * info =
rc_get_opcode_info(inst->U.I.Opcode);
if (info->IsFlowControl) {
generic_if = 1;
break;
}
}
if (generic_if) {
break;
}
}
}
 
if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) {
alu_chan = RC_ALURESULT_X;
} else {
alu_chan = RC_ALURESULT_W;
}
if (generic_if) {
struct rc_instruction * inst_mov =
rc_insert_new_instruction(c, inst_if->Prev);
 
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.WriteMask = 0;
inst_mov->U.I.DstReg.File = RC_FILE_NONE;
inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;
inst_mov->U.I.WriteALUResult = alu_chan;
inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
if (alu_chan == RC_ALURESULT_X) {
inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(
inst_mov->U.I.SrcReg[0].Swizzle,
RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,
RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
} else {
inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(
inst_mov->U.I.SrcReg[0].Swizzle,
RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z);
}
} else {
rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER;
unsigned int reverse_srcs = 0;
unsigned int preserve_opcode = 0;
for (list_ptr = writer_list; list_ptr;
list_ptr = list_ptr->Next) {
writer = list_ptr->Item;
switch(writer->Inst->U.I.Opcode) {
case RC_OPCODE_SEQ:
compare_func = RC_COMPARE_FUNC_EQUAL;
break;
case RC_OPCODE_SNE:
compare_func = RC_COMPARE_FUNC_NOTEQUAL;
break;
case RC_OPCODE_SLE:
reverse_srcs = 1;
/* Fall through */
case RC_OPCODE_SGE:
compare_func = RC_COMPARE_FUNC_GEQUAL;
break;
case RC_OPCODE_SGT:
reverse_srcs = 1;
/* Fall through */
case RC_OPCODE_SLT:
compare_func = RC_COMPARE_FUNC_LESS;
break;
default:
compare_func = RC_COMPARE_FUNC_NOTEQUAL;
preserve_opcode = 1;
break;
}
if (!preserve_opcode) {
writer->Inst->U.I.Opcode = RC_OPCODE_SUB;
}
writer->Inst->U.I.DstReg.WriteMask = 0;
writer->Inst->U.I.DstReg.File = RC_FILE_NONE;
writer->Inst->U.I.WriteALUResult = alu_chan;
writer->Inst->U.I.ALUResultCompare = compare_func;
if (reverse_srcs) {
struct rc_src_register temp_src;
temp_src = writer->Inst->U.I.SrcReg[0];
writer->Inst->U.I.SrcReg[0] =
writer->Inst->U.I.SrcReg[1];
writer->Inst->U.I.SrcReg[1] = temp_src;
}
}
}
 
inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL;
inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;
inst_if->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(
RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,
RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
inst_if->U.I.SrcReg[0].Negate = 0;
 
return 1;
}
 
static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
unsigned int relevant;
int i;
 
if (opcode == RC_OPCODE_TEX ||
opcode == RC_OPCODE_TXB ||
opcode == RC_OPCODE_TXP ||
opcode == RC_OPCODE_TXD ||
opcode == RC_OPCODE_TXL ||
opcode == RC_OPCODE_KIL) {
if (reg.Abs)
return 0;
 
if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE))
return 0;
 
for(i = 0; i < 4; ++i) {
unsigned int swz = GET_SWZ(reg.Swizzle, i);
if (swz == RC_SWIZZLE_UNUSED) {
reg.Negate &= ~(1 << i);
continue;
}
if (swz >= 4)
return 0;
}
 
if (reg.Negate)
return 0;
 
return 1;
} else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) {
/* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
* if it doesn't fit perfectly into a .xyzw case... */
if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate)
return 1;
 
return 0;
} else if (reg.File == RC_FILE_INLINE) {
return 1;
} else {
/* ALU instructions support almost everything */
relevant = 0;
for(i = 0; i < 3; ++i) {
unsigned int swz = GET_SWZ(reg.Swizzle, i);
if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
relevant |= 1 << i;
}
if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
return 0;
 
return 1;
}
}
 
/**
* Split source register access.
*
* The only thing we *cannot* do in an ALU instruction is per-component
* negation.
*/
static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask,
struct rc_swizzle_split * split)
{
unsigned int negatebase[2] = { 0, 0 };
int i;
 
for(i = 0; i < 4; ++i) {
unsigned int swz = GET_SWZ(src.Swizzle, i);
if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i))
continue;
negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
}
 
split->NumPhases = 0;
 
for(i = 0; i <= 1; ++i) {
if (!negatebase[i])
continue;
 
split->Phase[split->NumPhases++] = negatebase[i];
}
}
 
struct rc_swizzle_caps r500_swizzle_caps = {
.IsNative = r500_swizzle_is_native,
.Split = r500_swizzle_split
};
 
static char *toswiz(int swiz_val) {
switch(swiz_val) {
case 0: return "R";
case 1: return "G";
case 2: return "B";
case 3: return "A";
case 4: return "0";
case 5: return "H";
case 6: return "1";
case 7: return "U";
}
return NULL;
}
 
static char *toop(int op_val)
{
char *str = NULL;
switch (op_val) {
case 0: str = "MAD"; break;
case 1: str = "DP3"; break;
case 2: str = "DP4"; break;
case 3: str = "D2A"; break;
case 4: str = "MIN"; break;
case 5: str = "MAX"; break;
case 6: str = "Reserved"; break;
case 7: str = "CND"; break;
case 8: str = "CMP"; break;
case 9: str = "FRC"; break;
case 10: str = "SOP"; break;
case 11: str = "MDH"; break;
case 12: str = "MDV"; break;
}
return str;
}
 
static char *to_alpha_op(int op_val)
{
char *str = NULL;
switch (op_val) {
case 0: str = "MAD"; break;
case 1: str = "DP"; break;
case 2: str = "MIN"; break;
case 3: str = "MAX"; break;
case 4: str = "Reserved"; break;
case 5: str = "CND"; break;
case 6: str = "CMP"; break;
case 7: str = "FRC"; break;
case 8: str = "EX2"; break;
case 9: str = "LN2"; break;
case 10: str = "RCP"; break;
case 11: str = "RSQ"; break;
case 12: str = "SIN"; break;
case 13: str = "COS"; break;
case 14: str = "MDH"; break;
case 15: str = "MDV"; break;
}
return str;
}
 
static char *to_mask(int val)
{
char *str = NULL;
switch(val) {
case 0: str = "NONE"; break;
case 1: str = "R"; break;
case 2: str = "G"; break;
case 3: str = "RG"; break;
case 4: str = "B"; break;
case 5: str = "RB"; break;
case 6: str = "GB"; break;
case 7: str = "RGB"; break;
case 8: str = "A"; break;
case 9: str = "AR"; break;
case 10: str = "AG"; break;
case 11: str = "ARG"; break;
case 12: str = "AB"; break;
case 13: str = "ARB"; break;
case 14: str = "AGB"; break;
case 15: str = "ARGB"; break;
}
return str;
}
 
static char *to_texop(int val)
{
switch(val) {
case 0: return "NOP";
case 1: return "LD";
case 2: return "TEXKILL";
case 3: return "PROJ";
case 4: return "LODBIAS";
case 5: return "LOD";
case 6: return "DXDY";
}
return NULL;
}
 
void r500FragmentProgramDump(struct radeon_compiler *c, void *user)
{
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
struct r500_fragment_program_code *code = &compiler->code->code.r500;
int n, i;
uint32_t inst;
uint32_t inst0;
char *str = NULL;
fprintf(stderr, "R500 Fragment Program:\n--------\n");
 
for (n = 0; n < code->inst_end+1; n++) {
inst0 = inst = code->inst[n].inst0;
fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
switch(inst & 0x3) {
case R500_INST_TYPE_ALU: str = "ALU"; break;
case R500_INST_TYPE_OUT: str = "OUT"; break;
case R500_INST_TYPE_FC: str = "FC"; break;
case R500_INST_TYPE_TEX: str = "TEX"; break;
};
fprintf(stderr,"%s %s %s %s %s ", str,
inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
inst & R500_INST_LAST ? "LAST" : "",
inst & R500_INST_NOP ? "NOP" : "",
inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
to_mask((inst >> 15) & 0xf));
 
switch(inst0 & 0x3) {
case R500_INST_TYPE_ALU:
case R500_INST_TYPE_OUT:
fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1);
inst = code->inst[n].inst1;
 
fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
(inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
(inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
(inst >> 30));
 
fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2);
inst = code->inst[n].inst2;
fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
(inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
(inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
(inst >> 30));
fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3);
inst = code->inst[n].inst3;
fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n",
(inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
(inst >> 11) & 0x3,
(inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
(inst >> 24) & 0x3, (inst >> 29) & 0x3);
 
 
fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);
inst = code->inst[n].inst4;
fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf),
(inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
(inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
(inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
(inst >> 29) & 0x3,
(inst >> 31) & 0x1);
 
fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);
inst = code->inst[n].inst5;
fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
(inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
(inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
(inst >> 23) & 0x3,
(inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
break;
case R500_INST_TYPE_FC:
fprintf(stderr, "\t2:FC_INST 0x%08x:", code->inst[n].inst2);
inst = code->inst[n].inst2;
/* JUMP_FUNC JUMP_ANY*/
fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff,
(inst & R500_FC_JUMP_ANY) >> 5);
/* OP */
switch(inst & 0x7){
case R500_FC_OP_JUMP:
fprintf(stderr, "JUMP");
break;
case R500_FC_OP_LOOP:
fprintf(stderr, "LOOP");
break;
case R500_FC_OP_ENDLOOP:
fprintf(stderr, "ENDLOOP");
break;
case R500_FC_OP_REP:
fprintf(stderr, "REP");
break;
case R500_FC_OP_ENDREP:
fprintf(stderr, "ENDREP");
break;
case R500_FC_OP_BREAKLOOP:
fprintf(stderr, "BREAKLOOP");
break;
case R500_FC_OP_BREAKREP:
fprintf(stderr, "BREAKREP");
break;
case R500_FC_OP_CONTINUE:
fprintf(stderr, "CONTINUE");
break;
}
fprintf(stderr," ");
/* A_OP */
switch(inst & (0x3 << 6)){
case R500_FC_A_OP_NONE:
fprintf(stderr, "NONE");
break;
case R500_FC_A_OP_POP:
fprintf(stderr, "POP");
break;
case R500_FC_A_OP_PUSH:
fprintf(stderr, "PUSH");
break;
}
/* B_OP0 B_OP1 */
for(i=0; i<2; i++){
fprintf(stderr, " ");
switch(inst & (0x3 << (24 + (i * 2)))){
/* R500_FC_B_OP0_NONE
* R500_FC_B_OP1_NONE */
case 0:
fprintf(stderr, "NONE");
break;
case R500_FC_B_OP0_DECR:
case R500_FC_B_OP1_DECR:
fprintf(stderr, "DECR");
break;
case R500_FC_B_OP0_INCR:
case R500_FC_B_OP1_INCR:
fprintf(stderr, "INCR");
break;
}
}
/*POP_CNT B_ELSE */
fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4);
inst = code->inst[n].inst3;
/* JUMP_ADDR */
fprintf(stderr, " %d", inst >> 16);
if(code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED){
fprintf(stderr, " IGN_UNC");
}
inst = code->inst[n].inst3;
fprintf(stderr, "\n\t3:FC_ADDR 0x%08x:", inst);
fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n",
inst & 0x1f, (inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31);
break;
case R500_INST_TYPE_TEX:
inst = code->inst[n].inst1;
fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
(inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
inst = code->inst[n].inst2;
fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
inst & 127, inst & (1<<7) ? "(rel)" : "",
toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
(inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
 
fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3);
break;
}
fprintf(stderr,"\n");
}
 
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r500_fragprog.h
0,0 → 1,50
/*
* Copyright (C) 2005 Ben Skeggs.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
/*
* Authors:
* Ben Skeggs <darktama@iinet.net.au>
* Jerome Glisse <j.glisse@gmail.com>
*/
#ifndef __R500_FRAGPROG_H_
#define __R500_FRAGPROG_H_
 
#include "radeon_compiler.h"
#include "radeon_swizzle.h"
 
extern void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user);
 
extern void r500FragmentProgramDump(struct radeon_compiler *c, void *user);
 
extern struct rc_swizzle_caps r500_swizzle_caps;
 
extern int r500_transform_IF(
struct radeon_compiler * c,
struct rc_instruction * inst_if,
void* data);
 
#endif
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c
0,0 → 1,687
/*
* Copyright (C) 2005 Ben Skeggs.
*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
/**
* \file
*
* \author Ben Skeggs <darktama@iinet.net.au>
*
* \author Jerome Glisse <j.glisse@gmail.com>
*
* \author Corbin Simpson <MostAwesomeDude@gmail.com>
*
*/
 
#include "r500_fragprog.h"
 
#include "../r300_reg.h"
 
#include "radeon_program_pair.h"
 
#define PROG_CODE \
struct r500_fragment_program_code *code = &c->code->code.r500
 
#define error(fmt, args...) do { \
rc_error(&c->Base, "%s::%s(): " fmt "\n", \
__FILE__, __FUNCTION__, ##args); \
} while(0)
 
 
struct branch_info {
int If;
int Else;
int Endif;
};
 
struct r500_loop_info {
int BgnLoop;
 
int BranchDepth;
int * Brks;
int BrkCount;
int BrkReserved;
 
int * Conts;
int ContCount;
int ContReserved;
};
 
struct emit_state {
struct radeon_compiler * C;
struct r500_fragment_program_code * Code;
 
struct branch_info * Branches;
unsigned int CurrentBranchDepth;
unsigned int BranchesReserved;
 
struct r500_loop_info * Loops;
unsigned int CurrentLoopDepth;
unsigned int LoopsReserved;
 
unsigned int MaxBranchDepth;
 
};
 
static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
{
switch(opcode) {
case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
default:
error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
/* fall through */
case RC_OPCODE_NOP:
/* fall through */
case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
}
}
 
static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
{
switch(opcode) {
case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
default:
error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
/* fall through */
case RC_OPCODE_NOP:
/* fall through */
case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
}
}
 
static unsigned int fix_hw_swizzle(unsigned int swz)
{
switch (swz) {
case RC_SWIZZLE_ZERO:
case RC_SWIZZLE_UNUSED:
swz = 4;
break;
case RC_SWIZZLE_HALF:
swz = 5;
break;
case RC_SWIZZLE_ONE:
swz = 6;
break;
}
 
return swz;
}
 
static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
{
unsigned int t = inst->RGB.Arg[arg].Source;
int comp;
t |= inst->RGB.Arg[arg].Negate << 11;
t |= inst->RGB.Arg[arg].Abs << 12;
 
for(comp = 0; comp < 3; ++comp)
t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
 
return t;
}
 
static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
{
unsigned int t = inst->Alpha.Arg[i].Source;
t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
t |= inst->Alpha.Arg[i].Negate << 5;
t |= inst->Alpha.Arg[i].Abs << 6;
return t;
}
 
static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
{
switch(func) {
case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
default:
rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
return 0;
}
}
 
static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
{
if (index > code->max_temp_idx)
code->max_temp_idx = index;
}
 
static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
{
/* From docs:
* Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
* MSB = 1 << 7 */
if (!src.Used)
return 1 << 7;
 
if (src.File == RC_FILE_CONSTANT) {
return src.Index | R500_RGB_ADDR0_CONST;
} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
use_temporary(code, src.Index);
return src.Index;
} else if (src.File == RC_FILE_INLINE) {
return src.Index | (1 << 7);
}
 
return 0;
}
 
/**
* NOP the specified instruction if it is not a texture lookup.
*/
static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
{
PROG_CODE;
 
if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
code->inst[ip].inst0 |= R500_INST_NOP;
}
}
 
/**
* Emit a paired ALU instruction.
*/
static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
{
int ip;
PROG_CODE;
 
if (code->inst_end >= c->Base.max_alu_insts-1) {
error("emit_alu: Too many instructions");
return;
}
 
ip = ++code->inst_end;
 
/* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
if (ip > 0) {
alu_nop(c, ip - 1);
}
}
 
code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
 
if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
code->inst[ip].inst0 = R500_INST_TYPE_OUT;
if (inst->WriteALUResult) {
error("Cannot write output and ALU result at the same time");
return;
}
} else {
code->inst[ip].inst0 = R500_INST_TYPE_ALU;
}
code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
 
code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
if (inst->Nop) {
code->inst[ip].inst0 |= R500_INST_NOP;
}
if (inst->Alpha.DepthWriteMask) {
code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
c->code->writes_depth = 1;
}
 
code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
use_temporary(code, inst->Alpha.DestIndex);
use_temporary(code, inst->RGB.DestIndex);
 
if (inst->RGB.Saturate)
code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
if (inst->Alpha.Saturate)
code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
 
/* Set the presubtract operation. */
switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
case RC_PRESUB_BIAS:
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
break;
case RC_PRESUB_SUB:
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
break;
case RC_PRESUB_ADD:
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
break;
case RC_PRESUB_INV:
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
break;
default:
break;
}
switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
case RC_PRESUB_BIAS:
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
break;
case RC_PRESUB_SUB:
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
break;
case RC_PRESUB_ADD:
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
break;
case RC_PRESUB_INV:
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
break;
default:
break;
}
 
/* Set the output modifier */
code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT;
code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT;
 
code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
 
code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
 
code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
 
code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
 
code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
 
if (inst->WriteALUResult) {
code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
 
if (inst->WriteALUResult == RC_ALURESULT_X)
code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
else
code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
 
code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
}
}
 
static unsigned int translate_strq_swizzle(unsigned int swizzle)
{
unsigned int swiz = 0;
int i;
for (i = 0; i < 4; i++)
swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
return swiz;
}
 
/**
* Emit a single TEX instruction
*/
static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
{
int ip;
PROG_CODE;
 
if (code->inst_end >= c->Base.max_alu_insts-1) {
error("emit_tex: Too many instructions");
return 0;
}
 
ip = ++code->inst_end;
 
code->inst[ip].inst0 = R500_INST_TYPE_TEX
| (inst->DstReg.WriteMask << 11)
| (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
| (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT);
 
if (inst->TexSrcTarget == RC_TEXTURE_RECT)
code->inst[ip].inst1 |= R500_TEX_UNSCALED;
 
switch (inst->Opcode) {
case RC_OPCODE_KIL:
code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
break;
case RC_OPCODE_TEX:
code->inst[ip].inst1 |= R500_TEX_INST_LD;
break;
case RC_OPCODE_TXB:
code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
break;
case RC_OPCODE_TXP:
code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
break;
case RC_OPCODE_TXD:
code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
break;
case RC_OPCODE_TXL:
code->inst[ip].inst1 |= R500_TEX_INST_LOD;
break;
default:
error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
}
 
use_temporary(code, inst->SrcReg[0].Index);
if (inst->Opcode != RC_OPCODE_KIL)
use_temporary(code, inst->DstReg.Index);
 
code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
| (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
| R500_TEX_DST_ADDR(inst->DstReg.Index)
| (GET_SWZ(inst->TexSwizzle, 0) << 24)
| (GET_SWZ(inst->TexSwizzle, 1) << 26)
| (GET_SWZ(inst->TexSwizzle, 2) << 28)
| (GET_SWZ(inst->TexSwizzle, 3) << 30)
;
 
if (inst->Opcode == RC_OPCODE_TXD) {
use_temporary(code, inst->SrcReg[1].Index);
use_temporary(code, inst->SrcReg[2].Index);
 
/* DX and DY parameters are specified in a separate register. */
code->inst[ip].inst3 =
R500_DX_ADDR(inst->SrcReg[1].Index) |
(translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
R500_DY_ADDR(inst->SrcReg[2].Index) |
(translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
}
 
return 1;
}
 
static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
{
unsigned int newip;
 
if (s->Code->inst_end >= s->C->max_alu_insts-1) {
rc_error(s->C, "emit_tex: Too many instructions");
return;
}
 
newip = ++s->Code->inst_end;
 
/* Currently all loops use the same integer constant to intialize
* the loop variables. */
if(!s->Code->int_constants[0]) {
s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
s->Code->int_constant_count = 1;
}
s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
 
switch(inst->U.I.Opcode){
struct branch_info * branch;
struct r500_loop_info * loop;
case RC_OPCODE_BGNLOOP:
memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
 
loop = &s->Loops[s->CurrentLoopDepth++];
memset(loop, 0, sizeof(struct r500_loop_info));
loop->BranchDepth = s->CurrentBranchDepth;
loop->BgnLoop = newip;
 
s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
| R500_FC_JUMP_FUNC(0x00)
| R500_FC_IGNORE_UNCOVERED
;
break;
case RC_OPCODE_BRK:
loop = &s->Loops[s->CurrentLoopDepth - 1];
memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
loop->BrkCount, loop->BrkReserved, 1);
 
loop->Brks[loop->BrkCount++] = newip;
s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
| R500_FC_JUMP_FUNC(0xff)
| R500_FC_B_OP1_DECR
| R500_FC_B_POP_CNT(
s->CurrentBranchDepth - loop->BranchDepth)
| R500_FC_IGNORE_UNCOVERED
;
break;
 
case RC_OPCODE_CONT:
loop = &s->Loops[s->CurrentLoopDepth - 1];
memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
loop->ContCount, loop->ContReserved, 1);
loop->Conts[loop->ContCount++] = newip;
s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
| R500_FC_JUMP_FUNC(0xff)
| R500_FC_B_OP1_DECR
| R500_FC_B_POP_CNT(
s->CurrentBranchDepth - loop->BranchDepth)
| R500_FC_IGNORE_UNCOVERED
;
break;
 
case RC_OPCODE_ENDLOOP:
{
loop = &s->Loops[s->CurrentLoopDepth - 1];
/* Emit ENDLOOP */
s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
| R500_FC_JUMP_FUNC(0xff)
| R500_FC_JUMP_ANY
| R500_FC_IGNORE_UNCOVERED
;
/* The constant integer at index 0 is used by all loops. */
s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
| R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
;
 
/* Set jump address and int constant for BGNLOOP */
s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
| R500_FC_JUMP_ADDR(newip)
;
 
/* Set jump address for the BRK instructions. */
while(loop->BrkCount--) {
s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
R500_FC_JUMP_ADDR(newip + 1);
}
 
/* Set jump address for CONT instructions. */
while(loop->ContCount--) {
s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
R500_FC_JUMP_ADDR(newip);
}
s->CurrentLoopDepth--;
break;
}
case RC_OPCODE_IF:
if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
rc_error(s->C, "Branch depth exceeds hardware limit");
return;
}
memory_pool_array_reserve(&s->C->Pool, struct branch_info,
s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
 
branch = &s->Branches[s->CurrentBranchDepth++];
branch->If = newip;
branch->Else = -1;
branch->Endif = -1;
 
if (s->CurrentBranchDepth > s->MaxBranchDepth)
s->MaxBranchDepth = s->CurrentBranchDepth;
 
/* actual instruction is filled in at ENDIF time */
break;
case RC_OPCODE_ELSE:
if (!s->CurrentBranchDepth) {
rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
return;
}
 
branch = &s->Branches[s->CurrentBranchDepth - 1];
branch->Else = newip;
 
/* actual instruction is filled in at ENDIF time */
break;
 
case RC_OPCODE_ENDIF:
if (!s->CurrentBranchDepth) {
rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
return;
}
 
branch = &s->Branches[s->CurrentBranchDepth - 1];
branch->Endif = newip;
 
s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
| R500_FC_A_OP_NONE /* no address stack */
| R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
| R500_FC_B_OP0_DECR /* decrement branch counter if stay */
| R500_FC_B_OP1_NONE /* no branch counter if stay */
| R500_FC_B_POP_CNT(1)
;
s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
| R500_FC_A_OP_NONE /* no address stack */
| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
| R500_FC_B_OP0_INCR /* increment branch counter if stay */
| R500_FC_IGNORE_UNCOVERED
;
 
if (branch->Else >= 0) {
/* increment branch counter also if jump */
s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
 
s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
| R500_FC_A_OP_NONE /* no address stack */
| R500_FC_B_ELSE /* all active pixels want to jump */
| R500_FC_B_OP0_NONE /* no counter op if stay */
| R500_FC_B_OP1_DECR /* decrement branch counter if jump */
| R500_FC_B_POP_CNT(1)
;
s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
} else {
/* don't touch branch counter on jump */
s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
}
 
 
s->CurrentBranchDepth--;
break;
default:
rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
}
}
 
void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
{
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
struct emit_state s;
struct r500_fragment_program_code *code = &compiler->code->code.r500;
 
memset(&s, 0, sizeof(s));
s.C = &compiler->Base;
s.Code = code;
 
memset(code, 0, sizeof(*code));
code->max_temp_idx = 1;
code->inst_end = -1;
 
for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
inst = inst->Next) {
if (inst->Type == RC_INSTRUCTION_NORMAL) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
if (opcode->IsFlowControl) {
emit_flowcontrol(&s, inst);
} else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
continue;
} else {
emit_tex(compiler, &inst->U.I);
}
} else {
emit_paired(compiler, &inst->U.P);
}
}
 
if (code->max_temp_idx >= compiler->Base.max_temp_regs)
rc_error(&compiler->Base, "Too many hardware temporaries used");
 
if (compiler->Base.Error)
return;
 
if (code->inst_end == -1 ||
(code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
int ip;
 
/* This may happen when dead-code elimination is disabled or
* when most of the fragment program logic is leading to a KIL */
if (code->inst_end >= compiler->Base.max_alu_insts-1) {
rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
return;
}
 
ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
}
 
/* Make sure TEX_SEM_WAIT is set on the last instruction */
code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT;
 
/* Enable full flow control mode if we are using loops or have if
* statements nested at least four deep. */
if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
if (code->max_temp_idx < 1)
code->max_temp_idx = 1;
 
code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_code.c
0,0 → 1,187
/*
* Copyright (C) 2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include "radeon_code.h"
 
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
 
#include "radeon_program.h"
 
void rc_constants_init(struct rc_constant_list * c)
{
memset(c, 0, sizeof(*c));
}
 
/**
* Copy a constants structure, assuming that the destination structure
* is not initialized.
*/
void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src)
{
dst->Constants = malloc(sizeof(struct rc_constant) * src->Count);
memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count);
dst->Count = src->Count;
dst->_Reserved = src->Count;
}
 
void rc_constants_destroy(struct rc_constant_list * c)
{
free(c->Constants);
memset(c, 0, sizeof(*c));
}
 
unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant)
{
unsigned index = c->Count;
 
if (c->Count >= c->_Reserved) {
struct rc_constant * newlist;
 
c->_Reserved = c->_Reserved * 2;
if (!c->_Reserved)
c->_Reserved = 16;
 
newlist = malloc(sizeof(struct rc_constant) * c->_Reserved);
memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count);
 
free(c->Constants);
c->Constants = newlist;
}
 
c->Constants[index] = *constant;
c->Count++;
 
return index;
}
 
 
/**
* Add a state vector to the constant list, while trying to avoid duplicates.
*/
unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1)
{
unsigned index;
struct rc_constant constant;
 
for(index = 0; index < c->Count; ++index) {
if (c->Constants[index].Type == RC_CONSTANT_STATE) {
if (c->Constants[index].u.State[0] == state0 &&
c->Constants[index].u.State[1] == state1)
return index;
}
}
 
memset(&constant, 0, sizeof(constant));
constant.Type = RC_CONSTANT_STATE;
constant.Size = 4;
constant.u.State[0] = state0;
constant.u.State[1] = state1;
 
return rc_constants_add(c, &constant);
}
 
 
/**
* Add an immediate vector to the constant list, while trying to avoid
* duplicates.
*/
unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data)
{
unsigned index;
struct rc_constant constant;
 
for(index = 0; index < c->Count; ++index) {
if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4))
return index;
}
}
 
memset(&constant, 0, sizeof(constant));
constant.Type = RC_CONSTANT_IMMEDIATE;
constant.Size = 4;
memcpy(constant.u.Immediate, data, sizeof(float) * 4);
 
return rc_constants_add(c, &constant);
}
 
 
/**
* Add an immediate scalar to the constant list, while trying to avoid
* duplicates.
*/
unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle)
{
unsigned index;
int free_index = -1;
struct rc_constant constant;
 
for(index = 0; index < c->Count; ++index) {
if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
unsigned comp;
for(comp = 0; comp < c->Constants[index].Size; ++comp) {
if (c->Constants[index].u.Immediate[comp] == data) {
*swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
return index;
}
}
 
if (c->Constants[index].Size < 4)
free_index = index;
}
}
 
if (free_index >= 0) {
unsigned comp = c->Constants[free_index].Size++;
c->Constants[free_index].u.Immediate[comp] = data;
*swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
return free_index;
}
 
memset(&constant, 0, sizeof(constant));
constant.Type = RC_CONSTANT_IMMEDIATE;
constant.Size = 1;
constant.u.Immediate[0] = data;
*swizzle = RC_SWIZZLE_XXXX;
 
return rc_constants_add(c, &constant);
}
 
void rc_constants_print(struct rc_constant_list * c)
{
unsigned int i;
for(i = 0; i < c->Count; i++) {
if (c->Constants[i].Type == RC_CONSTANT_IMMEDIATE) {
float * values = c->Constants[i].u.Immediate;
fprintf(stderr, "CONST[%u] = "
"{ %10.4f %10.4f %10.4f %10.4f }\n",
i, values[0],values[1], values[2], values[3]);
}
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_code.h
0,0 → 1,306
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#ifndef RADEON_CODE_H
#define RADEON_CODE_H
 
#include <stdint.h>
 
#define R300_PFS_MAX_ALU_INST 64
#define R300_PFS_MAX_TEX_INST 32
#define R300_PFS_MAX_TEX_INDIRECT 4
#define R300_PFS_NUM_TEMP_REGS 32
#define R300_PFS_NUM_CONST_REGS 32
 
#define R400_PFS_MAX_ALU_INST 512
#define R400_PFS_MAX_TEX_INST 512
 
#define R500_PFS_MAX_INST 512
#define R500_PFS_NUM_TEMP_REGS 128
#define R500_PFS_NUM_CONST_REGS 256
#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32
#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4
 
/* The r500 maximum depth is not just for loops, but any combination of loops
* and subroutine jumps. */
#define R500_PVS_MAX_LOOP_DEPTH 8
 
#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
 
enum {
/**
* External constants are constants whose meaning is unknown to this
* compiler. For example, a Mesa gl_program's constants are turned
* into external constants.
*/
RC_CONSTANT_EXTERNAL = 0,
 
RC_CONSTANT_IMMEDIATE,
 
/**
* Constant referring to state that is known by this compiler,
* see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state.
*/
RC_CONSTANT_STATE
};
 
enum {
RC_STATE_SHADOW_AMBIENT = 0,
 
RC_STATE_R300_WINDOW_DIMENSION,
RC_STATE_R300_TEXRECT_FACTOR,
RC_STATE_R300_TEXSCALE_FACTOR,
RC_STATE_R300_VIEWPORT_SCALE,
RC_STATE_R300_VIEWPORT_OFFSET
};
 
struct rc_constant {
unsigned Type:2; /**< RC_CONSTANT_xxx */
unsigned Size:3;
 
union {
unsigned External;
float Immediate[4];
unsigned State[2];
} u;
};
 
struct rc_constant_list {
struct rc_constant * Constants;
unsigned Count;
 
unsigned _Reserved;
};
 
void rc_constants_init(struct rc_constant_list * c);
void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src);
void rc_constants_destroy(struct rc_constant_list * c);
unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant);
unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2);
unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data);
unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
void rc_constants_print(struct rc_constant_list * c);
 
/**
* Compare functions.
*
* \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you
* the correct GL compare function.
*/
typedef enum {
RC_COMPARE_FUNC_NEVER = 0,
RC_COMPARE_FUNC_LESS,
RC_COMPARE_FUNC_EQUAL,
RC_COMPARE_FUNC_LEQUAL,
RC_COMPARE_FUNC_GREATER,
RC_COMPARE_FUNC_NOTEQUAL,
RC_COMPARE_FUNC_GEQUAL,
RC_COMPARE_FUNC_ALWAYS
} rc_compare_func;
 
/**
* Coordinate wrapping modes.
*
* These are not quite the same as their GL counterparts yet.
*/
typedef enum {
RC_WRAP_NONE = 0,
RC_WRAP_REPEAT,
RC_WRAP_MIRRORED_REPEAT,
RC_WRAP_MIRRORED_CLAMP
} rc_wrap_mode;
 
/**
* Stores state that influences the compilation of a fragment program.
*/
struct r300_fragment_program_external_state {
struct {
/**
* This field contains swizzle for some lowering passes
* (shadow comparison, unorm->snorm conversion)
*/
unsigned texture_swizzle:12;
 
/**
* If the sampler is used as a shadow sampler,
* this field specifies the compare function.
*
* Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0).
* \sa rc_compare_func
*/
unsigned texture_compare_func : 3;
 
/**
* No matter what the sampler type is,
* this field turns it into a shadow sampler.
*/
unsigned compare_mode_enabled : 1;
 
/**
* If the sampler will receive non-normalized coords,
* this field is set. The scaling factor is given by
* RC_STATE_R300_TEXRECT_FACTOR.
*/
unsigned non_normalized_coords : 1;
 
/**
* This field specifies wrapping modes for the sampler.
*
* If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths
* will be performed on the coordinates.
*/
unsigned wrap_mode : 3;
 
/**
* The coords are scaled after applying the wrap mode emulation
* and right before texture fetch. The scaling factor is given by
* RC_STATE_R300_TEXSCALE_FACTOR. */
unsigned clamp_and_scale_before_fetch : 1;
 
/**
* Fetch RGTC1_SNORM or LATC1_SNORM as UNORM and convert UNORM -> SNORM
* in the shader.
*/
unsigned convert_unorm_to_snorm:1;
} unit[16];
 
unsigned alpha_to_one:1;
};
 
 
 
struct r300_fragment_program_node {
int tex_offset; /**< first tex instruction */
int tex_end; /**< last tex instruction, relative to tex_offset */
int alu_offset; /**< first ALU instruction */
int alu_end; /**< last ALU instruction, relative to alu_offset */
int flags;
};
 
/**
* Stores an R300 fragment program in its compiled-to-hardware form.
*/
struct r300_fragment_program_code {
struct {
unsigned int length; /**< total # of texture instructions used */
uint32_t inst[R400_PFS_MAX_TEX_INST];
} tex;
 
struct {
unsigned int length; /**< total # of ALU instructions used */
struct {
uint32_t rgb_inst;
uint32_t rgb_addr;
uint32_t alpha_inst;
uint32_t alpha_addr;
uint32_t r400_ext_addr;
} inst[R400_PFS_MAX_ALU_INST];
} alu;
 
uint32_t config; /* US_CONFIG */
uint32_t pixsize; /* US_PIXSIZE */
uint32_t code_offset; /* US_CODE_OFFSET */
uint32_t r400_code_offset_ext; /* US_CODE_EXT */
uint32_t code_addr[4]; /* US_CODE_ADDR */
/*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries
* for r400 cards */
unsigned int r390_mode:1;
};
 
 
struct r500_fragment_program_code {
struct {
uint32_t inst0;
uint32_t inst1;
uint32_t inst2;
uint32_t inst3;
uint32_t inst4;
uint32_t inst5;
} inst[R500_PFS_MAX_INST];
 
int inst_end; /* Number of instructions - 1; also, last instruction to be executed */
 
int max_temp_idx;
 
uint32_t us_fc_ctrl;
 
uint32_t int_constants[32];
uint32_t int_constant_count;
};
 
struct rX00_fragment_program_code {
union {
struct r300_fragment_program_code r300;
struct r500_fragment_program_code r500;
} code;
 
unsigned writes_depth:1;
 
struct rc_constant_list constants;
unsigned *constants_remap_table;
};
 
 
#define R300_VS_MAX_ALU 256
#define R300_VS_MAX_ALU_DWORDS (R300_VS_MAX_ALU * 4)
#define R500_VS_MAX_ALU 1024
#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4)
#define R300_VS_MAX_TEMPS 32
/* This is the max for all chipsets (r300-r500) */
#define R300_VS_MAX_FC_OPS 16
#define R300_VS_MAX_LOOP_DEPTH 1
 
#define VSF_MAX_INPUTS 32
#define VSF_MAX_OUTPUTS 32
 
struct r300_vertex_program_code {
int length;
union {
uint32_t d[R500_VS_MAX_ALU_DWORDS];
float f[R500_VS_MAX_ALU_DWORDS];
} body;
 
int pos_end;
int num_temporaries; /* Number of temp vars used by program */
int inputs[VSF_MAX_INPUTS];
int outputs[VSF_MAX_OUTPUTS];
 
struct rc_constant_list constants;
unsigned *constants_remap_table;
 
uint32_t InputsRead;
uint32_t OutputsWritten;
 
unsigned int num_fc_ops;
uint32_t fc_ops;
union {
uint32_t r300[R300_VS_MAX_FC_OPS];
struct {
uint32_t lw;
uint32_t uw;
} r500[R300_VS_MAX_FC_OPS];
} fc_op_addrs;
int32_t fc_loop_index[R300_VS_MAX_FC_OPS];
};
 
#endif /* RADEON_CODE_H */
 
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_compiler.c
0,0 → 1,504
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "radeon_compiler.h"
 
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
 
#include "radeon_dataflow.h"
#include "radeon_program.h"
#include "radeon_program_pair.h"
#include "radeon_regalloc.h"
#include "radeon_compiler_util.h"
 
 
void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs)
{
memset(c, 0, sizeof(*c));
 
memory_pool_init(&c->Pool);
c->Program.Instructions.Prev = &c->Program.Instructions;
c->Program.Instructions.Next = &c->Program.Instructions;
c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
c->regalloc_state = rs;
}
 
void rc_destroy(struct radeon_compiler * c)
{
rc_constants_destroy(&c->Program.Constants);
memory_pool_destroy(&c->Pool);
free(c->ErrorMsg);
}
 
void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
{
va_list ap;
 
if (!(c->Debug & RC_DBG_LOG))
return;
 
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
}
 
void rc_error(struct radeon_compiler * c, const char * fmt, ...)
{
va_list ap;
 
c->Error = 1;
 
if (!c->ErrorMsg) {
/* Only remember the first error */
char buf[1024];
int written;
 
va_start(ap, fmt);
written = vsnprintf(buf, sizeof(buf), fmt, ap);
va_end(ap);
 
if (written < sizeof(buf)) {
c->ErrorMsg = strdup(buf);
} else {
c->ErrorMsg = malloc(written + 1);
 
va_start(ap, fmt);
vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
va_end(ap);
}
}
 
if (c->Debug & RC_DBG_LOG) {
fprintf(stderr, "r300compiler error: ");
 
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
}
}
 
int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
{
rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
return 1;
}
 
/**
* Recompute c->Program.InputsRead and c->Program.OutputsWritten
* based on which inputs and outputs are actually referenced
* in program instructions.
*/
void rc_calculate_inputs_outputs(struct radeon_compiler * c)
{
struct rc_instruction *inst;
 
c->Program.InputsRead = 0;
c->Program.OutputsWritten = 0;
 
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
{
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
int i;
 
for (i = 0; i < opcode->NumSrcRegs; ++i) {
if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index;
}
 
if (opcode->HasDstReg) {
if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index;
}
}
}
 
/**
* Rewrite the program such that everything that source the given input
* register will source new_input instead.
*/
void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input)
{
struct rc_instruction * inst;
 
c->Program.InputsRead &= ~(1 << input);
 
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
unsigned i;
 
for(i = 0; i < opcode->NumSrcRegs; ++i) {
if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) {
inst->U.I.SrcReg[i].File = new_input.File;
inst->U.I.SrcReg[i].Index = new_input.Index;
inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle);
if (!inst->U.I.SrcReg[i].Abs) {
inst->U.I.SrcReg[i].Negate ^= new_input.Negate;
inst->U.I.SrcReg[i].Abs = new_input.Abs;
}
 
c->Program.InputsRead |= 1 << new_input.Index;
}
}
}
}
 
 
/**
* Rewrite the program such that everything that writes into the given
* output register will instead write to new_output. The new_output
* writemask is honoured.
*/
void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask)
{
struct rc_instruction * inst;
 
c->Program.OutputsWritten &= ~(1 << output);
 
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
if (opcode->HasDstReg) {
if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
inst->U.I.DstReg.Index = new_output;
inst->U.I.DstReg.WriteMask &= writemask;
 
c->Program.OutputsWritten |= 1 << new_output;
}
}
}
}
 
 
/**
* Rewrite the program such that a given output is duplicated.
*/
void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
{
unsigned tempreg = rc_find_free_temporary(c);
struct rc_instruction * inst;
 
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
if (opcode->HasDstReg) {
if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst->U.I.DstReg.Index = tempreg;
}
}
}
 
inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.DstReg.File = RC_FILE_OUTPUT;
inst->U.I.DstReg.Index = output;
 
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[0].Index = tempreg;
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
 
inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.DstReg.File = RC_FILE_OUTPUT;
inst->U.I.DstReg.Index = dup_output;
 
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[0].Index = tempreg;
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
 
c->Program.OutputsWritten |= 1 << dup_output;
}
 
 
/**
* Introduce standard code fragment to deal with fragment.position.
*/
void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
int full_vtransform)
{
unsigned tempregi = rc_find_free_temporary(c);
struct rc_instruction * inst_rcp;
struct rc_instruction * inst_mul;
struct rc_instruction * inst_mad;
struct rc_instruction * inst;
 
c->Program.InputsRead &= ~(1 << wpos);
c->Program.InputsRead |= 1 << new_input;
 
/* perspective divide */
inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
 
inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_rcp->U.I.DstReg.Index = tempregi;
inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
 
inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
inst_rcp->U.I.SrcReg[0].Index = new_input;
inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
 
inst_mul = rc_insert_new_instruction(c, inst_rcp);
inst_mul->U.I.Opcode = RC_OPCODE_MUL;
 
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mul->U.I.DstReg.Index = tempregi;
inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
 
inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
inst_mul->U.I.SrcReg[0].Index = new_input;
 
inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
inst_mul->U.I.SrcReg[1].Index = tempregi;
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
 
/* viewport transformation */
inst_mad = rc_insert_new_instruction(c, inst_mul);
inst_mad->U.I.Opcode = RC_OPCODE_MAD;
 
inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mad->U.I.DstReg.Index = tempregi;
inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
 
inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_mad->U.I.SrcReg[0].Index = tempregi;
inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
 
inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
 
inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
 
if (full_vtransform) {
inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
} else {
inst_mad->U.I.SrcReg[1].Index =
inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
}
 
for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
unsigned i;
 
for(i = 0; i < opcode->NumSrcRegs; i++) {
if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
inst->U.I.SrcReg[i].Index == wpos) {
inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[i].Index = tempregi;
}
}
}
}
 
 
/**
* The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
* Gallium and OpenGL define it the other way around.
*
* So let's just negate FACE at the beginning of the shader and rewrite the rest
* of the shader to read from the newly allocated temporary.
*/
void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
{
unsigned tempregi = rc_find_free_temporary(c);
struct rc_instruction *inst_add;
struct rc_instruction *inst;
 
/* perspective divide */
inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
inst_add->U.I.Opcode = RC_OPCODE_ADD;
 
inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_add->U.I.DstReg.Index = tempregi;
inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
 
inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
 
inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
inst_add->U.I.SrcReg[1].Index = face;
inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
 
for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
unsigned i;
 
for(i = 0; i < opcode->NumSrcRegs; i++) {
if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
inst->U.I.SrcReg[i].Index == face) {
inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[i].Index = tempregi;
}
}
}
}
 
static void reg_count_callback(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
struct rc_program_stats *s = userdata;
if (file == RC_FILE_TEMPORARY)
(int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;
if (file == RC_FILE_INLINE)
s->num_inline_literals++;
}
 
void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
{
struct rc_instruction * tmp;
memset(s, 0, sizeof(*s));
 
for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
tmp = tmp->Next){
const struct rc_opcode_info * info;
rc_for_all_reads_mask(tmp, reg_count_callback, s);
if (tmp->Type == RC_INSTRUCTION_NORMAL) {
info = rc_get_opcode_info(tmp->U.I.Opcode);
if (info->Opcode == RC_OPCODE_BEGIN_TEX)
continue;
if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)
s->num_presub_ops++;
} else {
if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
s->num_presub_ops++;
if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
s->num_presub_ops++;
/* Assuming alpha will never be a flow control or
* a tex instruction. */
if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
s->num_alpha_insts++;
if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
s->num_rgb_insts++;
if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 &&
tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) {
s->num_omod_ops++;
}
if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 &&
tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) {
s->num_omod_ops++;
}
info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
}
if (info->IsFlowControl)
s->num_fc_insts++;
if (info->HasTexture)
s->num_tex_insts++;
s->num_insts++;
}
/* Increment here because the reg_count_callback store the max
* temporary reg index in s->nun_temp_regs. */
s->num_temp_regs++;
}
 
static void print_stats(struct radeon_compiler * c)
{
struct rc_program_stats s;
 
if (c->initial_num_insts <= 5)
return;
 
rc_get_stats(c, &s);
 
switch (c->type) {
case RC_VERTEX_PROGRAM:
fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n"
"~%4u Instructions\n"
"~%4u Flow Control Instructions\n"
"~%4u Temporary Registers\n"
"~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
s.num_insts, s.num_fc_insts, s.num_temp_regs);
break;
 
case RC_FRAGMENT_PROGRAM:
fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n"
"~%4u Instructions\n"
"~%4u Vector Instructions (RGB)\n"
"~%4u Scalar Instructions (Alpha)\n"
"~%4u Flow Control Instructions\n"
"~%4u Texture Instructions\n"
"~%4u Presub Operations\n"
"~%4u OMOD Operations\n"
"~%4u Temporary Registers\n"
"~%4u Inline Literals\n"
"~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
s.num_omod_ops, s.num_temp_regs, s.num_inline_literals);
break;
default:
assert(0);
}
}
 
static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
"Vertex Program",
"Fragment Program"
};
 
void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
{
for (unsigned i = 0; list[i].name; i++) {
if (list[i].predicate) {
list[i].run(c, list[i].user);
 
if (c->Error)
return;
 
if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
rc_print_program(&c->Program);
}
}
}
}
 
/* Executes a list of compiler passes given in the parameter 'list'. */
void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
{
struct rc_program_stats s;
 
rc_get_stats(c, &s);
c->initial_num_insts = s.num_insts;
 
if (c->Debug & RC_DBG_LOG) {
fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
rc_print_program(&c->Program);
}
 
rc_run_compiler_passes(c, list);
 
if (c->Debug & RC_DBG_STATS)
print_stats(c);
}
 
void rc_validate_final_shader(struct radeon_compiler *c, void *user)
{
/* Check the number of constants. */
if (c->Program.Constants.Count > c->max_constants) {
rc_error(c, "Too many constants. Max: %i, Got: %i\n",
c->max_constants, c->Program.Constants.Count);
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_compiler.h
0,0 → 1,173
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#ifndef RADEON_COMPILER_H
#define RADEON_COMPILER_H
 
#include "main/compiler.h"
 
#include "memory_pool.h"
#include "radeon_code.h"
#include "radeon_program.h"
#include "radeon_emulate_loops.h"
 
#define RC_DBG_LOG (1 << 0)
#define RC_DBG_STATS (1 << 1)
 
struct rc_swizzle_caps;
 
enum rc_program_type {
RC_VERTEX_PROGRAM,
RC_FRAGMENT_PROGRAM,
RC_NUM_PROGRAM_TYPES
};
 
struct radeon_compiler {
struct memory_pool Pool;
struct rc_program Program;
const struct rc_regalloc_state *regalloc_state;
enum rc_program_type type;
unsigned Debug:2;
unsigned Error:1;
char * ErrorMsg;
 
/* Hardware specification. */
unsigned is_r400:1;
unsigned is_r500:1;
unsigned has_half_swizzles:1;
unsigned has_presub:1;
unsigned has_omod:1;
unsigned disable_optimizations:1;
unsigned max_temp_regs;
unsigned max_constants;
int max_alu_insts;
unsigned max_tex_insts;
 
/* Whether to remove unused constants and empty holes in constant space. */
unsigned remove_unused_constants:1;
 
/**
* Variables used internally, not be touched by callers
* of the compiler
*/
/*@{*/
struct rc_swizzle_caps * SwizzleCaps;
/*@}*/
 
struct emulate_loop_state loop_state;
 
unsigned initial_num_insts; /* Number of instructions at start. */
};
 
void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs);
void rc_destroy(struct radeon_compiler * c);
 
void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
void rc_error(struct radeon_compiler * c, const char * fmt, ...);
 
int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion);
 
/**
* This macro acts like an if-statement that can be used to implement
* non-aborting assertions in the compiler.
*
* It checks whether \p cond is true. If not, an internal compiler error is
* flagged and the if-clause is run.
*
* A typical use-case would be:
*
* if (rc_assert(c, condition-that-must-be-true))
* return;
*/
#define rc_assert(c, cond) \
(!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond))
 
void rc_calculate_inputs_outputs(struct radeon_compiler * c);
 
void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input);
void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask);
void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
int full_vtransform);
void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face);
 
struct r300_fragment_program_compiler {
struct radeon_compiler Base;
struct rX00_fragment_program_code *code;
/* Optional transformations and features. */
struct r300_fragment_program_external_state state;
/* Register corresponding to the depthbuffer. */
unsigned OutputDepth;
/* Registers corresponding to the four colorbuffers. */
unsigned OutputColor[4];
 
void * UserData;
void (*AllocateHwInputs)(
struct r300_fragment_program_compiler * c,
void (*allocate)(void * data, unsigned input, unsigned hwreg),
void * mydata);
};
 
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
 
struct r300_vertex_program_compiler {
struct radeon_compiler Base;
struct r300_vertex_program_code *code;
uint32_t RequiredOutputs;
 
void * UserData;
void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
 
};
 
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
void rc_vert_fc(struct radeon_compiler *compiler, void *user);
void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user);
 
struct radeon_compiler_pass {
const char *name; /* Name of the pass. */
int dump; /* Dump the program if Debug == 1? */
int predicate; /* Run this pass? */
void (*run)(struct radeon_compiler *c, void *user); /* The main entrypoint. */
void *user; /* Optional parameter which is passed to the run function. */
};
 
struct rc_program_stats {
unsigned num_insts;
unsigned num_fc_insts;
unsigned num_tex_insts;
unsigned num_rgb_insts;
unsigned num_alpha_insts;
unsigned num_presub_ops;
unsigned num_temp_regs;
unsigned num_omod_ops;
unsigned num_inline_literals;
};
 
void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s);
 
/* Executes a list of compiler passes given in the parameter 'list'. */
void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list);
void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list);
void rc_validate_final_shader(struct radeon_compiler *c, void *user);
 
#endif /* RADEON_COMPILER_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_compiler_util.c
0,0 → 1,753
/*
* Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
/**
* \file
*/
 
#include "radeon_compiler_util.h"
 
#include "radeon_compiler.h"
#include "radeon_dataflow.h"
/**
*/
unsigned int rc_swizzle_to_writemask(unsigned int swz)
{
unsigned int mask = 0;
unsigned int i;
 
for(i = 0; i < 4; i++) {
mask |= 1 << GET_SWZ(swz, i);
}
mask &= RC_MASK_XYZW;
 
return mask;
}
 
rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
{
if (idx & 0x4)
return idx;
return GET_SWZ(swz, idx);
}
 
/**
* The purpose of this function is to standardize the number channels used by
* swizzles. All swizzles regardless of what instruction they are a part of
* should have 4 channels initialized with values.
* @param channels The number of channels in initial_value that have a
* meaningful value.
* @return An initialized swizzle that has all of the unused channels set to
* RC_SWIZZLE_UNUSED.
*/
unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels)
{
unsigned int i;
for (i = channels; i < 4; i++) {
SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED);
}
return initial_value;
}
 
unsigned int combine_swizzles4(unsigned int src,
rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
{
unsigned int ret = 0;
 
ret |= get_swz(src, swz_x);
ret |= get_swz(src, swz_y) << 3;
ret |= get_swz(src, swz_z) << 6;
ret |= get_swz(src, swz_w) << 9;
 
return ret;
}
 
unsigned int combine_swizzles(unsigned int src, unsigned int swz)
{
unsigned int ret = 0;
 
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;
 
return ret;
}
 
/**
* @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W
*/
rc_swizzle rc_mask_to_swizzle(unsigned int mask)
{
switch (mask) {
case RC_MASK_X: return RC_SWIZZLE_X;
case RC_MASK_Y: return RC_SWIZZLE_Y;
case RC_MASK_Z: return RC_SWIZZLE_Z;
case RC_MASK_W: return RC_SWIZZLE_W;
}
return RC_SWIZZLE_UNUSED;
}
 
/* Reorder mask bits according to swizzle. */
unsigned swizzle_mask(unsigned swizzle, unsigned mask)
{
unsigned ret = 0;
for (unsigned chan = 0; chan < 4; ++chan) {
unsigned swz = GET_SWZ(swizzle, chan);
if (swz < 4)
ret |= GET_BIT(mask, swz) << chan;
}
return ret;
}
 
static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info)
{
if (info->HasTexture) {
return 0;
}
switch (info->Opcode) {
case RC_OPCODE_DP2:
case RC_OPCODE_DP3:
case RC_OPCODE_DP4:
case RC_OPCODE_DDX:
case RC_OPCODE_DDY:
return 0;
default:
return 1;
}
}
 
/**
* @return A swizzle the results from converting old_swizzle using
* conversion_swizzle
*/
unsigned int rc_adjust_channels(
unsigned int old_swizzle,
unsigned int conversion_swizzle)
{
unsigned int i;
unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
for (i = 0; i < 4; i++) {
unsigned int new_chan = get_swz(conversion_swizzle, i);
if (new_chan == RC_SWIZZLE_UNUSED) {
continue;
}
SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i));
}
return new_swizzle;
}
 
static unsigned int rewrite_writemask(
unsigned int old_mask,
unsigned int conversion_swizzle)
{
unsigned int new_mask = 0;
unsigned int i;
 
for (i = 0; i < 4; i++) {
if (!GET_BIT(old_mask, i)
|| GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) {
continue;
}
new_mask |= (1 << GET_SWZ(conversion_swizzle, i));
}
 
return new_mask;
}
 
/**
* This function rewrites the writemask of sub and adjusts the swizzles
* of all its source registers based on the conversion_swizzle.
* conversion_swizzle represents a mapping of the old writemask to the
* new writemask. For a detailed description of how conversion swizzles
* work see rc_rewrite_swizzle().
*/
void rc_pair_rewrite_writemask(
struct rc_pair_sub_instruction * sub,
unsigned int conversion_swizzle)
{
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
unsigned int i;
 
sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle);
 
if (!srcs_need_rewrite(info)) {
return ;
}
 
for (i = 0; i < info->NumSrcRegs; i++) {
sub->Arg[i].Swizzle =
rc_adjust_channels(sub->Arg[i].Swizzle,
conversion_swizzle);
}
}
 
static void normal_rewrite_writemask_cb(
void * userdata,
struct rc_instruction * inst,
struct rc_src_register * src)
{
unsigned int * conversion_swizzle = (unsigned int *)userdata;
src->Swizzle = rc_adjust_channels(src->Swizzle, *conversion_swizzle);
}
 
/**
* This function is the same as rc_pair_rewrite_writemask() except it
* operates on normal instructions.
*/
void rc_normal_rewrite_writemask(
struct rc_instruction * inst,
unsigned int conversion_swizzle)
{
struct rc_sub_instruction * sub = &inst->U.I;
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
sub->DstReg.WriteMask =
rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle);
 
if (info->HasTexture) {
unsigned int i;
assert(sub->TexSwizzle == RC_SWIZZLE_XYZW);
for (i = 0; i < 4; i++) {
unsigned int swz = GET_SWZ(conversion_swizzle, i);
if (swz > 3)
continue;
SET_SWZ(sub->TexSwizzle, swz, i);
}
}
 
if (!srcs_need_rewrite(info)) {
return;
}
 
rc_for_all_reads_src(inst, normal_rewrite_writemask_cb,
&conversion_swizzle);
}
 
/**
* This function replaces each value 'swz' in swizzle with the value of
* GET_SWZ(conversion_swizzle, swz). So, if you want to change all the X's
* in swizzle to Y, then conversion_swizzle should be Y___ (0xff9). If you want
* to change all the Y's in swizzle to X, then conversion_swizzle should be
* _X__ (0xfc7). If you want to change the Y's to X and the X's to Y, then
* conversion swizzle should be YX__ (0xfc1).
* @param swizzle The swizzle to change
* @param conversion_swizzle Describes the conversion to perform on the swizzle
* @return A converted swizzle
*/
unsigned int rc_rewrite_swizzle(
unsigned int swizzle,
unsigned int conversion_swizzle)
{
unsigned int chan;
unsigned int out_swizzle = swizzle;
 
for (chan = 0; chan < 4; chan++) {
unsigned int swz = GET_SWZ(swizzle, chan);
unsigned int new_swz;
if (swz > 3) {
SET_SWZ(out_swizzle, chan, swz);
} else {
new_swz = GET_SWZ(conversion_swizzle, swz);
if (new_swz != RC_SWIZZLE_UNUSED) {
SET_SWZ(out_swizzle, chan, new_swz);
} else {
SET_SWZ(out_swizzle, chan, swz);
}
}
}
return out_swizzle;
}
 
/**
* Left multiplication of a register with a swizzle
*/
struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)
{
struct rc_src_register tmp = srcreg;
int i;
tmp.Swizzle = 0;
tmp.Negate = 0;
for(i = 0; i < 4; ++i) {
rc_swizzle swz = GET_SWZ(swizzle, i);
if (swz < 4) {
tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
} else {
tmp.Swizzle |= swz << (i*3);
}
}
return tmp;
}
 
void reset_srcreg(struct rc_src_register* reg)
{
memset(reg, 0, sizeof(struct rc_src_register));
reg->Swizzle = RC_SWIZZLE_XYZW;
}
 
unsigned int rc_src_reads_dst_mask(
rc_register_file src_file,
unsigned int src_idx,
unsigned int src_swz,
rc_register_file dst_file,
unsigned int dst_idx,
unsigned int dst_mask)
{
if (src_file != dst_file || src_idx != dst_idx) {
return RC_MASK_NONE;
}
return dst_mask & rc_swizzle_to_writemask(src_swz);
}
 
/**
* @return A bit mask specifying whether this swizzle will select from an RGB
* source, an Alpha source, or both.
*/
unsigned int rc_source_type_swz(unsigned int swizzle)
{
unsigned int chan;
unsigned int swz = RC_SWIZZLE_UNUSED;
unsigned int ret = RC_SOURCE_NONE;
 
for(chan = 0; chan < 4; chan++) {
swz = GET_SWZ(swizzle, chan);
if (swz == RC_SWIZZLE_W) {
ret |= RC_SOURCE_ALPHA;
} else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
|| swz == RC_SWIZZLE_Z) {
ret |= RC_SOURCE_RGB;
}
}
return ret;
}
 
unsigned int rc_source_type_mask(unsigned int mask)
{
unsigned int ret = RC_SOURCE_NONE;
 
if (mask & RC_MASK_XYZ)
ret |= RC_SOURCE_RGB;
 
if (mask & RC_MASK_W)
ret |= RC_SOURCE_ALPHA;
 
return ret;
}
 
struct src_select {
rc_register_file File;
int Index;
unsigned int SrcType;
};
 
struct can_use_presub_data {
struct src_select Selects[5];
unsigned int SelectCount;
const struct rc_src_register * ReplaceReg;
unsigned int ReplaceRemoved;
};
 
static void can_use_presub_data_add_select(
struct can_use_presub_data * data,
rc_register_file file,
unsigned int index,
unsigned int src_type)
{
struct src_select * select;
 
select = &data->Selects[data->SelectCount++];
select->File = file;
select->Index = index;
select->SrcType = src_type;
}
 
/**
* This callback function counts the number of sources in inst that are
* different from the sources in can_use_presub_data->RemoveSrcs.
*/
static void can_use_presub_read_cb(
void * userdata,
struct rc_instruction * inst,
struct rc_src_register * src)
{
struct can_use_presub_data * d = userdata;
 
if (!d->ReplaceRemoved && src == d->ReplaceReg) {
d->ReplaceRemoved = 1;
return;
}
 
if (src->File == RC_FILE_NONE)
return;
 
can_use_presub_data_add_select(d, src->File, src->Index,
rc_source_type_swz(src->Swizzle));
}
 
unsigned int rc_inst_can_use_presub(
struct rc_instruction * inst,
rc_presubtract_op presub_op,
unsigned int presub_writemask,
const struct rc_src_register * replace_reg,
const struct rc_src_register * presub_src0,
const struct rc_src_register * presub_src1)
{
struct can_use_presub_data d;
unsigned int num_presub_srcs;
unsigned int i;
const struct rc_opcode_info * info =
rc_get_opcode_info(inst->U.I.Opcode);
int rgb_count = 0, alpha_count = 0;
unsigned int src_type0, src_type1;
 
if (presub_op == RC_PRESUB_NONE) {
return 1;
}
 
if (info->HasTexture) {
return 0;
}
 
/* We can't use more than one presubtract value in an
* instruction, unless the two prsubtract operations
* are the same and read from the same registers.
* XXX For now we will limit instructions to only one presubtract
* value.*/
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
return 0;
}
 
memset(&d, 0, sizeof(d));
d.ReplaceReg = replace_reg;
 
rc_for_all_reads_src(inst, can_use_presub_read_cb, &d);
 
num_presub_srcs = rc_presubtract_src_reg_count(presub_op);
 
src_type0 = rc_source_type_swz(presub_src0->Swizzle);
can_use_presub_data_add_select(&d,
presub_src0->File,
presub_src0->Index,
src_type0);
 
if (num_presub_srcs > 1) {
src_type1 = rc_source_type_swz(presub_src1->Swizzle);
can_use_presub_data_add_select(&d,
presub_src1->File,
presub_src1->Index,
src_type1);
 
/* Even if both of the presub sources read from the same
* register, we still need to use 2 different source selects
* for them, so we need to increment the count to compensate.
*/
if (presub_src0->File == presub_src1->File
&& presub_src0->Index == presub_src1->Index) {
if (src_type0 & src_type1 & RC_SOURCE_RGB) {
rgb_count++;
}
if (src_type0 & src_type1 & RC_SOURCE_ALPHA) {
alpha_count++;
}
}
}
 
/* Count the number of source selects for Alpha and RGB. If we
* encounter two of the same source selects then we can ignore the
* first one. */
for (i = 0; i < d.SelectCount; i++) {
unsigned int j;
unsigned int src_type = d.Selects[i].SrcType;
for (j = i + 1; j < d.SelectCount; j++) {
if (d.Selects[i].File == d.Selects[j].File
&& d.Selects[i].Index == d.Selects[j].Index) {
src_type &= ~d.Selects[j].SrcType;
}
}
if (src_type & RC_SOURCE_RGB) {
rgb_count++;
}
 
if (src_type & RC_SOURCE_ALPHA) {
alpha_count++;
}
}
 
if (rgb_count > 3 || alpha_count > 3) {
return 0;
}
 
return 1;
}
 
struct max_data {
unsigned int Max;
unsigned int HasFileType;
rc_register_file File;
};
 
static void max_callback(
void * userdata,
struct rc_instruction * inst,
rc_register_file file,
unsigned int index,
unsigned int mask)
{
struct max_data * d = (struct max_data*)userdata;
if (file == d->File && (!d->HasFileType || index > d->Max)) {
d->Max = index;
d->HasFileType = 1;
}
}
 
/**
* @return The maximum index of the specified register file used by the
* program.
*/
int rc_get_max_index(
struct radeon_compiler * c,
rc_register_file file)
{
struct max_data data;
struct rc_instruction * inst;
data.Max = 0;
data.HasFileType = 0;
data.File = file;
for (inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
rc_for_all_reads_mask(inst, max_callback, &data);
rc_for_all_writes_mask(inst, max_callback, &data);
}
if (!data.HasFileType) {
return -1;
} else {
return data.Max;
}
}
 
static unsigned int get_source_readmask(
struct rc_pair_sub_instruction * sub,
unsigned int source,
unsigned int src_type)
{
unsigned int i;
unsigned int readmask = 0;
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
 
for (i = 0; i < info->NumSrcRegs; i++) {
if (sub->Arg[i].Source != source
|| src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) {
continue;
}
readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle);
}
return readmask;
}
 
/**
* This function attempts to remove a source from a pair instructions.
* @param inst
* @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd
* @param source The index of the source to remove
* @param new_readmask A mask representing the components that are read by
* the source that is intended to replace the one you are removing. If you
* want to remove a source only and not replace it, this parameter should be
* zero.
* @return 1 if the source was successfully removed, 0 if it was not
*/
unsigned int rc_pair_remove_src(
struct rc_instruction * inst,
unsigned int src_type,
unsigned int source,
unsigned int new_readmask)
{
unsigned int readmask = 0;
 
readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type);
readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type);
 
if ((new_readmask & readmask) != readmask)
return 0;
 
if (src_type & RC_SOURCE_RGB) {
memset(&inst->U.P.RGB.Src[source], 0,
sizeof(struct rc_pair_instruction_source));
}
 
if (src_type & RC_SOURCE_ALPHA) {
memset(&inst->U.P.Alpha.Src[source], 0,
sizeof(struct rc_pair_instruction_source));
}
 
return 1;
}
 
/**
* @return RC_OPCODE_NOOP if inst is not a flow control instruction.
* @return The opcode of inst if it is a flow control instruction.
*/
rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst)
{
const struct rc_opcode_info * info;
if (inst->Type == RC_INSTRUCTION_NORMAL) {
info = rc_get_opcode_info(inst->U.I.Opcode);
} else {
info = rc_get_opcode_info(inst->U.P.RGB.Opcode);
/*A flow control instruction shouldn't have an alpha
* instruction.*/
assert(!info->IsFlowControl ||
inst->U.P.Alpha.Opcode == RC_OPCODE_NOP);
}
 
if (info->IsFlowControl)
return info->Opcode;
else
return RC_OPCODE_NOP;
 
}
 
/**
* @return The BGNLOOP instruction that starts the loop ended by endloop.
*/
struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop)
{
unsigned int endloop_count = 0;
struct rc_instruction * inst;
for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) {
rc_opcode op = rc_get_flow_control_inst(inst);
if (op == RC_OPCODE_ENDLOOP) {
endloop_count++;
} else if (op == RC_OPCODE_BGNLOOP) {
if (endloop_count == 0) {
return inst;
} else {
endloop_count--;
}
}
}
return NULL;
}
 
/**
* @return The ENDLOOP instruction that ends the loop started by bgnloop.
*/
struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop)
{
unsigned int bgnloop_count = 0;
struct rc_instruction * inst;
for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) {
rc_opcode op = rc_get_flow_control_inst(inst);
if (op == RC_OPCODE_BGNLOOP) {
bgnloop_count++;
} else if (op == RC_OPCODE_ENDLOOP) {
if (bgnloop_count == 0) {
return inst;
} else {
bgnloop_count--;
}
}
}
return NULL;
}
 
/**
* @return A conversion swizzle for converting from old_mask->new_mask
*/
unsigned int rc_make_conversion_swizzle(
unsigned int old_mask,
unsigned int new_mask)
{
unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
unsigned int old_idx;
unsigned int new_idx = 0;
for (old_idx = 0; old_idx < 4; old_idx++) {
if (!GET_BIT(old_mask, old_idx))
continue;
for ( ; new_idx < 4; new_idx++) {
if (GET_BIT(new_mask, new_idx)) {
SET_SWZ(conversion_swizzle, old_idx, new_idx);
new_idx++;
break;
}
}
}
return conversion_swizzle;
}
 
/**
* @return 1 if the register contains an immediate value, 0 otherwise.
*/
unsigned int rc_src_reg_is_immediate(
struct radeon_compiler * c,
unsigned int file,
unsigned int index)
{
return file == RC_FILE_CONSTANT &&
c->Program.Constants.Constants[index].Type == RC_CONSTANT_IMMEDIATE;
}
 
/**
* @return The immediate value in the specified register.
*/
float rc_get_constant_value(
struct radeon_compiler * c,
unsigned int index,
unsigned int swizzle,
unsigned int negate,
unsigned int chan)
{
float base = 1.0f;
int swz = GET_SWZ(swizzle, chan);
if(swz >= 4 || index >= c->Program.Constants.Count ){
rc_error(c, "get_constant_value: Can't find a value.\n");
return 0.0f;
}
if(GET_BIT(negate, chan)){
base = -1.0f;
}
return base *
c->Program.Constants.Constants[index].u.Immediate[swz];
}
 
/**
* This function returns the component value (RC_SWIZZLE_*) of the first used
* channel in the swizzle. This is only useful for scalar instructions that are
* known to use only one channel of the swizzle.
*/
unsigned int rc_get_scalar_src_swz(unsigned int swizzle)
{
unsigned int swz, chan;
for (chan = 0; chan < 4; chan++) {
swz = GET_SWZ(swizzle, chan);
if (swz != RC_SWIZZLE_UNUSED) {
break;
}
}
assert(swz != RC_SWIZZLE_UNUSED);
return swz;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_compiler_util.h
0,0 → 1,130
/*
* Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include "radeon_program_constants.h"
 
#ifndef RADEON_PROGRAM_UTIL_H
#define RADEON_PROGRAM_UTIL_H
 
#include "radeon_opcodes.h"
 
struct radeon_compiler;
struct rc_instruction;
struct rc_pair_instruction;
struct rc_pair_sub_instruction;
struct rc_src_register;
 
unsigned int rc_swizzle_to_writemask(unsigned int swz);
 
rc_swizzle get_swz(unsigned int swz, rc_swizzle idx);
 
unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels);
 
unsigned int combine_swizzles4(unsigned int src,
rc_swizzle swz_x, rc_swizzle swz_y,
rc_swizzle swz_z, rc_swizzle swz_w);
 
unsigned int combine_swizzles(unsigned int src, unsigned int swz);
 
rc_swizzle rc_mask_to_swizzle(unsigned int mask);
 
unsigned swizzle_mask(unsigned swizzle, unsigned mask);
 
unsigned int rc_adjust_channels(
unsigned int old_swizzle,
unsigned int conversion_swizzle);
 
void rc_pair_rewrite_writemask(
struct rc_pair_sub_instruction * sub,
unsigned int conversion_swizzle);
 
void rc_normal_rewrite_writemask(
struct rc_instruction * inst,
unsigned int conversion_swizzle);
 
unsigned int rc_rewrite_swizzle(
unsigned int swizzle,
unsigned int new_mask);
 
struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
 
void reset_srcreg(struct rc_src_register* reg);
 
unsigned int rc_src_reads_dst_mask(
rc_register_file src_file,
unsigned int src_idx,
unsigned int src_swz,
rc_register_file dst_file,
unsigned int dst_idx,
unsigned int dst_mask);
 
unsigned int rc_source_type_swz(unsigned int swizzle);
 
unsigned int rc_source_type_mask(unsigned int mask);
 
unsigned int rc_inst_can_use_presub(
struct rc_instruction * inst,
rc_presubtract_op presub_op,
unsigned int presub_writemask,
const struct rc_src_register * replace_reg,
const struct rc_src_register * presub_src0,
const struct rc_src_register * presub_src1);
 
int rc_get_max_index(
struct radeon_compiler * c,
rc_register_file file);
 
unsigned int rc_pair_remove_src(
struct rc_instruction * inst,
unsigned int src_type,
unsigned int source,
unsigned int new_readmask);
 
rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst);
 
struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop);
struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop);
 
unsigned int rc_make_conversion_swizzle(
unsigned int old_mask,
unsigned int new_mask);
 
unsigned int rc_src_reg_is_immediate(
struct radeon_compiler * c,
unsigned int file,
unsigned int index);
 
float rc_get_constant_value(
struct radeon_compiler * c,
unsigned int index,
unsigned int swizzle,
unsigned int negate,
unsigned int chan);
 
unsigned int rc_get_scalar_src_swz(unsigned int swizzle);
 
#endif /* RADEON_PROGRAM_UTIL_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_dataflow.c
0,0 → 1,892
/*
* Copyright (C) 2009 Nicolai Haehnle.
* Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include "radeon_dataflow.h"
 
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
#include "radeon_program.h"
 
struct read_write_mask_data {
void * UserData;
rc_read_write_mask_fn Cb;
};
 
static void reads_normal_callback(
void * userdata,
struct rc_instruction * fullinst,
struct rc_src_register * src)
{
struct read_write_mask_data * cb_data = userdata;
unsigned int refmask = 0;
unsigned int chan;
for(chan = 0; chan < 4; chan++) {
refmask |= 1 << GET_SWZ(src->Swizzle, chan);
}
refmask &= RC_MASK_XYZW;
 
if (refmask) {
cb_data->Cb(cb_data->UserData, fullinst, src->File,
src->Index, refmask);
}
 
if (refmask && src->RelAddr) {
cb_data->Cb(cb_data->UserData, fullinst, RC_FILE_ADDRESS, 0,
RC_MASK_X);
}
}
 
static void pair_get_src_refmasks(unsigned int * refmasks,
struct rc_pair_instruction * inst,
unsigned int swz, unsigned int src)
{
if (swz >= 4)
return;
 
if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) {
if(src == RC_PAIR_PRESUB_SRC) {
unsigned int i;
int srcp_regs =
rc_presubtract_src_reg_count(
inst->RGB.Src[src].Index);
for(i = 0; i < srcp_regs; i++) {
refmasks[i] |= 1 << swz;
}
}
else {
refmasks[src] |= 1 << swz;
}
}
 
if (swz == RC_SWIZZLE_W) {
if (src == RC_PAIR_PRESUB_SRC) {
unsigned int i;
int srcp_regs = rc_presubtract_src_reg_count(
inst->Alpha.Src[src].Index);
for(i = 0; i < srcp_regs; i++) {
refmasks[i] |= 1 << swz;
}
}
else {
refmasks[src] |= 1 << swz;
}
}
}
 
static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
{
struct rc_pair_instruction * inst = &fullinst->U.P;
unsigned int refmasks[3] = { 0, 0, 0 };
 
unsigned int arg;
 
for(arg = 0; arg < 3; ++arg) {
unsigned int chan;
for(chan = 0; chan < 3; ++chan) {
unsigned int swz_rgb =
GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan);
unsigned int swz_alpha =
GET_SWZ(inst->Alpha.Arg[arg].Swizzle, chan);
pair_get_src_refmasks(refmasks, inst, swz_rgb,
inst->RGB.Arg[arg].Source);
pair_get_src_refmasks(refmasks, inst, swz_alpha,
inst->Alpha.Arg[arg].Source);
}
}
 
for(unsigned int src = 0; src < 3; ++src) {
if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ))
cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index,
refmasks[src] & RC_MASK_XYZ);
 
if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W))
cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W);
}
}
 
static void pair_sub_for_all_args(
struct rc_instruction * fullinst,
struct rc_pair_sub_instruction * sub,
rc_pair_read_arg_fn cb,
void * userdata)
{
int i;
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
 
for(i = 0; i < info->NumSrcRegs; i++) {
unsigned int src_type;
 
src_type = rc_source_type_swz(sub->Arg[i].Swizzle);
 
if (src_type == RC_SOURCE_NONE)
continue;
 
if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) {
unsigned int presub_type;
unsigned int presub_src_count;
struct rc_pair_instruction_source * src_array;
unsigned int j;
 
if (src_type & RC_SOURCE_RGB) {
presub_type = fullinst->
U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index;
src_array = fullinst->U.P.RGB.Src;
} else {
presub_type = fullinst->
U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index;
src_array = fullinst->U.P.Alpha.Src;
}
presub_src_count
= rc_presubtract_src_reg_count(presub_type);
for(j = 0; j < presub_src_count; j++) {
cb(userdata, fullinst, &sub->Arg[i],
&src_array[j]);
}
} else {
struct rc_pair_instruction_source * src =
rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]);
if (src) {
cb(userdata, fullinst, &sub->Arg[i], src);
}
}
}
}
 
/* This function calls the callback function (cb) for each source used by
* the instruction.
* */
void rc_for_all_reads_src(
struct rc_instruction * inst,
rc_read_src_fn cb,
void * userdata)
{
const struct rc_opcode_info * opcode =
rc_get_opcode_info(inst->U.I.Opcode);
 
/* This function only works with normal instructions. */
if (inst->Type != RC_INSTRUCTION_NORMAL) {
assert(0);
return;
}
 
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
 
if (inst->U.I.SrcReg[src].File == RC_FILE_NONE)
continue;
 
if (inst->U.I.SrcReg[src].File == RC_FILE_PRESUB) {
unsigned int i;
unsigned int srcp_regs = rc_presubtract_src_reg_count(
inst->U.I.PreSub.Opcode);
for( i = 0; i < srcp_regs; i++) {
cb(userdata, inst, &inst->U.I.PreSub.SrcReg[i]);
}
} else {
cb(userdata, inst, &inst->U.I.SrcReg[src]);
}
}
}
 
/**
* This function calls the callback function (cb) for each arg of the RGB and
* alpha components.
*/
void rc_pair_for_all_reads_arg(
struct rc_instruction * inst,
rc_pair_read_arg_fn cb,
void * userdata)
{
/* This function only works with pair instructions. */
if (inst->Type != RC_INSTRUCTION_PAIR) {
assert(0);
return;
}
 
pair_sub_for_all_args(inst, &inst->U.P.RGB, cb, userdata);
pair_sub_for_all_args(inst, &inst->U.P.Alpha, cb, userdata);
}
 
/**
* Calls a callback function for all register reads.
*
* This is conservative, i.e. if the same register is referenced multiple times,
* the callback may also be called multiple times.
* Also, the writemask of the instruction is not taken into account.
*/
void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
{
if (inst->Type == RC_INSTRUCTION_NORMAL) {
struct read_write_mask_data cb_data;
cb_data.UserData = userdata;
cb_data.Cb = cb;
 
rc_for_all_reads_src(inst, reads_normal_callback, &cb_data);
} else {
reads_pair(inst, cb, userdata);
}
}
 
 
 
static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
{
struct rc_sub_instruction * inst = &fullinst->U.I;
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
 
if (opcode->HasDstReg && inst->DstReg.WriteMask)
cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask);
 
if (inst->WriteALUResult)
cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
}
 
static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
{
struct rc_pair_instruction * inst = &fullinst->U.P;
 
if (inst->RGB.WriteMask)
cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask);
 
if (inst->Alpha.WriteMask)
cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W);
 
if (inst->WriteALUResult)
cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
}
 
/**
* Calls a callback function for all register writes in the instruction,
* reporting writemasks to the callback function.
*
* \warning Does not report output registers for paired instructions!
*/
void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
{
if (inst->Type == RC_INSTRUCTION_NORMAL) {
writes_normal(inst, cb, userdata);
} else {
writes_pair(inst, cb, userdata);
}
}
 
 
struct mask_to_chan_data {
void * UserData;
rc_read_write_chan_fn Fn;
};
 
static void mask_to_chan_cb(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
struct mask_to_chan_data * d = data;
for(unsigned int chan = 0; chan < 4; ++chan) {
if (GET_BIT(mask, chan))
d->Fn(d->UserData, inst, file, index, chan);
}
}
 
/**
* Calls a callback function for all sourced register channels.
*
* This is conservative, i.e. channels may be called multiple times,
* and the writemask of the instruction is not taken into account.
*/
void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
{
struct mask_to_chan_data d;
d.UserData = userdata;
d.Fn = cb;
rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d);
}
 
/**
* Calls a callback function for all written register channels.
*
* \warning Does not report output registers for paired instructions!
*/
void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
{
struct mask_to_chan_data d;
d.UserData = userdata;
d.Fn = cb;
rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d);
}
 
static void remap_normal_instruction(struct rc_instruction * fullinst,
rc_remap_register_fn cb, void * userdata)
{
struct rc_sub_instruction * inst = &fullinst->U.I;
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
unsigned int remapped_presub = 0;
 
if (opcode->HasDstReg) {
rc_register_file file = inst->DstReg.File;
unsigned int index = inst->DstReg.Index;
 
cb(userdata, fullinst, &file, &index);
 
inst->DstReg.File = file;
inst->DstReg.Index = index;
}
 
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
rc_register_file file = inst->SrcReg[src].File;
unsigned int index = inst->SrcReg[src].Index;
 
if (file == RC_FILE_PRESUB) {
unsigned int i;
unsigned int srcp_srcs = rc_presubtract_src_reg_count(
inst->PreSub.Opcode);
/* Make sure we only remap presubtract sources once in
* case more than one source register reads the
* presubtract result. */
if (remapped_presub)
continue;
 
for(i = 0; i < srcp_srcs; i++) {
file = inst->PreSub.SrcReg[i].File;
index = inst->PreSub.SrcReg[i].Index;
cb(userdata, fullinst, &file, &index);
inst->PreSub.SrcReg[i].File = file;
inst->PreSub.SrcReg[i].Index = index;
}
remapped_presub = 1;
}
else {
cb(userdata, fullinst, &file, &index);
 
inst->SrcReg[src].File = file;
inst->SrcReg[src].Index = index;
}
}
}
 
static void remap_pair_instruction(struct rc_instruction * fullinst,
rc_remap_register_fn cb, void * userdata)
{
struct rc_pair_instruction * inst = &fullinst->U.P;
 
if (inst->RGB.WriteMask) {
rc_register_file file = RC_FILE_TEMPORARY;
unsigned int index = inst->RGB.DestIndex;
 
cb(userdata, fullinst, &file, &index);
 
inst->RGB.DestIndex = index;
}
 
if (inst->Alpha.WriteMask) {
rc_register_file file = RC_FILE_TEMPORARY;
unsigned int index = inst->Alpha.DestIndex;
 
cb(userdata, fullinst, &file, &index);
 
inst->Alpha.DestIndex = index;
}
 
for(unsigned int src = 0; src < 3; ++src) {
if (inst->RGB.Src[src].Used) {
rc_register_file file = inst->RGB.Src[src].File;
unsigned int index = inst->RGB.Src[src].Index;
 
cb(userdata, fullinst, &file, &index);
 
inst->RGB.Src[src].File = file;
inst->RGB.Src[src].Index = index;
}
 
if (inst->Alpha.Src[src].Used) {
rc_register_file file = inst->Alpha.Src[src].File;
unsigned int index = inst->Alpha.Src[src].Index;
 
cb(userdata, fullinst, &file, &index);
 
inst->Alpha.Src[src].File = file;
inst->Alpha.Src[src].Index = index;
}
}
}
 
 
/**
* Remap all register accesses according to the given function.
* That is, call the function \p cb for each referenced register (both read and written)
* and update the given instruction \p inst accordingly
* if it modifies its \ref pfile and \ref pindex contents.
*/
void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata)
{
if (inst->Type == RC_INSTRUCTION_NORMAL)
remap_normal_instruction(inst, cb, userdata);
else
remap_pair_instruction(inst, cb, userdata);
}
 
struct branch_write_mask {
unsigned int IfWriteMask:4;
unsigned int ElseWriteMask:4;
unsigned int HasElse:1;
};
 
union get_readers_read_cb {
rc_read_src_fn I;
rc_pair_read_arg_fn P;
};
 
struct get_readers_callback_data {
struct radeon_compiler * C;
struct rc_reader_data * ReaderData;
rc_read_src_fn ReadNormalCB;
rc_pair_read_arg_fn ReadPairCB;
rc_read_write_mask_fn WriteCB;
rc_register_file DstFile;
unsigned int DstIndex;
unsigned int DstMask;
unsigned int AliveWriteMask;
/* For convenience, this is indexed starting at 1 */
struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1];
};
 
static struct rc_reader * add_reader(
struct memory_pool * pool,
struct rc_reader_data * data,
struct rc_instruction * inst,
unsigned int mask)
{
struct rc_reader * new;
memory_pool_array_reserve(pool, struct rc_reader, data->Readers,
data->ReaderCount, data->ReadersReserved, 1);
new = &data->Readers[data->ReaderCount++];
new->Inst = inst;
new->WriteMask = mask;
return new;
}
 
static void add_reader_normal(
struct memory_pool * pool,
struct rc_reader_data * data,
struct rc_instruction * inst,
unsigned int mask,
struct rc_src_register * src)
{
struct rc_reader * new = add_reader(pool, data, inst, mask);
new->U.I.Src = src;
}
 
 
static void add_reader_pair(
struct memory_pool * pool,
struct rc_reader_data * data,
struct rc_instruction * inst,
unsigned int mask,
struct rc_pair_instruction_arg * arg,
struct rc_pair_instruction_source * src)
{
struct rc_reader * new = add_reader(pool, data, inst, mask);
new->U.P.Src = src;
new->U.P.Arg = arg;
}
 
static unsigned int get_readers_read_callback(
struct get_readers_callback_data * cb_data,
unsigned int has_rel_addr,
rc_register_file file,
unsigned int index,
unsigned int swizzle)
{
unsigned int shared_mask, read_mask;
 
if (has_rel_addr) {
cb_data->ReaderData->Abort = 1;
return RC_MASK_NONE;
}
 
shared_mask = rc_src_reads_dst_mask(file, index, swizzle,
cb_data->DstFile, cb_data->DstIndex, cb_data->AliveWriteMask);
 
if (shared_mask == RC_MASK_NONE)
return shared_mask;
 
/* If we make it this far, it means that this source reads from the
* same register written to by d->ReaderData->Writer. */
 
read_mask = rc_swizzle_to_writemask(swizzle);
if (cb_data->ReaderData->AbortOnRead & read_mask) {
cb_data->ReaderData->Abort = 1;
return shared_mask;
}
 
if (cb_data->ReaderData->LoopDepth > 0) {
cb_data->ReaderData->AbortOnWrite |=
(read_mask & cb_data->AliveWriteMask);
}
 
/* XXX The behavior in this case should be configurable. */
if ((read_mask & cb_data->AliveWriteMask) != read_mask) {
cb_data->ReaderData->Abort = 1;
return shared_mask;
}
 
return shared_mask;
}
 
static void get_readers_pair_read_callback(
void * userdata,
struct rc_instruction * inst,
struct rc_pair_instruction_arg * arg,
struct rc_pair_instruction_source * src)
{
unsigned int shared_mask;
struct get_readers_callback_data * d = userdata;
 
shared_mask = get_readers_read_callback(d,
0 /*Pair Instructions don't use RelAddr*/,
src->File, src->Index, arg->Swizzle);
 
if (shared_mask == RC_MASK_NONE)
return;
 
if (d->ReadPairCB)
d->ReadPairCB(d->ReaderData, inst, arg, src);
 
if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort)
return;
 
add_reader_pair(&d->C->Pool, d->ReaderData, inst, shared_mask, arg, src);
}
 
/**
* This function is used by rc_get_readers_normal() to determine whether inst
* is a reader of userdata->ReaderData->Writer
*/
static void get_readers_normal_read_callback(
void * userdata,
struct rc_instruction * inst,
struct rc_src_register * src)
{
struct get_readers_callback_data * d = userdata;
unsigned int shared_mask;
 
shared_mask = get_readers_read_callback(d,
src->RelAddr, src->File, src->Index, src->Swizzle);
 
if (shared_mask == RC_MASK_NONE)
return;
/* The callback function could potentially clear d->ReaderData->Abort,
* so we need to call it before we return. */
if (d->ReadNormalCB)
d->ReadNormalCB(d->ReaderData, inst, src);
 
if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort)
return;
 
add_reader_normal(&d->C->Pool, d->ReaderData, inst, shared_mask, src);
}
 
/**
* This function is used by rc_get_readers_normal() to determine when
* userdata->ReaderData->Writer is dead (i. e. All compontents of its
* destination register have been overwritten by other instructions).
*/
static void get_readers_write_callback(
void *userdata,
struct rc_instruction * inst,
rc_register_file file,
unsigned int index,
unsigned int mask)
{
struct get_readers_callback_data * d = userdata;
 
if (index == d->DstIndex && file == d->DstFile) {
unsigned int shared_mask = mask & d->DstMask;
d->ReaderData->AbortOnRead &= ~shared_mask;
d->AliveWriteMask &= ~shared_mask;
if (d->ReaderData->AbortOnWrite & shared_mask) {
d->ReaderData->Abort = 1;
}
}
 
if(d->WriteCB)
d->WriteCB(d->ReaderData, inst, file, index, mask);
}
 
static void push_branch_mask(
struct get_readers_callback_data * d,
unsigned int * branch_depth)
{
(*branch_depth)++;
if (*branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) {
d->ReaderData->Abort = 1;
return;
}
d->BranchMasks[*branch_depth].IfWriteMask =
d->AliveWriteMask;
}
 
static void pop_branch_mask(
struct get_readers_callback_data * d,
unsigned int * branch_depth)
{
struct branch_write_mask * masks = &d->BranchMasks[*branch_depth];
 
if (masks->HasElse) {
/* Abort on read for components that were written in the IF
* block. */
d->ReaderData->AbortOnRead |=
masks->IfWriteMask & ~masks->ElseWriteMask;
/* Abort on read for components that were written in the ELSE
* block. */
d->ReaderData->AbortOnRead |=
masks->ElseWriteMask & ~d->AliveWriteMask;
 
d->AliveWriteMask = masks->IfWriteMask
^ ((masks->IfWriteMask ^ masks->ElseWriteMask)
& (masks->IfWriteMask ^ d->AliveWriteMask));
} else {
d->ReaderData->AbortOnRead |=
masks->IfWriteMask & ~d->AliveWriteMask;
d->AliveWriteMask = masks->IfWriteMask;
 
}
memset(masks, 0, sizeof(struct branch_write_mask));
(*branch_depth)--;
}
 
static void get_readers_for_single_write(
void * userdata,
struct rc_instruction * writer,
rc_register_file dst_file,
unsigned int dst_index,
unsigned int dst_mask)
{
struct rc_instruction * tmp;
unsigned int branch_depth = 0;
struct rc_instruction * endloop = NULL;
unsigned int abort_on_read_at_endloop = 0;
struct get_readers_callback_data * d = userdata;
 
d->ReaderData->Writer = writer;
d->ReaderData->AbortOnRead = 0;
d->ReaderData->AbortOnWrite = 0;
d->ReaderData->LoopDepth = 0;
d->ReaderData->InElse = 0;
d->DstFile = dst_file;
d->DstIndex = dst_index;
d->DstMask = dst_mask;
d->AliveWriteMask = dst_mask;
memset(d->BranchMasks, 0, sizeof(d->BranchMasks));
 
if (!dst_mask)
return;
 
for(tmp = writer->Next; tmp != &d->C->Program.Instructions;
tmp = tmp->Next){
rc_opcode opcode = rc_get_flow_control_inst(tmp);
switch(opcode) {
case RC_OPCODE_BGNLOOP:
d->ReaderData->LoopDepth++;
push_branch_mask(d, &branch_depth);
break;
case RC_OPCODE_ENDLOOP:
if (d->ReaderData->LoopDepth > 0) {
d->ReaderData->LoopDepth--;
if (d->ReaderData->LoopDepth == 0) {
d->ReaderData->AbortOnWrite = 0;
}
pop_branch_mask(d, &branch_depth);
} else {
/* Here we have reached an ENDLOOP without
* seeing its BGNLOOP. These means that
* the writer was written inside of a loop,
* so it could have readers that are above it
* (i.e. they have a lower IP). To find these
* readers we jump to the BGNLOOP instruction
* and check each instruction until we get
* back to the writer.
*/
endloop = tmp;
tmp = rc_match_endloop(tmp);
if (!tmp) {
rc_error(d->C, "Failed to match endloop.\n");
d->ReaderData->Abort = 1;
return;
}
abort_on_read_at_endloop = d->ReaderData->AbortOnRead;
d->ReaderData->AbortOnRead |= d->AliveWriteMask;
continue;
}
break;
case RC_OPCODE_IF:
push_branch_mask(d, &branch_depth);
break;
case RC_OPCODE_ELSE:
if (branch_depth == 0) {
d->ReaderData->InElse = 1;
} else {
unsigned int temp_mask = d->AliveWriteMask;
d->AliveWriteMask =
d->BranchMasks[branch_depth].IfWriteMask;
d->BranchMasks[branch_depth].ElseWriteMask =
temp_mask;
d->BranchMasks[branch_depth].HasElse = 1;
}
break;
case RC_OPCODE_ENDIF:
if (branch_depth == 0) {
d->ReaderData->AbortOnRead = d->AliveWriteMask;
d->ReaderData->InElse = 0;
}
else {
pop_branch_mask(d, &branch_depth);
}
break;
default:
break;
}
 
if (d->ReaderData->InElse)
continue;
 
if (tmp->Type == RC_INSTRUCTION_NORMAL) {
rc_for_all_reads_src(tmp,
get_readers_normal_read_callback, d);
} else {
rc_pair_for_all_reads_arg(tmp,
get_readers_pair_read_callback, d);
}
 
/* This can happen when we jump from an ENDLOOP to BGNLOOP */
if (tmp == writer) {
tmp = endloop;
endloop = NULL;
d->ReaderData->AbortOnRead = abort_on_read_at_endloop;
continue;
}
rc_for_all_writes_mask(tmp, get_readers_write_callback, d);
 
if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort)
return;
 
if (branch_depth == 0 && !d->AliveWriteMask)
return;
}
}
 
static void init_get_readers_callback_data(
struct get_readers_callback_data * d,
struct rc_reader_data * reader_data,
struct radeon_compiler * c,
rc_read_src_fn read_normal_cb,
rc_pair_read_arg_fn read_pair_cb,
rc_read_write_mask_fn write_cb)
{
reader_data->Abort = 0;
reader_data->ReaderCount = 0;
reader_data->ReadersReserved = 0;
reader_data->Readers = NULL;
 
d->C = c;
d->ReaderData = reader_data;
d->ReadNormalCB = read_normal_cb;
d->ReadPairCB = read_pair_cb;
d->WriteCB = write_cb;
}
 
/**
* This function will create a list of readers via the rc_reader_data struct.
* This function will abort (set the flag data->Abort) and return if it
* encounters an instruction that reads from @param writer and also a different
* instruction. Here are some examples:
*
* writer = instruction 0;
* 0 MOV TEMP[0].xy, TEMP[1].xy
* 1 MOV TEMP[0].zw, TEMP[2].xy
* 2 MOV TEMP[3], TEMP[0]
* The Abort flag will be set on instruction 2, because it reads values written
* by instructions 0 and 1.
*
* writer = instruction 1;
* 0 IF TEMP[0].x
* 1 MOV TEMP[1], TEMP[2]
* 2 ELSE
* 3 MOV TEMP[1], TEMP[2]
* 4 ENDIF
* 5 MOV TEMP[3], TEMP[1]
* The Abort flag will be set on instruction 5, because it could read from the
* value written by either instruction 1 or 3, depending on the jump decision
* made at instruction 0.
*
* writer = instruction 0;
* 0 MOV TEMP[0], TEMP[1]
* 2 BGNLOOP
* 3 ADD TEMP[0], TEMP[0], none.1
* 4 ENDLOOP
* The Abort flag will be set on instruction 3, because in the first iteration
* of the loop it reads the value written by instruction 0 and in all other
* iterations it reads the value written by instruction 3.
*
* @param read_cb This function will be called for for every instruction that
* has been determined to be a reader of writer.
* @param write_cb This function will be called for every instruction after
* writer.
*/
void rc_get_readers(
struct radeon_compiler * c,
struct rc_instruction * writer,
struct rc_reader_data * data,
rc_read_src_fn read_normal_cb,
rc_pair_read_arg_fn read_pair_cb,
rc_read_write_mask_fn write_cb)
{
struct get_readers_callback_data d;
 
init_get_readers_callback_data(&d, data, c, read_normal_cb,
read_pair_cb, write_cb);
 
rc_for_all_writes_mask(writer, get_readers_for_single_write, &d);
}
 
void rc_get_readers_sub(
struct radeon_compiler * c,
struct rc_instruction * writer,
struct rc_pair_sub_instruction * sub_writer,
struct rc_reader_data * data,
rc_read_src_fn read_normal_cb,
rc_pair_read_arg_fn read_pair_cb,
rc_read_write_mask_fn write_cb)
{
struct get_readers_callback_data d;
 
init_get_readers_callback_data(&d, data, c, read_normal_cb,
read_pair_cb, write_cb);
 
if (sub_writer->WriteMask) {
get_readers_for_single_write(&d, writer, RC_FILE_TEMPORARY,
sub_writer->DestIndex, sub_writer->WriteMask);
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_dataflow.h
0,0 → 1,135
/*
* Copyright (C) 2009 Nicolai Haehnle.
* Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#ifndef RADEON_DATAFLOW_H
#define RADEON_DATAFLOW_H
 
#include "radeon_program_constants.h"
 
struct radeon_compiler;
struct rc_instruction;
struct rc_swizzle_caps;
struct rc_src_register;
struct rc_pair_instruction_arg;
struct rc_pair_instruction_source;
struct rc_pair_sub_instruction;
struct rc_compiler;
 
 
/**
* Help analyze and modify the register accesses of instructions.
*/
/*@{*/
typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int chan);
void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
 
typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask);
void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
 
typedef void (*rc_read_src_fn)(void * userdata, struct rc_instruction * inst,
struct rc_src_register * src);
void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb,
void * userdata);
 
typedef void (*rc_pair_read_arg_fn)(void * userdata,
struct rc_instruction * inst, struct rc_pair_instruction_arg * arg,
struct rc_pair_instruction_source * src);
void rc_pair_for_all_reads_arg(struct rc_instruction * inst,
rc_pair_read_arg_fn cb, void * userdata);
 
typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst,
rc_register_file * pfile, unsigned int * pindex);
void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata);
/*@}*/
 
struct rc_reader {
struct rc_instruction * Inst;
unsigned int WriteMask;
union {
struct {
struct rc_src_register * Src;
} I;
struct {
struct rc_pair_instruction_arg * Arg;
struct rc_pair_instruction_source * Src;
} P;
} U;
};
 
struct rc_reader_data {
unsigned int Abort;
unsigned int AbortOnRead;
unsigned int AbortOnWrite;
unsigned int LoopDepth;
unsigned int InElse;
struct rc_instruction * Writer;
 
unsigned int ReaderCount;
unsigned int ReadersReserved;
struct rc_reader * Readers;
 
/* If this flag is enabled, rc_get_readers will exit as soon possbile
* after the Abort flag is set.*/
unsigned int ExitOnAbort;
void * CbData;
};
 
void rc_get_readers(
struct radeon_compiler * c,
struct rc_instruction * writer,
struct rc_reader_data * data,
rc_read_src_fn read_normal_cb,
rc_pair_read_arg_fn read_pair_cb,
rc_read_write_mask_fn write_cb);
 
void rc_get_readers_sub(
struct radeon_compiler * c,
struct rc_instruction * writer,
struct rc_pair_sub_instruction * sub_writer,
struct rc_reader_data * data,
rc_read_src_fn read_normal_cb,
rc_pair_read_arg_fn read_pair_cb,
rc_read_write_mask_fn write_cb);
/**
* Compiler passes based on dataflow analysis.
*/
/*@{*/
typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data,
void (*mark_fn)(void * data, unsigned int index, unsigned int mask));
void rc_dataflow_deadcode(struct radeon_compiler * c, void *user);
void rc_dataflow_swizzles(struct radeon_compiler * c, void *user);
/*@}*/
 
void rc_optimize(struct radeon_compiler * c, void *user);
void rc_inline_literals(struct radeon_compiler *c, void *user);
 
#endif /* RADEON_DATAFLOW_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c
0,0 → 1,359
/*
* Copyright (C) 2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include "radeon_dataflow.h"
 
#include "radeon_compiler.h"
 
 
struct updatemask_state {
unsigned char Output[RC_REGISTER_MAX_INDEX];
unsigned char Temporary[RC_REGISTER_MAX_INDEX];
unsigned char Address;
unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
};
 
struct instruction_state {
unsigned char WriteMask:4;
unsigned char WriteALUResult:1;
unsigned char SrcReg[3];
};
 
struct loopinfo {
struct updatemask_state * Breaks;
unsigned int BreakCount;
unsigned int BreaksReserved;
};
 
struct branchinfo {
unsigned int HaveElse:1;
 
struct updatemask_state StoreEndif;
struct updatemask_state StoreElse;
};
 
struct deadcode_state {
struct radeon_compiler * C;
struct instruction_state * Instructions;
 
struct updatemask_state R;
 
struct branchinfo * BranchStack;
unsigned int BranchStackSize;
unsigned int BranchStackReserved;
 
struct loopinfo * LoopStack;
unsigned int LoopStackSize;
unsigned int LoopStackReserved;
};
 
 
static void or_updatemasks(
struct updatemask_state * dst,
struct updatemask_state * a,
struct updatemask_state * b)
{
for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
dst->Output[i] = a->Output[i] | b->Output[i];
dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
}
 
for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
dst->Special[i] = a->Special[i] | b->Special[i];
 
dst->Address = a->Address | b->Address;
}
 
static void push_break(struct deadcode_state *s)
{
struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1];
memory_pool_array_reserve(&s->C->Pool, struct updatemask_state,
loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1);
 
memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R));
}
 
static void push_loop(struct deadcode_state * s)
{
memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
s->LoopStackSize, s->LoopStackReserved, 1);
memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
}
 
static void push_branch(struct deadcode_state * s)
{
struct branchinfo * branch;
 
memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
s->BranchStackSize, s->BranchStackReserved, 1);
 
branch = &s->BranchStack[s->BranchStackSize++];
branch->HaveElse = 0;
memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
}
 
static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
{
if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
if (index >= RC_REGISTER_MAX_INDEX) {
rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);
return 0;
}
 
if (file == RC_FILE_OUTPUT)
return &s->R.Output[index];
else
return &s->R.Temporary[index];
} else if (file == RC_FILE_ADDRESS) {
return &s->R.Address;
} else if (file == RC_FILE_SPECIAL) {
if (index >= RC_NUM_SPECIAL_REGISTERS) {
rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);
return 0;
}
 
return &s->R.Special[index];
}
 
return 0;
}
 
static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
{
unsigned char * pused = get_used_ptr(s, file, index);
if (pused)
*pused |= mask;
}
 
static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
{
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
struct instruction_state * insts = &s->Instructions[inst->IP];
unsigned int usedmask = 0;
unsigned int srcmasks[3];
 
if (opcode->HasDstReg) {
unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
if (pused) {
usedmask = *pused & inst->U.I.DstReg.WriteMask;
*pused &= ~usedmask;
}
}
 
insts->WriteMask |= usedmask;
 
if (inst->U.I.WriteALUResult) {
unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
if (pused && *pused) {
if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
usedmask |= RC_MASK_X;
else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
usedmask |= RC_MASK_W;
 
*pused = 0;
insts->WriteALUResult = 1;
}
}
 
rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
 
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
unsigned int refmask = 0;
unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
insts->SrcReg[src] |= newsrcmask;
 
for(unsigned int chan = 0; chan < 4; ++chan) {
if (GET_BIT(newsrcmask, chan))
refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
}
 
/* get rid of spurious bits from ZERO, ONE, etc. swizzles */
refmask &= RC_MASK_XYZW;
 
if (!refmask)
continue;
 
mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
 
if (inst->U.I.SrcReg[src].RelAddr)
mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
}
}
 
static void mark_output_use(void * data, unsigned int index, unsigned int mask)
{
struct deadcode_state * s = data;
 
mark_used(s, RC_FILE_OUTPUT, index, mask);
}
 
void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
{
struct deadcode_state s;
unsigned int nr_instructions;
rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user;
unsigned int ip;
 
memset(&s, 0, sizeof(s));
s.C = c;
 
nr_instructions = rc_recompute_ips(c);
s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
 
dce(c, &s, &mark_output_use);
 
for(struct rc_instruction * inst = c->Program.Instructions.Prev;
inst != &c->Program.Instructions;
inst = inst->Prev) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
switch(opcode->Opcode){
/* Mark all sources in the loop body as used before doing
* normal deadcode analysis. This is probably not optimal.
*/
case RC_OPCODE_ENDLOOP:
{
int endloops = 1;
struct rc_instruction *ptr;
for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){
opcode = rc_get_opcode_info(ptr->U.I.Opcode);
if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
endloops--;
continue;
}
if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){
endloops++;
continue;
}
if(opcode->HasDstReg){
int src = 0;
unsigned int srcmasks[3];
rc_compute_sources_for_writemask(ptr,
ptr->U.I.DstReg.WriteMask, srcmasks);
for(src=0; src < opcode->NumSrcRegs; src++){
mark_used(&s,
ptr->U.I.SrcReg[src].File,
ptr->U.I.SrcReg[src].Index,
srcmasks[src]);
}
}
}
push_loop(&s);
break;
}
case RC_OPCODE_BRK:
push_break(&s);
break;
case RC_OPCODE_BGNLOOP:
{
unsigned int i;
struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
for(i = 0; i < loop->BreakCount; i++) {
or_updatemasks(&s.R, &s.R, &loop->Breaks[i]);
}
break;
}
case RC_OPCODE_CONT:
break;
case RC_OPCODE_ENDIF:
push_branch(&s);
break;
default:
if (opcode->IsFlowControl && s.BranchStackSize) {
struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
if (opcode->Opcode == RC_OPCODE_IF) {
or_updatemasks(&s.R,
&s.R,
branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
 
s.BranchStackSize--;
} else if (opcode->Opcode == RC_OPCODE_ELSE) {
if (branch->HaveElse) {
rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
} else {
memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
branch->HaveElse = 1;
}
} else {
rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
}
}
}
 
update_instruction(&s, inst);
}
 
ip = 0;
for(struct rc_instruction * inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next, ++ip) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
int dead = 1;
unsigned int srcmasks[3];
unsigned int usemask;
 
if (!opcode->HasDstReg) {
dead = 0;
} else {
inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
if (s.Instructions[ip].WriteMask)
dead = 0;
 
if (s.Instructions[ip].WriteALUResult)
dead = 0;
else
inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
}
 
if (dead) {
struct rc_instruction * todelete = inst;
inst = inst->Prev;
rc_remove_instruction(todelete);
continue;
}
 
usemask = s.Instructions[ip].WriteMask;
 
if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
usemask |= RC_MASK_X;
else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
usemask |= RC_MASK_W;
 
rc_compute_sources_for_writemask(inst, usemask, srcmasks);
 
for(unsigned int src = 0; src < 3; ++src) {
for(unsigned int chan = 0; chan < 4; ++chan) {
if (!GET_BIT(srcmasks[src], chan))
SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
}
}
}
 
rc_calculate_inputs_outputs(c);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c
0,0 → 1,448
/*
* Copyright (C) 2009 Nicolai Haehnle.
* Copyright 2012 Advanced Micro Devices, Inc.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Nicolai Haehnle
* Tom Stellard <thomas.stellard@amd.com>
*/
 
#include "radeon_dataflow.h"
 
#include "radeon_code.h"
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
#include "radeon_swizzle.h"
 
 
static void rewrite_source(struct radeon_compiler * c,
struct rc_instruction * inst, unsigned src)
{
struct rc_swizzle_split split;
unsigned int tempreg = rc_find_free_temporary(c);
unsigned int usemask;
 
usemask = 0;
for(unsigned int chan = 0; chan < 4; ++chan) {
if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
usemask |= 1 << chan;
}
 
c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split);
 
for(unsigned int phase = 0; phase < split.NumPhases; ++phase) {
struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev);
unsigned int phase_refmask;
unsigned int masked_negate;
 
mov->U.I.Opcode = RC_OPCODE_MOV;
mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
mov->U.I.DstReg.Index = tempreg;
mov->U.I.DstReg.WriteMask = split.Phase[phase];
mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
mov->U.I.PreSub = inst->U.I.PreSub;
 
phase_refmask = 0;
for(unsigned int chan = 0; chan < 4; ++chan) {
if (!GET_BIT(split.Phase[phase], chan))
SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
else
phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan);
}
 
phase_refmask &= RC_MASK_XYZW;
 
masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate;
if (masked_negate == 0)
mov->U.I.SrcReg[0].Negate = 0;
else if (masked_negate == split.Phase[phase])
mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
 
}
 
inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[src].Index = tempreg;
inst->U.I.SrcReg[src].Swizzle = 0;
inst->U.I.SrcReg[src].Negate = RC_MASK_NONE;
inst->U.I.SrcReg[src].Abs = 0;
for(unsigned int chan = 0; chan < 4; ++chan) {
SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
}
}
 
/**
* This function will attempt to rewrite non-native swizzles that read from
* immediate registers by rearranging the immediates to allow the
* instruction to use native swizzles.
*/
static unsigned try_rewrite_constant(struct radeon_compiler *c,
struct rc_src_register *reg)
{
unsigned new_swizzle, chan, swz0, swz1, swz2, swz3, found_swizzle, swz;
unsigned all_inline = 0;
float imms[4] = {0.0f, 0.0f, 0.0f, 0.0f};
 
if (!rc_src_reg_is_immediate(c, reg->File, reg->Index)) {
/* The register does not contain immediates, but if all
* the swizzles are inline constants, we can still rewrite
* it. */
 
new_swizzle = RC_SWIZZLE_XYZW;
for (chan = 0 ; chan < 4; chan++) {
unsigned swz = GET_SWZ(reg->Swizzle, chan);
if (swz <= RC_SWIZZLE_W) {
return 0;
}
if (swz == RC_SWIZZLE_UNUSED) {
SET_SWZ(new_swizzle, chan, RC_SWIZZLE_UNUSED);
}
}
all_inline = 1;
} else {
new_swizzle = reg->Swizzle;
}
 
swz = RC_SWIZZLE_UNUSED;
found_swizzle = 1;
/* Check if all channels have the same swizzle. If they do we can skip
* the search for a native swizzle. We only need to check the first
* three channels, because any swizzle is legal in the fourth channel.
*/
for (chan = 0; chan < 3; chan++) {
unsigned chan_swz = GET_SWZ(reg->Swizzle, chan);
if (chan_swz == RC_SWIZZLE_UNUSED) {
continue;
}
if (swz == RC_SWIZZLE_UNUSED) {
swz = chan_swz;
} else if (swz != chan_swz) {
found_swizzle = 0;
break;
}
}
 
/* Find a legal swizzle */
 
/* This loop attempts to find a native swizzle where all the
* channels are different. */
while (!found_swizzle && !all_inline) {
swz0 = GET_SWZ(new_swizzle, 0);
swz1 = GET_SWZ(new_swizzle, 1);
swz2 = GET_SWZ(new_swizzle, 2);
 
/* Swizzle .W. is never legal. */
if (swz1 == RC_SWIZZLE_W ||
swz1 == RC_SWIZZLE_UNUSED ||
swz1 == RC_SWIZZLE_ZERO ||
swz1 == RC_SWIZZLE_HALF ||
swz1 == RC_SWIZZLE_ONE) {
/* We chose Z, because there are two non-repeating
* swizzle combinations of the form .Z. There are
* only one combination each for .X. and .Y. */
SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
continue;
}
 
if (swz2 == RC_SWIZZLE_UNUSED) {
/* We choose Y, because there are two non-repeating
* swizzle combinations of the form ..Y */
SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
continue;
}
 
switch (swz0) {
/* X.. */
case RC_SWIZZLE_X:
/* Legal swizzles that start with X: XYZ, XXX */
switch (swz1) {
/* XX. */
case RC_SWIZZLE_X:
/* The new swizzle will be:
* ZXY (XX. => ZX. => ZXY) */
SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z);
break;
/* XY. */
case RC_SWIZZLE_Y:
/* The new swizzle is XYZ */
SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Z);
found_swizzle = 1;
break;
/* XZ. */
case RC_SWIZZLE_Z:
/* XZZ */
if (swz2 == RC_SWIZZLE_Z) {
/* The new swizzle is XYZ */
SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Y);
found_swizzle = 1;
} else { /* XZ[^Z] */
/* The new swizzle will be:
* YZX (XZ. => YZ. => YZX) */
SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Y);
}
break;
/* XW. Should have already been handled. */
case RC_SWIZZLE_W:
assert(0);
break;
}
break;
/* Y.. */
case RC_SWIZZLE_Y:
/* Legal swizzles that start with Y: YYY, YZX */
switch (swz1) {
/* YY. */
case RC_SWIZZLE_Y:
/* The new swizzle will be:
* XYZ (YY. => XY. => XYZ) */
SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
break;
/* YZ. */
case RC_SWIZZLE_Z:
/* The new swizzle is YZX */
SET_SWZ(new_swizzle, 2, RC_SWIZZLE_X);
found_swizzle = 1;
break;
/* YX. */
case RC_SWIZZLE_X:
/* YXX */
if (swz2 == RC_SWIZZLE_X) {
/*The new swizzle is YZX */
SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
found_swizzle = 1;
} else { /* YX[^X] */
/* The new swizzle will be:
* ZXY (YX. => ZX. -> ZXY) */
SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z);
}
break;
/* YW. Should have already been handled. */
case RC_SWIZZLE_W:
assert(0);
break;
}
break;
/* Z.. */
case RC_SWIZZLE_Z:
/* Legal swizzles that start with Z: ZZZ, ZXY */
switch (swz1) {
/* ZZ. */
case RC_SWIZZLE_Z:
/* The new swizzle will be:
* WZY (ZZ. => WZ. => WZY) */
SET_SWZ(new_swizzle, 0, RC_SWIZZLE_W);
break;
/* ZX. */
case RC_SWIZZLE_X:
/* The new swizzle is ZXY */
SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
found_swizzle = 1;
break;
/* ZY. */
case RC_SWIZZLE_Y:
/* ZYY */
if (swz2 == RC_SWIZZLE_Y) {
/* The new swizzle is ZXY */
SET_SWZ(new_swizzle, 1, RC_SWIZZLE_X);
found_swizzle = 1;
} else { /* ZY[^Y] */
/* The new swizzle will be:
* XYZ (ZY. => XY. => XYZ) */
SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
}
break;
/* ZW. Should have already been handled. */
case RC_SWIZZLE_W:
assert(0);
break;
}
break;
 
/* W.. */
case RC_SWIZZLE_W:
/* Legal swizzles that start with X: WWW, WZY */
switch (swz1) {
/* WW. Should have already been handled. */
case RC_SWIZZLE_W:
assert(0);
break;
/* WZ. */
case RC_SWIZZLE_Z:
/* The new swizzle will be WZY */
SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
found_swizzle = 1;
break;
/* WX. */
case RC_SWIZZLE_X:
/* WY. */
case RC_SWIZZLE_Y:
/* W[XY]Y */
if (swz2 == RC_SWIZZLE_Y) {
/* The new swizzle will be WZY */
SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
found_swizzle = 1;
} else { /* W[XY][^Y] */
/* The new swizzle will be:
* ZXY (WX. => XX. => ZX. => ZXY) or
* XYZ (WY. => XY. => XYZ)
*/
SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
}
break;
}
break;
/* U.. 0.. 1.. H..*/
case RC_SWIZZLE_UNUSED:
case RC_SWIZZLE_ZERO:
case RC_SWIZZLE_ONE:
case RC_SWIZZLE_HALF:
SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
break;
}
}
 
/* Handle the swizzle in the w channel. */
swz3 = GET_SWZ(reg->Swizzle, 3);
 
/* We can skip this if the swizzle in channel w is an inline constant. */
if (swz3 <= RC_SWIZZLE_W) {
for (chan = 0; chan < 3; chan++) {
unsigned old_swz = GET_SWZ(reg->Swizzle, chan);
unsigned new_swz = GET_SWZ(new_swizzle, chan);
/* If the swizzle in the w channel is the same as the
* swizzle in any other channels, we need to rewrite it.
* For example:
* reg->Swizzle == XWZW
* new_swizzle == XYZX
* Since the swizzle in the y channel is being
* rewritten from W -> Y we need to change the swizzle
* in the w channel from W -> Y as well.
*/
if (old_swz == swz3) {
SET_SWZ(new_swizzle, 3,
GET_SWZ(new_swizzle, chan));
break;
}
 
/* The swizzle in channel w will be overwritten by one
* of the new swizzles. */
if (new_swz == swz3) {
/* Find an unused swizzle */
unsigned i;
unsigned used = 0;
for (i = 0; i < 3; i++) {
used |= 1 << GET_SWZ(new_swizzle, i);
}
for (i = 0; i < 4; i++) {
if (used & (1 << i)) {
continue;
}
SET_SWZ(new_swizzle, 3, i);
}
}
}
}
 
for (chan = 0; chan < 4; chan++) {
unsigned old_swz = GET_SWZ(reg->Swizzle, chan);
unsigned new_swz = GET_SWZ(new_swizzle, chan);
 
if (old_swz == RC_SWIZZLE_UNUSED) {
continue;
}
 
/* We don't need to change the swizzle in channel w if it is
* an inline constant. These are always legal in the w channel.
*
* Swizzles with a value > RC_SWIZZLE_W are inline constants.
*/
if (chan == 3 && old_swz > RC_SWIZZLE_W) {
continue;
}
 
assert(new_swz <= RC_SWIZZLE_W);
 
switch (old_swz) {
case RC_SWIZZLE_ZERO:
imms[new_swz] = 0.0f;
break;
case RC_SWIZZLE_HALF:
if (reg->Negate & (1 << chan)) {
imms[new_swz] = -0.5f;
} else {
imms[new_swz] = 0.5f;
}
break;
case RC_SWIZZLE_ONE:
if (reg->Negate & (1 << chan)) {
imms[new_swz] = -1.0f;
} else {
imms[new_swz] = 1.0f;
}
break;
default:
imms[new_swz] = rc_get_constant_value(c, reg->Index,
reg->Swizzle, reg->Negate, chan);
}
SET_SWZ(reg->Swizzle, chan, new_swz);
}
reg->Index = rc_constants_add_immediate_vec4(&c->Program.Constants,
imms);
/* We need to set the register file to CONSTANT in case we are
* converting a non-constant register with constant swizzles (e.g.
* ONE, ZERO, HALF).
*/
reg->File = RC_FILE_CONSTANT;
reg->Negate = 0;
return 1;
}
 
void rc_dataflow_swizzles(struct radeon_compiler * c, void *user)
{
struct rc_instruction * inst;
 
for(inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
const struct rc_opcode_info * opcode =
rc_get_opcode_info(inst->U.I.Opcode);
unsigned int src;
 
for(src = 0; src < opcode->NumSrcRegs; ++src) {
struct rc_src_register *reg = &inst->U.I.SrcReg[src];
if (c->SwizzleCaps->IsNative(inst->U.I.Opcode, *reg)) {
continue;
}
if (!c->is_r500 &&
c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS &&
try_rewrite_constant(c, reg)) {
continue;
}
rewrite_source(c, inst, src);
}
}
if (c->Debug & RC_DBG_LOG)
rc_constants_print(&c->Program.Constants);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c
0,0 → 1,342
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "radeon_emulate_branches.h"
 
#include <stdio.h>
 
#include "radeon_compiler.h"
#include "radeon_dataflow.h"
 
#define VERBOSE 0
 
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
 
 
struct proxy_info {
unsigned int Proxied:1;
unsigned int Index:RC_REGISTER_INDEX_BITS;
};
 
struct register_proxies {
struct proxy_info Temporary[RC_REGISTER_MAX_INDEX];
};
 
struct branch_info {
struct rc_instruction * If;
struct rc_instruction * Else;
};
 
struct emulate_branch_state {
struct radeon_compiler * C;
 
struct branch_info * Branches;
unsigned int BranchCount;
unsigned int BranchReserved;
};
 
 
static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst)
{
struct branch_info * branch;
struct rc_instruction * inst_mov;
 
memory_pool_array_reserve(&s->C->Pool, struct branch_info,
s->Branches, s->BranchCount, s->BranchReserved, 1);
 
DBG("%s\n", __FUNCTION__);
 
branch = &s->Branches[s->BranchCount++];
memset(branch, 0, sizeof(struct branch_info));
branch->If = inst;
 
/* Make a safety copy of the decision register, because we will need
* it at ENDIF time and it might be overwritten in both branches. */
inst_mov = rc_insert_new_instruction(s->C, inst->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C);
inst_mov->U.I.DstReg.WriteMask = RC_MASK_X;
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
 
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
inst->U.I.SrcReg[0].Swizzle = 0;
inst->U.I.SrcReg[0].Abs = 0;
inst->U.I.SrcReg[0].Negate = 0;
}
 
static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst)
{
struct branch_info * branch;
 
if (!s->BranchCount) {
rc_error(s->C, "Encountered ELSE outside of branches");
return;
}
 
DBG("%s\n", __FUNCTION__);
 
branch = &s->Branches[s->BranchCount - 1];
branch->Else = inst;
}
 
 
struct state_and_proxies {
struct emulate_branch_state * S;
struct register_proxies * Proxies;
};
 
static struct proxy_info * get_proxy_info(struct state_and_proxies * sap,
rc_register_file file, unsigned int index)
{
if (file == RC_FILE_TEMPORARY) {
return &sap->Proxies->Temporary[index];
} else {
return 0;
}
}
 
static void scan_write(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int comp)
{
struct state_and_proxies * sap = userdata;
struct proxy_info * proxy = get_proxy_info(sap, file, index);
 
if (proxy && !proxy->Proxied) {
proxy->Proxied = 1;
proxy->Index = rc_find_free_temporary(sap->S->C);
}
}
 
static void remap_proxy_function(void * userdata, struct rc_instruction * inst,
rc_register_file * pfile, unsigned int * pindex)
{
struct state_and_proxies * sap = userdata;
struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex);
 
if (proxy && proxy->Proxied) {
*pfile = RC_FILE_TEMPORARY;
*pindex = proxy->Index;
}
}
 
/**
* Redirect all writes in the instruction range [begin, end) to proxy
* temporary registers.
*/
static void allocate_and_insert_proxies(struct emulate_branch_state * s,
struct register_proxies * proxies,
struct rc_instruction * begin,
struct rc_instruction * end)
{
struct state_and_proxies sap;
 
sap.S = s;
sap.Proxies = proxies;
 
for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
rc_for_all_writes_mask(inst, scan_write, &sap);
rc_remap_registers(inst, remap_proxy_function, &sap);
}
 
for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
if (proxies->Temporary[index].Proxied) {
struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index;
inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_mov->U.I.SrcReg[0].Index = index;
}
}
}
 
 
static void inject_cmp(struct emulate_branch_state * s,
struct rc_instruction * inst_if,
struct rc_instruction * inst_endif,
rc_register_file file, unsigned int index,
struct proxy_info ifproxy,
struct proxy_info elseproxy)
{
struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif);
inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
inst_cmp->U.I.DstReg.File = file;
inst_cmp->U.I.DstReg.Index = index;
inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW;
inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
inst_cmp->U.I.SrcReg[0].Abs = 1;
inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index;
inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index;
}
 
static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst)
{
struct branch_info * branch;
struct register_proxies IfProxies;
struct register_proxies ElseProxies;
 
if (!s->BranchCount) {
rc_error(s->C, "Encountered ENDIF outside of branches");
return;
}
 
DBG("%s\n", __FUNCTION__);
 
branch = &s->Branches[s->BranchCount - 1];
 
memset(&IfProxies, 0, sizeof(IfProxies));
memset(&ElseProxies, 0, sizeof(ElseProxies));
 
allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst);
 
if (branch->Else)
allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst);
 
/* Insert the CMP instructions at the end. */
for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) {
inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index,
IfProxies.Temporary[index], ElseProxies.Temporary[index]);
}
}
 
/* Remove all traces of the branch instructions */
rc_remove_instruction(branch->If);
if (branch->Else)
rc_remove_instruction(branch->Else);
rc_remove_instruction(inst);
 
s->BranchCount--;
 
if (VERBOSE) {
DBG("Program after ENDIF handling:\n");
rc_print_program(&s->C->Program);
}
}
 
 
struct remap_output_data {
unsigned int Output:RC_REGISTER_INDEX_BITS;
unsigned int Temporary:RC_REGISTER_INDEX_BITS;
};
 
static void remap_output_function(void * userdata, struct rc_instruction * inst,
rc_register_file * pfile, unsigned int * pindex)
{
struct remap_output_data * data = userdata;
 
if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) {
*pfile = RC_FILE_TEMPORARY;
*pindex = data->Temporary;
}
}
 
 
/**
* Output registers cannot be read from and so cannot be dealt with like
* temporary registers.
*
* We do the simplest thing: If an output registers is written within
* a branch, then *all* writes to this register are proxied to a
* temporary register, and a final MOV is appended to the end of
* the program.
*/
static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst)
{
const struct rc_opcode_info * opcode;
 
if (!s->BranchCount)
return;
 
opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
if (!opcode->HasDstReg)
return;
 
if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) {
struct remap_output_data remap;
struct rc_instruction * inst_mov;
 
remap.Output = inst->U.I.DstReg.Index;
remap.Temporary = rc_find_free_temporary(s->C);
 
for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
inst != &s->C->Program.Instructions;
inst = inst->Next) {
rc_remap_registers(inst, &remap_output_function, &remap);
}
 
inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT;
inst_mov->U.I.DstReg.Index = remap.Output;
inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_mov->U.I.SrcReg[0].Index = remap.Temporary;
}
}
 
/**
* Remove branch instructions; instead, execute both branches
* on different register sets and choose between their results
* using CMP instructions in place of the original ENDIF.
*/
void rc_emulate_branches(struct radeon_compiler *c, void *user)
{
struct emulate_branch_state s;
struct rc_instruction * ptr;
 
memset(&s, 0, sizeof(s));
s.C = c;
 
/* Untypical loop because we may remove the current instruction */
ptr = c->Program.Instructions.Next;
while(ptr != &c->Program.Instructions) {
struct rc_instruction * inst = ptr;
ptr = ptr->Next;
 
if (inst->Type == RC_INSTRUCTION_NORMAL) {
switch(inst->U.I.Opcode) {
case RC_OPCODE_IF:
handle_if(&s, inst);
break;
case RC_OPCODE_ELSE:
handle_else(&s, inst);
break;
case RC_OPCODE_ENDIF:
handle_endif(&s, inst);
break;
default:
fix_output_writes(&s, inst);
break;
}
} else {
rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__);
}
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h
0,0 → 1,30
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#ifndef RADEON_EMULATE_BRANCHES_H
#define RADEON_EMULATE_BRANCHES_H
 
struct radeon_compiler;
 
void rc_emulate_branches(struct radeon_compiler *c, void *user);
 
#endif /* RADEON_EMULATE_BRANCHES_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c
0,0 → 1,521
/*
* Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
/**
* \file
*/
 
#include "radeon_emulate_loops.h"
 
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
 
#define VERBOSE 0
 
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
 
struct const_value {
struct radeon_compiler * C;
struct rc_src_register * Src;
float Value;
int HasValue;
};
 
struct count_inst {
struct radeon_compiler * C;
int Index;
rc_swizzle Swz;
float Amount;
int Unknown;
unsigned BranchDepth;
};
 
static unsigned int loop_max_possible_iterations(struct radeon_compiler *c,
struct loop_info * loop)
{
unsigned int total_i = rc_recompute_ips(c);
unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1;
/* +1 because the program already has one iteration of the loop. */
return 1 + ((c->max_alu_insts - total_i) / loop_i);
}
 
static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
unsigned int iterations)
{
unsigned int i;
struct rc_instruction * ptr;
struct rc_instruction * first = loop->BeginLoop->Next;
struct rc_instruction * last = loop->EndLoop->Prev;
struct rc_instruction * append_to = last;
rc_remove_instruction(loop->BeginLoop);
rc_remove_instruction(loop->EndLoop);
for( i = 1; i < iterations; i++){
for(ptr = first; ptr != last->Next; ptr = ptr->Next){
struct rc_instruction *new = rc_alloc_instruction(c);
memcpy(new, ptr, sizeof(struct rc_instruction));
rc_insert_instruction(append_to, new);
append_to = new;
}
}
}
 
 
static void update_const_value(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
struct const_value * value = data;
if(value->Src->File != file ||
value->Src->Index != index ||
!(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
return;
}
switch(inst->U.I.Opcode){
case RC_OPCODE_MOV:
if(!rc_src_reg_is_immediate(value->C, inst->U.I.SrcReg[0].File,
inst->U.I.SrcReg[0].Index)){
return;
}
value->HasValue = 1;
value->Value =
rc_get_constant_value(value->C,
inst->U.I.SrcReg[0].Index,
inst->U.I.SrcReg[0].Swizzle,
inst->U.I.SrcReg[0].Negate, 0);
break;
}
}
 
static void get_incr_amount(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
struct count_inst * count_inst = data;
int amnt_src_index;
const struct rc_opcode_info * opcode;
float amount;
 
if(file != RC_FILE_TEMPORARY ||
count_inst->Index != index ||
(1 << GET_SWZ(count_inst->Swz,0) != mask)){
return;
}
 
/* XXX: Give up if the counter is modified within an IF block. We
* could handle this case with better analysis. */
if (count_inst->BranchDepth > 0) {
count_inst->Unknown = 1;
return;
}
 
/* Find the index of the counter register. */
opcode = rc_get_opcode_info(inst->U.I.Opcode);
if(opcode->NumSrcRegs != 2){
count_inst->Unknown = 1;
return;
}
if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY &&
inst->U.I.SrcReg[0].Index == count_inst->Index &&
inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){
amnt_src_index = 1;
} else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY &&
inst->U.I.SrcReg[1].Index == count_inst->Index &&
inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){
amnt_src_index = 0;
}
else{
count_inst->Unknown = 1;
return;
}
if(rc_src_reg_is_immediate(count_inst->C,
inst->U.I.SrcReg[amnt_src_index].File,
inst->U.I.SrcReg[amnt_src_index].Index)){
amount = rc_get_constant_value(count_inst->C,
inst->U.I.SrcReg[amnt_src_index].Index,
inst->U.I.SrcReg[amnt_src_index].Swizzle,
inst->U.I.SrcReg[amnt_src_index].Negate, 0);
}
else{
count_inst->Unknown = 1 ;
return;
}
switch(inst->U.I.Opcode){
case RC_OPCODE_ADD:
count_inst->Amount += amount;
break;
case RC_OPCODE_SUB:
if(amnt_src_index == 0){
count_inst->Unknown = 0;
return;
}
count_inst->Amount -= amount;
break;
default:
count_inst->Unknown = 1;
return;
}
}
 
/**
* If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless
* of how many iterations they have.
*/
static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop)
{
int end_loops;
int iterations;
struct count_inst count_inst;
float limit_value;
struct rc_src_register * counter;
struct rc_src_register * limit;
struct const_value counter_value;
struct rc_instruction * inst;
 
/* Find the counter and the upper limit */
 
if(rc_src_reg_is_immediate(c, loop->Cond->U.I.SrcReg[0].File,
loop->Cond->U.I.SrcReg[0].Index)){
limit = &loop->Cond->U.I.SrcReg[0];
counter = &loop->Cond->U.I.SrcReg[1];
}
else if(rc_src_reg_is_immediate(c, loop->Cond->U.I.SrcReg[1].File,
loop->Cond->U.I.SrcReg[1].Index)){
limit = &loop->Cond->U.I.SrcReg[1];
counter = &loop->Cond->U.I.SrcReg[0];
}
else{
DBG("No constant limit.\n");
return 0;
}
 
/* Find the initial value of the counter */
counter_value.Src = counter;
counter_value.Value = 0.0f;
counter_value.HasValue = 0;
counter_value.C = c;
for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop;
inst = inst->Next){
rc_for_all_writes_mask(inst, update_const_value, &counter_value);
}
if(!counter_value.HasValue){
DBG("Initial counter value cannot be determined.\n");
return 0;
}
DBG("Initial counter value is %f\n", counter_value.Value);
/* Determine how the counter is modified each loop */
count_inst.C = c;
count_inst.Index = counter->Index;
count_inst.Swz = counter->Swizzle;
count_inst.Amount = 0.0f;
count_inst.Unknown = 0;
count_inst.BranchDepth = 0;
end_loops = 1;
for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
switch(inst->U.I.Opcode){
/* XXX In the future we might want to try to unroll nested
* loops here.*/
case RC_OPCODE_BGNLOOP:
end_loops++;
break;
case RC_OPCODE_ENDLOOP:
loop->EndLoop = inst;
end_loops--;
break;
case RC_OPCODE_BRK:
/* Don't unroll loops if it has a BRK instruction
* other one used when testing the main conditional
* of the loop. */
 
/* Make sure we haven't entered a nested loops. */
if(inst != loop->Brk && end_loops == 1) {
return 0;
}
break;
case RC_OPCODE_IF:
count_inst.BranchDepth++;
break;
case RC_OPCODE_ENDIF:
count_inst.BranchDepth--;
break;
default:
rc_for_all_writes_mask(inst, get_incr_amount, &count_inst);
if(count_inst.Unknown){
return 0;
}
break;
}
}
/* Infinite loop */
if(count_inst.Amount == 0.0f){
return 0;
}
DBG("Counter is increased by %f each iteration.\n", count_inst.Amount);
/* Calculate the number of iterations of this loop. Keeping this
* simple, since we only support increment and decrement loops.
*/
limit_value = rc_get_constant_value(c, limit->Index, limit->Swizzle,
limit->Negate, 0);
DBG("Limit is %f.\n", limit_value);
/* The iteration calculations are opposite of what you would expect.
* In a normal loop, if the condition is met, then loop continues, but
* with our loops, if the condition is met, the is exited. */
switch(loop->Cond->U.I.Opcode){
case RC_OPCODE_SGE:
case RC_OPCODE_SLE:
iterations = (int) ceilf((limit_value - counter_value.Value) /
count_inst.Amount);
break;
 
case RC_OPCODE_SGT:
case RC_OPCODE_SLT:
iterations = (int) floorf((limit_value - counter_value.Value) /
count_inst.Amount) + 1;
break;
default:
return 0;
}
 
if (c->max_alu_insts > 0
&& iterations > loop_max_possible_iterations(c, loop)) {
return 0;
}
 
DBG("Loop will have %d iterations.\n", iterations);
 
/* Prepare loop for unrolling */
rc_remove_instruction(loop->Cond);
rc_remove_instruction(loop->If);
rc_remove_instruction(loop->Brk);
rc_remove_instruction(loop->EndIf);
 
unroll_loop(c, loop, iterations);
loop->EndLoop = NULL;
return 1;
}
 
/**
* @param c
* @param loop
* @param inst A pointer to a BGNLOOP instruction.
* @return 1 if all of the members of loop where set.
* @return 0 if there was an error and some members of loop are still NULL.
*/
static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop,
struct rc_instruction * inst)
{
struct rc_instruction * ptr;
 
if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){
rc_error(c, "%s: expected BGNLOOP", __FUNCTION__);
return 0;
}
 
memset(loop, 0, sizeof(struct loop_info));
 
loop->BeginLoop = inst;
 
for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) {
 
if (ptr == &c->Program.Instructions) {
rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n",
__FUNCTION__);
return 0;
}
 
switch(ptr->U.I.Opcode){
case RC_OPCODE_BGNLOOP:
{
/* Nested loop, skip ahead to the end. */
unsigned int loop_depth = 1;
for(ptr = ptr->Next; ptr != &c->Program.Instructions;
ptr = ptr->Next){
if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
loop_depth++;
} else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
if (!--loop_depth) {
break;
}
}
}
if (ptr == &c->Program.Instructions) {
rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n",
__FUNCTION__);
return 0;
}
break;
}
case RC_OPCODE_BRK:
if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF
|| ptr->Prev->U.I.Opcode != RC_OPCODE_IF
|| loop->Brk){
continue;
}
loop->Brk = ptr;
loop->If = ptr->Prev;
loop->EndIf = ptr->Next;
switch(loop->If->Prev->U.I.Opcode){
case RC_OPCODE_SLT:
case RC_OPCODE_SGE:
case RC_OPCODE_SGT:
case RC_OPCODE_SLE:
case RC_OPCODE_SEQ:
case RC_OPCODE_SNE:
break;
default:
return 0;
}
loop->Cond = loop->If->Prev;
break;
 
case RC_OPCODE_ENDLOOP:
loop->EndLoop = ptr;
break;
}
}
 
if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf
&& loop->Cond && loop->EndLoop) {
return 1;
}
return 0;
}
 
/**
* This function prepares a loop to be unrolled by converting it into an if
* statement. Here is an outline of the conversion process:
* BGNLOOP; -> BGNLOOP;
* <Additional conditional code> -> <Additional conditional code>
* SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
* IF temp[0]; -> IF temp[0];
* BRK; ->
* ENDIF; -> <Loop Body>
* <Loop Body> -> ENDIF;
* ENDLOOP; -> ENDLOOP
*
* @param inst A pointer to a BGNLOOP instruction.
* @return 1 for success, 0 for failure
*/
static int transform_loop(struct emulate_loop_state * s,
struct rc_instruction * inst)
{
struct loop_info * loop;
 
memory_pool_array_reserve(&s->C->Pool, struct loop_info,
s->Loops, s->LoopCount, s->LoopReserved, 1);
 
loop = &s->Loops[s->LoopCount++];
 
if (!build_loop_info(s->C, loop, inst)) {
rc_error(s->C, "Failed to build loop info\n");
return 0;
}
 
if(try_unroll_loop(s->C, loop)){
return 1;
}
 
/* Reverse the conditional instruction */
switch(loop->Cond->U.I.Opcode){
case RC_OPCODE_SGE:
loop->Cond->U.I.Opcode = RC_OPCODE_SLT;
break;
case RC_OPCODE_SLT:
loop->Cond->U.I.Opcode = RC_OPCODE_SGE;
break;
case RC_OPCODE_SLE:
loop->Cond->U.I.Opcode = RC_OPCODE_SGT;
break;
case RC_OPCODE_SGT:
loop->Cond->U.I.Opcode = RC_OPCODE_SLE;
break;
case RC_OPCODE_SEQ:
loop->Cond->U.I.Opcode = RC_OPCODE_SNE;
break;
case RC_OPCODE_SNE:
loop->Cond->U.I.Opcode = RC_OPCODE_SEQ;
break;
default:
rc_error(s->C, "loop->Cond is not a conditional.\n");
return 0;
}
 
/* Prepare the loop to be emulated */
rc_remove_instruction(loop->Brk);
rc_remove_instruction(loop->EndIf);
rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
return 1;
}
 
void rc_transform_loops(struct radeon_compiler *c, void *user)
{
struct emulate_loop_state * s = &c->loop_state;
struct rc_instruction * ptr;
 
memset(s, 0, sizeof(struct emulate_loop_state));
s->C = c;
for(ptr = s->C->Program.Instructions.Next;
ptr != &s->C->Program.Instructions; ptr = ptr->Next) {
if(ptr->Type == RC_INSTRUCTION_NORMAL &&
ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
if (!transform_loop(s, ptr))
return;
}
}
}
 
void rc_unroll_loops(struct radeon_compiler *c, void *user)
{
struct rc_instruction * inst;
struct loop_info loop;
 
for(inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions; inst = inst->Next) {
 
if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
if (build_loop_info(c, &loop, inst)) {
try_unroll_loop(c, &loop);
}
}
}
}
 
void rc_emulate_loops(struct radeon_compiler *c, void *user)
{
struct emulate_loop_state * s = &c->loop_state;
int i;
/* Iterate backwards of the list of loops so that loops that nested
* loops are unrolled first.
*/
for( i = s->LoopCount - 1; i >= 0; i-- ){
unsigned int iterations;
 
if(!s->Loops[i].EndLoop){
continue;
}
iterations = loop_max_possible_iterations(s->C, &s->Loops[i]);
unroll_loop(s->C, &s->Loops[i], iterations);
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h
0,0 → 1,57
/*
* Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#ifndef RADEON_EMULATE_LOOPS_H
#define RADEON_EMULATE_LOOPS_H
 
#define MAX_ITERATIONS 8
 
struct radeon_compiler;
 
struct loop_info {
struct rc_instruction * BeginLoop;
struct rc_instruction * Cond;
struct rc_instruction * If;
struct rc_instruction * Brk;
struct rc_instruction * EndIf;
struct rc_instruction * EndLoop;
};
 
struct emulate_loop_state {
struct radeon_compiler * C;
struct loop_info * Loops;
unsigned int LoopCount;
unsigned int LoopReserved;
};
 
void rc_transform_loops(struct radeon_compiler *c, void *user);
 
void rc_unroll_loops(struct radeon_compiler * c, void *user);
 
void rc_emulate_loops(struct radeon_compiler * c, void *user);
 
#endif /* RADEON_EMULATE_LOOPS_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_inline_literals.c
0,0 → 1,164
/*
* Copyright 2012 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Author: Tom Stellard <thomas.stellard@amd.com>
*/
 
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_program.h"
#include "radeon_program_constants.h"
#include <stdio.h>
 
#define VERBOSE 0
 
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
 
/* IEEE-754:
* 22:0 mantissa
* 30:23 exponent
* 31 sign
*
* R300:
* 0:2 mantissa
* 3:6 exponent (bias 7)
*/
static int ieee_754_to_r300_float(float f, unsigned char *r300_float_out)
{
unsigned float_bits = *((unsigned *)&f);
/* XXX: Handle big-endian */
unsigned mantissa = float_bits & 0x007fffff;
unsigned biased_exponent = (float_bits & 0x7f800000) >> 23;
unsigned negate = !!(float_bits & 0x80000000);
int exponent = biased_exponent - 127;
unsigned mantissa_mask = 0xff8fffff;
unsigned r300_exponent, r300_mantissa;
 
DBG("Converting %f (0x%x) to 7-bit:\n", f, float_bits);
DBG("Raw exponent = %d\n", exponent);
 
if (exponent < -7 || exponent > 8) {
DBG("Failed exponent out of range\n\n");
return 0;
}
 
if (mantissa & mantissa_mask) {
DBG("Failed mantisa has too many bits:\n"
"manitssa=0x%x mantissa_mask=0x%x, and=0x%x\n\n",
mantissa, mantissa_mask,
mantissa & mantissa_mask);
return 0;
}
 
r300_exponent = exponent + 7;
r300_mantissa = (mantissa & ~mantissa_mask) >> 20;
*r300_float_out = r300_mantissa | (r300_exponent << 3);
 
DBG("Success! r300_float = 0x%x\n\n", *r300_float_out);
 
if (negate)
return -1;
else
return 1;
}
 
void rc_inline_literals(struct radeon_compiler *c, void *user)
{
struct rc_instruction * inst;
 
for(inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
const struct rc_opcode_info * info =
rc_get_opcode_info(inst->U.I.Opcode);
 
unsigned src_idx;
struct rc_constant * constant;
float float_value;
unsigned char r300_float = 0;
int ret;
 
/* XXX: Handle presub */
 
/* We aren't using rc_for_all_reads_src here, because presub
* sources need to be handled differently. */
for (src_idx = 0; src_idx < info->NumSrcRegs; src_idx++) {
unsigned new_swizzle;
unsigned use_literal = 0;
unsigned negate_mask = 0;
unsigned swz, chan;
struct rc_src_register * src_reg =
&inst->U.I.SrcReg[src_idx];
swz = RC_SWIZZLE_UNUSED;
if (src_reg->File != RC_FILE_CONSTANT) {
continue;
}
constant =
&c->Program.Constants.Constants[src_reg->Index];
if (constant->Type != RC_CONSTANT_IMMEDIATE) {
continue;
}
new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
for (chan = 0; chan < 4; chan++) {
unsigned char r300_float_tmp;
swz = GET_SWZ(src_reg->Swizzle, chan);
if (swz == RC_SWIZZLE_UNUSED) {
continue;
}
float_value = constant->u.Immediate[swz];
ret = ieee_754_to_r300_float(float_value,
&r300_float_tmp);
if (!ret || (use_literal &&
r300_float != r300_float_tmp)) {
use_literal = 0;
break;
}
 
if (ret == -1 && src_reg->Abs) {
use_literal = 0;
break;
}
 
if (!use_literal) {
r300_float = r300_float_tmp;
use_literal = 1;
}
 
/* Use RC_SWIZZLE_W for the inline constant, so
* it will become one of the alpha sources. */
SET_SWZ(new_swizzle, chan, RC_SWIZZLE_W);
if (ret == -1) {
negate_mask |= (1 << chan);
}
}
 
if (!use_literal) {
continue;
}
src_reg->File = RC_FILE_INLINE;
src_reg->Index = r300_float;
src_reg->Swizzle = new_swizzle;
src_reg->Negate = src_reg->Negate ^ negate_mask;
}
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_list.c
0,0 → 1,90
/*
* Copyright 2011 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include "radeon_list.h"
 
#include <stdlib.h>
#include <stdio.h>
 
#include "memory_pool.h"
 
struct rc_list * rc_list(struct memory_pool * pool, void * item)
{
struct rc_list * new = memory_pool_malloc(pool, sizeof(struct rc_list));
new->Item = item;
new->Next = NULL;
new->Prev = NULL;
 
return new;
}
 
void rc_list_add(struct rc_list ** list, struct rc_list * new_value)
{
struct rc_list * temp;
 
if (*list == NULL) {
*list = new_value;
return;
}
 
for (temp = *list; temp->Next; temp = temp->Next);
 
temp->Next = new_value;
new_value->Prev = temp;
}
 
void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value)
{
if (*list == rm_value) {
*list = rm_value->Next;
return;
}
 
rm_value->Prev->Next = rm_value->Next;
if (rm_value->Next) {
rm_value->Next->Prev = rm_value->Prev;
}
}
 
unsigned int rc_list_count(struct rc_list * list)
{
unsigned int count = 0;
while (list) {
count++;
list = list->Next;
}
return count;
}
 
void rc_list_print(struct rc_list * list)
{
while(list) {
fprintf(stderr, "%p->", list->Item);
list = list->Next;
}
fprintf(stderr, "\n");
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_list.h
0,0 → 1,46
/*
* Copyright 2011 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#ifndef RADEON_LIST_H
#define RADEON_LIST_H
 
struct memory_pool;
 
struct rc_list {
void * Item;
struct rc_list * Prev;
struct rc_list * Next;
};
 
struct rc_list * rc_list(struct memory_pool * pool, void * item);
void rc_list_add(struct rc_list ** list, struct rc_list * new_value);
void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value);
unsigned int rc_list_count(struct rc_list * list);
void rc_list_print(struct rc_list * list);
 
#endif /* RADEON_LIST_H */
 
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_opcodes.c
0,0 → 1,632
/*
* Copyright (C) 2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include "radeon_opcodes.h"
#include "radeon_program.h"
 
#include "radeon_program_constants.h"
 
struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
{
.Opcode = RC_OPCODE_NOP,
.Name = "NOP"
},
{
.Opcode = RC_OPCODE_ILLEGAL_OPCODE,
.Name = "ILLEGAL OPCODE"
},
{
.Opcode = RC_OPCODE_ABS,
.Name = "ABS",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_ADD,
.Name = "ADD",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_ARL,
.Name = "ARL",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_CEIL,
.Name = "CEIL",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_CLAMP,
.Name = "CLAMP",
.NumSrcRegs = 3,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_CMP,
.Name = "CMP",
.NumSrcRegs = 3,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_CND,
.Name = "CND",
.NumSrcRegs = 3,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_COS,
.Name = "COS",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_DDX,
.Name = "DDX",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_DDY,
.Name = "DDY",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_DP2,
.Name = "DP2",
.NumSrcRegs = 2,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_DP3,
.Name = "DP3",
.NumSrcRegs = 2,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_DP4,
.Name = "DP4",
.NumSrcRegs = 2,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_DPH,
.Name = "DPH",
.NumSrcRegs = 2,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_DST,
.Name = "DST",
.NumSrcRegs = 2,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_EX2,
.Name = "EX2",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_EXP,
.Name = "EXP",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_FLR,
.Name = "FLR",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_FRC,
.Name = "FRC",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_KIL,
.Name = "KIL",
.NumSrcRegs = 1
},
{
.Opcode = RC_OPCODE_LG2,
.Name = "LG2",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_LIT,
.Name = "LIT",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_LOG,
.Name = "LOG",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_LRP,
.Name = "LRP",
.NumSrcRegs = 3,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MAD,
.Name = "MAD",
.NumSrcRegs = 3,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MAX,
.Name = "MAX",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MIN,
.Name = "MIN",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MOV,
.Name = "MOV",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MUL,
.Name = "MUL",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_POW,
.Name = "POW",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_RCP,
.Name = "RCP",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_ROUND,
.Name = "ROUND",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_RSQ,
.Name = "RSQ",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_SCS,
.Name = "SCS",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_SEQ,
.Name = "SEQ",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SFL,
.Name = "SFL",
.NumSrcRegs = 0,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SGE,
.Name = "SGE",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SGT,
.Name = "SGT",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SIN,
.Name = "SIN",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_SLE,
.Name = "SLE",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SLT,
.Name = "SLT",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SNE,
.Name = "SNE",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SSG,
.Name = "SSG",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SUB,
.Name = "SUB",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SWZ,
.Name = "SWZ",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_TRUNC,
.Name = "TRUNC",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_XPD,
.Name = "XPD",
.NumSrcRegs = 2,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_TEX,
.Name = "TEX",
.HasTexture = 1,
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_TXB,
.Name = "TXB",
.HasTexture = 1,
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_TXD,
.Name = "TXD",
.HasTexture = 1,
.NumSrcRegs = 3,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_TXL,
.Name = "TXL",
.HasTexture = 1,
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_TXP,
.Name = "TXP",
.HasTexture = 1,
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_IF,
.Name = "IF",
.IsFlowControl = 1,
.NumSrcRegs = 1
},
{
.Opcode = RC_OPCODE_ELSE,
.Name = "ELSE",
.IsFlowControl = 1,
.NumSrcRegs = 0
},
{
.Opcode = RC_OPCODE_ENDIF,
.Name = "ENDIF",
.IsFlowControl = 1,
.NumSrcRegs = 0
},
{
.Opcode = RC_OPCODE_BGNLOOP,
.Name = "BGNLOOP",
.IsFlowControl = 1,
.NumSrcRegs = 0
},
{
.Opcode = RC_OPCODE_BRK,
.Name = "BRK",
.IsFlowControl = 1,
.NumSrcRegs = 0
},
{
.Opcode = RC_OPCODE_ENDLOOP,
.Name = "ENDLOOP",
.IsFlowControl = 1,
.NumSrcRegs = 0,
},
{
.Opcode = RC_OPCODE_CONT,
.Name = "CONT",
.IsFlowControl = 1,
.NumSrcRegs = 0
},
{
.Opcode = RC_OPCODE_REPL_ALPHA,
.Name = "REPL_ALPHA",
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_BEGIN_TEX,
.Name = "BEGIN_TEX"
},
{
.Opcode = RC_OPCODE_KILP,
.Name = "KILP",
},
{
.Opcode = RC_ME_PRED_SEQ,
.Name = "ME_PRED_SEQ",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_ME_PRED_SGT,
.Name = "ME_PRED_SGT",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_ME_PRED_SGE,
.Name = "ME_PRED_SGE",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_ME_PRED_SNEQ,
.Name = "ME_PRED_SNEQ",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_ME_PRED_SET_CLR,
.Name = "ME_PRED_SET_CLEAR",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_ME_PRED_SET_INV,
.Name = "ME_PRED_SET_INV",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_ME_PRED_SET_POP,
.Name = "ME_PRED_SET_POP",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_ME_PRED_SET_RESTORE,
.Name = "ME_PRED_SET_RESTORE",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_VE_PRED_SEQ_PUSH,
.Name = "VE_PRED_SEQ_PUSH",
.NumSrcRegs = 2,
.HasDstReg = 1
},
{
.Opcode = RC_VE_PRED_SGT_PUSH,
.Name = "VE_PRED_SGT_PUSH",
.NumSrcRegs = 2,
.HasDstReg = 1
},
{
.Opcode = RC_VE_PRED_SGE_PUSH,
.Name = "VE_PRED_SGE_PUSH",
.NumSrcRegs = 2,
.HasDstReg = 1
},
{
.Opcode = RC_VE_PRED_SNEQ_PUSH,
.Name = "VE_PRED_SNEQ_PUSH",
.NumSrcRegs = 2,
.HasDstReg = 1
}
};
 
void rc_compute_sources_for_writemask(
const struct rc_instruction *inst,
unsigned int writemask,
unsigned int *srcmasks)
{
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
srcmasks[0] = 0;
srcmasks[1] = 0;
srcmasks[2] = 0;
 
if (opcode->Opcode == RC_OPCODE_KIL)
srcmasks[0] |= RC_MASK_XYZW;
else if (opcode->Opcode == RC_OPCODE_IF)
srcmasks[0] |= RC_MASK_X;
 
if (!writemask)
return;
 
if (opcode->IsComponentwise) {
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
srcmasks[src] |= writemask;
} else if (opcode->IsStandardScalar) {
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
srcmasks[src] |= writemask;
} else {
switch(opcode->Opcode) {
case RC_OPCODE_ARL:
srcmasks[0] |= RC_MASK_X;
break;
case RC_OPCODE_DP2:
srcmasks[0] |= RC_MASK_XY;
srcmasks[1] |= RC_MASK_XY;
break;
case RC_OPCODE_DP3:
case RC_OPCODE_XPD:
srcmasks[0] |= RC_MASK_XYZ;
srcmasks[1] |= RC_MASK_XYZ;
break;
case RC_OPCODE_DP4:
srcmasks[0] |= RC_MASK_XYZW;
srcmasks[1] |= RC_MASK_XYZW;
break;
case RC_OPCODE_DPH:
srcmasks[0] |= RC_MASK_XYZ;
srcmasks[1] |= RC_MASK_XYZW;
break;
case RC_OPCODE_TXB:
case RC_OPCODE_TXP:
case RC_OPCODE_TXL:
srcmasks[0] |= RC_MASK_W;
/* Fall through */
case RC_OPCODE_TEX:
switch (inst->U.I.TexSrcTarget) {
case RC_TEXTURE_1D:
srcmasks[0] |= RC_MASK_X;
break;
case RC_TEXTURE_2D:
case RC_TEXTURE_RECT:
case RC_TEXTURE_1D_ARRAY:
srcmasks[0] |= RC_MASK_XY;
break;
case RC_TEXTURE_3D:
case RC_TEXTURE_CUBE:
case RC_TEXTURE_2D_ARRAY:
srcmasks[0] |= RC_MASK_XYZ;
break;
}
break;
case RC_OPCODE_TXD:
switch (inst->U.I.TexSrcTarget) {
case RC_TEXTURE_1D_ARRAY:
srcmasks[0] |= RC_MASK_Y;
/* Fall through. */
case RC_TEXTURE_1D:
srcmasks[0] |= RC_MASK_X;
srcmasks[1] |= RC_MASK_X;
srcmasks[2] |= RC_MASK_X;
break;
case RC_TEXTURE_2D_ARRAY:
srcmasks[0] |= RC_MASK_Z;
/* Fall through. */
case RC_TEXTURE_2D:
case RC_TEXTURE_RECT:
srcmasks[0] |= RC_MASK_XY;
srcmasks[1] |= RC_MASK_XY;
srcmasks[2] |= RC_MASK_XY;
break;
case RC_TEXTURE_3D:
case RC_TEXTURE_CUBE:
srcmasks[0] |= RC_MASK_XYZ;
srcmasks[1] |= RC_MASK_XYZ;
srcmasks[2] |= RC_MASK_XYZ;
break;
}
break;
case RC_OPCODE_DST:
srcmasks[0] |= RC_MASK_Y | RC_MASK_Z;
srcmasks[1] |= RC_MASK_Y | RC_MASK_W;
break;
case RC_OPCODE_EXP:
case RC_OPCODE_LOG:
srcmasks[0] |= RC_MASK_XY;
break;
case RC_OPCODE_LIT:
srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W;
break;
default:
break;
}
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_opcodes.h
0,0 → 1,284
/*
* Copyright (C) 2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#ifndef RADEON_OPCODES_H
#define RADEON_OPCODES_H
 
#include <assert.h>
 
/**
* Opcodes understood by the Radeon compiler.
*/
typedef enum {
RC_OPCODE_NOP = 0,
RC_OPCODE_ILLEGAL_OPCODE,
 
/** vec4 instruction: dst.c = abs(src0.c); */
RC_OPCODE_ABS,
 
/** vec4 instruction: dst.c = src0.c + src1.c; */
RC_OPCODE_ADD,
 
/** special instruction: load address register
* dst.x = floor(src.x), where dst must be an address register */
RC_OPCODE_ARL,
 
/** vec4 instruction: dst.c = ceil(src0.c) */
RC_OPCODE_CEIL,
 
/** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */
RC_OPCODE_CLAMP,
 
/** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */
RC_OPCODE_CMP,
 
/** vec4 instruction: dst.c = src2.c > 0.5 ? src0.c : src1.c */
RC_OPCODE_CND,
 
/** scalar instruction: dst = cos(src0.x) */
RC_OPCODE_COS,
 
/** special instruction: take vec4 partial derivative in X direction
* dst.c = d src0.c / dx */
RC_OPCODE_DDX,
 
/** special instruction: take vec4 partial derivative in Y direction
* dst.c = d src0.c / dy */
RC_OPCODE_DDY,
 
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */
RC_OPCODE_DP2,
 
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */
RC_OPCODE_DP3,
 
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */
RC_OPCODE_DP4,
 
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */
RC_OPCODE_DPH,
 
/** special instruction, see ARB_fragment_program */
RC_OPCODE_DST,
 
/** scalar instruction: dst = 2**src0.x */
RC_OPCODE_EX2,
 
/** special instruction, see ARB_vertex_program */
RC_OPCODE_EXP,
 
/** vec4 instruction: dst.c = floor(src0.c) */
RC_OPCODE_FLR,
 
/** vec4 instruction: dst.c = src0.c - floor(src0.c) */
RC_OPCODE_FRC,
 
/** special instruction: stop execution if any component of src0 is negative */
RC_OPCODE_KIL,
 
/** scalar instruction: dst = log_2(src0.x) */
RC_OPCODE_LG2,
 
/** special instruction, see ARB_vertex_program */
RC_OPCODE_LIT,
 
/** special instruction, see ARB_vertex_program */
RC_OPCODE_LOG,
 
/** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */
RC_OPCODE_LRP,
 
/** vec4 instruction: dst.c = src0.c*src1.c + src2.c */
RC_OPCODE_MAD,
 
/** vec4 instruction: dst.c = max(src0.c, src1.c) */
RC_OPCODE_MAX,
 
/** vec4 instruction: dst.c = min(src0.c, src1.c) */
RC_OPCODE_MIN,
 
/** vec4 instruction: dst.c = src0.c */
RC_OPCODE_MOV,
 
/** vec4 instruction: dst.c = src0.c*src1.c */
RC_OPCODE_MUL,
 
/** scalar instruction: dst = src0.x ** src1.x */
RC_OPCODE_POW,
 
/** scalar instruction: dst = 1 / src0.x */
RC_OPCODE_RCP,
 
/** vec4 instruction: dst.c = floor(src0.c + 0.5) */
RC_OPCODE_ROUND,
 
/** scalar instruction: dst = 1 / sqrt(src0.x) */
RC_OPCODE_RSQ,
 
/** special instruction, see ARB_fragment_program */
RC_OPCODE_SCS,
 
/** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */
RC_OPCODE_SEQ,
 
/** vec4 instruction: dst.c = 0.0 */
RC_OPCODE_SFL,
 
/** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */
RC_OPCODE_SGE,
 
/** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */
RC_OPCODE_SGT,
 
/** scalar instruction: dst = sin(src0.x) */
RC_OPCODE_SIN,
 
/** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */
RC_OPCODE_SLE,
 
/** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */
RC_OPCODE_SLT,
 
/** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */
RC_OPCODE_SNE,
 
/** vec4 instruction: dst.c = (src0.c < 0 ?) -1 : ((src0.c > 0) : 1 : 0) */
RC_OPCODE_SSG,
 
/** vec4 instruction: dst.c = src0.c - src1.c */
RC_OPCODE_SUB,
 
/** vec4 instruction: dst.c = src0.c */
RC_OPCODE_SWZ,
 
/** vec4 instruction: dst.c = (abs(src0.c) - fract(abs(src0.c))) * sgn(src0.c) */
RC_OPCODE_TRUNC,
 
/** special instruction, see ARB_fragment_program */
RC_OPCODE_XPD,
 
RC_OPCODE_TEX,
RC_OPCODE_TXB,
RC_OPCODE_TXD,
RC_OPCODE_TXL,
RC_OPCODE_TXP,
 
/** branch instruction:
* If src0.x != 0.0, continue with the next instruction;
* otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF.
*/
RC_OPCODE_IF,
 
/** branch instruction: jump to matching RC_OPCODE_ENDIF */
RC_OPCODE_ELSE,
 
/** branch instruction: has no effect */
RC_OPCODE_ENDIF,
RC_OPCODE_BGNLOOP,
 
RC_OPCODE_BRK,
 
RC_OPCODE_ENDLOOP,
 
RC_OPCODE_CONT,
 
/** special instruction, used in R300-R500 fragment program pair instructions
* indicates that the result of the alpha operation shall be replicated
* across all other channels */
RC_OPCODE_REPL_ALPHA,
 
/** special instruction, used in R300-R500 fragment programs
* to indicate the start of a block of texture instructions that
* can run simultaneously. */
RC_OPCODE_BEGIN_TEX,
 
/** Stop execution of the shader (GLSL discard) */
RC_OPCODE_KILP,
 
/* Vertex shader CF Instructions */
RC_ME_PRED_SEQ,
RC_ME_PRED_SGT,
RC_ME_PRED_SGE,
RC_ME_PRED_SNEQ,
RC_ME_PRED_SET_CLR,
RC_ME_PRED_SET_INV,
RC_ME_PRED_SET_POP,
RC_ME_PRED_SET_RESTORE,
 
RC_VE_PRED_SEQ_PUSH,
RC_VE_PRED_SGT_PUSH,
RC_VE_PRED_SGE_PUSH,
RC_VE_PRED_SNEQ_PUSH,
 
MAX_RC_OPCODE
} rc_opcode;
 
 
struct rc_opcode_info {
rc_opcode Opcode;
const char * Name;
 
/** true if the instruction reads from a texture.
*
* \note This is false for the KIL instruction, even though KIL is
* a texture instruction from a hardware point of view. */
unsigned int HasTexture:1;
 
unsigned int NumSrcRegs:2;
unsigned int HasDstReg:1;
 
/** true if this instruction affects control flow */
unsigned int IsFlowControl:1;
 
/** true if this is a vector instruction that operates on components in parallel
* without any cross-component interaction */
unsigned int IsComponentwise:1;
 
/** true if this instruction sources only its operands X components
* to compute one result which is smeared across all output channels */
unsigned int IsStandardScalar:1;
};
 
extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE];
 
static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode)
{
assert((unsigned int)opcode < MAX_RC_OPCODE);
assert(rc_opcodes[opcode].Opcode == opcode);
 
return &rc_opcodes[opcode];
}
 
struct rc_instruction;
 
void rc_compute_sources_for_writemask(
const struct rc_instruction *inst,
unsigned int writemask,
unsigned int *srcmasks);
 
#endif /* RADEON_OPCODES_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_optimize.c
0,0 → 1,908
/*
* Copyright (C) 2009 Nicolai Haehnle.
* Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include "radeon_dataflow.h"
 
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
#include "radeon_list.h"
#include "radeon_swizzle.h"
#include "radeon_variable.h"
 
struct src_clobbered_reads_cb_data {
rc_register_file File;
unsigned int Index;
unsigned int Mask;
struct rc_reader_data * ReaderData;
};
 
typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
struct rc_instruction *,
unsigned int);
 
static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
{
struct rc_src_register combine;
combine.File = inner.File;
combine.Index = inner.Index;
combine.RelAddr = inner.RelAddr;
if (outer.Abs) {
combine.Abs = 1;
combine.Negate = outer.Negate;
} else {
combine.Abs = inner.Abs;
combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
combine.Negate ^= outer.Negate;
}
combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
return combine;
}
 
static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
struct rc_src_register * src)
{
rc_register_file file = src->File;
struct rc_reader_data * reader_data = data;
 
if(!rc_inst_can_use_presub(inst,
reader_data->Writer->U.I.PreSub.Opcode,
rc_swizzle_to_writemask(src->Swizzle),
src,
&reader_data->Writer->U.I.PreSub.SrcReg[0],
&reader_data->Writer->U.I.PreSub.SrcReg[1])) {
reader_data->Abort = 1;
return;
}
 
/* XXX This could probably be handled better. */
if (file == RC_FILE_ADDRESS) {
reader_data->Abort = 1;
return;
}
 
/* These instructions cannot read from the constants file.
* see radeonTransformTEX()
*/
if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
(inst->U.I.Opcode == RC_OPCODE_TEX ||
inst->U.I.Opcode == RC_OPCODE_TXB ||
inst->U.I.Opcode == RC_OPCODE_TXP ||
inst->U.I.Opcode == RC_OPCODE_TXD ||
inst->U.I.Opcode == RC_OPCODE_TXL ||
inst->U.I.Opcode == RC_OPCODE_KIL)){
reader_data->Abort = 1;
return;
}
}
 
static void src_clobbered_reads_cb(
void * data,
struct rc_instruction * inst,
struct rc_src_register * src)
{
struct src_clobbered_reads_cb_data * sc_data = data;
 
if (src->File == sc_data->File
&& src->Index == sc_data->Index
&& (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
 
sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
}
 
if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
}
}
 
static void is_src_clobbered_scan_write(
void * data,
struct rc_instruction * inst,
rc_register_file file,
unsigned int index,
unsigned int mask)
{
struct src_clobbered_reads_cb_data sc_data;
struct rc_reader_data * reader_data = data;
sc_data.File = file;
sc_data.Index = index;
sc_data.Mask = mask;
sc_data.ReaderData = reader_data;
rc_for_all_reads_src(reader_data->Writer,
src_clobbered_reads_cb, &sc_data);
}
 
static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
{
struct rc_reader_data reader_data;
unsigned int i;
 
if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
inst_mov->U.I.WriteALUResult)
return;
 
/* Get a list of all the readers of this MOV instruction. */
reader_data.ExitOnAbort = 1;
rc_get_readers(c, inst_mov, &reader_data,
copy_propagate_scan_read, NULL,
is_src_clobbered_scan_write);
 
if (reader_data.Abort || reader_data.ReaderCount == 0)
return;
 
/* We can propagate SaturateMode if all the readers are MOV instructions
* without a presubtract operation, source negation and absolute.
* In that case, we just move SaturateMode to all readers. */
if (inst_mov->U.I.SaturateMode) {
for (i = 0; i < reader_data.ReaderCount; i++) {
struct rc_instruction * inst = reader_data.Readers[i].Inst;
 
if (inst->U.I.Opcode != RC_OPCODE_MOV ||
inst->U.I.SrcReg[0].File == RC_FILE_PRESUB ||
inst->U.I.SrcReg[0].Abs ||
inst->U.I.SrcReg[0].Negate) {
return;
}
}
}
 
/* Propagate the MOV instruction. */
for (i = 0; i < reader_data.ReaderCount; i++) {
struct rc_instruction * inst = reader_data.Readers[i].Inst;
*reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
 
if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
inst->U.I.PreSub = inst_mov->U.I.PreSub;
if (!inst->U.I.SaturateMode)
inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode;
}
 
/* Finally, remove the original MOV instruction */
rc_remove_instruction(inst_mov);
}
 
/**
* Check if a source register is actually always the same
* swizzle constant.
*/
static int is_src_uniform_constant(struct rc_src_register src,
rc_swizzle * pswz, unsigned int * pnegate)
{
int have_used = 0;
 
if (src.File != RC_FILE_NONE) {
*pswz = 0;
return 0;
}
 
for(unsigned int chan = 0; chan < 4; ++chan) {
unsigned int swz = GET_SWZ(src.Swizzle, chan);
if (swz < 4) {
*pswz = 0;
return 0;
}
if (swz == RC_SWIZZLE_UNUSED)
continue;
 
if (!have_used) {
*pswz = swz;
*pnegate = GET_BIT(src.Negate, chan);
have_used = 1;
} else {
if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
*pswz = 0;
return 0;
}
}
}
 
return 1;
}
 
static void constant_folding_mad(struct rc_instruction * inst)
{
rc_swizzle swz = 0;
unsigned int negate= 0;
 
if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
if (swz == RC_SWIZZLE_ZERO) {
inst->U.I.Opcode = RC_OPCODE_MUL;
return;
}
}
 
if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
if (swz == RC_SWIZZLE_ONE) {
inst->U.I.Opcode = RC_OPCODE_ADD;
if (negate)
inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
return;
} else if (swz == RC_SWIZZLE_ZERO) {
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
return;
}
}
 
if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
if (swz == RC_SWIZZLE_ONE) {
inst->U.I.Opcode = RC_OPCODE_ADD;
if (negate)
inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
return;
} else if (swz == RC_SWIZZLE_ZERO) {
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
return;
}
}
}
 
static void constant_folding_mul(struct rc_instruction * inst)
{
rc_swizzle swz = 0;
unsigned int negate = 0;
 
if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
if (swz == RC_SWIZZLE_ONE) {
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
if (negate)
inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
return;
} else if (swz == RC_SWIZZLE_ZERO) {
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
return;
}
}
 
if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
if (swz == RC_SWIZZLE_ONE) {
inst->U.I.Opcode = RC_OPCODE_MOV;
if (negate)
inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
return;
} else if (swz == RC_SWIZZLE_ZERO) {
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
return;
}
}
}
 
static void constant_folding_add(struct rc_instruction * inst)
{
rc_swizzle swz = 0;
unsigned int negate = 0;
 
if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
if (swz == RC_SWIZZLE_ZERO) {
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
return;
}
}
 
if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
if (swz == RC_SWIZZLE_ZERO) {
inst->U.I.Opcode = RC_OPCODE_MOV;
return;
}
}
}
 
/**
* Replace 0.0, 1.0 and 0.5 immediate constants by their
* respective swizzles. Simplify instructions like ADD dst, src, 0;
*/
static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
{
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
unsigned int i;
 
/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
struct rc_constant * constant;
struct rc_src_register newsrc;
int have_real_reference;
unsigned int chan;
 
/* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
for (chan = 0; chan < 4; ++chan)
if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
break;
if (chan == 4) {
inst->U.I.SrcReg[src].File = RC_FILE_NONE;
continue;
}
 
/* Convert immediates to swizzles. */
if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
inst->U.I.SrcReg[src].RelAddr ||
inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
continue;
 
constant =
&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
 
if (constant->Type != RC_CONSTANT_IMMEDIATE)
continue;
 
newsrc = inst->U.I.SrcReg[src];
have_real_reference = 0;
for (chan = 0; chan < 4; ++chan) {
unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
unsigned int newswz;
float imm;
float baseimm;
 
if (swz >= 4)
continue;
 
imm = constant->u.Immediate[swz];
baseimm = imm;
if (imm < 0.0)
baseimm = -baseimm;
 
if (baseimm == 0.0) {
newswz = RC_SWIZZLE_ZERO;
} else if (baseimm == 1.0) {
newswz = RC_SWIZZLE_ONE;
} else if (baseimm == 0.5 && c->has_half_swizzles) {
newswz = RC_SWIZZLE_HALF;
} else {
have_real_reference = 1;
continue;
}
 
SET_SWZ(newsrc.Swizzle, chan, newswz);
if (imm < 0.0 && !newsrc.Abs)
newsrc.Negate ^= 1 << chan;
}
 
if (!have_real_reference) {
newsrc.File = RC_FILE_NONE;
newsrc.Index = 0;
}
 
/* don't make the swizzle worse */
if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
continue;
 
inst->U.I.SrcReg[src] = newsrc;
}
 
/* Simplify instructions based on constants */
if (inst->U.I.Opcode == RC_OPCODE_MAD)
constant_folding_mad(inst);
 
/* note: MAD can simplify to MUL or ADD */
if (inst->U.I.Opcode == RC_OPCODE_MUL)
constant_folding_mul(inst);
else if (inst->U.I.Opcode == RC_OPCODE_ADD)
constant_folding_add(inst);
 
/* In case this instruction has been converted, make sure all of the
* registers that are no longer used are empty. */
opcode = rc_get_opcode_info(inst->U.I.Opcode);
for(i = opcode->NumSrcRegs; i < 3; i++) {
memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
}
}
 
/**
* If src and dst use the same register, this function returns a writemask that
* indicates wich components are read by src. Otherwise zero is returned.
*/
static unsigned int src_reads_dst_mask(struct rc_src_register src,
struct rc_dst_register dst)
{
if (dst.File != src.File || dst.Index != src.Index) {
return 0;
}
return rc_swizzle_to_writemask(src.Swizzle);
}
 
/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
* in any of its channels. Return 0 otherwise. */
static int src_has_const_swz(struct rc_src_register src) {
int chan;
for(chan = 0; chan < 4; chan++) {
unsigned int swz = GET_SWZ(src.Swizzle, chan);
if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
|| swz == RC_SWIZZLE_ONE) {
return 1;
}
}
return 0;
}
 
static void presub_scan_read(
void * data,
struct rc_instruction * inst,
struct rc_src_register * src)
{
struct rc_reader_data * reader_data = data;
rc_presubtract_op * presub_opcode = reader_data->CbData;
 
if (!rc_inst_can_use_presub(inst, *presub_opcode,
reader_data->Writer->U.I.DstReg.WriteMask,
src,
&reader_data->Writer->U.I.SrcReg[0],
&reader_data->Writer->U.I.SrcReg[1])) {
reader_data->Abort = 1;
return;
}
}
 
static int presub_helper(
struct radeon_compiler * c,
struct rc_instruction * inst_add,
rc_presubtract_op presub_opcode,
rc_presub_replace_fn presub_replace)
{
struct rc_reader_data reader_data;
unsigned int i;
rc_presubtract_op cb_op = presub_opcode;
 
reader_data.CbData = &cb_op;
reader_data.ExitOnAbort = 1;
rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
is_src_clobbered_scan_write);
 
if (reader_data.Abort || reader_data.ReaderCount == 0)
return 0;
 
for(i = 0; i < reader_data.ReaderCount; i++) {
unsigned int src_index;
struct rc_reader reader = reader_data.Readers[i];
const struct rc_opcode_info * info =
rc_get_opcode_info(reader.Inst->U.I.Opcode);
 
for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
presub_replace(inst_add, reader.Inst, src_index);
}
}
return 1;
}
 
/* This function assumes that inst_add->U.I.SrcReg[0] and
* inst_add->U.I.SrcReg[1] aren't both negative. */
static void presub_replace_add(
struct rc_instruction * inst_add,
struct rc_instruction * inst_reader,
unsigned int src_index)
{
rc_presubtract_op presub_opcode;
if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
presub_opcode = RC_PRESUB_SUB;
else
presub_opcode = RC_PRESUB_ADD;
 
if (inst_add->U.I.SrcReg[1].Negate) {
inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
} else {
inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
}
inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
inst_reader->U.I.PreSub.Opcode = presub_opcode;
inst_reader->U.I.SrcReg[src_index] =
chain_srcregs(inst_reader->U.I.SrcReg[src_index],
inst_reader->U.I.PreSub.SrcReg[0]);
inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
}
 
static int is_presub_candidate(
struct radeon_compiler * c,
struct rc_instruction * inst)
{
const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
unsigned int i;
unsigned int is_constant[2] = {0, 0};
 
assert(inst->U.I.Opcode == RC_OPCODE_ADD);
 
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
|| inst->U.I.SaturateMode
|| inst->U.I.WriteALUResult
|| inst->U.I.Omod) {
return 0;
}
 
/* If both sources use a constant swizzle, then we can't convert it to
* a presubtract operation. In fact for the ADD and SUB presubtract
* operations neither source can contain a constant swizzle. This
* specific case is checked in peephole_add_presub_add() when
* we make sure the swizzles for both sources are equal, so we
* don't need to worry about it here. */
for (i = 0; i < 2; i++) {
int chan;
for (chan = 0; chan < 4; chan++) {
rc_swizzle swz =
get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
if (swz == RC_SWIZZLE_ONE
|| swz == RC_SWIZZLE_ZERO
|| swz == RC_SWIZZLE_HALF) {
is_constant[i] = 1;
}
}
}
if (is_constant[0] && is_constant[1])
return 0;
 
for(i = 0; i < info->NumSrcRegs; i++) {
struct rc_src_register src = inst->U.I.SrcReg[i];
if (src_reads_dst_mask(src, inst->U.I.DstReg))
return 0;
 
src.File = RC_FILE_PRESUB;
if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
return 0;
}
return 1;
}
 
static int peephole_add_presub_add(
struct radeon_compiler * c,
struct rc_instruction * inst_add)
{
unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
 
if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
return 0;
 
/* src0 and src1 can't have absolute values */
if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
return 0;
 
/* presub_replace_add() assumes only one is negative */
if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
return 0;
 
/* if src0 is negative, at least all bits of dstmask have to be set */
if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
return 0;
 
/* if src1 is negative, at least all bits of dstmask have to be set */
if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
return 0;
 
if (!is_presub_candidate(c, inst_add))
return 0;
 
if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
rc_remove_instruction(inst_add);
return 1;
}
return 0;
}
 
static void presub_replace_inv(
struct rc_instruction * inst_add,
struct rc_instruction * inst_reader,
unsigned int src_index)
{
/* We must be careful not to modify inst_add, since it
* is possible it will remain part of the program.*/
inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
inst_reader->U.I.PreSub.SrcReg[0]);
 
inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
}
 
/**
* PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
* Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
* of the add instruction must have the constatnt 1 swizzle. This function
* does not check const registers to see if their value is 1.0, so it should
* be called after the constant_folding optimization.
* @return
* 0 if the ADD instruction is still part of the program.
* 1 if the ADD instruction is no longer part of the program.
*/
static int peephole_add_presub_inv(
struct radeon_compiler * c,
struct rc_instruction * inst_add)
{
unsigned int i, swz;
 
if (!is_presub_candidate(c, inst_add))
return 0;
 
/* Check if src0 is 1. */
/* XXX It would be nice to use is_src_uniform_constant here, but that
* function only works if the register's file is RC_FILE_NONE */
for(i = 0; i < 4; i++ ) {
swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
&& swz != RC_SWIZZLE_ONE) {
return 0;
}
}
 
/* Check src1. */
if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
inst_add->U.I.DstReg.WriteMask
|| inst_add->U.I.SrcReg[1].Abs
|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {
 
return 0;
}
 
if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
rc_remove_instruction(inst_add);
return 1;
}
return 0;
}
 
struct peephole_mul_cb_data {
struct rc_dst_register * Writer;
unsigned int Clobbered;
};
 
static void omod_filter_reader_cb(
void * userdata,
struct rc_instruction * inst,
rc_register_file file,
unsigned int index,
unsigned int mask)
{
struct peephole_mul_cb_data * d = userdata;
if (rc_src_reads_dst_mask(file, mask, index,
d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) {
 
d->Clobbered = 1;
}
}
 
static void omod_filter_writer_cb(
void * userdata,
struct rc_instruction * inst,
rc_register_file file,
unsigned int index,
unsigned int mask)
{
struct peephole_mul_cb_data * d = userdata;
if (file == d->Writer->File && index == d->Writer->Index &&
(mask & d->Writer->WriteMask)) {
d->Clobbered = 1;
}
}
 
static int peephole_mul_omod(
struct radeon_compiler * c,
struct rc_instruction * inst_mul,
struct rc_list * var_list)
{
unsigned int chan = 0, swz, i;
int const_index = -1;
int temp_index = -1;
float const_value;
rc_omod_op omod_op = RC_OMOD_DISABLE;
struct rc_list * writer_list;
struct rc_variable * var;
struct peephole_mul_cb_data cb_data;
unsigned writemask_sum;
 
for (i = 0; i < 2; i++) {
unsigned int j;
if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT
&& inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) {
return 0;
}
if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
if (temp_index != -1) {
/* The instruction has two temp sources */
return 0;
} else {
temp_index = i;
continue;
}
}
/* If we get this far Src[i] must be a constant src */
if (inst_mul->U.I.SrcReg[i].Negate) {
return 0;
}
/* The constant src needs to read from the same swizzle */
swz = RC_SWIZZLE_UNUSED;
chan = 0;
for (j = 0; j < 4; j++) {
unsigned int j_swz =
GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j);
if (j_swz == RC_SWIZZLE_UNUSED) {
continue;
}
if (swz == RC_SWIZZLE_UNUSED) {
swz = j_swz;
chan = j;
} else if (j_swz != swz) {
return 0;
}
}
 
if (const_index != -1) {
/* The instruction has two constant sources */
return 0;
} else {
const_index = i;
}
}
 
if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File,
inst_mul->U.I.SrcReg[const_index].Index)) {
return 0;
}
const_value = rc_get_constant_value(c,
inst_mul->U.I.SrcReg[const_index].Index,
inst_mul->U.I.SrcReg[const_index].Swizzle,
inst_mul->U.I.SrcReg[const_index].Negate,
chan);
 
if (const_value == 2.0f) {
omod_op = RC_OMOD_MUL_2;
} else if (const_value == 4.0f) {
omod_op = RC_OMOD_MUL_4;
} else if (const_value == 8.0f) {
omod_op = RC_OMOD_MUL_8;
} else if (const_value == (1.0f / 2.0f)) {
omod_op = RC_OMOD_DIV_2;
} else if (const_value == (1.0f / 4.0f)) {
omod_op = RC_OMOD_DIV_4;
} else if (const_value == (1.0f / 8.0f)) {
omod_op = RC_OMOD_DIV_8;
} else {
return 0;
}
 
writer_list = rc_variable_list_get_writers_one_reader(var_list,
RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]);
 
if (!writer_list) {
return 0;
}
 
cb_data.Clobbered = 0;
cb_data.Writer = &inst_mul->U.I.DstReg;
for (var = writer_list->Item; var; var = var->Friend) {
struct rc_instruction * inst;
const struct rc_opcode_info * info = rc_get_opcode_info(
var->Inst->U.I.Opcode);
if (info->HasTexture) {
return 0;
}
if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) {
return 0;
}
for (inst = inst_mul->Prev; inst != var->Inst;
inst = inst->Prev) {
rc_for_all_reads_mask(inst, omod_filter_reader_cb,
&cb_data);
rc_for_all_writes_mask(inst, omod_filter_writer_cb,
&cb_data);
if (cb_data.Clobbered) {
break;
}
}
}
 
if (cb_data.Clobbered) {
return 0;
}
 
/* Rewrite the instructions */
writemask_sum = rc_variable_writemask_sum(writer_list->Item);
for (var = writer_list->Item; var; var = var->Friend) {
struct rc_variable * writer = var;
unsigned conversion_swizzle = rc_make_conversion_swizzle(
writemask_sum,
inst_mul->U.I.DstReg.WriteMask);
writer->Inst->U.I.Omod = omod_op;
writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File;
writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index;
rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle);
writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode;
}
 
rc_remove_instruction(inst_mul);
 
return 1;
}
 
/**
* @return
* 0 if inst is still part of the program.
* 1 if inst is no longer part of the program.
*/
static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
{
switch(inst->U.I.Opcode){
case RC_OPCODE_ADD:
if (c->has_presub) {
if(peephole_add_presub_inv(c, inst))
return 1;
if(peephole_add_presub_add(c, inst))
return 1;
}
break;
default:
break;
}
return 0;
}
 
void rc_optimize(struct radeon_compiler * c, void *user)
{
struct rc_instruction * inst = c->Program.Instructions.Next;
struct rc_list * var_list;
while(inst != &c->Program.Instructions) {
struct rc_instruction * cur = inst;
inst = inst->Next;
 
constant_folding(c, cur);
 
if(peephole(c, cur))
continue;
 
if (cur->U.I.Opcode == RC_OPCODE_MOV) {
copy_propagate(c, cur);
/* cur may no longer be part of the program */
}
}
 
if (!c->has_omod) {
return;
}
 
inst = c->Program.Instructions.Next;
while(inst != &c->Program.Instructions) {
struct rc_instruction * cur = inst;
inst = inst->Next;
if (cur->U.I.Opcode == RC_OPCODE_MUL) {
var_list = rc_get_variables(c);
peephole_mul_omod(c, cur, var_list);
}
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c
0,0 → 1,88
/*
* Copyright 2011 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
#include "radeon_opcodes.h"
#include "radeon_program_pair.h"
 
static void mark_used_presub(struct rc_pair_sub_instruction * sub)
{
if (sub->Src[RC_PAIR_PRESUB_SRC].Used) {
unsigned int presub_reg_count = rc_presubtract_src_reg_count(
sub->Src[RC_PAIR_PRESUB_SRC].Index);
unsigned int i;
for (i = 0; i < presub_reg_count; i++) {
sub->Src[i].Used = 1;
}
}
}
 
static void mark_used(
struct rc_instruction * inst,
struct rc_pair_sub_instruction * sub)
{
unsigned int i;
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
for (i = 0; i < info->NumSrcRegs; i++) {
unsigned int src_type = rc_source_type_swz(sub->Arg[i].Swizzle);
if (src_type & RC_SOURCE_RGB) {
inst->U.P.RGB.Src[sub->Arg[i].Source].Used = 1;
}
 
if (src_type & RC_SOURCE_ALPHA) {
inst->U.P.Alpha.Src[sub->Arg[i].Source].Used = 1;
}
}
}
 
/**
* This pass finds sources that are not used by their instruction and marks
* them as unused.
*/
void rc_pair_remove_dead_sources(struct radeon_compiler * c, void *user)
{
struct rc_instruction * inst;
for (inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
unsigned int i;
if (inst->Type == RC_INSTRUCTION_NORMAL)
continue;
 
/* Mark all sources as unused */
for (i = 0; i < 4; i++) {
inst->U.P.RGB.Src[i].Used = 0;
inst->U.P.Alpha.Src[i].Used = 0;
}
mark_used(inst, &inst->U.P.RGB);
mark_used(inst, &inst->U.P.Alpha);
 
mark_used_presub(&inst->U.P.RGB);
mark_used_presub(&inst->U.P.Alpha);
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
0,0 → 1,789
/*
* Copyright (C) 2009 Nicolai Haehnle.
* Copyright 2011 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include "radeon_program_pair.h"
 
#include <stdio.h>
 
#include "main/glheader.h"
#include "program/register_allocate.h"
#include "util/u_memory.h"
#include "ralloc.h"
 
#include "r300_fragprog_swizzle.h"
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_list.h"
#include "radeon_regalloc.h"
#include "radeon_variable.h"
 
#define VERBOSE 0
 
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
 
 
 
struct register_info {
struct live_intervals Live[4];
 
unsigned int Used:1;
unsigned int Allocated:1;
unsigned int File:3;
unsigned int Index:RC_REGISTER_INDEX_BITS;
unsigned int Writemask;
};
 
struct regalloc_state {
struct radeon_compiler * C;
 
struct register_info * Input;
unsigned int NumInputs;
 
struct register_info * Temporary;
unsigned int NumTemporaries;
 
unsigned int Simple;
int LoopEnd;
};
 
struct rc_class {
enum rc_reg_class ID;
 
unsigned int WritemaskCount;
 
/** List of writemasks that belong to this class */
unsigned int Writemasks[3];
 
 
};
 
static const struct rc_class rc_class_list [] = {
{RC_REG_CLASS_SINGLE, 3,
{RC_MASK_X,
RC_MASK_Y,
RC_MASK_Z}},
{RC_REG_CLASS_DOUBLE, 3,
{RC_MASK_X | RC_MASK_Y,
RC_MASK_X | RC_MASK_Z,
RC_MASK_Y | RC_MASK_Z}},
{RC_REG_CLASS_TRIPLE, 1,
{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_ALPHA, 1,
{RC_MASK_W,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3,
{RC_MASK_X | RC_MASK_W,
RC_MASK_Y | RC_MASK_W,
RC_MASK_Z | RC_MASK_W}},
{RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3,
{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
RC_MASK_X | RC_MASK_Z | RC_MASK_W,
RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
{RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1,
{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_X, 1,
{RC_MASK_X,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_Y, 1,
{RC_MASK_Y,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_Z, 1,
{RC_MASK_Z,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_XY, 1,
{RC_MASK_X | RC_MASK_Y,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_YZ, 1,
{RC_MASK_Y | RC_MASK_Z,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_XZ, 1,
{RC_MASK_X | RC_MASK_Z,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_XW, 1,
{RC_MASK_X | RC_MASK_W,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_YW, 1,
{RC_MASK_Y | RC_MASK_W,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_ZW, 1,
{RC_MASK_Z | RC_MASK_W,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_XYW, 1,
{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_YZW, 1,
{RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_XZW, 1,
{RC_MASK_X | RC_MASK_Z | RC_MASK_W,
RC_MASK_NONE,
RC_MASK_NONE}}
};
 
static void print_live_intervals(struct live_intervals * src)
{
if (!src || !src->Used) {
DBG("(null)");
return;
}
 
DBG("(%i,%i)", src->Start, src->End);
}
 
static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
{
if (VERBOSE) {
DBG("overlap_live_intervals: ");
print_live_intervals(a);
DBG(" to ");
print_live_intervals(b);
DBG("\n");
}
 
if (!a->Used || !b->Used) {
DBG(" unused interval\n");
return 0;
}
 
if (a->Start > b->Start) {
if (a->Start < b->End) {
DBG(" overlap\n");
return 1;
}
} else if (b->Start > a->Start) {
if (b->Start < a->End) {
DBG(" overlap\n");
return 1;
}
} else { /* a->Start == b->Start */
if (a->Start != a->End && b->Start != b->End) {
DBG(" overlap\n");
return 1;
}
}
 
DBG(" no overlap\n");
 
return 0;
}
 
static void scan_read_callback(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
struct regalloc_state * s = data;
struct register_info * reg;
unsigned int i;
 
if (file != RC_FILE_INPUT)
return;
 
s->Input[index].Used = 1;
reg = &s->Input[index];
 
for (i = 0; i < 4; i++) {
if (!((mask >> i) & 0x1)) {
continue;
}
reg->Live[i].Used = 1;
reg->Live[i].Start = 0;
reg->Live[i].End =
s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
}
}
 
static void remap_register(void * data, struct rc_instruction * inst,
rc_register_file * file, unsigned int * index)
{
struct regalloc_state * s = data;
const struct register_info * reg;
 
if (*file == RC_FILE_TEMPORARY && s->Simple)
reg = &s->Temporary[*index];
else if (*file == RC_FILE_INPUT)
reg = &s->Input[*index];
else
return;
 
if (reg->Allocated) {
*index = reg->Index;
}
}
 
static void alloc_input_simple(void * data, unsigned int input,
unsigned int hwreg)
{
struct regalloc_state * s = data;
 
if (input >= s->NumInputs)
return;
 
s->Input[input].Allocated = 1;
s->Input[input].File = RC_FILE_TEMPORARY;
s->Input[input].Index = hwreg;
}
 
/* This functions offsets the temporary register indices by the number
* of input registers, because input registers are actually temporaries and
* should not occupy the same space.
*
* This pass is supposed to be used to maintain correct allocation of inputs
* if the standard register allocation is disabled. */
static void do_regalloc_inputs_only(struct regalloc_state * s)
{
for (unsigned i = 0; i < s->NumTemporaries; i++) {
s->Temporary[i].Allocated = 1;
s->Temporary[i].File = RC_FILE_TEMPORARY;
s->Temporary[i].Index = i + s->NumInputs;
}
}
 
static unsigned int is_derivative(rc_opcode op)
{
return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
}
 
static int find_class(
const struct rc_class * classes,
unsigned int writemask,
unsigned int max_writemask_count)
{
unsigned int i;
for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
unsigned int j;
if (classes[i].WritemaskCount > max_writemask_count) {
continue;
}
for (j = 0; j < 3; j++) {
if (classes[i].Writemasks[j] == writemask) {
return i;
}
}
}
return -1;
}
 
struct variable_get_class_cb_data {
unsigned int * can_change_writemask;
unsigned int conversion_swizzle;
};
 
static void variable_get_class_read_cb(
void * userdata,
struct rc_instruction * inst,
struct rc_pair_instruction_arg * arg,
struct rc_pair_instruction_source * src)
{
struct variable_get_class_cb_data * d = userdata;
unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,
d->conversion_swizzle);
if (!r300_swizzle_is_native_basic(new_swizzle)) {
*d->can_change_writemask = 0;
}
}
 
static enum rc_reg_class variable_get_class(
struct rc_variable * variable,
const struct rc_class * classes)
{
unsigned int i;
unsigned int can_change_writemask= 1;
unsigned int writemask = rc_variable_writemask_sum(variable);
struct rc_list * readers = rc_variable_readers_union(variable);
int class_index;
 
if (!variable->C->is_r500) {
struct rc_class c;
struct rc_variable * var_ptr;
/* The assumption here is that if an instruction has type
* RC_INSTRUCTION_NORMAL then it is a TEX instruction.
* r300 and r400 can't swizzle the result of a TEX lookup. */
for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
writemask = RC_MASK_XYZW;
}
}
 
/* Check if it is possible to do swizzle packing for r300/r400
* without creating non-native swizzles. */
class_index = find_class(classes, writemask, 3);
if (class_index < 0) {
goto error;
}
c = classes[class_index];
if (c.WritemaskCount == 1) {
goto done;
}
for (i = 0; i < c.WritemaskCount; i++) {
struct rc_variable * var_ptr;
for (var_ptr = variable; var_ptr;
var_ptr = var_ptr->Friend) {
int j;
unsigned int conversion_swizzle =
rc_make_conversion_swizzle(
writemask, c.Writemasks[i]);
struct variable_get_class_cb_data d;
d.can_change_writemask = &can_change_writemask;
d.conversion_swizzle = conversion_swizzle;
/* If we get this far var_ptr->Inst has to
* be a pair instruction. If variable or any
* of its friends are normal instructions,
* then the writemask will be set to RC_MASK_XYZW
* and the function will return before it gets
* here. */
rc_pair_for_all_reads_arg(var_ptr->Inst,
variable_get_class_read_cb, &d);
 
for (j = 0; j < var_ptr->ReaderCount; j++) {
unsigned int old_swizzle;
unsigned int new_swizzle;
struct rc_reader r = var_ptr->Readers[j];
if (r.Inst->Type ==
RC_INSTRUCTION_PAIR ) {
old_swizzle = r.U.P.Arg->Swizzle;
} else {
/* Source operands of TEX
* instructions can't be
* swizzle on r300/r400 GPUs.
*/
if (!variable->C->is_r500) {
can_change_writemask = 0;
break;
}
old_swizzle = r.U.I.Src->Swizzle;
}
new_swizzle = rc_adjust_channels(
old_swizzle, conversion_swizzle);
if (!r300_swizzle_is_native_basic(
new_swizzle)) {
can_change_writemask = 0;
break;
}
}
if (!can_change_writemask) {
break;
}
}
if (!can_change_writemask) {
break;
}
}
}
 
if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
/* DDX/DDY seem to always fail when their writemasks are
* changed.*/
if (is_derivative(variable->Inst->U.P.RGB.Opcode)
|| is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
can_change_writemask = 0;
}
}
for ( ; readers; readers = readers->Next) {
struct rc_reader * r = readers->Item;
if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
can_change_writemask = 0;
break;
}
/* DDX/DDY also fail when their swizzles are changed. */
if (is_derivative(r->Inst->U.P.RGB.Opcode)
|| is_derivative(r->Inst->U.P.Alpha.Opcode)) {
can_change_writemask = 0;
break;
}
}
}
 
class_index = find_class(classes, writemask,
can_change_writemask ? 3 : 1);
done:
if (class_index > -1) {
return classes[class_index].ID;
} else {
error:
rc_error(variable->C,
"Could not find class for index=%u mask=%u\n",
variable->Dst.Index, writemask);
return 0;
}
}
 
static unsigned int overlap_live_intervals_array(
struct live_intervals * a,
struct live_intervals * b)
{
unsigned int a_chan, b_chan;
for (a_chan = 0; a_chan < 4; a_chan++) {
for (b_chan = 0; b_chan < 4; b_chan++) {
if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
return 1;
}
}
}
return 0;
}
 
static unsigned int reg_get_index(int reg)
{
return reg / RC_MASK_XYZW;
}
 
static unsigned int reg_get_writemask(int reg)
{
return (reg % RC_MASK_XYZW) + 1;
}
 
static int get_reg_id(unsigned int index, unsigned int writemask)
{
assert(writemask);
if (writemask == 0) {
return 0;
}
return (index * RC_MASK_XYZW) + (writemask - 1);
}
 
#if VERBOSE
static void print_reg(int reg)
{
unsigned int index = reg_get_index(reg);
unsigned int mask = reg_get_writemask(reg);
fprintf(stderr, "Temp[%u].%c%c%c%c", index,
mask & RC_MASK_X ? 'x' : '_',
mask & RC_MASK_Y ? 'y' : '_',
mask & RC_MASK_Z ? 'z' : '_',
mask & RC_MASK_W ? 'w' : '_');
}
#endif
 
static void add_register_conflicts(
struct ra_regs * regs,
unsigned int max_temp_regs)
{
unsigned int index, a_mask, b_mask;
for (index = 0; index < max_temp_regs; index++) {
for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
b_mask++) {
if (a_mask & b_mask) {
ra_add_reg_conflict(regs,
get_reg_id(index, a_mask),
get_reg_id(index, b_mask));
}
}
}
}
}
 
static void do_advanced_regalloc(struct regalloc_state * s)
{
 
unsigned int i, input_node, node_count, node_index;
unsigned int * node_classes;
struct rc_instruction * inst;
struct rc_list * var_ptr;
struct rc_list * variables;
struct ra_graph * graph;
const struct rc_regalloc_state *ra_state = s->C->regalloc_state;
 
/* Get list of program variables */
variables = rc_get_variables(s->C);
node_count = rc_list_count(variables);
node_classes = memory_pool_malloc(&s->C->Pool,
node_count * sizeof(unsigned int));
 
for (var_ptr = variables, node_index = 0; var_ptr;
var_ptr = var_ptr->Next, node_index++) {
unsigned int class_index;
/* Compute the live intervals */
rc_variable_compute_live_intervals(var_ptr->Item);
 
class_index = variable_get_class(var_ptr->Item, rc_class_list);
node_classes[node_index] = ra_state->class_ids[class_index];
}
 
 
/* Calculate live intervals for input registers */
for (inst = s->C->Program.Instructions.Next;
inst != &s->C->Program.Instructions;
inst = inst->Next) {
rc_opcode op = rc_get_flow_control_inst(inst);
if (op == RC_OPCODE_BGNLOOP) {
struct rc_instruction * endloop =
rc_match_bgnloop(inst);
if (endloop->IP > s->LoopEnd) {
s->LoopEnd = endloop->IP;
}
}
rc_for_all_reads_mask(inst, scan_read_callback, s);
}
 
/* Compute the writemask for inputs. */
for (i = 0; i < s->NumInputs; i++) {
unsigned int chan, writemask = 0;
for (chan = 0; chan < 4; chan++) {
if (s->Input[i].Live[chan].Used) {
writemask |= (1 << chan);
}
}
s->Input[i].Writemask = writemask;
}
 
graph = ra_alloc_interference_graph(ra_state->regs,
node_count + s->NumInputs);
 
/* Build the interference graph */
for (var_ptr = variables, node_index = 0; var_ptr;
var_ptr = var_ptr->Next,node_index++) {
struct rc_list * a, * b;
unsigned int b_index;
 
ra_set_node_class(graph, node_index, node_classes[node_index]);
 
for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
b; b = b->Next, b_index++) {
struct rc_variable * var_a = a->Item;
while (var_a) {
struct rc_variable * var_b = b->Item;
while (var_b) {
if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
ra_add_node_interference(graph,
node_index, b_index);
}
var_b = var_b->Friend;
}
var_a = var_a->Friend;
}
}
}
 
/* Add input registers to the interference graph */
for (i = 0, input_node = 0; i< s->NumInputs; i++) {
if (!s->Input[i].Writemask) {
continue;
}
for (var_ptr = variables, node_index = 0;
var_ptr; var_ptr = var_ptr->Next, node_index++) {
struct rc_variable * var = var_ptr->Item;
if (overlap_live_intervals_array(s->Input[i].Live,
var->Live)) {
ra_add_node_interference(graph, node_index,
node_count + input_node);
}
}
/* Manually allocate a register for this input */
ra_set_node_reg(graph, node_count + input_node, get_reg_id(
s->Input[i].Index, s->Input[i].Writemask));
input_node++;
}
 
if (!ra_allocate_no_spills(graph)) {
rc_error(s->C, "Ran out of hardware temporaries\n");
return;
}
 
/* Rewrite the registers */
for (var_ptr = variables, node_index = 0; var_ptr;
var_ptr = var_ptr->Next, node_index++) {
int reg = ra_get_node_reg(graph, node_index);
unsigned int writemask = reg_get_writemask(reg);
unsigned int index = reg_get_index(reg);
struct rc_variable * var = var_ptr->Item;
 
if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
writemask = rc_variable_writemask_sum(var);
}
 
if (var->Dst.File == RC_FILE_INPUT) {
continue;
}
rc_variable_change_dst(var, index, writemask);
}
 
ralloc_free(graph);
}
 
void rc_init_regalloc_state(struct rc_regalloc_state *s)
{
unsigned i, j, index;
unsigned **ra_q_values;
 
/* Pre-computed q values. This array describes the maximum number of
* a class's [row] registers that are in conflict with a single
* register from another class [column].
*
* For example:
* q_values[0][2] is 3, because a register from class 2
* (RC_REG_CLASS_TRIPLE) may conflict with at most 3 registers from
* class 0 (RC_REG_CLASS_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y,
* and T0.z.
*
* q_values[2][0] is 1, because a register from class 0
* (RC_REG_CLASS_SINGLE) may conflict with at most 1 register from
* class 2 (RC_REG_CLASS_TRIPLE) e.g. T0.x conflicts with T0.xyz
*
* The q values for each register class [row] will never be greater
* than the maximum number of writemask combinations for that class.
*
* For example:
*
* Class 2 (RC_REG_CLASS_TRIPLE) only has 1 writemask combination,
* so no value in q_values[2][0..RC_REG_CLASS_COUNT] will be greater
* than 1.
*/
const unsigned q_values[RC_REG_CLASS_COUNT][RC_REG_CLASS_COUNT] = {
{1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2},
{2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3},
{1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},
{1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3},
{2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3},
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1},
{1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
{1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1},
{1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1},
{1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},
{1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
{1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
{1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1},
{1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
{1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
};
 
/* Allocate the main ra data structure */
s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW);
 
/* Create the register classes */
for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
const struct rc_class *class = &rc_class_list[i];
s->class_ids[class->ID] = ra_alloc_reg_class(s->regs);
 
/* Assign registers to the classes */
for (index = 0; index < R500_PFS_NUM_TEMP_REGS; index++) {
for (j = 0; j < class->WritemaskCount; j++) {
int reg_id = get_reg_id(index,
class->Writemasks[j]);
ra_class_add_reg(s->regs,
s->class_ids[class->ID], reg_id);
}
}
}
 
/* Set the q values. The q_values array is indexed based on
* the rc_reg_class ID (RC_REG_CLASS_*) which might be
* different than the ID assigned to that class by ra.
* This why we need to manually construct this list.
*/
ra_q_values = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned *));
 
for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
ra_q_values[i] = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned));
for (j = 0; j < RC_REG_CLASS_COUNT; j++) {
ra_q_values[s->class_ids[i]][s->class_ids[j]] =
q_values[i][j];
}
}
 
/* Add register conflicts */
add_register_conflicts(s->regs, R500_PFS_NUM_TEMP_REGS);
 
ra_set_finalize(s->regs, ra_q_values);
 
for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
FREE(ra_q_values[i]);
}
FREE(ra_q_values);
}
 
void rc_destroy_regalloc_state(struct rc_regalloc_state *s)
{
ralloc_free(s->regs);
}
 
/**
* @param user This parameter should be a pointer to an integer value. If this
* integer value is zero, then a simple register allocator will be used that
* only allocates space for input registers (\sa do_regalloc_inputs_only). If
* user is non-zero, then the regular register allocator will be used
* (\sa do_regalloc).
*/
void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
{
struct r300_fragment_program_compiler *c =
(struct r300_fragment_program_compiler*)cc;
struct regalloc_state s;
int * do_full_regalloc = (int*)user;
 
memset(&s, 0, sizeof(s));
s.C = cc;
s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
s.Input = memory_pool_malloc(&cc->Pool,
s.NumInputs * sizeof(struct register_info));
memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
 
s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
s.Temporary = memory_pool_malloc(&cc->Pool,
s.NumTemporaries * sizeof(struct register_info));
memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
 
rc_recompute_ips(s.C);
 
c->AllocateHwInputs(c, &alloc_input_simple, &s);
if (*do_full_regalloc) {
do_advanced_regalloc(&s);
} else {
s.Simple = 1;
do_regalloc_inputs_only(&s);
}
 
/* Rewrite inputs and if we are doing the simple allocation, rewrite
* temporaries too. */
for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
inst != &s.C->Program.Instructions;
inst = inst->Next) {
rc_remap_registers(inst, &remap_register, &s);
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c
0,0 → 1,1359
/*
* Copyright (C) 2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include "radeon_program_pair.h"
 
#include <stdio.h>
 
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_list.h"
#include "radeon_variable.h"
 
#include "util/u_debug.h"
 
#define VERBOSE 0
 
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
 
struct schedule_instruction {
struct rc_instruction * Instruction;
 
/** Next instruction in the linked list of ready instructions. */
struct schedule_instruction *NextReady;
 
/** Values that this instruction reads and writes */
struct reg_value * WriteValues[4];
struct reg_value * ReadValues[12];
unsigned int NumWriteValues:3;
unsigned int NumReadValues:4;
 
/**
* Number of (read and write) dependencies that must be resolved before
* this instruction can be scheduled.
*/
unsigned int NumDependencies:5;
 
/** List of all readers (see rc_get_readers() for the definition of
* "all readers"), even those outside the basic block this instruction
* lives in. */
struct rc_reader_data GlobalReaders;
 
/** If the scheduler has paired an RGB and an Alpha instruction together,
* PairedInst references the alpha insturction's dependency information.
*/
struct schedule_instruction * PairedInst;
 
/** This scheduler uses the value of Score to determine which
* instruction to schedule. Instructions with a higher value of Score
* will be scheduled first. */
int Score;
 
/** The number of components that read from a TEX instruction. */
unsigned TexReadCount;
 
/** For TEX instructions a list of readers */
struct rc_list * TexReaders;
};
 
 
/**
* Used to keep track of which instructions read a value.
*/
struct reg_value_reader {
struct schedule_instruction *Reader;
struct reg_value_reader *Next;
};
 
/**
* Used to keep track which values are stored in each component of a
* RC_FILE_TEMPORARY.
*/
struct reg_value {
struct schedule_instruction * Writer;
 
/**
* Unordered linked list of instructions that read from this value.
* When this value becomes available, we increase all readers'
* dependency count.
*/
struct reg_value_reader *Readers;
 
/**
* Number of readers of this value. This is decremented each time
* a reader of the value is committed.
* When the reader cound reaches zero, the dependency count
* of the instruction writing \ref Next is decremented.
*/
unsigned int NumReaders;
 
struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
};
 
struct register_state {
struct reg_value * Values[4];
};
 
struct remap_reg {
struct rc_instruciont * Inst;
unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
unsigned int OldSwizzle:3;
unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
unsigned int NewSwizzle:3;
unsigned int OnlyTexReads:1;
struct remap_reg * Next;
};
 
struct schedule_state {
struct radeon_compiler * C;
struct schedule_instruction * Current;
/** Array of the previous writers of Current's destination register
* indexed by channel. */
struct schedule_instruction * PrevWriter[4];
 
struct register_state Temporary[RC_REGISTER_MAX_INDEX];
 
/**
* Linked lists of instructions that can be scheduled right now,
* based on which ALU/TEX resources they require.
*/
/*@{*/
struct schedule_instruction *ReadyFullALU;
struct schedule_instruction *ReadyRGB;
struct schedule_instruction *ReadyAlpha;
struct schedule_instruction *ReadyTEX;
/*@}*/
struct rc_list *PendingTEX;
 
void (*CalcScore)(struct schedule_instruction *);
long max_tex_group;
unsigned PrevBlockHasTex:1;
unsigned TEXCount;
unsigned Opt:1;
};
 
static struct reg_value ** get_reg_valuep(struct schedule_state * s,
rc_register_file file, unsigned int index, unsigned int chan)
{
if (file != RC_FILE_TEMPORARY)
return 0;
 
if (index >= RC_REGISTER_MAX_INDEX) {
rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
return 0;
}
 
return &s->Temporary[index].Values[chan];
}
 
static unsigned get_tex_read_count(struct schedule_instruction * sinst)
{
unsigned tex_read_count = sinst->TexReadCount;
if (sinst->PairedInst) {
tex_read_count += sinst->PairedInst->TexReadCount;
}
return tex_read_count;
}
 
#if VERBOSE
static void print_list(struct schedule_instruction * sinst)
{
struct schedule_instruction * ptr;
for (ptr = sinst; ptr; ptr=ptr->NextReady) {
unsigned tex_read_count = get_tex_read_count(ptr);
unsigned score = sinst->Score;
fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score,
tex_read_count);
}
fprintf(stderr, "\n");
}
#endif
 
static void remove_inst_from_list(struct schedule_instruction ** list,
struct schedule_instruction * inst)
{
struct schedule_instruction * prev = NULL;
struct schedule_instruction * list_ptr;
for (list_ptr = *list; list_ptr; prev = list_ptr,
list_ptr = list_ptr->NextReady) {
if (list_ptr == inst) {
if (prev) {
prev->NextReady = inst->NextReady;
} else {
*list = inst->NextReady;
}
inst->NextReady = NULL;
break;
}
}
}
 
static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
{
inst->NextReady = *list;
*list = inst;
}
 
static void add_inst_to_list_score(struct schedule_instruction ** list,
struct schedule_instruction * inst)
{
struct schedule_instruction * temp;
struct schedule_instruction * prev;
if (!*list) {
*list = inst;
return;
}
temp = *list;
prev = NULL;
while(temp && inst->Score <= temp->Score) {
prev = temp;
temp = temp->NextReady;
}
 
if (!prev) {
inst->NextReady = temp;
*list = inst;
} else {
prev->NextReady = inst;
inst->NextReady = temp;
}
}
 
static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
{
DBG("%i is now ready\n", sinst->Instruction->IP);
 
/* Adding Ready TEX instructions to the end of the "Ready List" helps
* us emit TEX instructions in blocks without losing our place. */
if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
add_inst_to_list_score(&s->ReadyTEX, sinst);
else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
add_inst_to_list_score(&s->ReadyRGB, sinst);
else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
add_inst_to_list_score(&s->ReadyAlpha, sinst);
else
add_inst_to_list_score(&s->ReadyFullALU, sinst);
}
 
static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
{
assert(sinst->NumDependencies > 0);
sinst->NumDependencies--;
if (!sinst->NumDependencies)
instruction_ready(s, sinst);
}
 
/* These functions provide different heuristics for scheduling instructions.
* The default is calc_score_readers. */
 
#if 0
 
static void calc_score_zero(struct schedule_instruction * sinst)
{
sinst->Score = 0;
}
 
static void calc_score_deps(struct schedule_instruction * sinst)
{
int i;
sinst->Score = 0;
for (i = 0; i < sinst->NumWriteValues; i++) {
struct reg_value * v = sinst->WriteValues[i];
if (v->NumReaders) {
struct reg_value_reader * r;
for (r = v->Readers; r; r = r->Next) {
if (r->Reader->NumDependencies == 1) {
sinst->Score += 100;
}
sinst->Score += r->Reader->NumDependencies;
}
}
}
}
 
#endif
 
#define NO_OUTPUT_SCORE (1 << 24)
 
static void score_no_output(struct schedule_instruction * sinst)
{
assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL);
if (!sinst->Instruction->U.P.RGB.OutputWriteMask &&
!sinst->Instruction->U.P.Alpha.OutputWriteMask) {
if (sinst->PairedInst) {
if (!sinst->PairedInst->Instruction->U.P.
RGB.OutputWriteMask
&& !sinst->PairedInst->Instruction->U.P.
Alpha.OutputWriteMask) {
sinst->Score |= NO_OUTPUT_SCORE;
}
 
} else {
sinst->Score |= NO_OUTPUT_SCORE;
}
}
}
 
#define PAIRED_SCORE (1 << 16)
 
static void calc_score_r300(struct schedule_instruction * sinst)
{
unsigned src_idx;
 
if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
sinst->Score = 0;
return;
}
 
score_no_output(sinst);
 
if (sinst->PairedInst) {
sinst->Score |= PAIRED_SCORE;
return;
}
 
for (src_idx = 0; src_idx < 4; src_idx++) {
sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used +
sinst->Instruction->U.P.Alpha.Src[src_idx].Used;
}
}
 
#define NO_READ_TEX_SCORE (1 << 16)
 
static void calc_score_readers(struct schedule_instruction * sinst)
{
if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
sinst->Score = 0;
} else {
sinst->Score = sinst->NumReadValues;
if (sinst->PairedInst) {
sinst->Score += sinst->PairedInst->NumReadValues;
}
if (get_tex_read_count(sinst) == 0) {
sinst->Score |= NO_READ_TEX_SCORE;
}
score_no_output(sinst);
}
}
 
/**
* This function decreases the dependencies of the next instruction that
* wants to write to each of sinst's read values.
*/
static void commit_update_reads(struct schedule_state * s,
struct schedule_instruction * sinst){
unsigned int i;
for(i = 0; i < sinst->NumReadValues; ++i) {
struct reg_value * v = sinst->ReadValues[i];
assert(v->NumReaders > 0);
v->NumReaders--;
if (!v->NumReaders) {
if (v->Next) {
decrease_dependencies(s, v->Next->Writer);
}
}
}
if (sinst->PairedInst) {
commit_update_reads(s, sinst->PairedInst);
}
}
 
static void commit_update_writes(struct schedule_state * s,
struct schedule_instruction * sinst){
unsigned int i;
for(i = 0; i < sinst->NumWriteValues; ++i) {
struct reg_value * v = sinst->WriteValues[i];
if (v->NumReaders) {
for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
decrease_dependencies(s, r->Reader);
}
} else {
/* This happens in instruction sequences of the type
* OP r.x, ...;
* OP r.x, r.x, ...;
* See also the subtlety in how instructions that both
* read and write the same register are scanned.
*/
if (v->Next)
decrease_dependencies(s, v->Next->Writer);
}
}
if (sinst->PairedInst) {
commit_update_writes(s, sinst->PairedInst);
}
}
 
static void notify_sem_wait(struct schedule_state *s)
{
struct rc_list * pend_ptr;
for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) {
struct rc_list * read_ptr;
struct schedule_instruction * pending = pend_ptr->Item;
for (read_ptr = pending->TexReaders; read_ptr;
read_ptr = read_ptr->Next) {
struct schedule_instruction * reader = read_ptr->Item;
reader->TexReadCount--;
}
}
s->PendingTEX = NULL;
}
 
static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
{
DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score);
 
commit_update_reads(s, sinst);
 
commit_update_writes(s, sinst);
 
if (get_tex_read_count(sinst) > 0) {
sinst->Instruction->U.P.SemWait = 1;
notify_sem_wait(s);
}
}
 
/**
* Emit all ready texture instructions in a single block.
*
* Emit as a single block to (hopefully) sample many textures in parallel,
* and to avoid hardware indirections on R300.
*/
static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
{
struct schedule_instruction *readytex;
struct rc_instruction * inst_begin;
 
assert(s->ReadyTEX);
notify_sem_wait(s);
 
/* Node marker for R300 */
inst_begin = rc_insert_new_instruction(s->C, before->Prev);
inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
 
/* Link texture instructions back in */
readytex = s->ReadyTEX;
while(readytex) {
rc_insert_instruction(before->Prev, readytex->Instruction);
DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
 
/* All of the TEX instructions in the same TEX block have
* their source registers read from before any of the
* instructions in that block write to their destination
* registers. This means that when we commit a TEX
* instruction, any other TEX instruction that wants to write
* to one of the committed instruction's source register can be
* marked as ready and should be emitted in the same TEX
* block. This prevents the following sequence from being
* emitted in two different TEX blocks:
* 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
* 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
*/
commit_update_reads(s, readytex);
readytex = readytex->NextReady;
}
readytex = s->ReadyTEX;
s->ReadyTEX = 0;
while(readytex){
DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
commit_update_writes(s, readytex);
/* Set semaphore bits for last TEX instruction in the block */
if (!readytex->NextReady) {
readytex->Instruction->U.I.TexSemAcquire = 1;
readytex->Instruction->U.I.TexSemWait = 1;
}
rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex));
readytex = readytex->NextReady;
}
}
 
/* This is a helper function for destructive_merge_instructions(). It helps
* merge presubtract sources from two instructions and makes sure the
* presubtract sources end up in the correct spot. This function assumes that
* dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
* but no scalar instruction (alpha).
* @return 0 if merging the presubtract sources fails.
* @retrun 1 if merging the presubtract sources succeeds.
*/
static int merge_presub_sources(
struct rc_pair_instruction * dst_full,
struct rc_pair_sub_instruction src,
unsigned int type)
{
unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
struct rc_pair_sub_instruction * dst_sub;
const struct rc_opcode_info * info;
 
assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
 
switch(type) {
case RC_SOURCE_RGB:
is_rgb = 1;
is_alpha = 0;
dst_sub = &dst_full->RGB;
break;
case RC_SOURCE_ALPHA:
is_rgb = 0;
is_alpha = 1;
dst_sub = &dst_full->Alpha;
break;
default:
assert(0);
return 0;
}
 
info = rc_get_opcode_info(dst_full->RGB.Opcode);
 
if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
return 0;
 
srcp_regs = rc_presubtract_src_reg_count(
src.Src[RC_PAIR_PRESUB_SRC].Index);
for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
unsigned int arg;
int free_source;
unsigned int one_way = 0;
struct rc_pair_instruction_source srcp = src.Src[srcp_src];
struct rc_pair_instruction_source temp;
 
free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
srcp.File, srcp.Index);
 
/* If free_source < 0 then there are no free source
* slots. */
if (free_source < 0)
return 0;
 
temp = dst_sub->Src[srcp_src];
dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
 
/* srcp needs src0 and src1 to be the same */
if (free_source < srcp_src) {
if (!temp.Used)
continue;
free_source = rc_pair_alloc_source(dst_full, is_rgb,
is_alpha, temp.File, temp.Index);
if (free_source < 0)
return 0;
one_way = 1;
} else {
dst_sub->Src[free_source] = temp;
}
 
/* If free_source == srcp_src, then the presubtract
* source is already in the correct place. */
if (free_source == srcp_src)
continue;
 
/* Shuffle the sources, so we can put the
* presubtract source in the correct place. */
for(arg = 0; arg < info->NumSrcRegs; arg++) {
/*If this arg does not read from an rgb source,
* do nothing. */
if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
& type)) {
continue;
}
 
if (dst_full->RGB.Arg[arg].Source == srcp_src)
dst_full->RGB.Arg[arg].Source = free_source;
/* We need to do this just in case register
* is one of the sources already, but in the
* wrong spot. */
else if(dst_full->RGB.Arg[arg].Source == free_source
&& !one_way) {
dst_full->RGB.Arg[arg].Source = srcp_src;
}
}
}
return 1;
}
 
 
/* This function assumes that rgb.Alpha and alpha.RGB are unused */
static int destructive_merge_instructions(
struct rc_pair_instruction * rgb,
struct rc_pair_instruction * alpha)
{
const struct rc_opcode_info * opcode;
 
assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
 
/* Presubtract registers need to be merged first so that registers
* needed by the presubtract operation can be placed in src0 and/or
* src1. */
 
/* Merge the rgb presubtract registers. */
if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
return 0;
}
}
/* Merge the alpha presubtract registers */
if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){
return 0;
}
}
 
/* Copy alpha args into rgb */
opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
 
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
unsigned int srcrgb = 0;
unsigned int srcalpha = 0;
unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
rc_register_file file = 0;
unsigned int index = 0;
int source;
 
if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
srcrgb = 1;
file = alpha->RGB.Src[oldsrc].File;
index = alpha->RGB.Src[oldsrc].Index;
} else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
srcalpha = 1;
file = alpha->Alpha.Src[oldsrc].File;
index = alpha->Alpha.Src[oldsrc].Index;
}
 
source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
if (source < 0)
return 0;
 
rgb->Alpha.Arg[arg].Source = source;
rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
}
 
/* Copy alpha opcode into rgb */
rgb->Alpha.Opcode = alpha->Alpha.Opcode;
rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
rgb->Alpha.Saturate = alpha->Alpha.Saturate;
rgb->Alpha.Omod = alpha->Alpha.Omod;
 
/* Merge ALU result writing */
if (alpha->WriteALUResult) {
if (rgb->WriteALUResult)
return 0;
 
rgb->WriteALUResult = alpha->WriteALUResult;
rgb->ALUResultCompare = alpha->ALUResultCompare;
}
 
/* Copy SemWait */
rgb->SemWait |= alpha->SemWait;
 
return 1;
}
 
/**
* Try to merge the given instructions into the rgb instructions.
*
* Return true on success; on failure, return false, and keep
* the instructions untouched.
*/
static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
{
struct rc_pair_instruction backup;
 
/*Instructions can't write output registers and ALU result at the
* same time. */
if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
|| (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
return 0;
}
 
/* Writing output registers in the middle of shaders is slow, so
* we don't want to pair output writes with temp writes. */
if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask)
|| (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) {
return 0;
}
 
memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
 
if (destructive_merge_instructions(rgb, alpha))
return 1;
 
memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
return 0;
}
 
static void presub_nop(struct rc_instruction * emitted) {
int prev_rgb_index, prev_alpha_index, i, num_src;
 
/* We don't need a nop if the previous instruction is a TEX. */
if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
return;
}
if (emitted->Prev->U.P.RGB.WriteMask)
prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
else
prev_rgb_index = -1;
if (emitted->Prev->U.P.Alpha.WriteMask)
prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
else
prev_alpha_index = 1;
 
/* Check the previous rgb instruction */
if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
num_src = rc_presubtract_src_reg_count(
emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
for (i = 0; i < num_src; i++) {
unsigned int index = emitted->U.P.RGB.Src[i].Index;
if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
&& (index == prev_rgb_index
|| index == prev_alpha_index)) {
emitted->Prev->U.P.Nop = 1;
return;
}
}
}
 
/* Check the previous alpha instruction. */
if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
return;
 
num_src = rc_presubtract_src_reg_count(
emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
for (i = 0; i < num_src; i++) {
unsigned int index = emitted->U.P.Alpha.Src[i].Index;
if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
&& (index == prev_rgb_index || index == prev_alpha_index)) {
emitted->Prev->U.P.Nop = 1;
return;
}
}
}
 
static void rgb_to_alpha_remap (
struct rc_instruction * inst,
struct rc_pair_instruction_arg * arg,
rc_register_file old_file,
rc_swizzle old_swz,
unsigned int new_index)
{
int new_src_index;
unsigned int i;
 
for (i = 0; i < 3; i++) {
if (get_swz(arg->Swizzle, i) == old_swz) {
SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
}
}
new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
old_file, new_index);
/* This conversion is not possible, we must have made a mistake in
* is_rgb_to_alpha_possible. */
if (new_src_index < 0) {
assert(0);
return;
}
 
arg->Source = new_src_index;
}
 
static int can_remap(unsigned int opcode)
{
switch(opcode) {
case RC_OPCODE_DDX:
case RC_OPCODE_DDY:
return 0;
default:
return 1;
}
}
 
static int can_convert_opcode_to_alpha(unsigned int opcode)
{
switch(opcode) {
case RC_OPCODE_DDX:
case RC_OPCODE_DDY:
case RC_OPCODE_DP2:
case RC_OPCODE_DP3:
case RC_OPCODE_DP4:
case RC_OPCODE_DPH:
return 0;
default:
return 1;
}
}
 
static void is_rgb_to_alpha_possible(
void * userdata,
struct rc_instruction * inst,
struct rc_pair_instruction_arg * arg,
struct rc_pair_instruction_source * src)
{
unsigned int read_chan = RC_SWIZZLE_UNUSED;
unsigned int alpha_sources = 0;
unsigned int i;
struct rc_reader_data * reader_data = userdata;
 
if (!can_remap(inst->U.P.RGB.Opcode)
|| !can_remap(inst->U.P.Alpha.Opcode)) {
reader_data->Abort = 1;
return;
}
 
if (!src)
return;
 
/* XXX There are some cases where we can still do the conversion if
* a reader reads from a presubtract source, but for now we'll prevent
* it. */
if (arg->Source == RC_PAIR_PRESUB_SRC) {
reader_data->Abort = 1;
return;
}
 
/* Make sure the source only reads the register component that we
* are going to be convering from. It is OK if the instruction uses
* this component more than once.
* XXX If the index we will be converting to is the same as the
* current index, then it is OK to read from more than one component.
*/
for (i = 0; i < 3; i++) {
rc_swizzle swz = get_swz(arg->Swizzle, i);
switch(swz) {
case RC_SWIZZLE_X:
case RC_SWIZZLE_Y:
case RC_SWIZZLE_Z:
case RC_SWIZZLE_W:
if (read_chan == RC_SWIZZLE_UNUSED) {
read_chan = swz;
} else if (read_chan != swz) {
reader_data->Abort = 1;
return;
}
break;
default:
break;
}
}
 
/* Make sure there are enough alpha sources.
* XXX If we know what register all the readers are going
* to be remapped to, then in some situations we can still do
* the subsitution, even if all 3 alpha sources are being used.*/
for (i = 0; i < 3; i++) {
if (inst->U.P.Alpha.Src[i].Used) {
alpha_sources++;
}
}
if (alpha_sources > 2) {
reader_data->Abort = 1;
return;
}
}
 
static int convert_rgb_to_alpha(
struct schedule_state * s,
struct schedule_instruction * sched_inst)
{
struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
unsigned int old_mask = pair_inst->RGB.WriteMask;
unsigned int old_swz = rc_mask_to_swizzle(old_mask);
const struct rc_opcode_info * info =
rc_get_opcode_info(pair_inst->RGB.Opcode);
int new_index = -1;
unsigned int i;
 
if (sched_inst->GlobalReaders.Abort)
return 0;
 
if (!pair_inst->RGB.WriteMask)
return 0;
 
if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
|| !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
return 0;
}
 
assert(sched_inst->NumWriteValues == 1);
 
if (!sched_inst->WriteValues[0]) {
assert(0);
return 0;
}
 
/* We start at the old index, because if we can reuse the same
* register and just change the swizzle then it is more likely we
* will be able to convert all the readers. */
for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
struct reg_value ** new_regvalp = get_reg_valuep(
s, RC_FILE_TEMPORARY, i, 3);
if (!*new_regvalp) {
struct reg_value ** old_regvalp =
get_reg_valuep(s,
RC_FILE_TEMPORARY,
pair_inst->RGB.DestIndex,
rc_mask_to_swizzle(old_mask));
new_index = i;
*new_regvalp = *old_regvalp;
*old_regvalp = NULL;
new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
break;
}
}
if (new_index < 0) {
return 0;
}
 
/* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA
* as the RGB opcode, then the Alpha instruction will already contain
* the correct opcode and instruction args, so we do not want to
* overwrite them.
*/
if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) {
pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
sizeof(pair_inst->Alpha.Arg));
}
pair_inst->Alpha.DestIndex = new_index;
pair_inst->Alpha.WriteMask = RC_MASK_W;
pair_inst->Alpha.Target = pair_inst->RGB.Target;
pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
pair_inst->Alpha.Omod = pair_inst->RGB.Omod;
/* Move the swizzles into the first chan */
for (i = 0; i < info->NumSrcRegs; i++) {
unsigned int j;
for (j = 0; j < 3; j++) {
unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
if (swz != RC_SWIZZLE_UNUSED) {
pair_inst->Alpha.Arg[i].Swizzle =
rc_init_swizzle(swz, 1);
break;
}
}
}
pair_inst->RGB.Opcode = RC_OPCODE_NOP;
pair_inst->RGB.DestIndex = 0;
pair_inst->RGB.WriteMask = 0;
pair_inst->RGB.Target = 0;
pair_inst->RGB.OutputWriteMask = 0;
pair_inst->RGB.DepthWriteMask = 0;
pair_inst->RGB.Saturate = 0;
memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
 
for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg,
RC_FILE_TEMPORARY, old_swz, new_index);
}
return 1;
}
 
static void try_convert_and_pair(
struct schedule_state *s,
struct schedule_instruction ** inst_list)
{
struct schedule_instruction * list_ptr = *inst_list;
while (list_ptr && *inst_list && (*inst_list)->NextReady) {
int paired = 0;
if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP
&& list_ptr->Instruction->U.P.RGB.Opcode
!= RC_OPCODE_REPL_ALPHA) {
goto next;
}
if (list_ptr->NumWriteValues == 1
&& convert_rgb_to_alpha(s, list_ptr)) {
 
struct schedule_instruction * pair_ptr;
remove_inst_from_list(inst_list, list_ptr);
add_inst_to_list_score(&s->ReadyAlpha, list_ptr);
 
for (pair_ptr = s->ReadyRGB; pair_ptr;
pair_ptr = pair_ptr->NextReady) {
if (merge_instructions(&pair_ptr->Instruction->U.P,
&list_ptr->Instruction->U.P)) {
remove_inst_from_list(&s->ReadyAlpha, list_ptr);
remove_inst_from_list(&s->ReadyRGB, pair_ptr);
pair_ptr->PairedInst = list_ptr;
 
add_inst_to_list(&s->ReadyFullALU, pair_ptr);
list_ptr = *inst_list;
paired = 1;
break;
}
 
}
}
if (!paired) {
next:
list_ptr = list_ptr->NextReady;
}
}
}
 
/**
* This function attempts to merge RGB and Alpha instructions together.
*/
static void pair_instructions(struct schedule_state * s)
{
struct schedule_instruction *rgb_ptr;
struct schedule_instruction *alpha_ptr;
 
/* Some pairings might fail because they require too
* many source slots; try all possible pairings if necessary */
rgb_ptr = s->ReadyRGB;
while(rgb_ptr) {
struct schedule_instruction * rgb_next = rgb_ptr->NextReady;
alpha_ptr = s->ReadyAlpha;
while(alpha_ptr) {
struct schedule_instruction * alpha_next = alpha_ptr->NextReady;
if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) {
/* Remove RGB and Alpha from their ready lists.
*/
remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
remove_inst_from_list(&s->ReadyAlpha, alpha_ptr);
rgb_ptr->PairedInst = alpha_ptr;
add_inst_to_list(&s->ReadyFullALU, rgb_ptr);
break;
}
alpha_ptr = alpha_next;
}
rgb_ptr = rgb_next;
}
 
if (!s->Opt) {
return;
}
 
/* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB
* slot can be converted into Alpha instructions. */
try_convert_and_pair(s, &s->ReadyFullALU);
 
/* Try to convert some of the RGB instructions to Alpha and
* try to pair it with another RGB. */
try_convert_and_pair(s, &s->ReadyRGB);
}
 
static void update_max_score(
struct schedule_state * s,
struct schedule_instruction ** list,
int * max_score,
struct schedule_instruction ** max_inst_out,
struct schedule_instruction *** list_out)
{
struct schedule_instruction * list_ptr;
for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) {
int score;
s->CalcScore(list_ptr);
score = list_ptr->Score;
if (!*max_inst_out || score > *max_score) {
*max_score = score;
*max_inst_out = list_ptr;
*list_out = list;
}
}
}
 
static void emit_instruction(
struct schedule_state * s,
struct rc_instruction * before)
{
int max_score = -1;
struct schedule_instruction * max_inst = NULL;
struct schedule_instruction ** max_list = NULL;
unsigned tex_count = 0;
struct schedule_instruction * tex_ptr;
 
pair_instructions(s);
#if VERBOSE
fprintf(stderr, "Full:\n");
print_list(s->ReadyFullALU);
fprintf(stderr, "RGB:\n");
print_list(s->ReadyRGB);
fprintf(stderr, "Alpha:\n");
print_list(s->ReadyAlpha);
fprintf(stderr, "TEX:\n");
print_list(s->ReadyTEX);
#endif
 
for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) {
if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) {
emit_all_tex(s, before);
return;
}
tex_count++;
}
update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list);
update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list);
update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list);
 
if (tex_count >= s->max_tex_group || max_score == -1
|| (s->TEXCount > 0 && tex_count == s->TEXCount)
|| (!s->C->is_r500 && tex_count > 0 && max_score == -1)) {
emit_all_tex(s, before);
} else {
 
 
remove_inst_from_list(max_list, max_inst);
rc_insert_instruction(before->Prev, max_inst->Instruction);
commit_alu_instruction(s, max_inst);
 
presub_nop(before->Prev);
}
}
 
static void add_tex_reader(
struct schedule_state * s,
struct schedule_instruction * writer,
struct schedule_instruction * reader)
{
if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) {
/*Not a TEX instructions */
return;
}
reader->TexReadCount++;
rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader));
}
 
static void scan_read(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int chan)
{
struct schedule_state * s = data;
struct reg_value ** v = get_reg_valuep(s, file, index, chan);
struct reg_value_reader * reader;
 
if (!v)
return;
 
if (*v && (*v)->Writer == s->Current) {
/* The instruction reads and writes to a register component.
* In this case, we only want to increment dependencies by one.
* Why?
* Because each instruction depends on the writers of its source
* registers _and_ the most recent writer of its destination
* register. In this case, the current instruction (s->Current)
* has a dependency that both writes to one of its source
* registers and was the most recent writer to its destination
* register. We have already marked this dependency in
* scan_write(), so we don't need to do it again.
*/
 
/* We need to make sure we are adding s->Current to the
* previous writer's list of TexReaders, if the previous writer
* was a TEX instruction.
*/
add_tex_reader(s, s->PrevWriter[chan], s->Current);
 
return;
}
 
DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
 
reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
reader->Reader = s->Current;
if (!*v) {
/* In this situation, the instruction reads from a register
* that hasn't been written to or read from in the current
* block. */
*v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
memset(*v, 0, sizeof(struct reg_value));
(*v)->Readers = reader;
} else {
reader->Next = (*v)->Readers;
(*v)->Readers = reader;
/* Only update the current instruction's dependencies if the
* register it reads from has been written to in this block. */
if ((*v)->Writer) {
add_tex_reader(s, (*v)->Writer, s->Current);
s->Current->NumDependencies++;
}
}
(*v)->NumReaders++;
 
if (s->Current->NumReadValues >= 12) {
rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
} else {
s->Current->ReadValues[s->Current->NumReadValues++] = *v;
}
}
 
static void scan_write(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int chan)
{
struct schedule_state * s = data;
struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
struct reg_value * newv;
 
if (!pv)
return;
 
DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
 
newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
memset(newv, 0, sizeof(*newv));
 
newv->Writer = s->Current;
 
if (*pv) {
(*pv)->Next = newv;
s->Current->NumDependencies++;
/* Keep track of the previous writer to s->Current's destination
* register */
s->PrevWriter[chan] = (*pv)->Writer;
}
 
*pv = newv;
 
if (s->Current->NumWriteValues >= 4) {
rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
} else {
s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
}
}
 
static void is_rgb_to_alpha_possible_normal(
void * userdata,
struct rc_instruction * inst,
struct rc_src_register * src)
{
struct rc_reader_data * reader_data = userdata;
reader_data->Abort = 1;
 
}
 
static void schedule_block(struct schedule_state * s,
struct rc_instruction * begin, struct rc_instruction * end)
{
unsigned int ip;
 
/* Scan instructions for data dependencies */
ip = 0;
for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current));
memset(s->Current, 0, sizeof(struct schedule_instruction));
 
if (inst->Type == RC_INSTRUCTION_NORMAL) {
const struct rc_opcode_info * info =
rc_get_opcode_info(inst->U.I.Opcode);
if (info->HasTexture) {
s->TEXCount++;
}
}
 
/* XXX: This causes SemWait to be set for all instructions in
* a block if the previous block contained a TEX instruction.
* We can do better here, but it will take a lot of work. */
if (s->PrevBlockHasTex) {
s->Current->TexReadCount = 1;
}
 
s->Current->Instruction = inst;
inst->IP = ip++;
 
DBG("%i: Scanning\n", inst->IP);
 
/* The order of things here is subtle and maybe slightly
* counter-intuitive, to account for the case where an
* instruction writes to the same register as it reads
* from. */
rc_for_all_writes_chan(inst, &scan_write, s);
rc_for_all_reads_chan(inst, &scan_read, s);
 
DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies);
 
if (!s->Current->NumDependencies) {
instruction_ready(s, s->Current);
}
 
/* Get global readers for possible RGB->Alpha conversion. */
s->Current->GlobalReaders.ExitOnAbort = 1;
rc_get_readers(s->C, inst, &s->Current->GlobalReaders,
is_rgb_to_alpha_possible_normal,
is_rgb_to_alpha_possible, NULL);
}
 
/* Temporarily unlink all instructions */
begin->Prev->Next = end;
end->Prev = begin->Prev;
 
/* Schedule instructions back */
while(!s->C->Error &&
(s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) {
emit_instruction(s, end);
}
}
 
static int is_controlflow(struct rc_instruction * inst)
{
if (inst->Type == RC_INSTRUCTION_NORMAL) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
return opcode->IsFlowControl;
}
return 0;
}
 
void rc_pair_schedule(struct radeon_compiler *cc, void *user)
{
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
struct schedule_state s;
struct rc_instruction * inst = c->Base.Program.Instructions.Next;
unsigned int * opt = user;
 
memset(&s, 0, sizeof(s));
s.Opt = *opt;
s.C = &c->Base;
if (s.C->is_r500) {
s.CalcScore = calc_score_readers;
} else {
s.CalcScore = calc_score_r300;
}
s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8);
while(inst != &c->Base.Program.Instructions) {
struct rc_instruction * first;
 
if (is_controlflow(inst)) {
inst = inst->Next;
continue;
}
 
first = inst;
 
while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
inst = inst->Next;
 
DBG("Schedule one block\n");
memset(s.Temporary, 0, sizeof(s.Temporary));
s.TEXCount = 0;
schedule_block(&s, first, inst);
if (s.PendingTEX) {
s.PrevBlockHasTex = 1;
}
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_pair_translate.c
0,0 → 1,380
/*
* Copyright (C) 2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include "radeon_program_pair.h"
 
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
 
 
/**
* Finally rewrite ADD, MOV, MUL as the appropriate native instruction
* and reverse the order of arguments for CMP.
*/
static void final_rewrite(struct rc_sub_instruction *inst)
{
struct rc_src_register tmp;
 
switch(inst->Opcode) {
case RC_OPCODE_ADD:
inst->SrcReg[2] = inst->SrcReg[1];
inst->SrcReg[1].File = RC_FILE_NONE;
inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
inst->SrcReg[1].Negate = RC_MASK_NONE;
inst->Opcode = RC_OPCODE_MAD;
break;
case RC_OPCODE_CMP:
tmp = inst->SrcReg[2];
inst->SrcReg[2] = inst->SrcReg[0];
inst->SrcReg[0] = tmp;
break;
case RC_OPCODE_MOV:
/* AMD say we should use CMP.
* However, when we transform
* KIL -r0;
* into
* CMP tmp, -r0, -r0, 0;
* KIL tmp;
* we get incorrect behaviour on R500 when r0 == 0.0.
* It appears that the R500 KIL hardware treats -0.0 as less
* than zero.
*/
inst->SrcReg[1].File = RC_FILE_NONE;
inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
inst->SrcReg[2].File = RC_FILE_NONE;
inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
inst->Opcode = RC_OPCODE_MAD;
break;
case RC_OPCODE_MUL:
inst->SrcReg[2].File = RC_FILE_NONE;
inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
inst->Opcode = RC_OPCODE_MAD;
break;
default:
/* nothing to do */
break;
}
}
 
 
/**
* Classify an instruction according to which ALUs etc. it needs
*/
static void classify_instruction(struct rc_sub_instruction * inst,
int * needrgb, int * needalpha, int * istranscendent)
{
*needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
*needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
*istranscendent = 0;
 
if (inst->WriteALUResult == RC_ALURESULT_X)
*needrgb = 1;
else if (inst->WriteALUResult == RC_ALURESULT_W)
*needalpha = 1;
 
switch(inst->Opcode) {
case RC_OPCODE_ADD:
case RC_OPCODE_CMP:
case RC_OPCODE_CND:
case RC_OPCODE_DDX:
case RC_OPCODE_DDY:
case RC_OPCODE_FRC:
case RC_OPCODE_MAD:
case RC_OPCODE_MAX:
case RC_OPCODE_MIN:
case RC_OPCODE_MOV:
case RC_OPCODE_MUL:
break;
case RC_OPCODE_COS:
case RC_OPCODE_EX2:
case RC_OPCODE_LG2:
case RC_OPCODE_RCP:
case RC_OPCODE_RSQ:
case RC_OPCODE_SIN:
*istranscendent = 1;
*needalpha = 1;
break;
case RC_OPCODE_DP4:
*needalpha = 1;
/* fall through */
case RC_OPCODE_DP3:
*needrgb = 1;
break;
default:
break;
}
}
 
static void src_uses(struct rc_src_register src, unsigned int * rgb,
unsigned int * alpha)
{
int j;
for(j = 0; j < 4; ++j) {
unsigned int swz = GET_SWZ(src.Swizzle, j);
if (swz < 3)
*rgb = 1;
else if (swz < 4)
*alpha = 1;
}
}
 
/**
* Fill the given ALU instruction's opcodes and source operands into the given pair,
* if possible.
*/
static void set_pair_instruction(struct r300_fragment_program_compiler *c,
struct rc_pair_instruction * pair,
struct rc_sub_instruction * inst)
{
int needrgb, needalpha, istranscendent;
const struct rc_opcode_info * opcode;
int i;
 
memset(pair, 0, sizeof(struct rc_pair_instruction));
 
classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
 
if (needrgb) {
if (istranscendent)
pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
else
pair->RGB.Opcode = inst->Opcode;
if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
pair->RGB.Saturate = 1;
}
if (needalpha) {
pair->Alpha.Opcode = inst->Opcode;
if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
pair->Alpha.Saturate = 1;
}
 
opcode = rc_get_opcode_info(inst->Opcode);
 
/* Presubtract handling:
* We need to make sure that the values used by the presubtract
* operation end up in src0 or src1. */
if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
/* rc_pair_alloc_source() will fill in data for
* pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
int j;
for(j = 0; j < 3; j++) {
int src_regs;
if(inst->SrcReg[j].File != RC_FILE_PRESUB)
continue;
 
src_regs = rc_presubtract_src_reg_count(
inst->PreSub.Opcode);
for(i = 0; i < src_regs; i++) {
unsigned int rgb = 0;
unsigned int alpha = 0;
src_uses(inst->SrcReg[j], &rgb, &alpha);
if(rgb) {
pair->RGB.Src[i].File =
inst->PreSub.SrcReg[i].File;
pair->RGB.Src[i].Index =
inst->PreSub.SrcReg[i].Index;
pair->RGB.Src[i].Used = 1;
}
if(alpha) {
pair->Alpha.Src[i].File =
inst->PreSub.SrcReg[i].File;
pair->Alpha.Src[i].Index =
inst->PreSub.SrcReg[i].Index;
pair->Alpha.Src[i].Used = 1;
}
}
}
}
 
for(i = 0; i < opcode->NumSrcRegs; ++i) {
int source;
if (needrgb && !istranscendent) {
unsigned int srcrgb = 0;
unsigned int srcalpha = 0;
unsigned int srcmask = 0;
int j;
/* We don't care about the alpha channel here. We only
* want the part of the swizzle that writes to rgb,
* since we are creating an rgb instruction. */
for(j = 0; j < 3; ++j) {
unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
 
if (swz < RC_SWIZZLE_W)
srcrgb = 1;
else if (swz == RC_SWIZZLE_W)
srcalpha = 1;
 
if (swz < RC_SWIZZLE_UNUSED)
srcmask |= 1 << j;
}
source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
inst->SrcReg[i].File, inst->SrcReg[i].Index);
if (source < 0) {
rc_error(&c->Base, "Failed to translate "
"rgb instruction.\n");
return;
}
pair->RGB.Arg[i].Source = source;
pair->RGB.Arg[i].Swizzle =
rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
}
if (needalpha) {
unsigned int srcrgb = 0;
unsigned int srcalpha = 0;
unsigned int swz;
if (istranscendent) {
swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle);
} else {
swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3);
}
 
if (swz < 3)
srcrgb = 1;
else if (swz < 4)
srcalpha = 1;
source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
inst->SrcReg[i].File, inst->SrcReg[i].Index);
if (source < 0) {
rc_error(&c->Base, "Failed to translate "
"alpha instruction.\n");
return;
}
pair->Alpha.Arg[i].Source = source;
pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
 
if (istranscendent) {
pair->Alpha.Arg[i].Negate =
!!(inst->SrcReg[i].Negate &
inst->DstReg.WriteMask);
} else {
pair->Alpha.Arg[i].Negate =
!!(inst->SrcReg[i].Negate & RC_MASK_W);
}
}
}
 
/* Destination handling */
if (inst->DstReg.File == RC_FILE_OUTPUT) {
if (inst->DstReg.Index == c->OutputDepth) {
pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
} else {
for (i = 0; i < 4; i++) {
if (inst->DstReg.Index == c->OutputColor[i]) {
pair->RGB.Target = i;
pair->Alpha.Target = i;
pair->RGB.OutputWriteMask |=
inst->DstReg.WriteMask & RC_MASK_XYZ;
pair->Alpha.OutputWriteMask |=
GET_BIT(inst->DstReg.WriteMask, 3);
break;
}
}
}
} else {
if (needrgb) {
pair->RGB.DestIndex = inst->DstReg.Index;
pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
}
 
if (needalpha) {
pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
if (pair->Alpha.WriteMask) {
pair->Alpha.DestIndex = inst->DstReg.Index;
}
}
}
 
if (needrgb) {
pair->RGB.Omod = inst->Omod;
}
if (needalpha) {
pair->Alpha.Omod = inst->Omod;
}
 
if (inst->WriteALUResult) {
pair->WriteALUResult = inst->WriteALUResult;
pair->ALUResultCompare = inst->ALUResultCompare;
}
}
 
 
static void check_opcode_support(struct r300_fragment_program_compiler *c,
struct rc_sub_instruction *inst)
{
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
 
if (opcode->HasDstReg) {
if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
return;
}
}
 
for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
if (inst->SrcReg[i].RelAddr) {
rc_error(&c->Base, "Fragment program does not support relative addressing "
" of source operands.\n");
return;
}
}
}
 
 
/**
* Translate all ALU instructions into corresponding pair instructions,
* performing no other changes.
*/
void rc_pair_translate(struct radeon_compiler *cc, void *user)
{
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
 
for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
inst != &c->Base.Program.Instructions;
inst = inst->Next) {
const struct rc_opcode_info * opcode;
struct rc_sub_instruction copy;
 
if (inst->Type != RC_INSTRUCTION_NORMAL)
continue;
 
opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
continue;
 
copy = inst->U.I;
 
check_opcode_support(c, &copy);
 
final_rewrite(&copy);
inst->Type = RC_INSTRUCTION_PAIR;
set_pair_instruction(c, &inst->U.P, &copy);
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program.c
0,0 → 1,225
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include "radeon_program.h"
 
#include <stdio.h>
 
#include "radeon_compiler.h"
#include "radeon_dataflow.h"
 
 
/**
* Transform the given clause in the following way:
* 1. Replace it with an empty clause
* 2. For every instruction in the original clause, try the given
* transformations in order.
* 3. If one of the transformations returns GL_TRUE, assume that it
* has emitted the appropriate instruction(s) into the new clause;
* otherwise, copy the instruction verbatim.
*
* \note The transformation is currently not recursive; in other words,
* instructions emitted by transformations are not transformed.
*
* \note The transform is called 'local' because it can only look at
* one instruction at a time.
*/
void rc_local_transform(
struct radeon_compiler * c,
void *user)
{
struct radeon_program_transformation *transformations =
(struct radeon_program_transformation*)user;
struct rc_instruction * inst = c->Program.Instructions.Next;
 
while(inst != &c->Program.Instructions) {
struct rc_instruction * current = inst;
int i;
 
inst = inst->Next;
 
for(i = 0; transformations[i].function; ++i) {
struct radeon_program_transformation* t = transformations + i;
 
if (t->function(c, current, t->userData))
break;
}
}
}
 
struct get_used_temporaries_data {
unsigned char * Used;
unsigned int UsedLength;
};
 
static void get_used_temporaries_cb(
void * userdata,
struct rc_instruction * inst,
rc_register_file file,
unsigned int index,
unsigned int mask)
{
struct get_used_temporaries_data * d = userdata;
 
if (file != RC_FILE_TEMPORARY)
return;
 
if (index >= d->UsedLength)
return;
 
d->Used[index] |= mask;
}
 
/**
* This function fills in the parameter 'used' with a writemask that
* represent which components of each temporary register are used by the
* program. This is meant to be combined with rc_find_free_temporary_list as a
* more efficient version of rc_find_free_temporary.
* @param used The function does not initialize this parameter.
*/
void rc_get_used_temporaries(
struct radeon_compiler * c,
unsigned char * used,
unsigned int used_length)
{
struct rc_instruction * inst;
struct get_used_temporaries_data d;
d.Used = used;
d.UsedLength = used_length;
 
for(inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions; inst = inst->Next) {
 
rc_for_all_reads_mask(inst, get_used_temporaries_cb, &d);
rc_for_all_writes_mask(inst, get_used_temporaries_cb, &d);
}
}
 
/* Search a list of used temporaries for a free one
* \sa rc_get_used_temporaries
* @note If this functions finds a free temporary, it will mark it as used
* in the used temporary list (param 'used')
* @param used list of used temporaries
* @param used_length number of items in param 'used'
* @param mask which components must be free in the temporary index that is
* returned.
* @return -1 If there are no more free temporaries, otherwise the index of
* a temporary register where the components specified in param 'mask' are
* not being used.
*/
int rc_find_free_temporary_list(
struct radeon_compiler * c,
unsigned char * used,
unsigned int used_length,
unsigned int mask)
{
int i;
for(i = 0; i < used_length; i++) {
if ((~used[i] & mask) == mask) {
used[i] |= mask;
return i;
}
}
return -1;
}
 
unsigned int rc_find_free_temporary(struct radeon_compiler * c)
{
unsigned char used[RC_REGISTER_MAX_INDEX];
int free;
 
memset(used, 0, sizeof(used));
 
rc_get_used_temporaries(c, used, RC_REGISTER_MAX_INDEX);
 
free = rc_find_free_temporary_list(c, used, RC_REGISTER_MAX_INDEX,
RC_MASK_XYZW);
if (free < 0) {
rc_error(c, "Ran out of temporary registers\n");
return 0;
}
return free;
}
 
 
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c)
{
struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction));
 
memset(inst, 0, sizeof(struct rc_instruction));
 
inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW;
inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW;
 
return inst;
}
 
void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst)
{
inst->Prev = after;
inst->Next = after->Next;
 
inst->Prev->Next = inst;
inst->Next->Prev = inst;
}
 
struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after)
{
struct rc_instruction * inst = rc_alloc_instruction(c);
 
rc_insert_instruction(after, inst);
 
return inst;
}
 
void rc_remove_instruction(struct rc_instruction * inst)
{
inst->Prev->Next = inst->Next;
inst->Next->Prev = inst->Prev;
}
 
/**
* Return the number of instructions in the program.
*/
unsigned int rc_recompute_ips(struct radeon_compiler * c)
{
unsigned int ip = 0;
struct rc_instruction * inst;
 
for(inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
inst->IP = ip++;
}
 
c->Program.Instructions.IP = 0xcafedead;
 
return ip;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program.h
0,0 → 1,213
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#ifndef __RADEON_PROGRAM_H_
#define __RADEON_PROGRAM_H_
 
#include <stdint.h>
#include <string.h>
 
#include "radeon_opcodes.h"
#include "radeon_code.h"
#include "radeon_program_constants.h"
#include "radeon_program_pair.h"
 
struct radeon_compiler;
 
struct rc_src_register {
unsigned int File:4;
 
/** Negative values may be used for relative addressing. */
signed int Index:(RC_REGISTER_INDEX_BITS+1);
unsigned int RelAddr:1;
 
unsigned int Swizzle:12;
 
/** Take the component-wise absolute value */
unsigned int Abs:1;
 
/** Post-Abs negation. */
unsigned int Negate:4;
};
 
struct rc_dst_register {
unsigned int File:3;
unsigned int Index:RC_REGISTER_INDEX_BITS;
unsigned int WriteMask:4;
unsigned int Pred:2;
};
 
struct rc_presub_instruction {
rc_presubtract_op Opcode;
struct rc_src_register SrcReg[2];
};
 
/**
* Instructions are maintained by the compiler in a doubly linked list
* of these structures.
*
* This instruction format is intended to be expanded for hardware-specific
* trickery. At different stages of compilation, a different set of
* instruction types may be valid.
*/
struct rc_sub_instruction {
struct rc_src_register SrcReg[3];
struct rc_dst_register DstReg;
 
/**
* Opcode of this instruction, according to \ref rc_opcode enums.
*/
unsigned int Opcode:8;
 
/**
* Saturate each value of the result to the range [0,1] or [-1,1],
* according to \ref rc_saturate_mode enums.
*/
unsigned int SaturateMode:2;
 
/**
* Writing to the special register RC_SPECIAL_ALU_RESULT
*/
/*@{*/
unsigned int WriteALUResult:2;
unsigned int ALUResultCompare:3;
/*@}*/
 
/**
* \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions.
*/
/*@{*/
/** Source texture unit. */
unsigned int TexSrcUnit:5;
 
/** Source texture target, one of the \ref rc_texture_target enums */
unsigned int TexSrcTarget:3;
 
/** True if tex instruction should do shadow comparison */
unsigned int TexShadow:1;
 
/**/
unsigned int TexSemWait:1;
unsigned int TexSemAcquire:1;
 
/**R500 Only. How to swizzle the result of a TEX lookup*/
unsigned int TexSwizzle:12;
/*@}*/
 
/** This holds information about the presubtract operation used by
* this instruction. */
struct rc_presub_instruction PreSub;
 
rc_omod_op Omod;
};
 
typedef enum {
RC_INSTRUCTION_NORMAL = 0,
RC_INSTRUCTION_PAIR
} rc_instruction_type;
 
struct rc_instruction {
struct rc_instruction * Prev;
struct rc_instruction * Next;
 
rc_instruction_type Type;
union {
struct rc_sub_instruction I;
struct rc_pair_instruction P;
} U;
 
/**
* Warning: IPs are not stable. If you want to use them,
* you need to recompute them at the beginning of each pass
* using \ref rc_recompute_ips
*/
unsigned int IP;
};
 
struct rc_program {
/**
* Instructions.Next points to the first instruction,
* Instructions.Prev points to the last instruction.
*/
struct rc_instruction Instructions;
 
/* Long term, we should probably remove InputsRead & OutputsWritten,
* since updating dependent state can be fragile, and they aren't
* actually used very often. */
uint32_t InputsRead;
uint32_t OutputsWritten;
uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */
 
struct rc_constant_list Constants;
};
 
/**
* A transformation that can be passed to \ref rc_local_transform.
*
* The function will be called once for each instruction.
* It has to either emit the appropriate transformed code for the instruction
* and return true, or return false if it doesn't understand the
* instruction.
*
* The function gets passed the userData as last parameter.
*/
struct radeon_program_transformation {
int (*function)(
struct radeon_compiler*,
struct rc_instruction*,
void*);
void *userData;
};
 
void rc_local_transform(
struct radeon_compiler *c,
void *user);
 
void rc_get_used_temporaries(
struct radeon_compiler * c,
unsigned char * used,
unsigned int used_length);
 
int rc_find_free_temporary_list(
struct radeon_compiler * c,
unsigned char * used,
unsigned int used_length,
unsigned int mask);
 
unsigned int rc_find_free_temporary(struct radeon_compiler * c);
 
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst);
void rc_remove_instruction(struct rc_instruction * inst);
 
unsigned int rc_recompute_ips(struct radeon_compiler * c);
 
void rc_print_program(const struct rc_program *prog);
 
rc_swizzle rc_mask_to_swizzle(unsigned int mask);
#endif
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program_alu.c
0,0 → 1,1313
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
/**
* @file
*
* Shareable transformations that transform "special" ALU instructions
* into ALU instructions that are supported by hardware.
*
*/
 
#include "radeon_program_alu.h"
 
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
 
 
static struct rc_instruction *emit1(
struct radeon_compiler * c, struct rc_instruction * after,
rc_opcode Opcode, struct rc_sub_instruction * base,
struct rc_dst_register DstReg, struct rc_src_register SrcReg)
{
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
 
if (base) {
memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
}
 
fpi->U.I.Opcode = Opcode;
fpi->U.I.DstReg = DstReg;
fpi->U.I.SrcReg[0] = SrcReg;
return fpi;
}
 
static struct rc_instruction *emit2(
struct radeon_compiler * c, struct rc_instruction * after,
rc_opcode Opcode, struct rc_sub_instruction * base,
struct rc_dst_register DstReg,
struct rc_src_register SrcReg0, struct rc_src_register SrcReg1)
{
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
 
if (base) {
memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
}
 
fpi->U.I.Opcode = Opcode;
fpi->U.I.DstReg = DstReg;
fpi->U.I.SrcReg[0] = SrcReg0;
fpi->U.I.SrcReg[1] = SrcReg1;
return fpi;
}
 
static struct rc_instruction *emit3(
struct radeon_compiler * c, struct rc_instruction * after,
rc_opcode Opcode, struct rc_sub_instruction * base,
struct rc_dst_register DstReg,
struct rc_src_register SrcReg0, struct rc_src_register SrcReg1,
struct rc_src_register SrcReg2)
{
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
 
if (base) {
memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
}
 
fpi->U.I.Opcode = Opcode;
fpi->U.I.DstReg = DstReg;
fpi->U.I.SrcReg[0] = SrcReg0;
fpi->U.I.SrcReg[1] = SrcReg1;
fpi->U.I.SrcReg[2] = SrcReg2;
return fpi;
}
 
static struct rc_dst_register dstregtmpmask(int index, int mask)
{
struct rc_dst_register dst = {0, 0, 0};
dst.File = RC_FILE_TEMPORARY;
dst.Index = index;
dst.WriteMask = mask;
return dst;
}
 
static const struct rc_src_register builtin_zero = {
.File = RC_FILE_NONE,
.Index = 0,
.Swizzle = RC_SWIZZLE_0000
};
static const struct rc_src_register builtin_one = {
.File = RC_FILE_NONE,
.Index = 0,
.Swizzle = RC_SWIZZLE_1111
};
 
static const struct rc_src_register builtin_half = {
.File = RC_FILE_NONE,
.Index = 0,
.Swizzle = RC_SWIZZLE_HHHH
};
 
static const struct rc_src_register srcreg_undefined = {
.File = RC_FILE_NONE,
.Index = 0,
.Swizzle = RC_SWIZZLE_XYZW
};
 
static struct rc_src_register srcreg(int file, int index)
{
struct rc_src_register src = srcreg_undefined;
src.File = file;
src.Index = index;
return src;
}
 
static struct rc_src_register srcregswz(int file, int index, int swz)
{
struct rc_src_register src = srcreg_undefined;
src.File = file;
src.Index = index;
src.Swizzle = swz;
return src;
}
 
static struct rc_src_register absolute(struct rc_src_register reg)
{
struct rc_src_register newreg = reg;
newreg.Abs = 1;
newreg.Negate = RC_MASK_NONE;
return newreg;
}
 
static struct rc_src_register negate(struct rc_src_register reg)
{
struct rc_src_register newreg = reg;
newreg.Negate = newreg.Negate ^ RC_MASK_XYZW;
return newreg;
}
 
static struct rc_src_register swizzle(struct rc_src_register reg,
rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w)
{
struct rc_src_register swizzled = reg;
swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w);
return swizzled;
}
 
static struct rc_src_register swizzle_smear(struct rc_src_register reg,
rc_swizzle x)
{
return swizzle(reg, x, x, x, x);
}
 
static struct rc_src_register swizzle_xxxx(struct rc_src_register reg)
{
return swizzle_smear(reg, RC_SWIZZLE_X);
}
 
static struct rc_src_register swizzle_yyyy(struct rc_src_register reg)
{
return swizzle_smear(reg, RC_SWIZZLE_Y);
}
 
static struct rc_src_register swizzle_zzzz(struct rc_src_register reg)
{
return swizzle_smear(reg, RC_SWIZZLE_Z);
}
 
static struct rc_src_register swizzle_wwww(struct rc_src_register reg)
{
return swizzle_smear(reg, RC_SWIZZLE_W);
}
 
static int is_dst_safe_to_reuse(struct rc_instruction *inst)
{
const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
unsigned i;
 
assert(info->HasDstReg);
 
if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY)
return 0;
 
for (i = 0; i < info->NumSrcRegs; i++) {
if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index)
return 0;
}
 
return 1;
}
 
static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c,
struct rc_instruction *inst)
{
unsigned tmp;
 
if (is_dst_safe_to_reuse(inst))
tmp = inst->U.I.DstReg.Index;
else
tmp = rc_find_free_temporary(c);
 
return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask);
}
 
static void transform_ABS(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_src_register src = inst->U.I.SrcReg[0];
src.Abs = 1;
src.Negate = RC_MASK_NONE;
emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I, inst->U.I.DstReg, src);
rc_remove_instruction(inst);
}
 
static void transform_CEIL(struct radeon_compiler* c,
struct rc_instruction* inst)
{
/* Assuming:
* ceil(x) = -floor(-x)
*
* After inlining floor:
* ceil(x) = -(-x-frac(-x))
*
* After simplification:
* ceil(x) = x+frac(-x)
*/
 
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0]));
emit2(c, inst->Prev, RC_OPCODE_ADD, &inst->U.I, inst->U.I.DstReg,
inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index));
rc_remove_instruction(inst);
}
 
static void transform_CLAMP(struct radeon_compiler *c,
struct rc_instruction *inst)
{
/* CLAMP dst, src, min, max
* into:
* MIN tmp, src, max
* MAX dst, tmp, min
*/
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst,
inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]);
emit2(c, inst->Prev, RC_OPCODE_MAX, &inst->U.I, inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]);
rc_remove_instruction(inst);
}
 
static void transform_DP2(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_src_register src0 = inst->U.I.SrcReg[0];
struct rc_src_register src1 = inst->U.I.SrcReg[1];
src0.Negate &= ~(RC_MASK_Z | RC_MASK_W);
src0.Swizzle &= ~(63 << (3 * 2));
src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
src1.Negate &= ~(RC_MASK_Z | RC_MASK_W);
src1.Swizzle &= ~(63 << (3 * 2));
src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
emit2(c, inst->Prev, RC_OPCODE_DP3, &inst->U.I, inst->U.I.DstReg, src0, src1);
rc_remove_instruction(inst);
}
 
static void transform_DPH(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_src_register src0 = inst->U.I.SrcReg[0];
src0.Negate &= ~RC_MASK_W;
src0.Swizzle &= ~(7 << (3 * 3));
src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3);
emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]);
rc_remove_instruction(inst);
}
 
/**
* [1, src0.y*src1.y, src0.z, src1.w]
* So basically MUL with lotsa swizzling.
*/
static void transform_DST(struct radeon_compiler* c,
struct rc_instruction* inst)
{
emit2(c, inst->Prev, RC_OPCODE_MUL, &inst->U.I, inst->U.I.DstReg,
swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE),
swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W));
rc_remove_instruction(inst);
}
 
static void transform_FLR(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]);
emit2(c, inst->Prev, RC_OPCODE_ADD, &inst->U.I, inst->U.I.DstReg,
inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
rc_remove_instruction(inst);
}
 
static void transform_TRUNC(struct radeon_compiler* c,
struct rc_instruction* inst)
{
/* Definition of trunc:
* trunc(x) = (abs(x) - fract(abs(x))) * sgn(x)
*
* The multiplication by sgn(x) can be simplified using CMP:
* y * sgn(x) = (x < 0 ? -y : y)
*/
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, absolute(inst->U.I.SrcReg[0]));
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, absolute(inst->U.I.SrcReg[0]),
negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, inst->U.I.SrcReg[0],
negate(srcreg(RC_FILE_TEMPORARY, dst.Index)), srcreg(RC_FILE_TEMPORARY, dst.Index));
rc_remove_instruction(inst);
}
 
/**
* Definition of LIT (from ARB_fragment_program):
*
* tmp = VectorLoad(op0);
* if (tmp.x < 0) tmp.x = 0;
* if (tmp.y < 0) tmp.y = 0;
* if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
* else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
* result.x = 1.0;
* result.y = tmp.x;
* result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
* result.w = 1.0;
*
* The longest path of computation is the one leading to result.z,
* consisting of 5 operations. This implementation of LIT takes
* 5 slots, if the subsequent optimization passes are clever enough
* to pair instructions correctly.
*/
static void transform_LIT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
unsigned int constant;
unsigned int constant_swizzle;
unsigned int temp;
struct rc_src_register srctemp;
 
constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle);
 
if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
struct rc_instruction * inst_mov;
 
inst_mov = emit1(c, inst,
RC_OPCODE_MOV, 0, inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c)));
 
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
}
 
temp = inst->U.I.DstReg.Index;
srctemp = srcreg(RC_FILE_TEMPORARY, temp);
 
/* tmp.x = max(0.0, Src.x); */
/* tmp.y = max(0.0, Src.y); */
/* tmp.w = clamp(Src.z, -128+eps, 128-eps); */
emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
dstregtmpmask(temp, RC_MASK_XYW),
inst->U.I.SrcReg[0],
swizzle(srcreg(RC_FILE_CONSTANT, constant),
RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3));
emit2(c, inst->Prev, RC_OPCODE_MIN, 0,
dstregtmpmask(temp, RC_MASK_Z),
swizzle_wwww(srctemp),
negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)));
 
/* tmp.w = Pow(tmp.y, tmp.w) */
emit1(c, inst->Prev, RC_OPCODE_LG2, 0,
dstregtmpmask(temp, RC_MASK_W),
swizzle_yyyy(srctemp));
emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
dstregtmpmask(temp, RC_MASK_W),
swizzle_wwww(srctemp),
swizzle_zzzz(srctemp));
emit1(c, inst->Prev, RC_OPCODE_EX2, 0,
dstregtmpmask(temp, RC_MASK_W),
swizzle_wwww(srctemp));
 
/* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I,
dstregtmpmask(temp, RC_MASK_Z),
negate(swizzle_xxxx(srctemp)),
swizzle_wwww(srctemp),
builtin_zero);
 
/* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */
emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I,
dstregtmpmask(temp, RC_MASK_XYW),
swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE));
 
rc_remove_instruction(inst);
}
 
static void transform_LRP(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
 
emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
dst,
inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2]));
emit3(c, inst->Prev, RC_OPCODE_MAD, &inst->U.I,
inst->U.I.DstReg,
inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]);
 
rc_remove_instruction(inst);
}
 
static void transform_POW(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register tempdst = try_to_reuse_dst(c, inst);
struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index);
tempdst.WriteMask = RC_MASK_W;
tempsrc.Swizzle = RC_SWIZZLE_WWWW;
 
emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0]));
emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1]));
emit1(c, inst->Prev, RC_OPCODE_EX2, &inst->U.I, inst->U.I.DstReg, tempsrc);
 
rc_remove_instruction(inst);
}
 
/* dst = ROUND(src) :
* add = src + .5
* frac = FRC(add)
* dst = add - frac
*
* According to the GLSL spec, the implementor can decide which way to round
* when the fraction is .5. We round down for .5.
*
*/
static void transform_ROUND(struct radeon_compiler* c,
struct rc_instruction* inst)
{
unsigned int mask = inst->U.I.DstReg.WriteMask;
unsigned int frac_index, add_index;
struct rc_dst_register frac_dst, add_dst;
struct rc_src_register frac_src, add_src;
 
/* add = src + .5 */
add_index = rc_find_free_temporary(c);
add_dst = dstregtmpmask(add_index, mask);
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, add_dst, inst->U.I.SrcReg[0],
builtin_half);
add_src = srcreg(RC_FILE_TEMPORARY, add_dst.Index);
 
 
/* frac = FRC(add) */
frac_index = rc_find_free_temporary(c);
frac_dst = dstregtmpmask(frac_index, mask);
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, frac_dst, add_src);
frac_src = srcreg(RC_FILE_TEMPORARY, frac_dst.Index);
 
/* dst = add - frac */
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg,
add_src, negate(frac_src));
rc_remove_instruction(inst);
}
 
static void transform_RSQ(struct radeon_compiler* c,
struct rc_instruction* inst)
{
inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]);
}
 
static void transform_SEQ(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
 
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one);
 
rc_remove_instruction(inst);
}
 
static void transform_SFL(struct radeon_compiler* c,
struct rc_instruction* inst)
{
emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I, inst->U.I.DstReg, builtin_zero);
rc_remove_instruction(inst);
}
 
static void transform_SGE(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
 
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
 
rc_remove_instruction(inst);
}
 
static void transform_SGT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
 
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
 
rc_remove_instruction(inst);
}
 
static void transform_SLE(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
 
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
 
rc_remove_instruction(inst);
}
 
static void transform_SLT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
 
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
 
rc_remove_instruction(inst);
}
 
static void transform_SNE(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
 
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero);
 
rc_remove_instruction(inst);
}
 
static void transform_SSG(struct radeon_compiler* c,
struct rc_instruction* inst)
{
/* result = sign(x)
*
* CMP tmp0, -x, 1, 0
* CMP tmp1, x, 1, 0
* ADD result, tmp0, -tmp1;
*/
struct rc_dst_register dst0;
unsigned tmp1;
 
/* 0 < x */
dst0 = try_to_reuse_dst(c, inst);
emit3(c, inst->Prev, RC_OPCODE_CMP, 0,
dst0,
negate(inst->U.I.SrcReg[0]),
builtin_one,
builtin_zero);
 
/* x < 0 */
tmp1 = rc_find_free_temporary(c);
emit3(c, inst->Prev, RC_OPCODE_CMP, 0,
dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask),
inst->U.I.SrcReg[0],
builtin_one,
builtin_zero);
 
/* Either both are zero, or one of them is one and the other is zero. */
/* result = tmp0 - tmp1 */
emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, dst0.Index),
negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
 
rc_remove_instruction(inst);
}
 
static void transform_SUB(struct radeon_compiler* c,
struct rc_instruction* inst)
{
inst->U.I.Opcode = RC_OPCODE_ADD;
inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]);
}
 
static void transform_SWZ(struct radeon_compiler* c,
struct rc_instruction* inst)
{
inst->U.I.Opcode = RC_OPCODE_MOV;
}
 
static void transform_XPD(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
 
emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst,
swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W));
emit3(c, inst->Prev, RC_OPCODE_MAD, &inst->U.I, inst->U.I.DstReg,
swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W),
swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
 
rc_remove_instruction(inst);
}
 
 
/**
* Can be used as a transformation for @ref radeonClauseLocalTransform,
* no userData necessary.
*
* Eliminates the following ALU instructions:
* ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD
* using:
* MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
*
* Transforms RSQ to Radeon's native RSQ by explicitly setting
* absolute value.
*
* @note should be applicable to R300 and R500 fragment programs.
*/
int radeonTransformALU(
struct radeon_compiler * c,
struct rc_instruction* inst,
void* unused)
{
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1;
case RC_OPCODE_DP2: transform_DP2(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_DST: transform_DST(c, inst); return 1;
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
case RC_OPCODE_LIT: transform_LIT(c, inst); return 1;
case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
case RC_OPCODE_POW: transform_POW(c, inst); return 1;
case RC_OPCODE_ROUND: transform_ROUND(c, inst); return 1;
case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;
case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1;
case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
case RC_OPCODE_SGE: transform_SGE(c, inst); return 1;
case RC_OPCODE_SGT: transform_SGT(c, inst); return 1;
case RC_OPCODE_SLE: transform_SLE(c, inst); return 1;
case RC_OPCODE_SLT: transform_SLT(c, inst); return 1;
case RC_OPCODE_SNE: transform_SNE(c, inst); return 1;
case RC_OPCODE_SSG: transform_SSG(c, inst); return 1;
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
case RC_OPCODE_TRUNC: transform_TRUNC(c, inst); return 1;
case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
default:
return 0;
}
}
 
 
static void transform_r300_vertex_ABS(struct radeon_compiler* c,
struct rc_instruction* inst)
{
/* Note: r500 can take absolute values, but r300 cannot. */
inst->U.I.Opcode = RC_OPCODE_MAX;
inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0];
inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
}
 
static void transform_r300_vertex_CMP(struct radeon_compiler* c,
struct rc_instruction* inst)
{
/* There is no decent CMP available, so let's rig one up.
* CMP is defined as dst = src0 < 0.0 ? src1 : src2
* The following sequence consumes zero to two temps and two extra slots
* (the second temp and the second slot is consumed by transform_LRP),
* but should be equivalent:
*
* SLT tmp0, src0, 0.0
* LRP dst, tmp0, src1, src2
*
* Yes, I know, I'm a mad scientist. ~ C. & M. */
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
 
/* SLT tmp0, src0, 0.0 */
emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
dst,
inst->U.I.SrcReg[0], builtin_zero);
 
/* LRP dst, tmp0, src1, src2 */
transform_LRP(c,
emit3(c, inst->Prev, RC_OPCODE_LRP, 0,
inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]));
 
rc_remove_instruction(inst);
}
 
static void transform_r300_vertex_DP2(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_instruction *next_inst = inst->Next;
transform_DP2(c, inst);
next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4;
}
 
static void transform_r300_vertex_DP3(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_src_register src0 = inst->U.I.SrcReg[0];
struct rc_src_register src1 = inst->U.I.SrcReg[1];
src0.Negate &= ~RC_MASK_W;
src0.Swizzle &= ~(7 << (3 * 3));
src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
src1.Negate &= ~RC_MASK_W;
src1.Swizzle &= ~(7 << (3 * 3));
src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, src1);
rc_remove_instruction(inst);
}
 
static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
unsigned constant_swizzle;
int constant = rc_constants_add_immediate_scalar(&c->Program.Constants,
0.0000000000000000001,
&constant_swizzle);
 
/* MOV dst, src */
dst.WriteMask = RC_MASK_XYZW;
emit1(c, inst->Prev, RC_OPCODE_MOV, 0,
dst,
inst->U.I.SrcReg[0]);
 
/* MAX dst.y, src, 0.00...001 */
emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
dstregtmpmask(dst.Index, RC_MASK_Y),
srcreg(RC_FILE_TEMPORARY, dst.Index),
srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
 
inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index);
}
 
static void transform_r300_vertex_SEQ(struct radeon_compiler *c,
struct rc_instruction *inst)
{
/* x = y <==> x >= y && y >= x */
int tmp = rc_find_free_temporary(c);
 
/* x <= y */
emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
inst->U.I.SrcReg[0],
inst->U.I.SrcReg[1]);
 
/* y <= x */
emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
inst->U.I.DstReg,
inst->U.I.SrcReg[1],
inst->U.I.SrcReg[0]);
 
/* x && y = x * y */
emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, tmp),
srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
 
rc_remove_instruction(inst);
}
 
static void transform_r300_vertex_SNE(struct radeon_compiler *c,
struct rc_instruction *inst)
{
/* x != y <==> x < y || y < x */
int tmp = rc_find_free_temporary(c);
 
/* x < y */
emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
inst->U.I.SrcReg[0],
inst->U.I.SrcReg[1]);
 
/* y < x */
emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
inst->U.I.DstReg,
inst->U.I.SrcReg[1],
inst->U.I.SrcReg[0]);
 
/* x || y = max(x, y) */
emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, tmp),
srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
 
rc_remove_instruction(inst);
}
 
static void transform_r300_vertex_SGT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
/* x > y <==> -x < -y */
inst->U.I.Opcode = RC_OPCODE_SLT;
inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
}
 
static void transform_r300_vertex_SLE(struct radeon_compiler* c,
struct rc_instruction* inst)
{
/* x <= y <==> -x >= -y */
inst->U.I.Opcode = RC_OPCODE_SGE;
inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
}
 
static void transform_r300_vertex_SSG(struct radeon_compiler* c,
struct rc_instruction* inst)
{
/* result = sign(x)
*
* SLT tmp0, 0, x;
* SLT tmp1, x, 0;
* ADD result, tmp0, -tmp1;
*/
struct rc_dst_register dst0 = try_to_reuse_dst(c, inst);
unsigned tmp1;
 
/* 0 < x */
dst0 = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
dst0,
builtin_zero,
inst->U.I.SrcReg[0]);
 
/* x < 0 */
tmp1 = rc_find_free_temporary(c);
emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask),
inst->U.I.SrcReg[0],
builtin_zero);
 
/* Either both are zero, or one of them is one and the other is zero. */
/* result = tmp0 - tmp1 */
emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, dst0.Index),
negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
 
rc_remove_instruction(inst);
}
 
static void transform_vertex_TRUNC(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_instruction *next = inst->Next;
 
/* next->Prev is removed after each transformation and replaced
* by a new instruction. */
transform_TRUNC(c, next->Prev);
transform_r300_vertex_CMP(c, next->Prev);
}
 
/**
* For use with rc_local_transform, this transforms non-native ALU
* instructions of the r300 up to r500 vertex engine.
*/
int r300_transform_vertex_alu(
struct radeon_compiler * c,
struct rc_instruction* inst,
void* unused)
{
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1;
case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1;
case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1;
case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
case RC_OPCODE_SEQ:
if (!c->is_r500) {
transform_r300_vertex_SEQ(c, inst);
return 1;
}
return 0;
case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1;
case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1;
case RC_OPCODE_SNE:
if (!c->is_r500) {
transform_r300_vertex_SNE(c, inst);
return 1;
}
return 0;
case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1;
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
case RC_OPCODE_TRUNC: transform_vertex_TRUNC(c, inst); return 1;
case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
default:
return 0;
}
}
 
static void sincos_constants(struct radeon_compiler* c, unsigned int *constants)
{
static const float SinCosConsts[2][4] = {
{
1.273239545, /* 4/PI */
-0.405284735, /* -4/(PI*PI) */
3.141592654, /* PI */
0.2225 /* weight */
},
{
0.75,
0.5,
0.159154943, /* 1/(2*PI) */
6.283185307 /* 2*PI */
}
};
int i;
 
for(i = 0; i < 2; ++i)
constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]);
}
 
/**
* Approximate sin(x), where x is clamped to (-pi/2, pi/2).
*
* MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
* MAD tmp.x, tmp.y, |src|, tmp.x
* MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
* MAD dest, tmp.y, weight, tmp.x
*/
static void sin_approx(
struct radeon_compiler* c, struct rc_instruction * inst,
struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants)
{
unsigned int tempreg = rc_find_free_temporary(c);
 
emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY),
swizzle_xxxx(src),
srcreg(RC_FILE_CONSTANT, constants[0]));
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X),
swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
absolute(swizzle_xxxx(src)),
swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y),
swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))),
negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))));
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst,
swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])),
swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
}
 
/**
* Translate the trigonometric functions COS, SIN, and SCS
* using only the basic instructions
* MOV, ADD, MUL, MAD, FRC
*/
int r300_transform_trig_simple(struct radeon_compiler* c,
struct rc_instruction* inst,
void* unused)
{
unsigned int constants[2];
unsigned int tempreg;
 
if (inst->U.I.Opcode != RC_OPCODE_COS &&
inst->U.I.Opcode != RC_OPCODE_SIN &&
inst->U.I.Opcode != RC_OPCODE_SCS)
return 0;
 
tempreg = rc_find_free_temporary(c);
 
sincos_constants(c, constants);
 
if (inst->U.I.Opcode == RC_OPCODE_COS) {
/* MAD tmp.x, src, 1/(2*PI), 0.75 */
/* FRC tmp.x, tmp.x */
/* MAD tmp.z, tmp.x, 2*PI, -PI */
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
swizzle_xxxx(inst->U.I.SrcReg[0]),
swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1])));
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
 
sin_approx(c, inst, inst->U.I.DstReg,
swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
constants);
} else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
swizzle_xxxx(inst->U.I.SrcReg[0]),
swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1])));
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
 
sin_approx(c, inst, inst->U.I.DstReg,
swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
constants);
} else {
struct rc_dst_register dst;
 
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
swizzle_xxxx(inst->U.I.SrcReg[0]),
swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W));
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY),
srcreg(RC_FILE_TEMPORARY, tempreg));
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
srcreg(RC_FILE_TEMPORARY, tempreg),
swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
 
dst = inst->U.I.DstReg;
 
dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X;
sin_approx(c, inst, dst,
swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
constants);
 
dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y;
sin_approx(c, inst, dst,
swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
constants);
}
 
rc_remove_instruction(inst);
 
return 1;
}
 
static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c,
struct rc_instruction *inst,
unsigned srctmp)
{
if (inst->U.I.Opcode == RC_OPCODE_COS) {
emit1(c, inst->Prev, RC_OPCODE_COS, &inst->U.I, inst->U.I.DstReg,
srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
} else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
emit1(c, inst->Prev, RC_OPCODE_SIN, &inst->U.I,
inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
} else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
struct rc_dst_register moddst = inst->U.I.DstReg;
 
if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
moddst.WriteMask = RC_MASK_X;
emit1(c, inst->Prev, RC_OPCODE_COS, &inst->U.I, moddst,
srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
}
if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
moddst.WriteMask = RC_MASK_Y;
emit1(c, inst->Prev, RC_OPCODE_SIN, &inst->U.I, moddst,
srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
}
}
 
rc_remove_instruction(inst);
}
 
 
/**
* Transform the trigonometric functions COS, SIN, and SCS
* to include pre-scaling by 1/(2*PI) and taking the fractional
* part, so that the input to COS and SIN is always in the range [0,1).
* SCS is replaced by one COS and one SIN instruction.
*
* @warning This transformation implicitly changes the semantics of SIN and COS!
*/
int radeonTransformTrigScale(struct radeon_compiler* c,
struct rc_instruction* inst,
void* unused)
{
static const float RCP_2PI = 0.15915494309189535;
unsigned int temp;
unsigned int constant;
unsigned int constant_swizzle;
 
if (inst->U.I.Opcode != RC_OPCODE_COS &&
inst->U.I.Opcode != RC_OPCODE_SIN &&
inst->U.I.Opcode != RC_OPCODE_SCS)
return 0;
 
temp = rc_find_free_temporary(c);
constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);
 
emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W),
swizzle_xxxx(inst->U.I.SrcReg[0]),
srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
srcreg(RC_FILE_TEMPORARY, temp));
 
r300_transform_SIN_COS_SCS(c, inst, temp);
return 1;
}
 
/**
* Transform the trigonometric functions COS, SIN, and SCS
* so that the input to COS and SIN is always in the range [-PI, PI].
* SCS is replaced by one COS and one SIN instruction.
*/
int r300_transform_trig_scale_vertex(struct radeon_compiler *c,
struct rc_instruction *inst,
void *unused)
{
static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979};
unsigned int temp;
unsigned int constant;
 
if (inst->U.I.Opcode != RC_OPCODE_COS &&
inst->U.I.Opcode != RC_OPCODE_SIN &&
inst->U.I.Opcode != RC_OPCODE_SCS)
return 0;
 
/* Repeat x in the range [-PI, PI]:
*
* repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI
*/
 
temp = rc_find_free_temporary(c);
constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons);
 
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
swizzle_xxxx(inst->U.I.SrcReg[0]),
srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX),
srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY));
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
srcreg(RC_FILE_TEMPORARY, temp));
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
srcreg(RC_FILE_TEMPORARY, temp),
srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ),
srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW));
 
r300_transform_SIN_COS_SCS(c, inst, temp);
return 1;
}
 
/**
* Rewrite DDX/DDY instructions to properly work with r5xx shaders.
* The r5xx MDH/MDV instruction provides per-quad partial derivatives.
* It takes the form A*B+C. A and C are set by setting src0. B should be -1.
*
* @warning This explicitly changes the form of DDX and DDY!
*/
 
int radeonTransformDeriv(struct radeon_compiler* c,
struct rc_instruction* inst,
void* unused)
{
if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY)
return 0;
 
inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111;
inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
 
return 1;
}
 
/**
* IF Temp[0].x -> IF Temp[0].x
* ... -> ...
* KILL -> KIL -abs(Temp[0].x)
* ... -> ...
* ENDIF -> ENDIF
*
* === OR ===
*
* IF Temp[0].x -\
* KILL - > KIL -abs(Temp[0].x)
* ENDIF -/
*
* === OR ===
*
* IF Temp[0].x -> IF Temp[0].x
* ... -> ...
* ELSE -> ELSE
* ... -> ...
* KILL -> KIL -abs(Temp[0].x)
* ... -> ...
* ENDIF -> ENDIF
*
* === OR ===
*
* KILL -> KIL -none.1111
*
* This needs to be done in its own pass, because it might modify the
* instructions before and after KILL.
*/
void rc_transform_KILL(struct radeon_compiler * c, void *user)
{
struct rc_instruction * inst;
for (inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions; inst = inst->Next) {
struct rc_instruction * if_inst;
unsigned in_if = 0;
 
if (inst->U.I.Opcode != RC_OPCODE_KILP)
continue;
 
for (if_inst = inst->Prev; if_inst != &c->Program.Instructions;
if_inst = if_inst->Prev) {
 
if (if_inst->U.I.Opcode == RC_OPCODE_IF) {
in_if = 1;
break;
}
}
 
inst->U.I.Opcode = RC_OPCODE_KIL;
 
if (!in_if) {
inst->U.I.SrcReg[0] = negate(builtin_one);
} else {
/* This should work even if the KILP is inside the ELSE
* block, because -0.0 is considered negative. */
inst->U.I.SrcReg[0] =
negate(absolute(if_inst->U.I.SrcReg[0]));
 
if (inst->Prev->U.I.Opcode != RC_OPCODE_IF
&& inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
 
/* Optimize the special case:
* IF Temp[0].x
* KILP
* ENDIF
*/
 
/* Remove IF */
rc_remove_instruction(inst->Prev);
/* Remove ENDIF */
rc_remove_instruction(inst->Next);
}
}
}
}
 
int rc_force_output_alpha_to_one(struct radeon_compiler *c,
struct rc_instruction *inst, void *data)
{
struct r300_fragment_program_compiler *fragc = (struct r300_fragment_program_compiler*)c;
const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
unsigned tmp;
 
if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT ||
inst->U.I.DstReg.Index == fragc->OutputDepth)
return 1;
 
tmp = rc_find_free_temporary(c);
 
/* Insert MOV after inst, set alpha to 1. */
emit1(c, inst, RC_OPCODE_MOV, 0, inst->U.I.DstReg,
srcregswz(RC_FILE_TEMPORARY, tmp, RC_SWIZZLE_XYZ1));
 
/* Re-route the destination of inst to the source of mov. */
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst->U.I.DstReg.Index = tmp;
 
/* Move the saturate output modifier to the MOV instruction
* (for better copy propagation). */
inst->Next->U.I.SaturateMode = inst->U.I.SaturateMode;
inst->U.I.SaturateMode = RC_SATURATE_NONE;
return 1;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program_alu.h
0,0 → 1,69
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#ifndef __RADEON_PROGRAM_ALU_H_
#define __RADEON_PROGRAM_ALU_H_
 
#include "radeon_program.h"
 
int radeonTransformALU(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
 
int r300_transform_vertex_alu(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
 
int r300_transform_trig_simple(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
 
int radeonTransformTrigScale(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
 
int r300_transform_trig_scale_vertex(
struct radeon_compiler *c,
struct rc_instruction *inst,
void*);
 
int radeonTransformDeriv(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
 
void rc_transform_KILL(struct radeon_compiler * c,
void *user);
 
int rc_force_output_alpha_to_one(struct radeon_compiler *c,
struct rc_instruction *inst, void *data);
 
#endif /* __RADEON_PROGRAM_ALU_H_ */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program_constants.h
0,0 → 1,213
/*
* Copyright (C) 2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#ifndef RADEON_PROGRAM_CONSTANTS_H
#define RADEON_PROGRAM_CONSTANTS_H
 
typedef enum {
RC_SATURATE_NONE = 0,
RC_SATURATE_ZERO_ONE,
RC_SATURATE_MINUS_PLUS_ONE
} rc_saturate_mode;
 
typedef enum {
RC_TEXTURE_2D_ARRAY,
RC_TEXTURE_1D_ARRAY,
RC_TEXTURE_CUBE,
RC_TEXTURE_3D,
RC_TEXTURE_RECT,
RC_TEXTURE_2D,
RC_TEXTURE_1D
} rc_texture_target;
 
typedef enum {
/**
* Used to indicate unused register descriptions and
* source register that use a constant swizzle.
*/
RC_FILE_NONE = 0,
RC_FILE_TEMPORARY,
 
/**
* Input register.
*
* \note The compiler attaches no implicit semantics to input registers.
* Fragment/vertex program specific semantics must be defined explicitly
* using the appropriate compiler interfaces.
*/
RC_FILE_INPUT,
 
/**
* Output register.
*
* \note The compiler attaches no implicit semantics to input registers.
* Fragment/vertex program specific semantics must be defined explicitly
* using the appropriate compiler interfaces.
*/
RC_FILE_OUTPUT,
RC_FILE_ADDRESS,
 
/**
* Indicates a constant from the \ref rc_constant_list .
*/
RC_FILE_CONSTANT,
 
/**
* Indicates a special register, see RC_SPECIAL_xxx.
*/
RC_FILE_SPECIAL,
 
/**
* Indicates this register should use the result of the presubtract
* operation.
*/
RC_FILE_PRESUB,
 
/**
* Indicates that the source index has been encoded as a 7-bit float.
*/
RC_FILE_INLINE
} rc_register_file;
 
enum {
/** R500 fragment program ALU result "register" */
RC_SPECIAL_ALU_RESULT = 0,
 
/** Must be last */
RC_NUM_SPECIAL_REGISTERS
};
 
#define RC_REGISTER_INDEX_BITS 10
#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS)
 
typedef enum {
RC_SWIZZLE_X = 0,
RC_SWIZZLE_Y,
RC_SWIZZLE_Z,
RC_SWIZZLE_W,
RC_SWIZZLE_ZERO,
RC_SWIZZLE_ONE,
RC_SWIZZLE_HALF,
RC_SWIZZLE_UNUSED
} rc_swizzle;
 
#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9))
#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a))
#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7)
#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1)
#define SET_SWZ(swz, idx, newv) \
do { \
(swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \
} while(0)
 
#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)
#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO)
#define RC_SWIZZLE_XYZ1 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE)
#define RC_SWIZZLE_XYZZ RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z)
#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X)
#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y)
#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z)
#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W)
#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO)
#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE)
#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF)
#define RC_SWIZZLE_UUUU RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED)
 
/**
* \name Bitmasks for components of vectors.
*
* Used for write masks, negation masks, etc.
*/
/*@{*/
#define RC_MASK_NONE 0
#define RC_MASK_X 1
#define RC_MASK_Y 2
#define RC_MASK_Z 4
#define RC_MASK_W 8
#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y)
#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z)
#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W)
#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W)
/*@}*/
 
typedef enum {
RC_ALURESULT_NONE = 0,
RC_ALURESULT_X,
RC_ALURESULT_W
} rc_write_aluresult;
 
typedef enum {
RC_PRESUB_NONE = 0,
 
/** 1 - 2 * src0 */
RC_PRESUB_BIAS,
 
/** src1 - src0 */
RC_PRESUB_SUB,
 
/** src1 + src0 */
RC_PRESUB_ADD,
 
/** 1 - src0 */
RC_PRESUB_INV
} rc_presubtract_op;
 
typedef enum {
RC_OMOD_MUL_1,
RC_OMOD_MUL_2,
RC_OMOD_MUL_4,
RC_OMOD_MUL_8,
RC_OMOD_DIV_2,
RC_OMOD_DIV_4,
RC_OMOD_DIV_8,
RC_OMOD_DISABLE
} rc_omod_op;
 
static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){
switch(op){
case RC_PRESUB_BIAS:
case RC_PRESUB_INV:
return 1;
case RC_PRESUB_ADD:
case RC_PRESUB_SUB:
return 2;
default:
return 0;
}
}
 
#define RC_SOURCE_NONE 0x0
#define RC_SOURCE_RGB 0x1
#define RC_SOURCE_ALPHA 0x2
 
typedef enum {
RC_PRED_DISABLED,
RC_PRED_SET,
RC_PRED_INV
} rc_predicate_mode;
 
#endif /* RADEON_PROGRAM_CONSTANTS_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program_pair.c
0,0 → 1,239
/*
* Copyright (C) 2008-2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include "radeon_program_pair.h"
 
#include "radeon_compiler_util.h"
 
#include <stdlib.h>
 
/**
* Return the source slot where we installed the given register access,
* or -1 if no slot was free anymore.
*/
int rc_pair_alloc_source(struct rc_pair_instruction *pair,
unsigned int rgb, unsigned int alpha,
rc_register_file file, unsigned int index)
{
int candidate = -1;
int candidate_quality = -1;
unsigned int alpha_used = 0;
unsigned int rgb_used = 0;
int i;
 
if ((!rgb && !alpha) || file == RC_FILE_NONE)
return 0;
 
/* Make sure only one presubtract operation is used per instruction. */
if (file == RC_FILE_PRESUB) {
if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used
&& index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
return -1;
}
 
if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used
&& index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
return -1;
}
}
 
for(i = 0; i < 3; ++i) {
int q = 0;
if (rgb) {
if (pair->RGB.Src[i].Used) {
if (pair->RGB.Src[i].File != file ||
pair->RGB.Src[i].Index != index) {
rgb_used++;
continue;
}
q++;
}
}
if (alpha) {
if (pair->Alpha.Src[i].Used) {
if (pair->Alpha.Src[i].File != file ||
pair->Alpha.Src[i].Index != index) {
alpha_used++;
continue;
}
q++;
}
}
if (q > candidate_quality) {
candidate_quality = q;
candidate = i;
}
}
 
if (file == RC_FILE_PRESUB) {
candidate = RC_PAIR_PRESUB_SRC;
} else if (candidate < 0 || (rgb && rgb_used > 2)
|| (alpha && alpha_used > 2)) {
return -1;
}
 
/* candidate >= 0 */
 
if (rgb) {
pair->RGB.Src[candidate].Used = 1;
pair->RGB.Src[candidate].File = file;
pair->RGB.Src[candidate].Index = index;
if (candidate == RC_PAIR_PRESUB_SRC) {
/* For registers with the RC_FILE_PRESUB file,
* the index stores the presubtract op. */
int src_regs = rc_presubtract_src_reg_count(index);
for(i = 0; i < src_regs; i++) {
pair->RGB.Src[i].Used = 1;
}
}
}
if (alpha) {
pair->Alpha.Src[candidate].Used = 1;
pair->Alpha.Src[candidate].File = file;
pair->Alpha.Src[candidate].Index = index;
if (candidate == RC_PAIR_PRESUB_SRC) {
/* For registers with the RC_FILE_PRESUB file,
* the index stores the presubtract op. */
int src_regs = rc_presubtract_src_reg_count(index);
for(i=0; i < src_regs; i++) {
pair->Alpha.Src[i].Used = 1;
}
}
}
 
return candidate;
}
 
static void pair_foreach_source_callback(
struct rc_pair_instruction * pair,
void * data,
rc_pair_foreach_src_fn cb,
unsigned int swz,
unsigned int src)
{
/* swz > 3 means that the swizzle is either not used, or a constant
* swizzle (e.g. 0, 1, 0.5). */
if(swz > 3)
return;
 
if(swz == RC_SWIZZLE_W) {
if (src == RC_PAIR_PRESUB_SRC) {
unsigned int i;
unsigned int src_count = rc_presubtract_src_reg_count(
pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
for(i = 0; i < src_count; i++) {
cb(data, &pair->Alpha.Src[i]);
}
} else {
cb(data, &pair->Alpha.Src[src]);
}
} else {
if (src == RC_PAIR_PRESUB_SRC) {
unsigned int i;
unsigned int src_count = rc_presubtract_src_reg_count(
pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index);
for(i = 0; i < src_count; i++) {
cb(data, &pair->RGB.Src[i]);
}
}
else {
cb(data, &pair->RGB.Src[src]);
}
}
}
 
void rc_pair_foreach_source_that_alpha_reads(
struct rc_pair_instruction * pair,
void * data,
rc_pair_foreach_src_fn cb)
{
unsigned int i;
const struct rc_opcode_info * info =
rc_get_opcode_info(pair->Alpha.Opcode);
for(i = 0; i < info->NumSrcRegs; i++) {
pair_foreach_source_callback(pair, data, cb,
GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0),
pair->Alpha.Arg[i].Source);
}
}
 
void rc_pair_foreach_source_that_rgb_reads(
struct rc_pair_instruction * pair,
void * data,
rc_pair_foreach_src_fn cb)
{
unsigned int i;
const struct rc_opcode_info * info =
rc_get_opcode_info(pair->RGB.Opcode);
for(i = 0; i < info->NumSrcRegs; i++) {
unsigned int chan;
unsigned int swz = RC_SWIZZLE_UNUSED;
/* Find a swizzle that is either X,Y,Z,or W. We assume here
* that if one channel swizzles X,Y, or Z, then none of the
* other channels swizzle W, and vice-versa. */
for(chan = 0; chan < 4; chan++) {
swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan);
if(swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
|| swz == RC_SWIZZLE_Z || swz == RC_SWIZZLE_W)
continue;
}
pair_foreach_source_callback(pair, data, cb,
swz,
pair->RGB.Arg[i].Source);
}
}
 
struct rc_pair_instruction_source * rc_pair_get_src(
struct rc_pair_instruction * pair_inst,
struct rc_pair_instruction_arg * arg)
{
unsigned int type;
 
type = rc_source_type_swz(arg->Swizzle);
 
if (type & RC_SOURCE_RGB) {
return &pair_inst->RGB.Src[arg->Source];
} else if (type & RC_SOURCE_ALPHA) {
return &pair_inst->Alpha.Src[arg->Source];
} else {
return NULL;
}
}
 
int rc_pair_get_src_index(
struct rc_pair_instruction * pair_inst,
struct rc_pair_instruction_source * src)
{
int i;
for (i = 0; i < 3; i++) {
if (&pair_inst->RGB.Src[i] == src
|| &pair_inst->Alpha.Src[i] == src) {
return i;
}
}
return -1;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program_pair.h
0,0 → 1,139
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#ifndef __RADEON_PROGRAM_PAIR_H_
#define __RADEON_PROGRAM_PAIR_H_
 
#include "radeon_code.h"
#include "radeon_opcodes.h"
#include "radeon_program_constants.h"
 
struct radeon_compiler;
 
 
/**
* \file
* Represents a paired ALU instruction, as found in R300 and R500
* fragment programs.
*
* Note that this representation is taking some liberties as far
* as register files are concerned, to allow separate register
* allocation.
*
* Also note that there are some subtleties in that the semantics
* of certain opcodes are implicitly changed in this representation;
* see \ref rc_pair_translate
*/
 
/* For rgb and alpha instructions when arg[n].Source = RC_PAIR_PRESUB_SRC, then
* the presubtract value will be used, and
* {RGB,Alpha}.Src[RC_PAIR_PRESUB_SRC].File will be set to RC_FILE_PRESUB.
*/
#define RC_PAIR_PRESUB_SRC 3
 
struct rc_pair_instruction_source {
unsigned int Used:1;
unsigned int File:4;
unsigned int Index:RC_REGISTER_INDEX_BITS;
};
 
struct rc_pair_instruction_arg {
unsigned int Source:2;
unsigned int Swizzle:12;
unsigned int Abs:1;
unsigned int Negate:1;
};
 
struct rc_pair_sub_instruction {
unsigned int Opcode:8;
unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
unsigned int WriteMask:4;
unsigned int Target:2;
unsigned int OutputWriteMask:3;
unsigned int DepthWriteMask:1;
unsigned int Saturate:1;
unsigned int Omod:3;
 
struct rc_pair_instruction_source Src[4];
struct rc_pair_instruction_arg Arg[3];
};
 
struct rc_pair_instruction {
struct rc_pair_sub_instruction RGB;
struct rc_pair_sub_instruction Alpha;
 
unsigned int WriteALUResult:2;
unsigned int ALUResultCompare:3;
unsigned int Nop:1;
unsigned int SemWait:1;
};
 
typedef void (*rc_pair_foreach_src_fn)
(void *, struct rc_pair_instruction_source *);
 
/**
* General helper functions for dealing with the paired instruction format.
*/
/*@{*/
int rc_pair_alloc_source(struct rc_pair_instruction *pair,
unsigned int rgb, unsigned int alpha,
rc_register_file file, unsigned int index);
 
void rc_pair_foreach_source_that_alpha_reads(
struct rc_pair_instruction * pair,
void * data,
rc_pair_foreach_src_fn cb);
 
void rc_pair_foreach_source_that_rgb_reads(
struct rc_pair_instruction * pair,
void * data,
rc_pair_foreach_src_fn cb);
 
struct rc_pair_instruction_source * rc_pair_get_src(
struct rc_pair_instruction * pair_inst,
struct rc_pair_instruction_arg * arg);
 
int rc_pair_get_src_index(
struct rc_pair_instruction * pair_inst,
struct rc_pair_instruction_source * src);
/*@}*/
 
 
/**
* Compiler passes that operate with the paired format.
*/
/*@{*/
struct radeon_pair_handler;
 
void rc_pair_translate(struct radeon_compiler *cc, void *user);
void rc_pair_schedule(struct radeon_compiler *cc, void *user);
void rc_pair_regalloc(struct radeon_compiler *cc, void *user);
void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user);
void rc_pair_remove_dead_sources(struct radeon_compiler *c, void *user);
/*@}*/
 
#endif /* __RADEON_PROGRAM_PAIR_H_ */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program_print.c
0,0 → 1,484
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "radeon_program.h"
 
#include <stdio.h>
 
static const char * textarget_to_string(rc_texture_target target)
{
switch(target) {
case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY";
case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY";
case RC_TEXTURE_CUBE: return "CUBE";
case RC_TEXTURE_3D: return "3D";
case RC_TEXTURE_RECT: return "RECT";
case RC_TEXTURE_2D: return "2D";
case RC_TEXTURE_1D: return "1D";
default: return "BAD_TEXTURE_TARGET";
}
}
 
static const char * presubtract_op_to_string(rc_presubtract_op op)
{
switch(op) {
case RC_PRESUB_NONE:
return "NONE";
case RC_PRESUB_BIAS:
return "(1 - 2 * src0)";
case RC_PRESUB_SUB:
return "(src1 - src0)";
case RC_PRESUB_ADD:
return "(src1 + src0)";
case RC_PRESUB_INV:
return "(1 - src0)";
default:
return "BAD_PRESUBTRACT_OP";
}
}
 
static void print_omod_op(FILE * f, rc_omod_op op)
{
const char * omod_str;
 
switch(op) {
case RC_OMOD_MUL_1:
case RC_OMOD_DISABLE:
return;
case RC_OMOD_MUL_2:
omod_str = "* 2";
break;
case RC_OMOD_MUL_4:
omod_str = "* 4";
break;
case RC_OMOD_MUL_8:
omod_str = "* 8";
break;
case RC_OMOD_DIV_2:
omod_str = "/ 2";
break;
case RC_OMOD_DIV_4:
omod_str = "/ 4";
break;
case RC_OMOD_DIV_8:
omod_str = "/ 8";
break;
default:
return;
}
fprintf(f, " %s", omod_str);
}
 
static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs)
{
if (func == RC_COMPARE_FUNC_NEVER) {
fprintf(f, "false");
} else if (func == RC_COMPARE_FUNC_ALWAYS) {
fprintf(f, "true");
} else {
const char * op;
switch(func) {
case RC_COMPARE_FUNC_LESS: op = "<"; break;
case RC_COMPARE_FUNC_EQUAL: op = "=="; break;
case RC_COMPARE_FUNC_LEQUAL: op = "<="; break;
case RC_COMPARE_FUNC_GREATER: op = ">"; break;
case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break;
case RC_COMPARE_FUNC_GEQUAL: op = ">="; break;
default: op = "???"; break;
}
fprintf(f, "%s %s %s", lhs, op, rhs);
}
}
 
static void rc_print_inline_float(FILE * f, int index)
{
int r300_exponent = (index >> 3) & 0xf;
unsigned r300_mantissa = index & 0x7;
unsigned float_exponent;
unsigned real_float;
float * print_float = (float*) &real_float;
 
r300_exponent -= 7;
float_exponent = r300_exponent + 127;
real_float = (r300_mantissa << 20) | (float_exponent << 23);
 
fprintf(f, "%f (0x%x)", *print_float, index);
 
}
 
static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
{
if (file == RC_FILE_NONE) {
fprintf(f, "none");
} else if (file == RC_FILE_SPECIAL) {
switch(index) {
case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break;
default: fprintf(f, "special[%i]", index); break;
}
} else if (file == RC_FILE_INLINE) {
rc_print_inline_float(f, index);
} else {
const char * filename;
switch(file) {
case RC_FILE_TEMPORARY: filename = "temp"; break;
case RC_FILE_INPUT: filename = "input"; break;
case RC_FILE_OUTPUT: filename = "output"; break;
case RC_FILE_ADDRESS: filename = "addr"; break;
case RC_FILE_CONSTANT: filename = "const"; break;
default: filename = "BAD FILE"; break;
}
fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : "");
}
}
 
static void rc_print_mask(FILE * f, unsigned int mask)
{
if (mask & RC_MASK_X) fprintf(f, "x");
if (mask & RC_MASK_Y) fprintf(f, "y");
if (mask & RC_MASK_Z) fprintf(f, "z");
if (mask & RC_MASK_W) fprintf(f, "w");
}
 
static void rc_print_dst_register(FILE * f, struct rc_dst_register dst)
{
rc_print_register(f, dst.File, dst.Index, 0);
if (dst.WriteMask != RC_MASK_XYZW) {
fprintf(f, ".");
rc_print_mask(f, dst.WriteMask);
}
}
 
static char rc_swizzle_char(unsigned int swz)
{
switch(swz) {
case RC_SWIZZLE_X: return 'x';
case RC_SWIZZLE_Y: return 'y';
case RC_SWIZZLE_Z: return 'z';
case RC_SWIZZLE_W: return 'w';
case RC_SWIZZLE_ZERO: return '0';
case RC_SWIZZLE_ONE: return '1';
case RC_SWIZZLE_HALF: return 'H';
case RC_SWIZZLE_UNUSED: return '_';
}
fprintf(stderr, "bad swz: %u\n", swz);
return '?';
}
 
static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate)
{
unsigned int comp;
for(comp = 0; comp < 4; ++comp) {
rc_swizzle swz = GET_SWZ(swizzle, comp);
if (GET_BIT(negate, comp))
fprintf(f, "-");
fprintf(f, "%c", rc_swizzle_char(swz));
}
}
 
static void rc_print_presub_instruction(FILE * f,
struct rc_presub_instruction inst)
{
fprintf(f,"(");
switch(inst.Opcode){
case RC_PRESUB_BIAS:
fprintf(f, "1 - 2 * ");
rc_print_register(f, inst.SrcReg[0].File,
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
break;
case RC_PRESUB_SUB:
rc_print_register(f, inst.SrcReg[1].File,
inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
fprintf(f, " - ");
rc_print_register(f, inst.SrcReg[0].File,
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
break;
case RC_PRESUB_ADD:
rc_print_register(f, inst.SrcReg[1].File,
inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
fprintf(f, " + ");
rc_print_register(f, inst.SrcReg[0].File,
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
break;
case RC_PRESUB_INV:
fprintf(f, "1 - ");
rc_print_register(f, inst.SrcReg[0].File,
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
break;
default:
break;
}
fprintf(f, ")");
}
 
static void rc_print_src_register(FILE * f, struct rc_instruction * inst,
struct rc_src_register src)
{
int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
 
if (src.Negate == RC_MASK_XYZW)
fprintf(f, "-");
if (src.Abs)
fprintf(f, "|");
 
if(src.File == RC_FILE_PRESUB)
rc_print_presub_instruction(f, inst->U.I.PreSub);
else
rc_print_register(f, src.File, src.Index, src.RelAddr);
 
if (src.Abs && !trivial_negate)
fprintf(f, "|");
 
if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) {
fprintf(f, ".");
rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate);
}
 
if (src.Abs && trivial_negate)
fprintf(f, "|");
}
 
static unsigned update_branch_depth(rc_opcode opcode, unsigned *branch_depth)
{
switch (opcode) {
case RC_OPCODE_IF:
case RC_OPCODE_BGNLOOP:
return (*branch_depth)++ * 2;
 
case RC_OPCODE_ENDIF:
case RC_OPCODE_ENDLOOP:
assert(*branch_depth > 0);
return --(*branch_depth) * 2;
 
case RC_OPCODE_ELSE:
assert(*branch_depth > 0);
return (*branch_depth - 1) * 2;
 
default:
return *branch_depth * 2;
}
}
 
static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst, unsigned *branch_depth)
{
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
unsigned int reg;
unsigned spaces = update_branch_depth(inst->U.I.Opcode, branch_depth);
 
for (unsigned i = 0; i < spaces; i++)
fprintf(f, " ");
 
fprintf(f, "%s", opcode->Name);
 
switch(inst->U.I.SaturateMode) {
case RC_SATURATE_NONE: break;
case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break;
case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break;
default: fprintf(f, "_BAD_SAT"); break;
}
 
if (opcode->HasDstReg) {
fprintf(f, " ");
rc_print_dst_register(f, inst->U.I.DstReg);
print_omod_op(f, inst->U.I.Omod);
if (opcode->NumSrcRegs)
fprintf(f, ",");
}
 
for(reg = 0; reg < opcode->NumSrcRegs; ++reg) {
if (reg > 0)
fprintf(f, ",");
fprintf(f, " ");
rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]);
}
 
if (opcode->HasTexture) {
fprintf(f, ", %s%s[%u]%s%s",
textarget_to_string(inst->U.I.TexSrcTarget),
inst->U.I.TexShadow ? "SHADOW" : "",
inst->U.I.TexSrcUnit,
inst->U.I.TexSemWait ? " SEM_WAIT" : "",
inst->U.I.TexSemAcquire ? " SEM_ACQUIRE" : "");
}
 
fprintf(f, ";");
 
if (inst->U.I.WriteALUResult) {
fprintf(f, " [aluresult = (");
rc_print_comparefunc(f,
(inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w",
inst->U.I.ALUResultCompare, "0");
fprintf(f, ")]");
}
 
if (inst->U.I.DstReg.Pred == RC_PRED_SET) {
fprintf(f, " PRED_SET");
} else if (inst->U.I.DstReg.Pred == RC_PRED_INV) {
fprintf(f, " PRED_INV");
}
 
fprintf(f, "\n");
}
 
static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst, unsigned *branch_depth)
{
struct rc_pair_instruction * inst = &fullinst->U.P;
int printedsrc = 0;
unsigned spaces = update_branch_depth(inst->RGB.Opcode != RC_OPCODE_NOP ?
inst->RGB.Opcode : inst->Alpha.Opcode, branch_depth);
 
for (unsigned i = 0; i < spaces; i++)
fprintf(f, " ");
 
for(unsigned int src = 0; src < 3; ++src) {
if (inst->RGB.Src[src].Used) {
if (printedsrc)
fprintf(f, ", ");
fprintf(f, "src%i.xyz = ", src);
rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0);
printedsrc = 1;
}
if (inst->Alpha.Src[src].Used) {
if (printedsrc)
fprintf(f, ", ");
fprintf(f, "src%i.w = ", src);
rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0);
printedsrc = 1;
}
}
if(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
fprintf(f, ", srcp.xyz = %s",
presubtract_op_to_string(
inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index));
}
if(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
fprintf(f, ", srcp.w = %s",
presubtract_op_to_string(
inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index));
}
if (inst->SemWait) {
fprintf(f, " SEM_WAIT");
}
fprintf(f, "\n");
 
if (inst->RGB.Opcode != RC_OPCODE_NOP) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
 
for (unsigned i = 0; i < spaces; i++)
fprintf(f, " ");
 
fprintf(f, " %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : "");
if (inst->RGB.WriteMask)
fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex,
(inst->RGB.WriteMask & 1) ? "x" : "",
(inst->RGB.WriteMask & 2) ? "y" : "",
(inst->RGB.WriteMask & 4) ? "z" : "");
if (inst->RGB.OutputWriteMask)
fprintf(f, " color[%i].%s%s%s", inst->RGB.Target,
(inst->RGB.OutputWriteMask & 1) ? "x" : "",
(inst->RGB.OutputWriteMask & 2) ? "y" : "",
(inst->RGB.OutputWriteMask & 4) ? "z" : "");
if (inst->WriteALUResult == RC_ALURESULT_X)
fprintf(f, " aluresult");
 
print_omod_op(f, inst->RGB.Omod);
 
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
const char* abs = inst->RGB.Arg[arg].Abs ? "|" : "";
const char* neg = inst->RGB.Arg[arg].Negate ? "-" : "";
fprintf(f, ", %s%ssrc", neg, abs);
if(inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
fprintf(f,"p");
else
fprintf(f,"%d", inst->RGB.Arg[arg].Source);
fprintf(f,".%c%c%c%s",
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)),
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)),
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)),
abs);
}
fprintf(f, "\n");
}
 
if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
 
for (unsigned i = 0; i < spaces; i++)
fprintf(f, " ");
 
fprintf(f, " %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : "");
if (inst->Alpha.WriteMask)
fprintf(f, " temp[%i].w", inst->Alpha.DestIndex);
if (inst->Alpha.OutputWriteMask)
fprintf(f, " color[%i].w", inst->Alpha.Target);
if (inst->Alpha.DepthWriteMask)
fprintf(f, " depth.w");
if (inst->WriteALUResult == RC_ALURESULT_W)
fprintf(f, " aluresult");
 
print_omod_op(f, inst->Alpha.Omod);
 
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : "";
const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : "";
fprintf(f, ", %s%ssrc", neg, abs);
if(inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
fprintf(f,"p");
else
fprintf(f,"%d", inst->Alpha.Arg[arg].Source);
fprintf(f,".%c%s",
rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs);
}
fprintf(f, "\n");
}
 
if (inst->WriteALUResult) {
for (unsigned i = 0; i < spaces; i++)
fprintf(f, " ");
 
fprintf(f, " [aluresult = (");
rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0");
fprintf(f, ")]\n");
}
}
 
/**
* Print program to stderr, default options.
*/
void rc_print_program(const struct rc_program *prog)
{
unsigned int linenum = 0;
unsigned branch_depth = 0;
struct rc_instruction *inst;
 
fprintf(stderr, "# Radeon Compiler Program\n");
 
for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
fprintf(stderr, "%3d: ", linenum);
 
if (inst->Type == RC_INSTRUCTION_PAIR)
rc_print_pair_instruction(stderr, inst, &branch_depth);
else
rc_print_normal_instruction(stderr, inst, &branch_depth);
 
linenum++;
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program_tex.c
0,0 → 1,519
/*
* Copyright (C) 2010 Corbin Simpson
* Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include "radeon_program_tex.h"
 
#include "radeon_compiler_util.h"
 
/* Series of transformations to be done on textures. */
 
static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler,
int tmu)
{
struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 };
 
reg.File = RC_FILE_NONE;
reg.Swizzle = combine_swizzles(RC_SWIZZLE_0000,
compiler->state.unit[tmu].texture_swizzle);
return reg;
}
 
static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler,
int tmu)
{
struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 };
 
reg.File = RC_FILE_NONE;
reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111,
compiler->state.unit[tmu].texture_swizzle);
return reg;
}
 
static void scale_texcoords(struct r300_fragment_program_compiler *compiler,
struct rc_instruction *inst,
unsigned state_constant)
{
struct rc_instruction *inst_mov;
 
unsigned temp = rc_find_free_temporary(&compiler->Base);
 
inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev);
 
inst_mov->U.I.Opcode = RC_OPCODE_MUL;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = temp;
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
inst_mov->U.I.SrcReg[1].Index =
rc_constants_add_state(&compiler->Base.Program.Constants,
state_constant, inst->U.I.TexSrcUnit);
 
reset_srcreg(&inst->U.I.SrcReg[0]);
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[0].Index = temp;
}
 
static void projective_divide(struct r300_fragment_program_compiler *compiler,
struct rc_instruction *inst)
{
struct rc_instruction *inst_mul, *inst_rcp;
 
unsigned temp = rc_find_free_temporary(&compiler->Base);
 
inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev);
inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_rcp->U.I.DstReg.Index = temp;
inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
/* Because the input can be arbitrarily swizzled,
* read the component mapped to W. */
inst_rcp->U.I.SrcReg[0].Swizzle =
RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
 
inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev);
inst_mul->U.I.Opcode = RC_OPCODE_MUL;
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mul->U.I.DstReg.Index = temp;
inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
inst_mul->U.I.SrcReg[1].Index = temp;
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
 
reset_srcreg(&inst->U.I.SrcReg[0]);
inst->U.I.Opcode = RC_OPCODE_TEX;
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[0].Index = temp;
}
 
/**
* Transform TEX, TXP, TXB, and KIL instructions in the following ways:
* - implement texture compare (shadow extensions)
* - extract non-native source / destination operands
* - premultiply texture coordinates for RECT
* - extract operand swizzles
* - introduce a temporary register when write masks are needed
*/
int radeonTransformTEX(
struct radeon_compiler * c,
struct rc_instruction * inst,
void* data)
{
struct r300_fragment_program_compiler *compiler =
(struct r300_fragment_program_compiler*)data;
rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords;
 
if (inst->U.I.Opcode != RC_OPCODE_TEX &&
inst->U.I.Opcode != RC_OPCODE_TXB &&
inst->U.I.Opcode != RC_OPCODE_TXP &&
inst->U.I.Opcode != RC_OPCODE_TXD &&
inst->U.I.Opcode != RC_OPCODE_TXL &&
inst->U.I.Opcode != RC_OPCODE_KIL)
return 0;
 
/* ARB_shadow & EXT_shadow_funcs */
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) ||
(compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) {
rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
 
if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
inst->U.I.Opcode = RC_OPCODE_MOV;
 
if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
} else {
inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
}
 
return 1;
} else {
struct rc_instruction * inst_rcp = NULL;
struct rc_instruction *inst_mul, *inst_add, *inst_cmp;
unsigned tmp_texsample;
unsigned tmp_sum;
int pass, fail;
 
/* Save the output register. */
struct rc_dst_register output_reg = inst->U.I.DstReg;
unsigned saturate_mode = inst->U.I.SaturateMode;
 
/* Redirect TEX to a new temp. */
tmp_texsample = rc_find_free_temporary(c);
inst->U.I.SaturateMode = 0;
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst->U.I.DstReg.Index = tmp_texsample;
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
 
tmp_sum = rc_find_free_temporary(c);
 
if (inst->U.I.Opcode == RC_OPCODE_TXP) {
/* Compute 1/W. */
inst_rcp = rc_insert_new_instruction(c, inst);
inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_rcp->U.I.DstReg.Index = tmp_sum;
inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
inst_rcp->U.I.SrcReg[0].Swizzle =
RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
}
 
/* Divide Z by W (if it's TXP) and saturate. */
inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV;
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mul->U.I.DstReg.Index = tmp_sum;
inst_mul->U.I.DstReg.WriteMask = RC_MASK_W;
inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
inst_mul->U.I.SrcReg[0].Swizzle =
RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2));
if (inst->U.I.Opcode == RC_OPCODE_TXP) {
inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
inst_mul->U.I.SrcReg[1].Index = tmp_sum;
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
}
 
/* Add the depth texture value. */
inst_add = rc_insert_new_instruction(c, inst_mul);
inst_add->U.I.Opcode = RC_OPCODE_ADD;
inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_add->U.I.DstReg.Index = tmp_sum;
inst_add->U.I.DstReg.WriteMask = RC_MASK_W;
inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_add->U.I.SrcReg[0].Index = tmp_sum;
inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
inst_add->U.I.SrcReg[1].Index = tmp_texsample;
inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
 
/* Note that SrcReg[0] is r, SrcReg[1] is tex and:
* LESS: r < tex <=> -tex+r < 0
* GEQUAL: r >= tex <=> not (-tex+r < 0)
* GREATER: r > tex <=> tex-r < 0
* LEQUAL: r <= tex <=> not ( tex-r < 0)
* EQUAL: GEQUAL
* NOTEQUAL:LESS
*/
 
/* This negates either r or tex: */
if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL ||
comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL)
inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW;
else
inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
 
/* This negates the whole expresion: */
if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER ||
comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
pass = 1;
fail = 2;
} else {
pass = 2;
fail = 1;
}
 
inst_cmp = rc_insert_new_instruction(c, inst_add);
inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
inst_cmp->U.I.SaturateMode = saturate_mode;
inst_cmp->U.I.DstReg = output_reg;
inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
inst_cmp->U.I.SrcReg[0].Swizzle =
combine_swizzles(RC_SWIZZLE_WWWW,
compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle);
inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
 
assert(tmp_texsample != tmp_sum);
}
}
 
/* R300 cannot sample from rectangles and the wrap mode fallback needs
* normalized coordinates anyway. */
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) {
scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR);
inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
}
 
/* Divide by W if needed. */
if (inst->U.I.Opcode == RC_OPCODE_TXP &&
(wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT ||
compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) {
projective_divide(compiler, inst);
}
 
/* Texture wrap modes don't work on NPOT textures.
*
* Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and
* mirroring are not. If we need to repeat, we do:
*
* MUL temp, texcoord, <scaling factor constant>
* FRC temp, temp ; Discard integer portion of coords
*
* This gives us coords in [0, 1].
*
* Mirroring is trickier. We're going to start out like repeat:
*
* MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes
* MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]
* ; so scale to [0, 1]
* FRC temp, temp ; Make the pattern repeat
* MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]
* ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.
* ; The pattern is backwards, so reverse it (1-x).
*
* This gives us coords in [0, 1].
*
* ~ C & M. ;)
*/
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
wrapmode != RC_WRAP_NONE) {
struct rc_instruction *inst_mov;
unsigned temp = rc_find_free_temporary(c);
 
if (wrapmode == RC_WRAP_REPEAT) {
/* Both instructions will be paired up. */
struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);
 
inst_frc->U.I.Opcode = RC_OPCODE_FRC;
inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_frc->U.I.DstReg.Index = temp;
inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
} else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
/*
* Function:
* f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
*
* Code:
* MUL temp, src0, 0.5
* FRC temp, temp
* MAD temp, temp, 2, -1
* ADD temp, 1, -abs(temp)
*/
 
struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
unsigned two, two_swizzle;
 
inst_mul = rc_insert_new_instruction(c, inst->Prev);
 
inst_mul->U.I.Opcode = RC_OPCODE_MUL;
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mul->U.I.DstReg.Index = temp;
inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;
 
inst_frc = rc_insert_new_instruction(c, inst->Prev);
 
inst_frc->U.I.Opcode = RC_OPCODE_FRC;
inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_frc->U.I.DstReg.Index = temp;
inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_frc->U.I.SrcReg[0].Index = temp;
inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
 
two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
inst_mad = rc_insert_new_instruction(c, inst->Prev);
 
inst_mad->U.I.Opcode = RC_OPCODE_MAD;
inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mad->U.I.DstReg.Index = temp;
inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_mad->U.I.SrcReg[0].Index = temp;
inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
inst_mad->U.I.SrcReg[1].Index = two;
inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;
 
inst_add = rc_insert_new_instruction(c, inst->Prev);
 
inst_add->U.I.Opcode = RC_OPCODE_ADD;
inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_add->U.I.DstReg.Index = temp;
inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
inst_add->U.I.SrcReg[1].Index = temp;
inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
inst_add->U.I.SrcReg[1].Abs = 1;
inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
} else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
/*
* Mirrored clamp modes are bloody simple, we just use abs
* to mirror [0, 1] into [-1, 0]. This works for
* all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
*/
struct rc_instruction *inst_mov;
 
inst_mov = rc_insert_new_instruction(c, inst->Prev);
 
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = temp;
inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
inst_mov->U.I.SrcReg[0].Abs = 1;
}
 
/* Preserve W for TXP/TXB. */
inst_mov = rc_insert_new_instruction(c, inst->Prev);
 
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = temp;
inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
 
reset_srcreg(&inst->U.I.SrcReg[0]);
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[0].Index = temp;
}
 
/* NPOT -> POT conversion for 3D textures. */
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) {
struct rc_instruction *inst_mov;
unsigned temp = rc_find_free_temporary(c);
 
/* Saturate XYZ. */
inst_mov = rc_insert_new_instruction(c, inst->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = temp;
inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
 
/* Copy W. */
inst_mov = rc_insert_new_instruction(c, inst->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = temp;
inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
 
reset_srcreg(&inst->U.I.SrcReg[0]);
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[0].Index = temp;
 
scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR);
}
 
/* Convert SNORM-encoded ATI1N sampled as UNORM to SNORM.
* Formula: dst = tex > 0.5 ? tex*2-2 : tex*2
*/
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
compiler->state.unit[inst->U.I.TexSrcUnit].convert_unorm_to_snorm) {
unsigned two, two_swizzle;
struct rc_instruction *inst_mul, *inst_mad, *inst_cnd;
 
two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2.35, &two_swizzle);
 
inst_mul = rc_insert_new_instruction(c, inst);
inst_mul->U.I.Opcode = RC_OPCODE_MUL;
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c);
inst_mul->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_mul->U.I.SrcReg[0].Index = rc_find_free_temporary(c); /* redirected TEX output */
inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; /* 2 */
inst_mul->U.I.SrcReg[1].Index = two;
inst_mul->U.I.SrcReg[1].Swizzle = two_swizzle;
 
inst_mad = rc_insert_new_instruction(c, inst_mul);
inst_mad->U.I.Opcode = RC_OPCODE_MAD;
inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
inst_mad->U.I.SrcReg[0] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */
inst_mad->U.I.SrcReg[1] = inst_mul->U.I.SrcReg[1]; /* 2 */
inst_mad->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[1]; /* 2 */
inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZW;
 
inst_cnd = rc_insert_new_instruction(c, inst_mad);
inst_cnd->U.I.Opcode = RC_OPCODE_CND;
inst_cnd->U.I.SaturateMode = inst->U.I.SaturateMode;
inst_cnd->U.I.DstReg = inst->U.I.DstReg;
inst_cnd->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_cnd->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
inst_cnd->U.I.SrcReg[0].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
inst_cnd->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
inst_cnd->U.I.SrcReg[1].Index = inst_mul->U.I.DstReg.Index;
inst_cnd->U.I.SrcReg[1].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
inst_cnd->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */
 
inst->U.I.SaturateMode = 0;
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst->U.I.DstReg.Index = inst_mul->U.I.SrcReg[0].Index;
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
}
 
/* Cannot write texture to output registers or with saturate (all chips),
* or with masks (non-r500). */
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
(inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||
inst->U.I.SaturateMode ||
(!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
 
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode;
inst_mov->U.I.DstReg = inst->U.I.DstReg;
inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
 
inst->U.I.SaturateMode = 0;
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
}
 
/* Cannot read texture coordinate from constants file */
if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
 
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
 
reset_srcreg(&inst->U.I.SrcReg[0]);
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
}
 
return 1;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_program_tex.h
0,0 → 1,39
/*
* Copyright (C) 2010 Corbin Simpson
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#ifndef __RADEON_PROGRAM_TEX_H_
#define __RADEON_PROGRAM_TEX_H_
 
#include "radeon_compiler.h"
#include "radeon_program.h"
 
int radeonTransformTEX(
struct radeon_compiler * c,
struct rc_instruction * inst,
void* data);
 
#endif /* __RADEON_PROGRAM_TEX_H_ */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_regalloc.h
0,0 → 1,62
/*
* Copyright 2012 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Author: Tom Stellard <thomas.stellard@amd.com>
*/
 
#ifndef RADEON_REGALLOC_H
#define RADEON_REGALLOC_H
 
struct ra_regs;
 
enum rc_reg_class {
RC_REG_CLASS_SINGLE,
RC_REG_CLASS_DOUBLE,
RC_REG_CLASS_TRIPLE,
RC_REG_CLASS_ALPHA,
RC_REG_CLASS_SINGLE_PLUS_ALPHA,
RC_REG_CLASS_DOUBLE_PLUS_ALPHA,
RC_REG_CLASS_TRIPLE_PLUS_ALPHA,
RC_REG_CLASS_X,
RC_REG_CLASS_Y,
RC_REG_CLASS_Z,
RC_REG_CLASS_XY,
RC_REG_CLASS_YZ,
RC_REG_CLASS_XZ,
RC_REG_CLASS_XW,
RC_REG_CLASS_YW,
RC_REG_CLASS_ZW,
RC_REG_CLASS_XYW,
RC_REG_CLASS_YZW,
RC_REG_CLASS_XZW,
RC_REG_CLASS_COUNT
};
 
struct rc_regalloc_state {
struct ra_regs *regs;
unsigned class_ids[RC_REG_CLASS_COUNT];
};
 
void rc_init_regalloc_state(struct rc_regalloc_state *s);
void rc_destroy_regalloc_state(struct rc_regalloc_state *s);
 
#endif /* RADEON_REGALLOC_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_remove_constants.c
0,0 → 1,150
/*
* Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include "radeon_remove_constants.h"
#include "radeon_dataflow.h"
 
struct mark_used_data {
unsigned char * const_used;
unsigned * has_rel_addr;
};
 
static void remap_regs(void * userdata, struct rc_instruction * inst,
rc_register_file * pfile, unsigned int * pindex)
{
unsigned *inv_remap_table = userdata;
 
if (*pfile == RC_FILE_CONSTANT) {
*pindex = inv_remap_table[*pindex];
}
}
 
static void mark_used(void * userdata, struct rc_instruction * inst,
struct rc_src_register * src)
{
struct mark_used_data * d = userdata;
 
if (src->File == RC_FILE_CONSTANT) {
if (src->RelAddr) {
*d->has_rel_addr = 1;
} else {
d->const_used[src->Index] = 1;
}
}
}
 
void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
{
unsigned **out_remap_table = (unsigned**)user;
unsigned char *const_used;
unsigned *remap_table;
unsigned *inv_remap_table;
unsigned has_rel_addr = 0;
unsigned is_identity = 1;
unsigned are_externals_remapped = 0;
struct rc_constant *constants = c->Program.Constants.Constants;
struct mark_used_data d;
unsigned new_count;
 
if (!c->Program.Constants.Count) {
*out_remap_table = NULL;
return;
}
 
const_used = malloc(c->Program.Constants.Count);
memset(const_used, 0, c->Program.Constants.Count);
 
d.const_used = const_used;
d.has_rel_addr = &has_rel_addr;
 
/* Pass 1: Mark used constants. */
for (struct rc_instruction *inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions; inst = inst->Next) {
rc_for_all_reads_src(inst, mark_used, &d);
}
 
/* Pass 2: If there is relative addressing or dead constant elimination
* is disabled, mark all externals as used. */
if (has_rel_addr || !c->remove_unused_constants) {
for (unsigned i = 0; i < c->Program.Constants.Count; i++)
if (constants[i].Type == RC_CONSTANT_EXTERNAL)
const_used[i] = 1;
}
 
/* Pass 3: Make the remapping table and remap constants.
* This pass removes unused constants simply by overwriting them by other constants. */
remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
new_count = 0;
 
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
if (const_used[i]) {
remap_table[new_count] = i;
inv_remap_table[i] = new_count;
 
if (i != new_count) {
if (constants[i].Type == RC_CONSTANT_EXTERNAL)
are_externals_remapped = 1;
 
constants[new_count] = constants[i];
is_identity = 0;
}
new_count++;
}
}
 
/* is_identity ==> new_count == old_count
* !is_identity ==> new_count < old_count */
assert( is_identity || new_count < c->Program.Constants.Count);
assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped));
 
/* Pass 4: Redirect reads of all constants to their new locations. */
if (!is_identity) {
for (struct rc_instruction *inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions; inst = inst->Next) {
rc_remap_registers(inst, remap_regs, inv_remap_table);
}
}
 
/* Set the new constant count. Note that new_count may be less than
* Count even though the remapping function is identity. In that case,
* the constants have been removed at the end of the array. */
c->Program.Constants.Count = new_count;
 
if (are_externals_remapped) {
*out_remap_table = remap_table;
} else {
*out_remap_table = NULL;
free(remap_table);
}
 
free(const_used);
free(inv_remap_table);
 
if (c->Debug & RC_DBG_LOG)
rc_constants_print(&c->Program.Constants);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_remove_constants.h
0,0 → 1,35
/*
* Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#ifndef RADEON_REMOVE_CONSTANTS_H
#define RADEON_REMOVE_CONSTANTS_H
 
#include "radeon_compiler.h"
 
void rc_remove_unused_constants(struct radeon_compiler *c, void *user);
 
#endif
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_rename_regs.c
0,0 → 1,89
/*
* Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
/**
* \file
*/
 
#include "radeon_rename_regs.h"
 
#include "radeon_compiler.h"
#include "radeon_list.h"
#include "radeon_program.h"
#include "radeon_variable.h"
 
/**
* This function renames registers in an attempt to get the code close to
* SSA form. After this function has completed, most of the register are only
* written to one time, with a few exceptions.
*
* This function assumes all the instructions are still of type
* RC_INSTRUCTION_NORMAL.
*/
void rc_rename_regs(struct radeon_compiler *c, void *user)
{
unsigned int used_length;
struct rc_instruction * inst;
unsigned char * used;
struct rc_list * variables;
struct rc_list * var_ptr;
 
/* XXX Remove this once the register allocation works with flow control. */
for(inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP)
return;
}
 
used_length = 2 * rc_recompute_ips(c);
used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length);
memset(used, 0, sizeof(unsigned char) * used_length);
 
rc_get_used_temporaries(c, used, used_length);
variables = rc_get_variables(c);
 
for (var_ptr = variables; var_ptr; var_ptr = var_ptr->Next) {
unsigned new_index;
unsigned writemask;
struct rc_variable * var = var_ptr->Item;
 
if (var->Inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
continue;
}
 
new_index = rc_find_free_temporary_list(c, used, used_length,
RC_MASK_XYZW);
if (new_index < 0) {
rc_error(c, "Ran out of temporary registers\n");
return;
}
 
writemask = rc_variable_writemask_sum(var);
rc_variable_change_dst(var, new_index, writemask);
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_rename_regs.h
0,0 → 1,35
/*
* Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#ifndef RADEON_RENAME_REGS_H
#define RADEON_RENAME_REGS_H
 
struct radeon_compiler;
 
void rc_rename_regs(struct radeon_compiler *c, void *user);
 
#endif /* RADEON_RENAME_REGS_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_swizzle.h
0,0 → 1,59
/*
* Copyright (C) 2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#ifndef RADEON_SWIZZLE_H
#define RADEON_SWIZZLE_H
 
#include "radeon_program.h"
 
struct rc_swizzle_split {
unsigned char NumPhases;
unsigned char Phase[4];
};
 
/**
* Describe the swizzling capability of target hardware.
*/
struct rc_swizzle_caps {
/**
* Check whether the given swizzle, absolute and negate combination
* can be implemented natively by the hardware for this opcode.
*
* \return 1 if the swizzle is native for the given opcode
*/
int (*IsNative)(rc_opcode opcode, struct rc_src_register reg);
 
/**
* Determine how to split access to the masked channels of the
* given source register to obtain ALU-native swizzles.
*/
void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split);
};
 
extern struct rc_swizzle_caps r300_vertprog_swizzle_caps;
 
#endif /* RADEON_SWIZZLE_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_variable.c
0,0 → 1,536
/*
* Copyright 2011 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include "radeon_variable.h"
 
#include "memory_pool.h"
#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_list.h"
#include "radeon_opcodes.h"
#include "radeon_program.h"
 
/**
* Rewrite the index and writemask for the destination register of var
* and its friends to new_index and new_writemask. This function also takes
* care of rewriting the swizzles for the sources of var.
*/
void rc_variable_change_dst(
struct rc_variable * var,
unsigned int new_index,
unsigned int new_writemask)
{
struct rc_variable * var_ptr;
struct rc_list * readers;
unsigned int old_mask = rc_variable_writemask_sum(var);
unsigned int conversion_swizzle =
rc_make_conversion_swizzle(old_mask, new_writemask);
 
for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) {
if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
rc_normal_rewrite_writemask(var_ptr->Inst,
conversion_swizzle);
var_ptr->Inst->U.I.DstReg.Index = new_index;
} else {
struct rc_pair_sub_instruction * sub;
if (var_ptr->Dst.WriteMask == RC_MASK_W) {
assert(new_writemask & RC_MASK_W);
sub = &var_ptr->Inst->U.P.Alpha;
} else {
sub = &var_ptr->Inst->U.P.RGB;
rc_pair_rewrite_writemask(sub,
conversion_swizzle);
}
sub->DestIndex = new_index;
}
}
 
readers = rc_variable_readers_union(var);
 
for ( ; readers; readers = readers->Next) {
struct rc_reader * reader = readers->Item;
if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) {
reader->U.I.Src->Index = new_index;
reader->U.I.Src->Swizzle = rc_rewrite_swizzle(
reader->U.I.Src->Swizzle, conversion_swizzle);
} else {
struct rc_pair_instruction * pair_inst =
&reader->Inst->U.P;
unsigned int src_type = rc_source_type_swz(
reader->U.P.Arg->Swizzle);
 
int src_index = reader->U.P.Arg->Source;
if (src_index == RC_PAIR_PRESUB_SRC) {
src_index = rc_pair_get_src_index(
pair_inst, reader->U.P.Src);
}
/* Try to delete the old src, it is OK if this fails,
* because rc_pair_alloc_source might be able to
* find a source the ca be reused.
*/
if (rc_pair_remove_src(reader->Inst, src_type,
src_index, old_mask)) {
/* Reuse the source index of the source that
* was just deleted and set its register
* index. We can't use rc_pair_alloc_source
* for this becuase it might return a source
* index that is already being used. */
if (src_type & RC_SOURCE_RGB) {
pair_inst->RGB.Src[src_index]
.Used = 1;
pair_inst->RGB.Src[src_index]
.Index = new_index;
pair_inst->RGB.Src[src_index]
.File = RC_FILE_TEMPORARY;
}
if (src_type & RC_SOURCE_ALPHA) {
pair_inst->Alpha.Src[src_index]
.Used = 1;
pair_inst->Alpha.Src[src_index]
.Index = new_index;
pair_inst->Alpha.Src[src_index]
.File = RC_FILE_TEMPORARY;
}
} else {
src_index = rc_pair_alloc_source(
&reader->Inst->U.P,
src_type & RC_SOURCE_RGB,
src_type & RC_SOURCE_ALPHA,
RC_FILE_TEMPORARY,
new_index);
if (src_index < 0) {
rc_error(var->C, "Rewrite of inst %u failed "
"Can't allocate source for "
"Inst %u src_type=%x "
"new_index=%u new_mask=%u\n",
var->Inst->IP, reader->Inst->IP, src_type, new_index, new_writemask);
continue;
}
}
reader->U.P.Arg->Swizzle = rc_rewrite_swizzle(
reader->U.P.Arg->Swizzle, conversion_swizzle);
if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) {
reader->U.P.Arg->Source = src_index;
}
}
}
}
 
/**
* Compute the live intervals for var and its friends.
*/
void rc_variable_compute_live_intervals(struct rc_variable * var)
{
while(var) {
unsigned int i;
unsigned int start = var->Inst->IP;
 
for (i = 0; i < var->ReaderCount; i++) {
unsigned int chan;
unsigned int chan_start = start;
unsigned int chan_end = var->Readers[i].Inst->IP;
unsigned int mask = var->Readers[i].WriteMask;
struct rc_instruction * inst;
 
/* Extend the live interval of T0 to the start of the
* loop for sequences like:
* BGNLOOP
* read T0
* ...
* write T0
* ENDLOOP
*/
if (var->Readers[i].Inst->IP < start) {
struct rc_instruction * bgnloop =
rc_match_endloop(var->Readers[i].Inst);
chan_start = bgnloop->IP;
}
 
/* Extend the live interval of T0 to the start of the
* loop in case there is a BRK instruction in the loop
* (we don't actually check for a BRK instruction we
* assume there is one somewhere in the loop, which
* there usually is) for sequences like:
* BGNLOOP
* ...
* conditional BRK
* ...
* write T0
* ENDLOOP
* read T0
***************************************************
* Extend the live interval of T0 to the end of the
* loop for sequences like:
* write T0
* BGNLOOP
* ...
* read T0
* ENDLOOP
*/
for (inst = var->Inst; inst != var->Readers[i].Inst;
inst = inst->Next) {
rc_opcode op = rc_get_flow_control_inst(inst);
if (op == RC_OPCODE_ENDLOOP) {
struct rc_instruction * bgnloop =
rc_match_endloop(inst);
if (bgnloop->IP < chan_start) {
chan_start = bgnloop->IP;
}
} else if (op == RC_OPCODE_BGNLOOP) {
struct rc_instruction * endloop =
rc_match_bgnloop(inst);
if (endloop->IP > chan_end) {
chan_end = endloop->IP;
}
}
}
 
for (chan = 0; chan < 4; chan++) {
if ((mask >> chan) & 0x1) {
if (!var->Live[chan].Used
|| chan_start < var->Live[chan].Start) {
var->Live[chan].Start =
chan_start;
}
if (!var->Live[chan].Used
|| chan_end > var->Live[chan].End) {
var->Live[chan].End = chan_end;
}
var->Live[chan].Used = 1;
}
}
}
var = var->Friend;
}
}
 
/**
* @return 1 if a and b share a reader
* @return 0 if they do not
*/
static unsigned int readers_intersect(
struct rc_variable * a,
struct rc_variable * b)
{
unsigned int a_index, b_index;
for (a_index = 0; a_index < a->ReaderCount; a_index++) {
struct rc_reader reader_a = a->Readers[a_index];
for (b_index = 0; b_index < b->ReaderCount; b_index++) {
struct rc_reader reader_b = b->Readers[b_index];
if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL
&& reader_b.Inst->Type == RC_INSTRUCTION_NORMAL
&& reader_a.U.I.Src == reader_b.U.I.Src) {
 
return 1;
}
if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR
&& reader_b.Inst->Type == RC_INSTRUCTION_PAIR
&& reader_a.U.P.Src == reader_b.U.P.Src) {
 
return 1;
}
}
}
return 0;
}
 
void rc_variable_add_friend(
struct rc_variable * var,
struct rc_variable * friend)
{
assert(var->Dst.Index == friend->Dst.Index);
while(var->Friend) {
var = var->Friend;
}
var->Friend = friend;
}
 
struct rc_variable * rc_variable(
struct radeon_compiler * c,
unsigned int DstFile,
unsigned int DstIndex,
unsigned int DstWriteMask,
struct rc_reader_data * reader_data)
{
struct rc_variable * new =
memory_pool_malloc(&c->Pool, sizeof(struct rc_variable));
memset(new, 0, sizeof(struct rc_variable));
new->C = c;
new->Dst.File = DstFile;
new->Dst.Index = DstIndex;
new->Dst.WriteMask = DstWriteMask;
if (reader_data) {
new->Inst = reader_data->Writer;
new->ReaderCount = reader_data->ReaderCount;
new->Readers = reader_data->Readers;
}
return new;
}
 
static void get_variable_helper(
struct rc_list ** variable_list,
struct rc_variable * variable)
{
struct rc_list * list_ptr;
for (list_ptr = *variable_list; list_ptr; list_ptr = list_ptr->Next) {
struct rc_variable * var;
for (var = list_ptr->Item; var; var = var->Friend) {
if (readers_intersect(var, variable)) {
rc_variable_add_friend(var, variable);
return;
}
}
}
rc_list_add(variable_list, rc_list(&variable->C->Pool, variable));
}
 
static void get_variable_pair_helper(
struct rc_list ** variable_list,
struct radeon_compiler * c,
struct rc_instruction * inst,
struct rc_pair_sub_instruction * sub_inst)
{
struct rc_reader_data reader_data;
struct rc_variable * new_var;
rc_register_file file;
unsigned int writemask;
 
if (sub_inst->Opcode == RC_OPCODE_NOP) {
return;
}
memset(&reader_data, 0, sizeof(struct rc_reader_data));
rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL);
 
if (reader_data.ReaderCount == 0) {
return;
}
 
if (sub_inst->WriteMask) {
file = RC_FILE_TEMPORARY;
writemask = sub_inst->WriteMask;
} else if (sub_inst->OutputWriteMask) {
file = RC_FILE_OUTPUT;
writemask = sub_inst->OutputWriteMask;
} else {
writemask = 0;
file = RC_FILE_NONE;
}
new_var = rc_variable(c, file, sub_inst->DestIndex, writemask,
&reader_data);
get_variable_helper(variable_list, new_var);
}
 
/**
* Generate a list of variables used by the shader program. Each instruction
* that writes to a register is considered a variable. The struct rc_variable
* data structure includes a list of readers and is essentially a
* definition-use chain. Any two variables that share a reader are considered
* "friends" and they are linked together via the Friend attribute.
*/
struct rc_list * rc_get_variables(struct radeon_compiler * c)
{
struct rc_instruction * inst;
struct rc_list * variable_list = NULL;
 
for (inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
struct rc_reader_data reader_data;
struct rc_variable * new_var;
memset(&reader_data, 0, sizeof(reader_data));
 
if (inst->Type == RC_INSTRUCTION_NORMAL) {
rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
if (reader_data.ReaderCount == 0) {
continue;
}
new_var = rc_variable(c, inst->U.I.DstReg.File,
inst->U.I.DstReg.Index,
inst->U.I.DstReg.WriteMask, &reader_data);
get_variable_helper(&variable_list, new_var);
} else {
get_variable_pair_helper(&variable_list, c, inst,
&inst->U.P.RGB);
get_variable_pair_helper(&variable_list, c, inst,
&inst->U.P.Alpha);
}
}
 
return variable_list;
}
 
/**
* @return The bitwise or of the writemasks of a variable and all of its
* friends.
*/
unsigned int rc_variable_writemask_sum(struct rc_variable * var)
{
unsigned int writemask = 0;
while(var) {
writemask |= var->Dst.WriteMask;
var = var->Friend;
}
return writemask;
}
 
/*
* @return A list of readers for a variable and its friends. Readers
* that read from two different variable friends are only included once in
* this list.
*/
struct rc_list * rc_variable_readers_union(struct rc_variable * var)
{
struct rc_list * list = NULL;
while (var) {
unsigned int i;
for (i = 0; i < var->ReaderCount; i++) {
struct rc_list * temp;
struct rc_reader * a = &var->Readers[i];
unsigned int match = 0;
for (temp = list; temp; temp = temp->Next) {
struct rc_reader * b = temp->Item;
if (a->Inst->Type != b->Inst->Type) {
continue;
}
if (a->Inst->Type == RC_INSTRUCTION_NORMAL) {
if (a->U.I.Src == b->U.I.Src) {
match = 1;
break;
}
}
if (a->Inst->Type == RC_INSTRUCTION_PAIR) {
if (a->U.P.Arg == b->U.P.Arg
&& a->U.P.Src == b->U.P.Src) {
match = 1;
break;
}
}
}
if (match) {
continue;
}
rc_list_add(&list, rc_list(&var->C->Pool, a));
}
var = var->Friend;
}
return list;
}
 
static unsigned int reader_equals_src(
struct rc_reader reader,
unsigned int src_type,
void * src)
{
if (reader.Inst->Type != src_type) {
return 0;
}
if (src_type == RC_INSTRUCTION_NORMAL) {
return reader.U.I.Src == src;
} else {
return reader.U.P.Src == src;
}
}
 
static unsigned int variable_writes_src(
struct rc_variable * var,
unsigned int src_type,
void * src)
{
unsigned int i;
for (i = 0; i < var->ReaderCount; i++) {
if (reader_equals_src(var->Readers[i], src_type, src)) {
return 1;
}
}
return 0;
}
 
 
struct rc_list * rc_variable_list_get_writers(
struct rc_list * var_list,
unsigned int src_type,
void * src)
{
struct rc_list * list_ptr;
struct rc_list * writer_list = NULL;
for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) {
struct rc_variable * var = list_ptr->Item;
if (variable_writes_src(var, src_type, src)) {
struct rc_variable * friend;
rc_list_add(&writer_list, rc_list(&var->C->Pool, var));
for (friend = var->Friend; friend;
friend = friend->Friend) {
if (variable_writes_src(friend, src_type, src)) {
rc_list_add(&writer_list,
rc_list(&var->C->Pool, friend));
}
}
/* Once we have indentifed the variable and its
* friends that write this source, we can stop
* stop searching, because we know none of the
* other variables in the list will write this source.
* If they did they would be friends of var.
*/
break;
}
}
return writer_list;
}
 
struct rc_list * rc_variable_list_get_writers_one_reader(
struct rc_list * var_list,
unsigned int src_type,
void * src)
{
struct rc_list * writer_list =
rc_variable_list_get_writers(var_list, src_type, src);
struct rc_list * reader_list =
rc_variable_readers_union(writer_list->Item);
if (rc_list_count(reader_list) > 1) {
return NULL;
} else {
return writer_list;
}
}
 
void rc_variable_print(struct rc_variable * var)
{
unsigned int i;
while (var) {
fprintf(stderr, "%u: TEMP[%u].%u: ",
var->Inst->IP, var->Dst.Index, var->Dst.WriteMask);
for (i = 0; i < 4; i++) {
fprintf(stderr, "chan %u: start=%u end=%u ", i,
var->Live[i].Start, var->Live[i].End);
}
fprintf(stderr, "%u readers\n", var->ReaderCount);
if (var->Friend) {
fprintf(stderr, "Friend: \n\t");
}
var = var->Friend;
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_variable.h
0,0 → 1,94
/*
* Copyright 2011 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#ifndef RADEON_VARIABLE_H
#define RADEON_VARIABLE_H
 
#include "radeon_compiler.h"
 
struct radeon_compiler;
struct rc_list;
struct rc_reader_data;
struct rc_readers;
 
struct live_intervals {
int Start;
int End;
int Used;
};
 
struct rc_variable {
struct radeon_compiler * C;
struct rc_dst_register Dst;
 
struct rc_instruction * Inst;
unsigned int ReaderCount;
struct rc_reader * Readers;
struct live_intervals Live[4];
 
/* A friend is a variable that shares a reader with another variable.
*/
struct rc_variable * Friend;
};
 
void rc_variable_change_dst(
struct rc_variable * var,
unsigned int new_index,
unsigned int new_writemask);
 
void rc_variable_compute_live_intervals(struct rc_variable * var);
 
void rc_variable_add_friend(
struct rc_variable * var,
struct rc_variable * friend);
 
struct rc_variable * rc_variable(
struct radeon_compiler * c,
unsigned int DstFile,
unsigned int DstIndex,
unsigned int DstWriteMask,
struct rc_reader_data * reader_data);
 
struct rc_list * rc_get_variables(struct radeon_compiler * c);
 
unsigned int rc_variable_writemask_sum(struct rc_variable * var);
 
struct rc_list * rc_variable_readers_union(struct rc_variable * var);
 
struct rc_list * rc_variable_list_get_writers(
struct rc_list * var_list,
unsigned int src_type,
void * src);
 
struct rc_list * rc_variable_list_get_writers_one_reader(
struct rc_list * var_list,
unsigned int src_type,
void * src);
 
void rc_variable_print(struct rc_variable * var);
 
#endif /* RADEON_VARIABLE_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/radeon_vert_fc.c
0,0 → 1,302
/*
* Copyright 2012 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Author: Tom Stellard <thomas.stellard@amd.com>
*/
 
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_program.h"
#include "radeon_program_constants.h"
 
struct vert_fc_state {
struct radeon_compiler *C;
unsigned BranchDepth;
unsigned LoopDepth;
unsigned LoopsReserved;
int PredStack[R500_PVS_MAX_LOOP_DEPTH];
int PredicateReg;
unsigned InCFBreak;
};
 
static void build_pred_src(
struct rc_src_register * src,
struct vert_fc_state * fc_state)
{
src->Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
RC_SWIZZLE_UNUSED, RC_SWIZZLE_W);
src->File = RC_FILE_TEMPORARY;
src->Index = fc_state->PredicateReg;
}
 
static void build_pred_dst(
struct rc_dst_register * dst,
struct vert_fc_state * fc_state)
{
dst->WriteMask = RC_MASK_W;
dst->File = RC_FILE_TEMPORARY;
dst->Index = fc_state->PredicateReg;
}
 
static void mark_write(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
unsigned int * writemasks = userdata;
 
if (file != RC_FILE_TEMPORARY)
return;
 
if (index >= R300_VS_MAX_TEMPS)
return;
 
writemasks[index] |= mask;
}
 
static int reserve_predicate_reg(struct vert_fc_state * fc_state)
{
int i;
unsigned int writemasks[RC_REGISTER_MAX_INDEX];
struct rc_instruction * inst;
memset(writemasks, 0, sizeof(writemasks));
for(inst = fc_state->C->Program.Instructions.Next;
inst != &fc_state->C->Program.Instructions;
inst = inst->Next) {
rc_for_all_writes_mask(inst, mark_write, writemasks);
}
 
for(i = 0; i < fc_state->C->max_temp_regs; i++) {
/* Most of the control flow instructions only write the
* W component of the Predicate Register, but
* the docs say that ME_PRED_SET_CLR and
* ME_PRED_SET_RESTORE write all components of the
* register, so we must reserve a register that has
* all its components free. */
if (!writemasks[i]) {
fc_state->PredicateReg = i;
break;
}
}
if (i == fc_state->C->max_temp_regs) {
rc_error(fc_state->C, "No free temporary to use for"
" predicate stack counter.\n");
return -1;
}
return 1;
}
 
static void lower_bgnloop(
struct rc_instruction * inst,
struct vert_fc_state * fc_state)
{
struct rc_instruction * new_inst =
rc_insert_new_instruction(fc_state->C, inst->Prev);
 
if ((!fc_state->C->is_r500
&& fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH)
|| fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) {
rc_error(fc_state->C, "Loops are nested too deep.");
return;
}
 
if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) {
if (fc_state->PredicateReg == -1) {
if (reserve_predicate_reg(fc_state) == -1) {
return;
}
}
 
/* Initialize the predicate bit to true. */
new_inst->U.I.Opcode = RC_ME_PRED_SEQ;
build_pred_dst(&new_inst->U.I.DstReg, fc_state);
new_inst->U.I.SrcReg[0].Index = 0;
new_inst->U.I.SrcReg[0].File = RC_FILE_NONE;
new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
} else {
fc_state->PredStack[fc_state->LoopDepth] =
fc_state->PredicateReg;
/* Copy the the current predicate value to this loop's
* predicate register */
 
/* Use the old predicate value for src0 */
build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
 
/* Reserve this loop's predicate register */
if (reserve_predicate_reg(fc_state) == -1) {
return;
}
 
/* Copy the old predicate value to the new register */
new_inst->U.I.Opcode = RC_OPCODE_ADD;
build_pred_dst(&new_inst->U.I.DstReg, fc_state);
new_inst->U.I.SrcReg[1].Index = 0;
new_inst->U.I.SrcReg[1].File = RC_FILE_NONE;
new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
}
 
}
 
static void lower_brk(
struct rc_instruction * inst,
struct vert_fc_state * fc_state)
{
if (fc_state->LoopDepth == 1) {
inst->U.I.Opcode = RC_OPCODE_RCP;
inst->U.I.DstReg.Pred = RC_PRED_INV;
inst->U.I.SrcReg[0].Index = 0;
inst->U.I.SrcReg[0].File = RC_FILE_NONE;
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
} else {
inst->U.I.Opcode = RC_ME_PRED_SET_CLR;
inst->U.I.DstReg.Pred = RC_PRED_SET;
}
 
build_pred_dst(&inst->U.I.DstReg, fc_state);
}
 
static void lower_endloop(
struct rc_instruction * inst,
struct vert_fc_state * fc_state)
{
struct rc_instruction * new_inst =
rc_insert_new_instruction(fc_state->C, inst);
 
new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE;
build_pred_dst(&new_inst->U.I.DstReg, fc_state);
/* Restore the previous predicate register. */
fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1];
build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
}
 
static void lower_if(
struct rc_instruction * inst,
struct vert_fc_state * fc_state)
{
/* Reserve a temporary to use as our predicate stack counter, if we
* don't already have one. */
if (fc_state->PredicateReg == -1) {
/* If we are inside a loop, the Predicate Register should
* have already been defined. */
assert(fc_state->LoopDepth == 0);
 
if (reserve_predicate_reg(fc_state) == -1) {
return;
}
}
 
if (inst->Next->U.I.Opcode == RC_OPCODE_BRK) {
fc_state->InCFBreak = 1;
}
if ((fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0)
|| (fc_state->LoopDepth == 1 && fc_state->InCFBreak)) {
if (fc_state->InCFBreak) {
inst->U.I.Opcode = RC_ME_PRED_SEQ;
inst->U.I.DstReg.Pred = RC_PRED_SET;
} else {
inst->U.I.Opcode = RC_ME_PRED_SNEQ;
}
} else {
unsigned swz;
inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0],
sizeof(inst->U.I.SrcReg[1]));
swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle);
/* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the
* w component */
inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED,
RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz);
build_pred_src(&inst->U.I.SrcReg[0], fc_state);
}
build_pred_dst(&inst->U.I.DstReg, fc_state);
}
 
void rc_vert_fc(struct radeon_compiler *c, void *user)
{
struct rc_instruction * inst;
struct vert_fc_state fc_state;
 
memset(&fc_state, 0, sizeof(fc_state));
fc_state.PredicateReg = -1;
fc_state.C = c;
 
for(inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
 
switch (inst->U.I.Opcode) {
 
case RC_OPCODE_BGNLOOP:
lower_bgnloop(inst, &fc_state);
fc_state.LoopDepth++;
break;
 
case RC_OPCODE_BRK:
lower_brk(inst, &fc_state);
break;
 
case RC_OPCODE_ENDLOOP:
if (fc_state.BranchDepth != 0
|| fc_state.LoopDepth != 1) {
lower_endloop(inst, &fc_state);
}
fc_state.LoopDepth--;
/* Skip PRED_RESTORE */
inst = inst->Next;
break;
case RC_OPCODE_IF:
lower_if(inst, &fc_state);
fc_state.BranchDepth++;
break;
 
case RC_OPCODE_ELSE:
inst->U.I.Opcode = RC_ME_PRED_SET_INV;
build_pred_dst(&inst->U.I.DstReg, &fc_state);
build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
break;
 
case RC_OPCODE_ENDIF:
if (fc_state.LoopDepth == 1 && fc_state.InCFBreak) {
struct rc_instruction * to_delete = inst;
inst = inst->Prev;
rc_remove_instruction(to_delete);
/* XXX: Delete the endif instruction */
} else {
inst->U.I.Opcode = RC_ME_PRED_SET_POP;
build_pred_dst(&inst->U.I.DstReg, &fc_state);
build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
}
fc_state.InCFBreak = 0;
fc_state.BranchDepth--;
break;
 
default:
if (fc_state.BranchDepth || fc_state.LoopDepth) {
inst->U.I.DstReg.Pred = RC_PRED_SET;
}
break;
}
 
if (c->Error) {
return;
}
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/omod_two_writers.test
0,0 → 1,5
RCP temp[0].x, const[1].x___;
RCP temp[0].y, const[1]._y__;
MUL temp[1].xy, const[0].xx__, temp[0].xy__;
MOV output[0].xy, temp[1].xy;
=
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/r300_compiler_tests.c
0,0 → 1,44
/*
* Copyright 2011 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include "r300_compiler_tests.h"
 
#include <stdlib.h>
 
int main(int argc, char ** argv)
{
unsigned pass = 1;
pass &= radeon_compiler_optimize_run_tests();
pass &= radeon_compiler_regalloc_run_tests();
pass &= radeon_compiler_util_run_tests();
 
if (pass) {
return EXIT_SUCCESS;
} else {
return EXIT_FAILURE;
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/r300_compiler_tests.h
0,0 → 1,30
/*
* Copyright 2011 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
unsigned radeon_compiler_optimize_run_tests(void);
unsigned radeon_compiler_regalloc_run_tests(void);
unsigned radeon_compiler_util_run_tests(void);
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/radeon_compiler_optimize_tests.c
0,0 → 1,88
/*
* Copyright 2013 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Author: Tom Stellard <thomas.stellard@amd.com>
*/
 
#include "radeon_compiler.h"
#include "radeon_dataflow.h"
 
#include "r300_compiler_tests.h"
#include "rc_test_helpers.h"
#include "unit_test.h"
 
static unsigned test_rc_optimize(
struct test_result * result,
struct radeon_compiler * c,
const char * filename)
{
struct rc_test_file test_file;
 
test_begin(result);
 
if (!load_program(c, &test_file, filename)) {
fprintf(stderr, "Failed to load program\n");
return 0;
}
 
rc_optimize(c, NULL);
return 1;
}
 
static void test_runner_rc_optimize(struct test_result * result)
{
unsigned pass = 1;
struct radeon_compiler c;
struct rc_instruction *inst;
struct rc_instruction *inst_list[3];
unsigned inst_count = 0;
float const0[4] = {2.0f, 0.0f, 0.0f, 0.0f};
 
init_compiler(&c, RC_FRAGMENT_PROGRAM, 1, 0);
 
rc_constants_add_immediate_vec4(&c.Program.Constants, const0);
 
test_rc_optimize(result, &c, "omod_two_writers.test");
 
for(inst = c.Program.Instructions.Next;
inst != &c.Program.Instructions;
inst = inst->Next, inst_count++) {
inst_list[inst_count] = inst;
}
 
if (inst_list[0]->U.I.Omod != RC_OMOD_MUL_2 ||
inst_list[1]->U.I.Omod != RC_OMOD_MUL_2 ||
inst_list[2]->U.I.Opcode != RC_OPCODE_MOV) {
pass = 0;
}
 
test_check(result, pass);
}
 
unsigned radeon_compiler_optimize_run_tests()
{
static struct test tests[] = {
{"rc_optimize() => peephole_mul_omod()", test_runner_rc_optimize},
{NULL, NULL}
};
return run_tests(tests);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/radeon_compiler_regalloc_tests.c
0,0 → 1,99
/*
* Copyright 2013 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Author: Tom Stellard <thomas.stellard@amd.com>
*/
 
#include "radeon_program_pair.h"
 
#include "r300_compiler_tests.h"
#include "rc_test_helpers.h"
#include "unit_test.h"
 
static void dummy_allocate_hw_inputs(
struct r300_fragment_program_compiler * c,
void (*allocate)(void * data, unsigned input, unsigned hwreg),
void * mydata)
{
unsigned i;
for (i = 0; i < 10; i++) {
allocate(mydata, i, i);
}
}
 
static void test_runner_rc_regalloc(
struct test_result *result,
struct radeon_compiler *c,
const char *filename)
{
struct rc_test_file test_file;
unsigned optimizations = 1;
unsigned do_full_regalloc = 1;
struct rc_instruction *inst;
unsigned pass = 1;
 
test_begin(result);
 
if (!load_program(c, &test_file, filename)) {
fprintf(stderr, "Failed to load program\n");
}
 
rc_pair_translate(c, NULL);
rc_pair_schedule(c, &optimizations);
rc_pair_remove_dead_sources(c, NULL);
rc_pair_regalloc(c, &do_full_regalloc);
 
for(inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
if (inst->Type == RC_INSTRUCTION_NORMAL &&
inst->U.I.Opcode != RC_OPCODE_BEGIN_TEX) {
if (GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 0)
!= RC_SWIZZLE_X) {
pass = 0;
}
}
}
 
test_check(result, pass);
}
 
static void tex_1d_swizzle(struct test_result *result)
{
struct radeon_compiler c;
 
init_compiler(&c, RC_FRAGMENT_PROGRAM, 0, 0);
struct r300_fragment_program_compiler *cc =
(struct r300_fragment_program_compiler*)&c;
cc->AllocateHwInputs = dummy_allocate_hw_inputs;
 
test_runner_rc_regalloc(result, &c, "regalloc_tex_1d_swizzle.test");
}
 
unsigned radeon_compiler_regalloc_run_tests()
{
static struct test tests[] = {
{"rc_pair_regalloc() => TEX 1D Swizzle - r300", tex_1d_swizzle },
{NULL, NULL}
};
return run_tests(tests);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c
0,0 → 1,104
/*
* Copyright 2011 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
 
#include "radeon_compiler_util.h"
#include "radeon_program.h"
 
#include "r300_compiler_tests.h"
#include "rc_test_helpers.h"
#include "unit_test.h"
 
static void test_rc_inst_can_use_presub(
struct test_result * result,
int expected,
const char * add_str,
const char * replace_str)
{
struct rc_instruction add_inst, replace_inst;
int ret;
 
test_begin(result);
init_rc_normal_instruction(&add_inst, add_str);
init_rc_normal_instruction(&replace_inst, replace_str);
 
ret = rc_inst_can_use_presub(&replace_inst, RC_PRESUB_ADD, 0,
&replace_inst.U.I.SrcReg[0],
&add_inst.U.I.SrcReg[0], &add_inst.U.I.SrcReg[1]);
 
test_check(result, ret == expected);
}
 
static void test_runner_rc_inst_can_use_presub(struct test_result * result)
{
 
/* This tests the case where the source being replace has the same
* register file and register index as another source register in the
* CMP instruction. A previous version of this function was ignoring
* all registers that shared the same file and index as the replacement
* register when counting the number of source selects.
*
* https://bugs.freedesktop.org/show_bug.cgi?id=36527
*/
test_rc_inst_can_use_presub(result, 0,
"ADD temp[0].z, temp[6].__x_, const[1].__x_;",
"CMP temp[0].y, temp[0]._z__, const[0]._z__, temp[0]._y__;");
 
 
/* Testing a random case that should fail
*
* https://bugs.freedesktop.org/show_bug.cgi?id=36527
*/
test_rc_inst_can_use_presub(result, 0,
"ADD temp[3], temp[1], temp[2];",
"MAD temp[1], temp[0], const[0].xxxx, -temp[3];");
 
/* This tests the case where the arguments of the ADD
* instruction share the same register file and index. Normally, we
* would need only one source select for these two arguments, but since
* they will be part of a presubtract operation we need to use the two
* source selects that the presubtract instruction expects
* (src0 and src1).
*
* https://bugs.freedesktop.org/show_bug.cgi?id=36527
*/
test_rc_inst_can_use_presub(result, 0,
"ADD temp[3].x, temp[0].x___, temp[0].x___;",
"MAD temp[0].xyz, temp[2].xyz_, -temp[3].xxx_, input[5].xyz_;");
}
 
unsigned radeon_compiler_util_run_tests()
{
static struct test tests[] = {
{"rc_inst_can_use_presub()", test_runner_rc_inst_can_use_presub},
{NULL, NULL}
};
return run_tests(tests);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c
0,0 → 1,607
/*
* Copyright 2011 Tom Stellard <tstellar@gmail.com>
* Copyright 2013 Advanced Micro Devices, Inc.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Author: Tom Stellard <thomas.stellard@amd.com>
*/
 
#include <errno.h>
#include <regex.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
 
#include "r500_fragprog.h"
#include "r300_fragprog_swizzle.h"
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
#include "radeon_opcodes.h"
#include "radeon_program.h"
#include "radeon_regalloc.h"
#include "radeon_swizzle.h"
#include "util/u_math.h"
 
#include "rc_test_helpers.h"
 
/* This file contains some helper functions for filling out the rc_instruction
* data structures. These functions take a string as input based on the format
* output by rc_program_print().
*/
 
#define VERBOSE 0
 
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
 
#define REGEX_ERR_BUF_SIZE 50
 
struct match_info {
const char * String;
int Length;
};
 
static int is_whitespace(const char *str)
{
regex_t regex;
if (regcomp(&regex, "^[ \n]+$", REG_EXTENDED)) {
fprintf(stderr, "Failed to compile whitespace regex\n");
return 0;
}
return regexec(&regex, str, 0, NULL, 0) != REG_NOMATCH;
}
 
static int match_length(regmatch_t * matches, int index)
{
return matches[index].rm_eo - matches[index].rm_so;
}
 
static int regex_helper(
const char * regex_str,
const char * search_str,
regmatch_t * matches,
int num_matches)
{
char err_buf[REGEX_ERR_BUF_SIZE];
regex_t regex;
int err_code;
unsigned int i;
 
err_code = regcomp(&regex, regex_str, REG_EXTENDED);
if (err_code) {
regerror(err_code, &regex, err_buf, REGEX_ERR_BUF_SIZE);
fprintf(stderr, "Failed to compile regex: %s\n", err_buf);
return 0;
}
 
err_code = regexec(&regex, search_str, num_matches, matches, 0);
DBG("Search string: '%s'\n", search_str);
for (i = 0; i < num_matches; i++) {
DBG("Match %u start = %d end = %d\n", i,
matches[i].rm_so, matches[i].rm_eo);
}
if (err_code) {
regerror(err_code, &regex, err_buf, REGEX_ERR_BUF_SIZE);
fprintf(stderr, "Failed to match regex: %s\n", err_buf);
return 0;
}
return 1;
}
 
#define REGEX_SRC_MATCHES 6
 
struct src_tokens {
struct match_info Negate;
struct match_info Abs;
struct match_info File;
struct match_info Index;
struct match_info Swizzle;
};
 
/**
* Initialize the source register at index src_index for the instruction based
* on src_str.
*
* NOTE: Warning in init_rc_normal_instruction() applies to this function as
* well.
*
* @param src_str A string that represents the source register. The format for
* this string is the same that is output by rc_program_print.
* @return 1 On success, 0 on failure
*/
int init_rc_normal_src(
struct rc_instruction * inst,
unsigned int src_index,
const char * src_str)
{
const char * regex_str = "(-*)(\\|*)([[:lower:]]*)\\[*([[:digit:]]*)\\]*(\\.*[[:lower:]_]*)";
regmatch_t matches[REGEX_SRC_MATCHES];
struct src_tokens tokens;
struct rc_src_register * src_reg = &inst->U.I.SrcReg[src_index];
unsigned int i;
 
/* Execute the regex */
if (!regex_helper(regex_str, src_str, matches, REGEX_SRC_MATCHES)) {
fprintf(stderr, "Failed to execute regex for src register.\n");
return 0;
}
 
/* Create Tokens */
tokens.Negate.String = src_str + matches[1].rm_so;
tokens.Negate.Length = match_length(matches, 1);
tokens.Abs.String = src_str + matches[2].rm_so;
tokens.Abs.Length = match_length(matches, 2);
tokens.File.String = src_str + matches[3].rm_so;
tokens.File.Length = match_length(matches, 3);
tokens.Index.String = src_str + matches[4].rm_so;
tokens.Index.Length = match_length(matches, 4);
tokens.Swizzle.String = src_str + matches[5].rm_so;
tokens.Swizzle.Length = match_length(matches, 5);
 
/* Negate */
if (tokens.Negate.Length > 0) {
src_reg->Negate = RC_MASK_XYZW;
}
 
/* Abs */
if (tokens.Abs.Length > 0) {
src_reg->Abs = 1;
}
 
/* File */
if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) {
src_reg->File = RC_FILE_TEMPORARY;
} else if (!strncmp(tokens.File.String, "input", tokens.File.Length)) {
src_reg->File = RC_FILE_INPUT;
} else if (!strncmp(tokens.File.String, "const", tokens.File.Length)) {
src_reg->File = RC_FILE_CONSTANT;
} else if (!strncmp(tokens.File.String, "none", tokens.File.Length)) {
src_reg->File = RC_FILE_NONE;
}
 
/* Index */
errno = 0;
src_reg->Index = strtol(tokens.Index.String, NULL, 10);
if (errno > 0) {
fprintf(stderr, "Could not convert src register index.\n");
return 0;
}
 
/* Swizzle */
if (tokens.Swizzle.Length == 0) {
src_reg->Swizzle = RC_SWIZZLE_XYZW;
} else {
int str_index = 1;
src_reg->Swizzle = RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED);
if (tokens.Swizzle.String[0] != '.') {
fprintf(stderr, "First char of swizzle is not valid.\n");
return 0;
}
for (i = 0; i < 4 && str_index < tokens.Swizzle.Length;
i++, str_index++) {
if (tokens.Swizzle.String[str_index] == '-') {
src_reg->Negate |= (1 << i);
str_index++;
}
switch(tokens.Swizzle.String[str_index]) {
case 'x':
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_X);
break;
case 'y':
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Y);
break;
case 'z':
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Z);
break;
case 'w':
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_W);
break;
case '1':
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ONE);
break;
case '0':
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ZERO);
break;
case 'H':
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_HALF);
break;
case '_':
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_UNUSED);
break;
default:
fprintf(stderr, "Unknown src register swizzle: %c\n",
tokens.Swizzle.String[str_index]);
return 0;
}
}
}
DBG("File=%u index=%u swizzle=%x negate=%u abs=%u\n",
src_reg->File, src_reg->Index, src_reg->Swizzle,
src_reg->Negate, src_reg->Abs);
return 1;
}
 
#define REGEX_DST_MATCHES 4
 
struct dst_tokens {
struct match_info File;
struct match_info Index;
struct match_info WriteMask;
};
 
/**
* Initialize the destination for the instruction based on dst_str.
*
* NOTE: Warning in init_rc_normal_instruction() applies to this function as
* well.
*
* @param dst_str A string that represents the destination register. The format
* for this string is the same that is output by rc_program_print.
* @return 1 On success, 0 on failure
*/
int init_rc_normal_dst(
struct rc_instruction * inst,
const char * dst_str)
{
const char * regex_str = "([[:lower:]]*)\\[*([[:digit:]]*)\\]*(\\.*[[:lower:]]*)";
regmatch_t matches[REGEX_DST_MATCHES];
struct dst_tokens tokens;
unsigned int i;
 
/* Execute the regex */
if (!regex_helper(regex_str, dst_str, matches, REGEX_DST_MATCHES)) {
fprintf(stderr, "Failed to execute regex for dst register.\n");
return 0;
}
 
/* Create Tokens */
tokens.File.String = dst_str + matches[1].rm_so;
tokens.File.Length = match_length(matches, 1);
tokens.Index.String = dst_str + matches[2].rm_so;
tokens.Index.Length = match_length(matches, 2);
tokens.WriteMask.String = dst_str + matches[3].rm_so;
tokens.WriteMask.Length = match_length(matches, 3);
 
/* File Type */
if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) {
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
} else if (!strncmp(tokens.File.String, "output", tokens.File.Length)) {
inst->U.I.DstReg.File = RC_FILE_OUTPUT;
} else if (!strncmp(tokens.File.String, "none", tokens.File.Length)) {
inst->U.I.DstReg.File = RC_FILE_NONE;
return 1;
} else {
fprintf(stderr, "Unknown dst register file type.\n");
return 0;
}
 
/* File Index */
errno = 0;
inst->U.I.DstReg.Index = strtol(tokens.Index.String, NULL, 10);
 
if (errno > 0) {
fprintf(stderr, "Could not convert dst register index\n");
return 0;
}
 
/* WriteMask */
if (tokens.WriteMask.Length == 0) {
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
} else {
inst->U.I.DstReg.WriteMask = 0;
/* The first character should be '.' */
if (tokens.WriteMask.String[0] != '.') {
fprintf(stderr, "1st char of writemask is not valid.\n");
return 0;
}
for (i = 1; i < tokens.WriteMask.Length; i++) {
switch(tokens.WriteMask.String[i]) {
case 'x':
inst->U.I.DstReg.WriteMask |= RC_MASK_X;
break;
case 'y':
inst->U.I.DstReg.WriteMask |= RC_MASK_Y;
break;
case 'z':
inst->U.I.DstReg.WriteMask |= RC_MASK_Z;
break;
case 'w':
inst->U.I.DstReg.WriteMask |= RC_MASK_W;
break;
default:
fprintf(stderr, "Unknown swizzle in writemask: %c\n",
tokens.WriteMask.String[i]);
return 0;
}
}
}
DBG("Dst Reg File=%u Index=%d Writemask=%d\n",
inst->U.I.DstReg.File,
inst->U.I.DstReg.Index,
inst->U.I.DstReg.WriteMask);
return 1;
}
 
#define REGEX_INST_MATCHES 7
#define REGEX_CONST_MATCHES 5
 
struct inst_tokens {
struct match_info Opcode;
struct match_info Sat;
struct match_info Dst;
struct match_info Srcs[3];
};
 
/**
* Initialize a normal instruction based on inst_str.
*
* WARNING: This function might not be able to handle every kind of format that
* rc_program_print() can output. If you are having problems with a
* particular string, you may need to add support for it to this functions.
*
* @param inst_str A string that represents the source register. The format for
* this string is the same that is output by rc_program_print.
* @return 1 On success, 0 on failure
*/
 
int parse_rc_normal_instruction(
struct rc_instruction * inst,
const char * inst_str)
{
const char * regex_str = "[[:digit:]: ]*([[:upper:][:digit:]]+)(_SAT)*[ ]*([^,;]*)[, ]*([^,;]*)[, ]*([^,;]*)[, ]*([^;]*)";
int i;
regmatch_t matches[REGEX_INST_MATCHES];
struct inst_tokens tokens;
 
/* Execute the regex */
if (!regex_helper(regex_str, inst_str, matches, REGEX_INST_MATCHES)) {
return 0;
}
memset(&tokens, 0, sizeof(tokens));
 
/* Create Tokens */
tokens.Opcode.String = inst_str + matches[1].rm_so;
tokens.Opcode.Length = match_length(matches, 1);
if (matches[2].rm_so > -1) {
tokens.Sat.String = inst_str + matches[2].rm_so;
tokens.Sat.Length = match_length(matches, 2);
}
 
 
/* Fill out the rest of the instruction. */
inst->Type = RC_INSTRUCTION_NORMAL;
 
for (i = 0; i < MAX_RC_OPCODE; i++) {
const struct rc_opcode_info * info = rc_get_opcode_info(i);
unsigned int first_src = 3;
unsigned int j;
if (strncmp(tokens.Opcode.String, info->Name, tokens.Opcode.Length)) {
continue;
}
inst->U.I.Opcode = info->Opcode;
if (info->HasDstReg) {
char * dst_str;
tokens.Dst.String = inst_str + matches[3].rm_so;
tokens.Dst.Length = match_length(matches, 3);
first_src++;
 
dst_str = malloc(sizeof(char) * (tokens.Dst.Length + 1));
strncpy(dst_str, tokens.Dst.String, tokens.Dst.Length);
dst_str[tokens.Dst.Length] = '\0';
init_rc_normal_dst(inst, dst_str);
free(dst_str);
}
for (j = 0; j < info->NumSrcRegs; j++) {
char * src_str;
tokens.Srcs[j].String =
inst_str + matches[first_src + j].rm_so;
tokens.Srcs[j].Length =
match_length(matches, first_src + j);
 
src_str = malloc(sizeof(char) *
(tokens.Srcs[j].Length + 1));
strncpy(src_str, tokens.Srcs[j].String,
tokens.Srcs[j].Length);
src_str[tokens.Srcs[j].Length] = '\0';
init_rc_normal_src(inst, j, src_str);
}
if (info->HasTexture) {
/* XXX: Will this always be XYZW ? */
inst->U.I.TexSwizzle = RC_SWIZZLE_XYZW;
}
break;
}
return 1;
}
 
#define INDEX_TOKEN_LEN 4
#define FLOAT_TOKEN_LEN 50
int parse_constant(unsigned *index, float *data, const char *const_str)
{
int matched = sscanf(const_str, "const[%d] {%f, %f, %f, %f}", index,
&data[0], &data[1], &data[2], &data[3]);
return matched == 5;
}
 
int init_rc_normal_instruction(
struct rc_instruction * inst,
const char * inst_str)
{
/* Initialize inst */
memset(inst, 0, sizeof(struct rc_instruction));
 
return parse_rc_normal_instruction(inst, inst_str);
}
 
void add_instruction(struct radeon_compiler *c, const char * inst_string)
{
struct rc_instruction * new_inst =
rc_insert_new_instruction(c, c->Program.Instructions.Prev);
 
parse_rc_normal_instruction(new_inst, inst_string);
 
}
 
int add_constant(struct radeon_compiler *c, const char *const_str)
{
float data[4];
unsigned index;
struct rc_constant_list *constants;
struct rc_constant constant;
 
if (!parse_constant(&index, data, const_str)) {
return 0;
}
 
constants = &c->Program.Constants;
if (constants->_Reserved < index) {
struct rc_constant * newlist;
 
constants->_Reserved = index + 100;
 
newlist = malloc(sizeof(struct rc_constant) * constants->_Reserved);
if (constants->Constants) {
memcpy(newlist, constants->Constants,
sizeof(struct rc_constant) *
constants->_Reserved);
free(constants->Constants);
}
 
constants->Constants = newlist;
}
 
memset(&constant, 0, sizeof(constant));
constant.Type = RC_CONSTANT_IMMEDIATE;
constant.Size = 4;
memcpy(constant.u.Immediate, data, sizeof(float) * 4);
constants->Constants[index] = constant;
constants->Count = MAX2(constants->Count, index + 1);
 
return 1;
}
 
void init_compiler(
struct radeon_compiler *c,
enum rc_program_type program_type,
unsigned is_r500,
unsigned is_r400)
{
struct rc_regalloc_state *rs = malloc(sizeof(struct rc_regalloc_state));
rc_init_regalloc_state(rs);
rc_init(c, rs);
 
c->is_r500 = is_r500;
c->max_temp_regs = is_r500 ? 128 : (is_r400 ? 64 : 32);
c->max_constants = is_r500 ? 256 : 32;
c->max_alu_insts = (is_r500 || is_r400) ? 512 : 64;
c->max_tex_insts = (is_r500 || is_r400) ? 512 : 32;
if (program_type == RC_FRAGMENT_PROGRAM) {
c->has_half_swizzles = 1;
c->has_presub = 1;
c->has_omod = 1;
c->SwizzleCaps =
is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps;
} else {
c->SwizzleCaps = &r300_vertprog_swizzle_caps;
}
}
 
#define MAX_LINE_LENGTH 100
#define MAX_PATH_LENGTH 100
 
unsigned load_program(
struct radeon_compiler *c,
struct rc_test_file *test,
const char *filename)
{
char line[MAX_LINE_LENGTH];
char path[MAX_PATH_LENGTH];
FILE *file;
unsigned *count;
char **string_store;
unsigned i = 0;
 
snprintf(path, MAX_PATH_LENGTH, "compiler/tests/%s", filename);
file = fopen(path, "r");
if (!file) {
return 0;
}
memset(test, 0, sizeof(struct rc_test_file));
 
count = &test->num_input_lines;
 
while (fgets(line, MAX_LINE_LENGTH, file)){
if (line[MAX_LINE_LENGTH - 2] == '\n') {
fprintf(stderr, "Error line cannot be longer than 100 "
"characters:\n%s\n", line);
return 0;
}
 
// Comment
if (line[0] == '#' || is_whitespace(line)) {
continue;
}
 
if (line[0] == '=') {
count = &test->num_expected_lines;
continue;
}
 
(*count)++;
}
 
test->input = malloc(sizeof(char *) * test->num_input_lines);
test->expected = malloc(sizeof(char *) * test->num_expected_lines);
 
rewind(file);
string_store = test->input;
 
while(fgets(line, MAX_LINE_LENGTH, file)) {
// Comment
char * dst;
if (line[0] == '#' || is_whitespace(line)) {
continue;
}
 
if (line[0] == '=') {
i = 0;
string_store = test->expected;
continue;
}
 
dst = string_store[i++] = malloc((strlen(line) + 1) *
sizeof (char));
strcpy(dst, line);
}
 
for (i = 0; i < test->num_input_lines; i++) {
if (test->input[i][0] == 'c') {
add_constant(c, test->input[i]);
continue;
}
// XXX: Parse immediates from the file.
add_instruction(c, test->input[i]);
}
return 1;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h
0,0 → 1,71
/*
* Copyright 2011 Tom Stellard <tstellar@gmail.com>
* Copyright 2013 Advanced Micro Devices, Inc.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Author: Tom Stellard <thomas.stellard@amd.com>
*/
 
#include "radeon_compiler.h"
 
struct rc_test_file {
unsigned num_input_lines;
char **input;
unsigned num_expected_lines;
char **expected;
};
 
int init_rc_normal_src(
struct rc_instruction * inst,
unsigned int src_index,
const char * src_str);
 
int init_rc_normal_dst(
struct rc_instruction * inst,
const char * dst_str);
 
int parse_rc_normal_instruction(
struct rc_instruction * inst,
const char * inst_str);
 
int parse_constant(unsigned *index, float *data, const char *const_str);
 
int init_rc_normal_instruction(
struct rc_instruction * inst,
const char * inst_str);
 
void add_instruction(struct radeon_compiler *c, const char * inst_string);
 
int add_constant(struct radeon_compiler *c, const char *const_str);
 
void init_compiler(
struct radeon_compiler *c,
enum rc_program_type program_type,
unsigned is_r500,
unsigned is_r400);
 
unsigned load_program(
struct radeon_compiler *c,
struct rc_test_file *test,
const char *filename);
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/regalloc_tex_1d_swizzle.test
0,0 → 1,15
const[0] = { 0.0000 2.0000 1.0000 0.0000 }
0: TEX temp[8].xyz, input[1].xy__, 2D[0];
1: TEX temp[10].xyz, input[2].xyz_, CUBE[2];
2: TEX temp[12].xyz, input[1].xy__, 2D[1];
3: DP3 temp[14].w, input[2].xyz_, input[2].xyz_;
4: MAD temp[15].xyz, temp[12].xyz_, const[0].yyy_, -none.111_;
5: MAD temp[16].xyz, temp[10].xyz_, const[0].yyy_, -none.111_;
6: MUL temp[17].xyz, temp[8].xyz_, input[0].xyz_;
7: MOV output[0].w, none.___0;
8: MOV temp[0].x, temp[14].w___;
9: TEX temp[18].x, temp[0].x___, 1D[3];
10: DP3 temp[20].w, temp[16].xyz_, temp[15].xyz_;
11: MUL temp[21].xyz, temp[17].xyz_, temp[18].xxx_;
12: MUL output[0].xyz, temp[21].xyz_, temp[20].www_;
=
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/unit_test.c
0,0 → 1,67
/*
* Copyright 2011 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
 
#include "unit_test.h"
 
unsigned run_tests(struct test tests[])
{
int i;
unsigned pass = 1;
for (i = 0; tests[i].name; i++) {
printf("Test %s\n", tests[i].name);
memset(&tests[i].result, 0, sizeof(tests[i].result));
tests[i].test_func(&tests[i].result);
printf("Test %s (%d/%d) pass\n", tests[i].name,
tests[i].result.pass, tests[i].result.test_count);
if (tests[i].result.pass != tests[i].result.test_count) {
pass = 0;
}
}
return pass;
}
 
void test_begin(struct test_result * result)
{
result->test_count++;
}
 
void test_check(struct test_result * result, int cond)
{
printf("Subtest %u -> ", result->test_count);
if (cond) {
result->pass++;
printf("Pass");
} else {
result->fail++;
printf("Fail");
}
printf("\n");
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/compiler/tests/unit_test.h
0,0 → 1,43
/*
* Copyright 2011 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
struct test_result {
unsigned int test_count;
unsigned int pass;
unsigned int fail;
};
 
struct test {
const char * name;
void (*test_func)(struct test_result * result);
struct test_result result;
};
 
unsigned run_tests(struct test tests[]);
 
void test_begin(struct test_result * result);
void test_check(struct test_result * result, int cond);
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_blit.c
0,0 → 1,863
/*
* Copyright 2009 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "r300_context.h"
#include "r300_emit.h"
#include "r300_texture.h"
#include "r300_reg.h"
 
#include "util/u_format.h"
#include "util/u_half.h"
#include "util/u_pack_color.h"
#include "util/u_surface.h"
 
enum r300_blitter_op /* bitmask */
{
R300_STOP_QUERY = 1,
R300_SAVE_TEXTURES = 2,
R300_SAVE_FRAMEBUFFER = 4,
R300_IGNORE_RENDER_COND = 8,
 
R300_CLEAR = R300_STOP_QUERY,
 
R300_CLEAR_SURFACE = R300_STOP_QUERY | R300_SAVE_FRAMEBUFFER,
 
R300_COPY = R300_STOP_QUERY | R300_SAVE_FRAMEBUFFER |
R300_SAVE_TEXTURES | R300_IGNORE_RENDER_COND,
 
R300_BLIT = R300_STOP_QUERY | R300_SAVE_FRAMEBUFFER |
R300_SAVE_TEXTURES | R300_IGNORE_RENDER_COND,
 
R300_DECOMPRESS = R300_STOP_QUERY | R300_IGNORE_RENDER_COND,
};
 
static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op op)
{
if ((op & R300_STOP_QUERY) && r300->query_current) {
r300->blitter_saved_query = r300->query_current;
r300_stop_query(r300);
}
 
/* Yeah we have to save all those states to ensure the blitter operation
* is really transparent. The states will be restored by the blitter once
* copying is done. */
util_blitter_save_blend(r300->blitter, r300->blend_state.state);
util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state.state);
util_blitter_save_stencil_ref(r300->blitter, &(r300->stencil_ref));
util_blitter_save_rasterizer(r300->blitter, r300->rs_state.state);
util_blitter_save_fragment_shader(r300->blitter, r300->fs.state);
util_blitter_save_vertex_shader(r300->blitter, r300->vs_state.state);
util_blitter_save_viewport(r300->blitter, &r300->viewport);
util_blitter_save_scissor(r300->blitter, r300->scissor_state.state);
util_blitter_save_sample_mask(r300->blitter, *(unsigned*)r300->sample_mask.state);
util_blitter_save_vertex_buffer_slot(r300->blitter, r300->vertex_buffer);
util_blitter_save_vertex_elements(r300->blitter, r300->velems);
 
if (op & R300_SAVE_FRAMEBUFFER) {
util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state);
}
 
if (op & R300_SAVE_TEXTURES) {
struct r300_textures_state* state =
(struct r300_textures_state*)r300->textures_state.state;
 
util_blitter_save_fragment_sampler_states(
r300->blitter, state->sampler_state_count,
(void**)state->sampler_states);
 
util_blitter_save_fragment_sampler_views(
r300->blitter, state->sampler_view_count,
(struct pipe_sampler_view**)state->sampler_views);
}
 
if (op & R300_IGNORE_RENDER_COND) {
/* Save the flag. */
r300->blitter_saved_skip_rendering = r300->skip_rendering+1;
r300->skip_rendering = FALSE;
} else {
r300->blitter_saved_skip_rendering = 0;
}
}
 
static void r300_blitter_end(struct r300_context *r300)
{
if (r300->blitter_saved_query) {
r300_resume_query(r300, r300->blitter_saved_query);
r300->blitter_saved_query = NULL;
}
 
if (r300->blitter_saved_skip_rendering) {
/* Restore the flag. */
r300->skip_rendering = r300->blitter_saved_skip_rendering-1;
}
}
 
static uint32_t r300_depth_clear_cb_value(enum pipe_format format,
const float* rgba)
{
union util_color uc;
util_pack_color(rgba, format, &uc);
 
if (util_format_get_blocksizebits(format) == 32)
return uc.ui;
else
return uc.us | (uc.us << 16);
}
 
static boolean r300_cbzb_clear_allowed(struct r300_context *r300,
unsigned clear_buffers)
{
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
 
/* Only color clear allowed, and only one colorbuffer. */
if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1)
return FALSE;
 
return r300_surface(fb->cbufs[0])->cbzb_allowed;
}
 
static boolean r300_fast_zclear_allowed(struct r300_context *r300,
unsigned clear_buffers)
{
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
 
return r300_resource(fb->zsbuf->texture)->tex.zmask_dwords[fb->zsbuf->u.tex.level] != 0;
}
 
static boolean r300_hiz_clear_allowed(struct r300_context *r300)
{
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
 
return r300_resource(fb->zsbuf->texture)->tex.hiz_dwords[fb->zsbuf->u.tex.level] != 0;
}
 
static uint32_t r300_depth_clear_value(enum pipe_format format,
double depth, unsigned stencil)
{
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
return util_pack_z(format, depth);
 
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
return util_pack_z_stencil(format, depth, stencil);
 
default:
assert(0);
return 0;
}
}
 
static uint32_t r300_hiz_clear_value(double depth)
{
uint32_t r = (uint32_t)(CLAMP(depth, 0, 1) * 255.5);
assert(r <= 255);
return r | (r << 8) | (r << 16) | (r << 24);
}
 
static void r300_set_clear_color(struct r300_context *r300,
const union pipe_color_union *color)
{
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
union util_color uc;
 
memset(&uc, 0, sizeof(uc));
util_pack_color(color->f, fb->cbufs[0]->format, &uc);
 
if (fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT ||
fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16X16_FLOAT) {
/* (0,1,2,3) maps to (B,G,R,A) */
r300->color_clear_value_gb = uc.h[0] | ((uint32_t)uc.h[1] << 16);
r300->color_clear_value_ar = uc.h[2] | ((uint32_t)uc.h[3] << 16);
} else {
r300->color_clear_value = uc.ui;
}
}
 
DEBUG_GET_ONCE_BOOL_OPTION(hyperz, "RADEON_HYPERZ", FALSE)
 
/* Clear currently bound buffers. */
static void r300_clear(struct pipe_context* pipe,
unsigned buffers,
const union pipe_color_union *color,
double depth,
unsigned stencil)
{
/* My notes about Zbuffer compression:
*
* 1) The zbuffer must be micro-tiled and whole microtiles must be
* written if compression is enabled. If microtiling is disabled,
* it locks up.
*
* 2) There is ZMASK RAM which contains a compressed zbuffer.
* Each dword of the Z Mask contains compression information
* for 16 4x4 pixel tiles, that is 2 bits for each tile.
* On chips with 2 Z pipes, every other dword maps to a different
* pipe. On newer chipsets, there is a new compression mode
* with 8x8 pixel tiles per 2 bits.
*
* 3) The FASTFILL bit has nothing to do with filling. It only tells hw
* it should look in the ZMASK RAM first before fetching from a real
* zbuffer.
*
* 4) If a pixel is in a cleared state, ZB_DEPTHCLEARVALUE is returned
* during zbuffer reads instead of the value that is actually stored
* in the zbuffer memory. A pixel is in a cleared state when its ZMASK
* is equal to 0. Therefore, if you clear ZMASK with zeros, you may
* leave the zbuffer memory uninitialized, but then you must enable
* compression, so that the ZMASK RAM is actually used.
*
* 5) Each 4x4 (or 8x8) tile is automatically decompressed and recompressed
* during zbuffer updates. A special decompressing operation should be
* used to fully decompress a zbuffer, which basically just stores all
* compressed tiles in ZMASK to the zbuffer memory.
*
* 6) For a 16-bit zbuffer, compression causes a hung with one or
* two samples and should not be used.
*
* 7) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears
* to avoid needless decompression.
*
* 8) Fastfill must not be used if reading of compressed Z data is disabled
* and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE),
* i.e. it cannot be used to compress the zbuffer.
*
* 9) ZB_CB_CLEAR does not interact with zbuffer compression in any way.
*
* - Marek
*/
 
struct r300_context* r300 = r300_context(pipe);
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct r300_hyperz_state *hyperz =
(struct r300_hyperz_state*)r300->hyperz_state.state;
uint32_t width = fb->width;
uint32_t height = fb->height;
uint32_t hyperz_dcv = hyperz->zb_depthclearvalue;
 
/* Use fast Z clear.
* The zbuffer must be in micro-tiled mode, otherwise it locks up. */
if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
boolean zmask_clear, hiz_clear;
 
/* If both depth and stencil are present, they must be cleared together. */
if (fb->zsbuf->texture->format == PIPE_FORMAT_S8_UINT_Z24_UNORM &&
(buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) {
zmask_clear = FALSE;
hiz_clear = FALSE;
} else {
zmask_clear = r300_fast_zclear_allowed(r300, buffers);
hiz_clear = r300_hiz_clear_allowed(r300);
}
 
/* If we need Hyper-Z. */
if (zmask_clear || hiz_clear) {
/* Try to obtain the access to Hyper-Z buffers if we don't have one. */
if (!r300->hyperz_enabled &&
(r300->screen->caps.is_r500 || debug_get_option_hyperz())) {
r300->hyperz_enabled =
r300->rws->cs_request_feature(r300->cs,
RADEON_FID_R300_HYPERZ_ACCESS,
TRUE);
if (r300->hyperz_enabled) {
/* Need to emit HyperZ buffer regs for the first time. */
r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
}
}
 
/* Setup Hyper-Z clears. */
if (r300->hyperz_enabled) {
if (zmask_clear) {
hyperz_dcv = hyperz->zb_depthclearvalue =
r300_depth_clear_value(fb->zsbuf->format, depth, stencil);
 
r300_mark_atom_dirty(r300, &r300->zmask_clear);
r300_mark_atom_dirty(r300, &r300->gpu_flush);
buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
}
 
if (hiz_clear) {
r300->hiz_clear_value = r300_hiz_clear_value(depth);
r300_mark_atom_dirty(r300, &r300->hiz_clear);
r300_mark_atom_dirty(r300, &r300->gpu_flush);
}
r300->num_z_clears++;
}
}
}
 
/* Use fast color clear for an AA colorbuffer.
* The CMASK is shared between all colorbuffers, so we use it
* if there is only one colorbuffer bound. */
if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs == 1 &&
r300_resource(fb->cbufs[0]->texture)->tex.cmask_dwords) {
/* Try to obtain the access to the CMASK if we don't have one. */
if (!r300->cmask_access) {
r300->cmask_access =
r300->rws->cs_request_feature(r300->cs,
RADEON_FID_R300_CMASK_ACCESS,
TRUE);
}
 
/* Setup the clear. */
if (r300->cmask_access) {
/* Pair the resource with the CMASK to avoid other resources
* accessing it. */
if (!r300->screen->cmask_resource) {
pipe_mutex_lock(r300->screen->cmask_mutex);
/* Double checking (first unlocked, then locked). */
if (!r300->screen->cmask_resource) {
/* Don't reference this, so that the texture can be
* destroyed while set in cmask_resource.
* Then in texture_destroy, we set cmask_resource to NULL. */
r300->screen->cmask_resource = fb->cbufs[0]->texture;
}
pipe_mutex_unlock(r300->screen->cmask_mutex);
}
 
if (r300->screen->cmask_resource == fb->cbufs[0]->texture) {
r300_set_clear_color(r300, color);
r300_mark_atom_dirty(r300, &r300->cmask_clear);
r300_mark_atom_dirty(r300, &r300->gpu_flush);
buffers &= ~PIPE_CLEAR_COLOR;
}
}
}
/* Enable CBZB clear. */
else if (r300_cbzb_clear_allowed(r300, buffers)) {
struct r300_surface *surf = r300_surface(fb->cbufs[0]);
 
hyperz->zb_depthclearvalue =
r300_depth_clear_cb_value(surf->base.format, color->f);
 
width = surf->cbzb_width;
height = surf->cbzb_height;
 
r300->cbzb_clear = TRUE;
r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
}
 
/* Clear. */
if (buffers) {
/* Clear using the blitter. */
r300_blitter_begin(r300, R300_CLEAR);
util_blitter_clear(r300->blitter,
width,
height,
buffers, color, depth, stencil);
r300_blitter_end(r300);
} else if (r300->zmask_clear.dirty ||
r300->hiz_clear.dirty ||
r300->cmask_clear.dirty) {
/* Just clear zmask and hiz now, this does not use the standard draw
* procedure. */
/* Calculate zmask_clear and hiz_clear atom sizes. */
unsigned dwords =
r300->gpu_flush.size +
(r300->zmask_clear.dirty ? r300->zmask_clear.size : 0) +
(r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) +
(r300->cmask_clear.dirty ? r300->cmask_clear.size : 0) +
r300_get_num_cs_end_dwords(r300);
 
/* Reserve CS space. */
if (dwords > (RADEON_MAX_CMDBUF_DWORDS - r300->cs->cdw)) {
r300_flush(&r300->context, RADEON_FLUSH_ASYNC, NULL);
}
 
/* Emit clear packets. */
r300_emit_gpu_flush(r300, r300->gpu_flush.size, r300->gpu_flush.state);
r300->gpu_flush.dirty = FALSE;
 
if (r300->zmask_clear.dirty) {
r300_emit_zmask_clear(r300, r300->zmask_clear.size,
r300->zmask_clear.state);
r300->zmask_clear.dirty = FALSE;
}
if (r300->hiz_clear.dirty) {
r300_emit_hiz_clear(r300, r300->hiz_clear.size,
r300->hiz_clear.state);
r300->hiz_clear.dirty = FALSE;
}
if (r300->cmask_clear.dirty) {
r300_emit_cmask_clear(r300, r300->cmask_clear.size,
r300->cmask_clear.state);
r300->cmask_clear.dirty = FALSE;
}
} else {
assert(0);
}
 
/* Disable CBZB clear. */
if (r300->cbzb_clear) {
r300->cbzb_clear = FALSE;
hyperz->zb_depthclearvalue = hyperz_dcv;
r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
}
 
/* Enable fastfill and/or hiz.
*
* If we cleared zmask/hiz, it's in use now. The Hyper-Z state update
* looks if zmask/hiz is in use and programs hardware accordingly. */
if (r300->zmask_in_use || r300->hiz_in_use) {
r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
}
 
/* Clear a region of a color surface to a constant value. */
static void r300_clear_render_target(struct pipe_context *pipe,
struct pipe_surface *dst,
const union pipe_color_union *color,
unsigned dstx, unsigned dsty,
unsigned width, unsigned height)
{
struct r300_context *r300 = r300_context(pipe);
 
r300_blitter_begin(r300, R300_CLEAR_SURFACE);
util_blitter_clear_render_target(r300->blitter, dst, color,
dstx, dsty, width, height);
r300_blitter_end(r300);
}
 
/* Clear a region of a depth stencil surface. */
static void r300_clear_depth_stencil(struct pipe_context *pipe,
struct pipe_surface *dst,
unsigned clear_flags,
double depth,
unsigned stencil,
unsigned dstx, unsigned dsty,
unsigned width, unsigned height)
{
struct r300_context *r300 = r300_context(pipe);
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
 
if (r300->zmask_in_use && !r300->locked_zbuffer) {
if (fb->zsbuf->texture == dst->texture) {
r300_decompress_zmask(r300);
}
}
 
/* XXX Do not decompress ZMask of the currently-set zbuffer. */
r300_blitter_begin(r300, R300_CLEAR_SURFACE);
util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil,
dstx, dsty, width, height);
r300_blitter_end(r300);
}
 
void r300_decompress_zmask(struct r300_context *r300)
{
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
 
if (!r300->zmask_in_use || r300->locked_zbuffer)
return;
 
r300->zmask_decompress = TRUE;
r300_mark_atom_dirty(r300, &r300->hyperz_state);
 
r300_blitter_begin(r300, R300_DECOMPRESS);
util_blitter_custom_clear_depth(r300->blitter, fb->width, fb->height, 0,
r300->dsa_decompress_zmask);
r300_blitter_end(r300);
 
r300->zmask_decompress = FALSE;
r300->zmask_in_use = FALSE;
r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
 
void r300_decompress_zmask_locked_unsafe(struct r300_context *r300)
{
struct pipe_framebuffer_state fb;
 
memset(&fb, 0, sizeof(fb));
fb.width = r300->locked_zbuffer->width;
fb.height = r300->locked_zbuffer->height;
fb.zsbuf = r300->locked_zbuffer;
 
r300->context.set_framebuffer_state(&r300->context, &fb);
r300_decompress_zmask(r300);
}
 
void r300_decompress_zmask_locked(struct r300_context *r300)
{
struct pipe_framebuffer_state saved_fb;
 
memset(&saved_fb, 0, sizeof(saved_fb));
util_copy_framebuffer_state(&saved_fb, r300->fb_state.state);
r300_decompress_zmask_locked_unsafe(r300);
r300->context.set_framebuffer_state(&r300->context, &saved_fb);
util_unreference_framebuffer_state(&saved_fb);
 
pipe_surface_reference(&r300->locked_zbuffer, NULL);
}
 
bool r300_is_blit_supported(enum pipe_format format)
{
const struct util_format_description *desc =
util_format_description(format);
 
return desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ||
desc->layout == UTIL_FORMAT_LAYOUT_S3TC ||
desc->layout == UTIL_FORMAT_LAYOUT_RGTC;
}
 
/* Copy a block of pixels from one surface to another. */
static void r300_resource_copy_region(struct pipe_context *pipe,
struct pipe_resource *dst,
unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
struct pipe_resource *src,
unsigned src_level,
const struct pipe_box *src_box)
{
struct pipe_screen *screen = pipe->screen;
struct r300_context *r300 = r300_context(pipe);
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
unsigned src_width0 = r300_resource(src)->tex.width0;
unsigned src_height0 = r300_resource(src)->tex.height0;
unsigned dst_width0 = r300_resource(dst)->tex.width0;
unsigned dst_height0 = r300_resource(dst)->tex.height0;
unsigned layout;
struct pipe_box box, dstbox;
struct pipe_sampler_view src_templ, *src_view;
struct pipe_surface dst_templ, *dst_view;
 
/* Fallback for buffers. */
if ((dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) ||
!r300_is_blit_supported(dst->format)) {
util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box);
return;
}
 
/* Can't read MSAA textures. */
if (src->nr_samples > 1 || dst->nr_samples > 1) {
return;
}
 
/* The code below changes the texture format so that the copy can be done
* on hardware. E.g. depth-stencil surfaces are copied as RGBA
* colorbuffers. */
 
util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz);
util_blitter_default_src_texture(&src_templ, src, src_level);
 
layout = util_format_description(dst_templ.format)->layout;
 
/* Handle non-renderable plain formats. */
if (layout == UTIL_FORMAT_LAYOUT_PLAIN &&
(!screen->is_format_supported(screen, src_templ.format, src->target,
src->nr_samples,
PIPE_BIND_SAMPLER_VIEW) ||
!screen->is_format_supported(screen, dst_templ.format, dst->target,
dst->nr_samples,
PIPE_BIND_RENDER_TARGET))) {
switch (util_format_get_blocksize(dst_templ.format)) {
case 1:
dst_templ.format = PIPE_FORMAT_I8_UNORM;
break;
case 2:
dst_templ.format = PIPE_FORMAT_B4G4R4A4_UNORM;
break;
case 4:
dst_templ.format = PIPE_FORMAT_B8G8R8A8_UNORM;
break;
case 8:
dst_templ.format = PIPE_FORMAT_R16G16B16A16_UNORM;
break;
default:
debug_printf("r300: copy_region: Unhandled format: %s. Falling back to software.\n"
"r300: copy_region: Software fallback doesn't work for tiled textures.\n",
util_format_short_name(dst_templ.format));
}
src_templ.format = dst_templ.format;
}
 
/* Handle compressed formats. */
if (layout == UTIL_FORMAT_LAYOUT_S3TC ||
layout == UTIL_FORMAT_LAYOUT_RGTC) {
assert(src_templ.format == dst_templ.format);
 
box = *src_box;
src_box = &box;
 
dst_width0 = align(dst_width0, 4);
dst_height0 = align(dst_height0, 4);
src_width0 = align(src_width0, 4);
src_height0 = align(src_height0, 4);
box.width = align(box.width, 4);
box.height = align(box.height, 4);
 
switch (util_format_get_blocksize(dst_templ.format)) {
case 8:
/* one 4x4 pixel block has 8 bytes.
* we set 1 pixel = 4 bytes ===> 1 block corrensponds to 2 pixels. */
dst_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
dst_width0 = dst_width0 / 2;
src_width0 = src_width0 / 2;
dstx /= 2;
box.x /= 2;
box.width /= 2;
break;
case 16:
/* one 4x4 pixel block has 16 bytes.
* we set 1 pixel = 4 bytes ===> 1 block corresponds to 4 pixels. */
dst_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
break;
}
src_templ.format = dst_templ.format;
 
dst_height0 = dst_height0 / 4;
src_height0 = src_height0 / 4;
dsty /= 4;
box.y /= 4;
box.height /= 4;
}
 
/* Fallback for textures. */
if (!screen->is_format_supported(screen, dst_templ.format,
dst->target, dst->nr_samples,
PIPE_BIND_RENDER_TARGET) ||
!screen->is_format_supported(screen, src_templ.format,
src->target, src->nr_samples,
PIPE_BIND_SAMPLER_VIEW)) {
assert(0 && "this shouldn't happen, update r300_is_blit_supported");
util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box);
return;
}
 
/* Decompress ZMASK. */
if (r300->zmask_in_use && !r300->locked_zbuffer) {
if (fb->zsbuf->texture == src ||
fb->zsbuf->texture == dst) {
r300_decompress_zmask(r300);
}
}
 
dst_view = r300_create_surface_custom(pipe, dst, &dst_templ, dst_width0, dst_height0);
src_view = r300_create_sampler_view_custom(pipe, src, &src_templ, src_width0, src_height0);
 
u_box_3d(dstx, dsty, dstz, abs(src_box->width), abs(src_box->height),
abs(src_box->depth), &dstbox);
 
r300_blitter_begin(r300, R300_COPY);
util_blitter_blit_generic(r300->blitter, dst_view, &dstbox,
src_view, src_box, src_width0, src_height0,
PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
FALSE);
r300_blitter_end(r300);
 
pipe_surface_reference(&dst_view, NULL);
pipe_sampler_view_reference(&src_view, NULL);
}
 
static boolean r300_is_simple_msaa_resolve(const struct pipe_blit_info *info)
{
unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level);
unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level);
 
return info->dst.resource->format == info->src.resource->format &&
info->dst.resource->format == info->dst.format &&
info->src.resource->format == info->src.format &&
!info->scissor_enable &&
info->mask == PIPE_MASK_RGBA &&
dst_width == info->src.resource->width0 &&
dst_height == info->src.resource->height0 &&
info->dst.box.x == 0 &&
info->dst.box.y == 0 &&
info->dst.box.width == dst_width &&
info->dst.box.height == dst_height &&
info->src.box.x == 0 &&
info->src.box.y == 0 &&
info->src.box.width == dst_width &&
info->src.box.height == dst_height &&
(r300_resource(info->dst.resource)->tex.microtile != RADEON_LAYOUT_LINEAR ||
r300_resource(info->dst.resource)->tex.macrotile[info->dst.level] != RADEON_LAYOUT_LINEAR);
}
 
static void r300_simple_msaa_resolve(struct pipe_context *pipe,
struct pipe_resource *dst,
unsigned dst_level,
unsigned dst_layer,
struct pipe_resource *src,
enum pipe_format format)
{
struct r300_context *r300 = r300_context(pipe);
struct r300_surface *srcsurf, *dstsurf;
struct pipe_surface surf_tmpl;
struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
 
memset(&surf_tmpl, 0, sizeof(surf_tmpl));
surf_tmpl.format = format;
srcsurf = r300_surface(pipe->create_surface(pipe, src, &surf_tmpl));
 
surf_tmpl.format = format;
surf_tmpl.u.tex.level = dst_level;
surf_tmpl.u.tex.first_layer =
surf_tmpl.u.tex.last_layer = dst_layer;
dstsurf = r300_surface(pipe->create_surface(pipe, dst, &surf_tmpl));
 
/* COLORPITCH should contain the tiling info of the resolve buffer.
* The tiling of the AA buffer isn't programmable anyway. */
srcsurf->pitch &= ~(R300_COLOR_TILE(1) | R300_COLOR_MICROTILE(3));
srcsurf->pitch |= dstsurf->pitch & (R300_COLOR_TILE(1) | R300_COLOR_MICROTILE(3));
 
/* Enable AA resolve. */
aa->dest = dstsurf;
r300->aa_state.size = 8;
r300_mark_atom_dirty(r300, &r300->aa_state);
 
/* Resolve the surface. */
r300_blitter_begin(r300, R300_CLEAR_SURFACE);
util_blitter_custom_color(r300->blitter, &srcsurf->base, NULL);
r300_blitter_end(r300);
 
/* Disable AA resolve. */
aa->dest = NULL;
r300->aa_state.size = 4;
r300_mark_atom_dirty(r300, &r300->aa_state);
 
pipe_surface_reference((struct pipe_surface**)&srcsurf, NULL);
pipe_surface_reference((struct pipe_surface**)&dstsurf, NULL);
}
 
static void r300_msaa_resolve(struct pipe_context *pipe,
const struct pipe_blit_info *info)
{
struct r300_context *r300 = r300_context(pipe);
struct pipe_screen *screen = pipe->screen;
struct pipe_resource *tmp, templ;
struct pipe_blit_info blit;
 
assert(info->src.level == 0);
assert(info->src.box.z == 0);
assert(info->src.box.depth == 1);
assert(info->dst.box.depth == 1);
 
if (r300_is_simple_msaa_resolve(info)) {
r300_simple_msaa_resolve(pipe, info->dst.resource, info->dst.level,
info->dst.box.z, info->src.resource,
info->src.format);
return;
}
 
/* resolve into a temporary texture, then blit */
memset(&templ, 0, sizeof(templ));
templ.target = PIPE_TEXTURE_2D;
templ.format = info->src.resource->format;
templ.width0 = info->src.resource->width0;
templ.height0 = info->src.resource->height0;
templ.depth0 = 1;
templ.array_size = 1;
templ.usage = PIPE_USAGE_STATIC;
templ.flags = R300_RESOURCE_FORCE_MICROTILING;
 
tmp = screen->resource_create(screen, &templ);
 
/* resolve */
r300_simple_msaa_resolve(pipe, tmp, 0, 0, info->src.resource,
info->src.format);
 
/* blit */
blit = *info;
blit.src.resource = tmp;
blit.src.box.z = 0;
 
r300_blitter_begin(r300, R300_BLIT);
util_blitter_blit(r300->blitter, &blit);
r300_blitter_end(r300);
 
pipe_resource_reference(&tmp, NULL);
}
 
static void r300_blit(struct pipe_context *pipe,
const struct pipe_blit_info *blit)
{
struct r300_context *r300 = r300_context(pipe);
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct pipe_blit_info info = *blit;
 
/* MSAA resolve. */
if (info.src.resource->nr_samples > 1 &&
info.dst.resource->nr_samples <= 1 &&
!util_format_is_depth_or_stencil(info.src.resource->format)) {
r300_msaa_resolve(pipe, &info);
return;
}
 
/* Can't read MSAA textures. */
if (info.src.resource->nr_samples > 1) {
return;
}
 
/* Blit a combined depth-stencil resource as color.
* S8Z24 is the only supported stencil format. */
if ((info.mask & PIPE_MASK_S) &&
info.src.format == PIPE_FORMAT_S8_UINT_Z24_UNORM &&
info.dst.format == PIPE_FORMAT_S8_UINT_Z24_UNORM) {
if (info.dst.resource->nr_samples > 1) {
/* Cannot do that with MSAA buffers. */
info.mask &= ~PIPE_MASK_S;
if (!(info.mask & PIPE_MASK_Z)) {
return;
}
} else {
/* Single-sample buffer. */
info.src.format = PIPE_FORMAT_B8G8R8A8_UNORM;
info.dst.format = PIPE_FORMAT_B8G8R8A8_UNORM;
if (info.mask & PIPE_MASK_Z) {
info.mask = PIPE_MASK_RGBA; /* depth+stencil */
} else {
info.mask = PIPE_MASK_B; /* stencil only */
}
}
}
 
/* Decompress ZMASK. */
if (r300->zmask_in_use && !r300->locked_zbuffer) {
if (fb->zsbuf->texture == info.src.resource ||
fb->zsbuf->texture == info.dst.resource) {
r300_decompress_zmask(r300);
}
}
 
r300_blitter_begin(r300, R300_BLIT);
util_blitter_blit(r300->blitter, &info);
r300_blitter_end(r300);
}
 
void r300_init_blit_functions(struct r300_context *r300)
{
r300->context.clear = r300_clear;
r300->context.clear_render_target = r300_clear_render_target;
r300->context.clear_depth_stencil = r300_clear_depth_stencil;
r300->context.resource_copy_region = r300_resource_copy_region;
r300->context.blit = r300_blit;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_cb.h
0,0 → 1,151
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Copyright 2010 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
/**
* This file contains macros for building command buffers in memory.
*
* Use NEW_CB for buffers with a varying size and it will also allocate
* the buffer.
* Use BEGIN_CB for arrays with a static size.
*
* Example:
*
* uint32_t cb[3];
* CB_LOCALS;
*
* BEGIN_CB(cb, 3);
* OUT_CB_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2);
* OUT_CB(blend_color_red_alpha);
* OUT_CB(blend_color_green_blue);
* END_CB;
*
* And later:
*
* CS_LOCALS;
* WRITE_CS_TABLE(cb, 3);
*
* Or using a little slower variant:
*
* CS_LOCALS;
* BEGIN_CS(cb, 3);
* OUT_CS_TABLE(cb, 3);
* END_CS;
*/
 
#ifndef R300_CB_H
#define R300_CB_H
 
#include "r300_reg.h"
 
/* Yes, I know macros are ugly. However, they are much prettier than the code
* that they neatly hide away, and don't have the cost of function setup, so
* we're going to use them. */
 
/**
* Command buffer setup.
*/
 
#ifdef DEBUG
 
#define CB_LOCALS \
int cs_count = 0; \
uint32_t *cs_ptr = NULL; \
(void) cs_count; (void) cs_ptr
 
#define BEGIN_CB(ptr, size) do { \
assert(sizeof(*(ptr)) == sizeof(uint32_t)); \
cs_count = (size); \
cs_ptr = (ptr); \
} while (0)
 
#define NEW_CB(ptr, size) \
do { \
assert(sizeof(*(ptr)) == sizeof(uint32_t)); \
cs_count = (size); \
cs_ptr = (ptr) = malloc((size) * sizeof(uint32_t)); \
} while (0)
 
#define END_CB do { \
if (cs_count != 0) \
debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \
cs_count, __FUNCTION__, __FILE__, __LINE__); \
} while (0)
 
#define CB_USED_DW(x) cs_count -= x
 
#else
 
#define CB_LOCALS \
uint32_t *cs_ptr = NULL; (void) cs_ptr
 
#define NEW_CB(ptr, size) \
cs_ptr = (ptr) = malloc((size) * sizeof(uint32_t))
 
#define BEGIN_CB(ptr, size) cs_ptr = (ptr)
#define END_CB
#define CB_USED_DW(x)
 
#endif
 
 
/**
* Storing pure DWORDs.
*/
 
#define OUT_CB(value) do { \
*cs_ptr = (value); \
cs_ptr++; \
CB_USED_DW(1); \
} while (0)
 
#define OUT_CB_TABLE(values, count) do { \
memcpy(cs_ptr, values, count * sizeof(uint32_t)); \
cs_ptr += count; \
CB_USED_DW(count); \
} while (0)
 
#define OUT_CB_32F(value) \
OUT_CB(fui(value));
 
#define OUT_CB_REG(register, value) do { \
assert(register); \
OUT_CB(CP_PACKET0(register, 0)); \
OUT_CB(value); \
} while (0)
 
/* Note: This expects count to be the number of registers,
* not the actual packet0 count! */
#define OUT_CB_REG_SEQ(register, count) do { \
assert(register); \
OUT_CB(CP_PACKET0(register, (count) - 1)); \
} while (0)
 
#define OUT_CB_ONE_REG(register, count) do { \
assert(register); \
OUT_CB(CP_PACKET0(register, (count) - 1) | RADEON_ONE_REG_WR); \
} while (0)
 
#define OUT_CB_PKT3(op, count) \
OUT_CB(CP_PACKET3(op, count))
 
#endif /* R300_CB_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_chipset.c
0,0 → 1,175
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Copyright 2011 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "r300_chipset.h"
#include "../../winsys/radeon/drm/radeon_winsys.h"
 
#include "util/u_debug.h"
#include "util/u_memory.h"
#include "os/os_process.h"
 
#include <stdio.h>
#include <errno.h>
 
/* r300_chipset: A file all to itself for deducing the various properties of
* Radeons. */
 
static void r300_apply_hyperz_blacklist(struct r300_capabilities* caps)
{
static const char *list[] = {
"X", /* the DDX or indirect rendering */
"Xorg", /* (alternative name) */
"check_gl_texture_size", /* compiz */
"Compiz",
"gnome-session-check-accelerated-helper",
"gnome-shell",
"kwin_opengl_test",
"kwin",
"firefox",
};
int i;
char proc_name[128];
if (!os_get_process_name(proc_name, sizeof(proc_name)))
return;
 
for (i = 0; i < Elements(list); i++) {
if (strcmp(list[i], proc_name) == 0) {
caps->zmask_ram = 0;
caps->hiz_ram = 0;
break;
}
}
}
 
/* Parse a PCI ID and fill an r300_capabilities struct with information. */
void r300_parse_chipset(uint32_t pci_id, struct r300_capabilities* caps)
{
switch (pci_id) {
#define CHIPSET(pci_id, name, chipfamily) \
case pci_id: \
caps->family = CHIP_##chipfamily; \
break;
#include "pci_ids/r300_pci_ids.h"
#undef CHIPSET
 
default:
fprintf(stderr, "r300: Warning: Unknown chipset 0x%x\nAborting...",
pci_id);
abort();
}
 
/* Defaults. */
caps->high_second_pipe = FALSE;
caps->num_vert_fpus = 0;
caps->hiz_ram = 0;
caps->zmask_ram = 0;
 
 
switch (caps->family) {
case CHIP_R300:
case CHIP_R350:
caps->high_second_pipe = TRUE;
caps->num_vert_fpus = 4;
caps->hiz_ram = R300_HIZ_LIMIT;
caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
 
case CHIP_RV350:
case CHIP_RV370:
caps->high_second_pipe = TRUE;
caps->num_vert_fpus = 2;
caps->zmask_ram = RV3xx_ZMASK_SIZE;
break;
 
case CHIP_RV380:
caps->high_second_pipe = TRUE;
caps->num_vert_fpus = 2;
caps->hiz_ram = R300_HIZ_LIMIT;
caps->zmask_ram = RV3xx_ZMASK_SIZE;
break;
 
case CHIP_RS400:
case CHIP_RS600:
case CHIP_RS690:
case CHIP_RS740:
break;
 
case CHIP_RC410:
case CHIP_RS480:
caps->zmask_ram = RV3xx_ZMASK_SIZE;
break;
 
case CHIP_R420:
case CHIP_R423:
case CHIP_R430:
case CHIP_R480:
case CHIP_R481:
case CHIP_RV410:
caps->num_vert_fpus = 6;
caps->hiz_ram = R300_HIZ_LIMIT;
caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
 
case CHIP_R520:
caps->num_vert_fpus = 8;
caps->hiz_ram = R300_HIZ_LIMIT;
caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
 
case CHIP_RV515:
caps->num_vert_fpus = 2;
caps->hiz_ram = R300_HIZ_LIMIT;
caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
 
case CHIP_RV530:
caps->num_vert_fpus = 5;
caps->hiz_ram = RV530_HIZ_LIMIT;
caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
 
case CHIP_R580:
case CHIP_RV560:
case CHIP_RV570:
caps->num_vert_fpus = 8;
caps->hiz_ram = RV530_HIZ_LIMIT;
caps->zmask_ram = PIPE_ZMASK_SIZE;
break;
}
 
caps->num_tex_units = 16;
caps->is_r400 = caps->family >= CHIP_R420 && caps->family < CHIP_RV515;
caps->is_r500 = caps->family >= CHIP_RV515;
caps->is_rv350 = caps->family >= CHIP_RV350;
caps->z_compress = caps->is_rv350 ? R300_ZCOMP_8X8 : R300_ZCOMP_4X4;
caps->dxtc_swizzle = caps->is_r400 || caps->is_r500;
caps->has_us_format = caps->family == CHIP_R520;
caps->has_tcl = caps->num_vert_fpus > 0;
 
if (caps->has_tcl) {
caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE;
}
 
r300_apply_hyperz_blacklist(caps);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_chipset.h
0,0 → 1,93
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#ifndef R300_CHIPSET_H
#define R300_CHIPSET_H
 
#include "pipe/p_compiler.h"
 
/* these are sizes in dwords */
#define R300_HIZ_LIMIT 10240
#define RV530_HIZ_LIMIT 15360
 
/* rv3xx have only one pipe */
#define PIPE_ZMASK_SIZE 4096
#define RV3xx_ZMASK_SIZE 5120
 
/* The size of a compressed tile. Each compressed tile takes 2 bits
* in the ZMASK RAM, so there is always 16 tiles per one dword. */
enum r300_zmask_compression {
R300_ZCOMP_4X4 = 4,
R300_ZCOMP_8X8 = 8
};
 
/* Structure containing all the possible information about a specific Radeon
* in the R3xx, R4xx, and R5xx families. */
struct r300_capabilities {
/* Chipset family */
int family;
/* The number of vertex floating-point units */
unsigned num_vert_fpus;
/* The number of texture units. */
unsigned num_tex_units;
/* Whether or not TCL is physically present */
boolean has_tcl;
/* Some chipsets do not have HiZ RAM - other have varying amounts. */
int hiz_ram;
/* Some chipsets have zmask ram per pipe some don't. */
int zmask_ram;
/* Compression mode for ZMASK. */
enum r300_zmask_compression z_compress;
/* Whether or not this is RV350 or newer, including all r400 and r500
* chipsets. The differences compared to the oldest r300 chips are:
* - Blend LTE/GTE thresholds
* - Better MACRO_SWITCH in texture tiling
* - Half float vertex
* - More HyperZ optimizations */
boolean is_rv350;
/* Whether or not this is R400. The differences compared their rv350
* cousins are:
* - Extended fragment shader registers
* - 3DC texture compression (RGTC2) */
boolean is_r400;
/* Whether or not this is an RV515 or newer; R500s have many differences
* that require extra consideration, compared to their rv350 cousins:
* - Extra bit of width and height on texture sizes
* - Blend color is split across two registers
* - Universal Shader (US) block used for fragment shaders
* - FP16 blending and multisampling
* - Full RGTC texture compression
* - 24-bit depth textures
* - Stencil back-face reference value
* - Ability to render up to 2^24 - 1 vertices with signed index offset */
boolean is_r500;
/* Whether or not the second pixel pipe is accessed with the high bit */
boolean high_second_pipe;
/* DXTC texture swizzling. */
boolean dxtc_swizzle;
/* Whether R500_US_FORMAT0_0 exists (R520-only and depends on DRM). */
boolean has_us_format;
};
 
void r300_parse_chipset(uint32_t pci_id, struct r300_capabilities* caps);
 
#endif /* R300_CHIPSET_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_context.c
0,0 → 1,507
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "draw/draw_context.h"
 
#include "util/u_memory.h"
#include "util/u_sampler.h"
#include "util/u_simple_list.h"
#include "util/u_upload_mgr.h"
#include "os/os_time.h"
#include "vl/vl_decoder.h"
#include "vl/vl_video_buffer.h"
 
#include "r300_cb.h"
#include "r300_context.h"
#include "r300_emit.h"
#include "r300_screen.h"
#include "r300_screen_buffer.h"
#include "compiler/radeon_regalloc.h"
 
static void r300_release_referenced_objects(struct r300_context *r300)
{
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct r300_textures_state *textures =
(struct r300_textures_state*)r300->textures_state.state;
unsigned i;
 
/* Framebuffer state. */
util_unreference_framebuffer_state(fb);
 
/* Textures. */
for (i = 0; i < textures->sampler_view_count; i++)
pipe_sampler_view_reference(
(struct pipe_sampler_view**)&textures->sampler_views[i], NULL);
 
/* The special dummy texture for texkill. */
if (r300->texkill_sampler) {
pipe_sampler_view_reference(
(struct pipe_sampler_view**)&r300->texkill_sampler,
NULL);
}
 
/* Manually-created vertex buffers. */
pipe_resource_reference(&r300->dummy_vb.buffer, NULL);
pb_reference(&r300->vbo, NULL);
 
r300->context.delete_depth_stencil_alpha_state(&r300->context,
r300->dsa_decompress_zmask);
}
 
static void r300_destroy_context(struct pipe_context* context)
{
struct r300_context* r300 = r300_context(context);
 
if (r300->cs && r300->hyperz_enabled) {
r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, FALSE);
}
if (r300->cs && r300->cmask_access) {
r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_CMASK_ACCESS, FALSE);
}
 
if (r300->blitter)
util_blitter_destroy(r300->blitter);
if (r300->draw)
draw_destroy(r300->draw);
 
if (r300->uploader)
u_upload_destroy(r300->uploader);
 
/* XXX: This function assumes r300->query_list was initialized */
r300_release_referenced_objects(r300);
 
if (r300->cs)
r300->rws->cs_destroy(r300->cs);
 
rc_destroy_regalloc_state(&r300->fs_regalloc_state);
 
/* XXX: No way to tell if this was initialized or not? */
util_slab_destroy(&r300->pool_transfers);
 
/* Free the structs allocated in r300_setup_atoms() */
if (r300->aa_state.state) {
FREE(r300->aa_state.state);
FREE(r300->blend_color_state.state);
FREE(r300->clip_state.state);
FREE(r300->fb_state.state);
FREE(r300->gpu_flush.state);
FREE(r300->hyperz_state.state);
FREE(r300->invariant_state.state);
FREE(r300->rs_block_state.state);
FREE(r300->sample_mask.state);
FREE(r300->scissor_state.state);
FREE(r300->textures_state.state);
FREE(r300->vap_invariant_state.state);
FREE(r300->viewport_state.state);
FREE(r300->ztop_state.state);
FREE(r300->fs_constants.state);
FREE(r300->vs_constants.state);
if (!r300->screen->caps.has_tcl) {
FREE(r300->vertex_stream_state.state);
}
}
FREE(r300);
}
 
static void r300_flush_callback(void *data, unsigned flags)
{
struct r300_context* const cs_context_copy = data;
 
r300_flush(&cs_context_copy->context, flags, NULL);
}
 
#define R300_INIT_ATOM(atomname, atomsize) \
do { \
r300->atomname.name = #atomname; \
r300->atomname.state = NULL; \
r300->atomname.size = atomsize; \
r300->atomname.emit = r300_emit_##atomname; \
r300->atomname.dirty = FALSE; \
} while (0)
 
#define R300_ALLOC_ATOM(atomname, statetype) \
do { \
r300->atomname.state = CALLOC_STRUCT(statetype); \
if (r300->atomname.state == NULL) \
return FALSE; \
} while (0)
 
static boolean r300_setup_atoms(struct r300_context* r300)
{
boolean is_rv350 = r300->screen->caps.is_rv350;
boolean is_r500 = r300->screen->caps.is_r500;
boolean has_tcl = r300->screen->caps.has_tcl;
boolean drm_2_6_0 = r300->screen->info.drm_minor >= 6;
 
/* Create the actual atom list.
*
* Some atoms never change size, others change every emit - those have
* the size of 0 here.
*
* NOTE: The framebuffer state is split into these atoms:
* - gpu_flush (unpipelined regs)
* - aa_state (unpipelined regs)
* - fb_state (unpipelined regs)
* - hyperz_state (unpipelined regs followed by pipelined ones)
* - fb_state_pipelined (pipelined regs)
* The motivation behind this is to be able to emit a strict
* subset of the regs, and to have reasonable register ordering. */
/* SC, GB (unpipelined), RB3D (unpipelined), ZB (unpipelined). */
R300_INIT_ATOM(gpu_flush, 9);
R300_INIT_ATOM(aa_state, 4);
R300_INIT_ATOM(fb_state, 0);
R300_INIT_ATOM(hyperz_state, is_r500 || (is_rv350 && drm_2_6_0) ? 10 : 8);
/* ZB (unpipelined), SC. */
R300_INIT_ATOM(ztop_state, 2);
/* ZB, FG. */
R300_INIT_ATOM(dsa_state, is_r500 ? (drm_2_6_0 ? 10 : 8) : 6);
/* RB3D. */
R300_INIT_ATOM(blend_state, 8);
R300_INIT_ATOM(blend_color_state, is_r500 ? 3 : 2);
/* SC. */
R300_INIT_ATOM(sample_mask, 2);
R300_INIT_ATOM(scissor_state, 3);
/* GB, FG, GA, SU, SC, RB3D. */
R300_INIT_ATOM(invariant_state, 14 + (is_rv350 ? 4 : 0) + (is_r500 ? 4 : 0));
/* VAP. */
R300_INIT_ATOM(viewport_state, 9);
R300_INIT_ATOM(pvs_flush, 2);
R300_INIT_ATOM(vap_invariant_state, is_r500 ? 11 : 9);
R300_INIT_ATOM(vertex_stream_state, 0);
R300_INIT_ATOM(vs_state, 0);
R300_INIT_ATOM(vs_constants, 0);
R300_INIT_ATOM(clip_state, has_tcl ? 3 + (6 * 4) : 0);
/* VAP, RS, GA, GB, SU, SC. */
R300_INIT_ATOM(rs_block_state, 0);
R300_INIT_ATOM(rs_state, 0);
/* SC, US. */
R300_INIT_ATOM(fb_state_pipelined, 8);
/* US. */
R300_INIT_ATOM(fs, 0);
R300_INIT_ATOM(fs_rc_constant_state, 0);
R300_INIT_ATOM(fs_constants, 0);
/* TX. */
R300_INIT_ATOM(texture_cache_inval, 2);
R300_INIT_ATOM(textures_state, 0);
/* Clear commands */
R300_INIT_ATOM(hiz_clear, r300->screen->caps.hiz_ram > 0 ? 4 : 0);
R300_INIT_ATOM(zmask_clear, r300->screen->caps.zmask_ram > 0 ? 4 : 0);
R300_INIT_ATOM(cmask_clear, 4);
/* ZB (unpipelined), SU. */
R300_INIT_ATOM(query_start, 4);
 
/* Replace emission functions for r500. */
if (is_r500) {
r300->fs.emit = r500_emit_fs;
r300->fs_rc_constant_state.emit = r500_emit_fs_rc_constant_state;
r300->fs_constants.emit = r500_emit_fs_constants;
}
 
/* Some non-CSO atoms need explicit space to store the state locally. */
R300_ALLOC_ATOM(aa_state, r300_aa_state);
R300_ALLOC_ATOM(blend_color_state, r300_blend_color_state);
R300_ALLOC_ATOM(clip_state, r300_clip_state);
R300_ALLOC_ATOM(hyperz_state, r300_hyperz_state);
R300_ALLOC_ATOM(invariant_state, r300_invariant_state);
R300_ALLOC_ATOM(textures_state, r300_textures_state);
R300_ALLOC_ATOM(vap_invariant_state, r300_vap_invariant_state);
R300_ALLOC_ATOM(viewport_state, r300_viewport_state);
R300_ALLOC_ATOM(ztop_state, r300_ztop_state);
R300_ALLOC_ATOM(fb_state, pipe_framebuffer_state);
R300_ALLOC_ATOM(gpu_flush, pipe_framebuffer_state);
r300->sample_mask.state = malloc(4);
R300_ALLOC_ATOM(scissor_state, pipe_scissor_state);
R300_ALLOC_ATOM(rs_block_state, r300_rs_block);
R300_ALLOC_ATOM(fs_constants, r300_constant_buffer);
R300_ALLOC_ATOM(vs_constants, r300_constant_buffer);
if (!r300->screen->caps.has_tcl) {
R300_ALLOC_ATOM(vertex_stream_state, r300_vertex_stream_state);
}
 
/* Some non-CSO atoms don't use the state pointer. */
r300->fb_state_pipelined.allow_null_state = TRUE;
r300->fs_rc_constant_state.allow_null_state = TRUE;
r300->pvs_flush.allow_null_state = TRUE;
r300->query_start.allow_null_state = TRUE;
r300->texture_cache_inval.allow_null_state = TRUE;
 
/* Some states must be marked as dirty here to properly set up
* hardware in the first command stream. */
r300_mark_atom_dirty(r300, &r300->invariant_state);
r300_mark_atom_dirty(r300, &r300->pvs_flush);
r300_mark_atom_dirty(r300, &r300->vap_invariant_state);
r300_mark_atom_dirty(r300, &r300->texture_cache_inval);
r300_mark_atom_dirty(r300, &r300->textures_state);
 
return TRUE;
}
 
/* Not every state tracker calls every driver function before the first draw
* call and we must initialize the command buffers somehow. */
static void r300_init_states(struct pipe_context *pipe)
{
struct r300_context *r300 = r300_context(pipe);
struct pipe_blend_color bc = {{0}};
struct pipe_clip_state cs = {{{0}}};
struct pipe_scissor_state ss = {0};
struct r300_gpu_flush *gpuflush =
(struct r300_gpu_flush*)r300->gpu_flush.state;
struct r300_vap_invariant_state *vap_invariant =
(struct r300_vap_invariant_state*)r300->vap_invariant_state.state;
struct r300_invariant_state *invariant =
(struct r300_invariant_state*)r300->invariant_state.state;
 
CB_LOCALS;
 
pipe->set_blend_color(pipe, &bc);
pipe->set_clip_state(pipe, &cs);
pipe->set_scissor_states(pipe, 0, 1, &ss);
pipe->set_sample_mask(pipe, ~0);
 
/* Initialize the GPU flush. */
{
BEGIN_CB(gpuflush->cb_flush_clean, 6);
 
/* Flush and free renderbuffer caches. */
OUT_CB_REG(R300_RB3D_DSTCACHE_CTLSTAT,
R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
OUT_CB_REG(R300_ZB_ZCACHE_CTLSTAT,
R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
 
/* Wait until the GPU is idle.
* This fixes random pixels sometimes appearing probably caused
* by incomplete rendering. */
OUT_CB_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
END_CB;
}
 
/* Initialize the VAP invariant state. */
{
BEGIN_CB(vap_invariant->cb, r300->vap_invariant_state.size);
OUT_CB_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff);
OUT_CB_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4);
OUT_CB_32F(1.0);
OUT_CB_32F(1.0);
OUT_CB_32F(1.0);
OUT_CB_32F(1.0);
OUT_CB_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO);
 
if (r300->screen->caps.is_r500) {
OUT_CB_REG(R500_VAP_TEX_TO_COLOR_CNTL, 0);
}
END_CB;
}
 
/* Initialize the invariant state. */
{
BEGIN_CB(invariant->cb, r300->invariant_state.size);
OUT_CB_REG(R300_GB_SELECT, 0);
OUT_CB_REG(R300_FG_FOG_BLEND, 0);
OUT_CB_REG(R300_GA_OFFSET, 0);
OUT_CB_REG(R300_SU_TEX_WRAP, 0);
OUT_CB_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF);
OUT_CB_REG(R300_SU_DEPTH_OFFSET, 0);
OUT_CB_REG(R300_SC_EDGERULE, 0x2DA49525);
 
if (r300->screen->caps.is_rv350) {
OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101);
OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE);
}
 
if (r300->screen->caps.is_r500) {
OUT_CB_REG(R500_GA_COLOR_CONTROL_PS3, 0);
OUT_CB_REG(R500_SU_TEX_WRAP_PS3, 0);
}
END_CB;
}
 
/* Initialize the hyperz state. */
{
struct r300_hyperz_state *hyperz =
(struct r300_hyperz_state*)r300->hyperz_state.state;
BEGIN_CB(&hyperz->cb_flush_begin, r300->hyperz_state.size);
OUT_CB_REG(R300_ZB_ZCACHE_CTLSTAT,
R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE);
OUT_CB_REG(R300_ZB_BW_CNTL, 0);
OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0);
OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2);
 
if (r300->screen->caps.is_r500 ||
(r300->screen->caps.is_rv350 &&
r300->screen->info.drm_minor >= 6)) {
OUT_CB_REG(R300_GB_Z_PEQ_CONFIG, 0);
}
END_CB;
}
}
 
struct pipe_context* r300_create_context(struct pipe_screen* screen,
void *priv)
{
struct r300_context* r300 = CALLOC_STRUCT(r300_context);
struct r300_screen* r300screen = r300_screen(screen);
struct radeon_winsys *rws = r300screen->rws;
 
if (!r300)
return NULL;
 
r300->rws = rws;
r300->screen = r300screen;
 
r300->context.screen = screen;
r300->context.priv = priv;
 
r300->context.destroy = r300_destroy_context;
 
util_slab_create(&r300->pool_transfers,
sizeof(struct pipe_transfer), 64,
UTIL_SLAB_SINGLETHREADED);
 
r300->cs = rws->cs_create(rws, RING_GFX, NULL);
if (r300->cs == NULL)
goto fail;
 
if (!r300screen->caps.has_tcl) {
/* Create a Draw. This is used for SW TCL. */
r300->draw = draw_create(&r300->context);
if (r300->draw == NULL)
goto fail;
/* Enable our renderer. */
draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300));
/* Disable converting points/lines to triangles. */
draw_wide_line_threshold(r300->draw, 10000000.f);
draw_wide_point_threshold(r300->draw, 10000000.f);
draw_wide_point_sprites(r300->draw, FALSE);
draw_enable_line_stipple(r300->draw, TRUE);
draw_enable_point_sprites(r300->draw, FALSE);
}
 
if (!r300_setup_atoms(r300))
goto fail;
 
r300_init_blit_functions(r300);
r300_init_flush_functions(r300);
r300_init_query_functions(r300);
r300_init_state_functions(r300);
r300_init_resource_functions(r300);
r300_init_render_functions(r300);
r300_init_states(&r300->context);
 
r300->context.create_video_decoder = vl_create_decoder;
r300->context.create_video_buffer = vl_video_buffer_create;
 
r300->uploader = u_upload_create(&r300->context, 256 * 1024, 4,
PIPE_BIND_CUSTOM);
 
r300->blitter = util_blitter_create(&r300->context);
if (r300->blitter == NULL)
goto fail;
r300->blitter->draw_rectangle = r300_blitter_draw_rectangle;
 
rws->cs_set_flush_callback(r300->cs, r300_flush_callback, r300);
 
/* The KIL opcode needs the first texture unit to be enabled
* on r3xx-r4xx. In order to calm down the CS checker, we bind this
* dummy texture there. */
if (!r300->screen->caps.is_r500) {
struct pipe_resource *tex;
struct pipe_resource rtempl = {{0}};
struct pipe_sampler_view vtempl = {{0}};
 
rtempl.target = PIPE_TEXTURE_2D;
rtempl.format = PIPE_FORMAT_I8_UNORM;
rtempl.usage = PIPE_USAGE_IMMUTABLE;
rtempl.width0 = 1;
rtempl.height0 = 1;
rtempl.depth0 = 1;
tex = screen->resource_create(screen, &rtempl);
 
u_sampler_view_default_template(&vtempl, tex, tex->format);
 
r300->texkill_sampler = (struct r300_sampler_view*)
r300->context.create_sampler_view(&r300->context, tex, &vtempl);
 
pipe_resource_reference(&tex, NULL);
}
 
if (r300screen->caps.has_tcl) {
struct pipe_resource vb;
memset(&vb, 0, sizeof(vb));
vb.target = PIPE_BUFFER;
vb.format = PIPE_FORMAT_R8_UNORM;
vb.usage = PIPE_USAGE_STATIC;
vb.width0 = sizeof(float) * 16;
vb.height0 = 1;
vb.depth0 = 1;
 
r300->dummy_vb.buffer = screen->resource_create(screen, &vb);
r300->context.set_vertex_buffers(&r300->context, 0, 1, &r300->dummy_vb);
}
 
{
struct pipe_depth_stencil_alpha_state dsa;
memset(&dsa, 0, sizeof(dsa));
dsa.depth.writemask = 1;
 
r300->dsa_decompress_zmask =
r300->context.create_depth_stencil_alpha_state(&r300->context,
&dsa);
}
 
r300->hyperz_time_of_last_flush = os_time_get();
 
/* Register allocator state */
rc_init_regalloc_state(&r300->fs_regalloc_state);
 
/* Print driver info. */
#ifdef DEBUG
{
#else
if (DBG_ON(r300, DBG_INFO)) {
#endif
fprintf(stderr,
"r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n"
"r300: GART size: %d MB, VRAM size: %d MB\n"
"r300: AA compression RAM: %s, Z compression RAM: %s, HiZ RAM: %s\n",
r300->screen->info.drm_major,
r300->screen->info.drm_minor,
r300->screen->info.drm_patchlevel,
screen->get_name(screen),
r300->screen->info.pci_id,
r300->screen->info.r300_num_gb_pipes,
r300->screen->info.r300_num_z_pipes,
r300->screen->info.gart_size >> 20,
r300->screen->info.vram_size >> 20,
"YES", /* XXX really? */
r300->screen->caps.zmask_ram ? "YES" : "NO",
r300->screen->caps.hiz_ram ? "YES" : "NO");
}
 
return &r300->context;
 
fail:
r300_destroy_context(&r300->context);
return NULL;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_context.h
0,0 → 1,785
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#ifndef R300_CONTEXT_H
#define R300_CONTEXT_H
 
#define R300_BUFFER_ALIGNMENT 64
 
#include "draw/draw_vertex.h"
 
#include "util/u_blitter.h"
 
#include "pipe/p_context.h"
#include "util/u_inlines.h"
#include "util/u_transfer.h"
 
#include "r300_defines.h"
#include "r300_screen.h"
#include "compiler/radeon_regalloc.h"
#include "../../winsys/radeon/drm/radeon_winsys.h"
 
struct u_upload_mgr;
struct r300_context;
struct r300_fragment_shader;
struct r300_vertex_shader;
struct r300_stencilref_context;
 
enum colormask_swizzle {
COLORMASK_BGRA,
COLORMASK_RGBA,
COLORMASK_RRRR,
COLORMASK_AAAA,
COLORMASK_GRRG,
COLORMASK_ARRA,
COLORMASK_BGRX,
COLORMASK_RGBX,
COLORMASK_NUM_SWIZZLES
};
 
struct r300_atom {
/* Name, for debugging. */
const char* name;
/* Opaque state. */
void* state;
/* Emit the state to the context. */
void (*emit)(struct r300_context*, unsigned, void*);
/* Upper bound on number of dwords to emit. */
unsigned size;
/* Whether this atom should be emitted. */
boolean dirty;
/* Whether this atom may be emitted with state == NULL. */
boolean allow_null_state;
};
 
struct r300_aa_state {
struct r300_surface *dest;
 
uint32_t aa_config;
};
 
struct r300_blend_state {
struct pipe_blend_state state;
 
uint32_t cb_clamp[COLORMASK_NUM_SWIZZLES][8];
uint32_t cb_noclamp[8];
uint32_t cb_noclamp_noalpha[8];
uint32_t cb_no_readwrite[8];
};
 
struct r300_blend_color_state {
struct pipe_blend_color state;
uint32_t cb[3];
};
 
struct r300_clip_state {
uint32_t cb[29];
};
 
struct r300_dsa_state {
struct pipe_depth_stencil_alpha_state dsa;
 
/* This is actually a command buffer with named dwords. */
uint32_t cb_begin;
uint32_t z_buffer_control; /* R300_ZB_CNTL: 0x4f00 */
uint32_t z_stencil_control; /* R300_ZB_ZSTENCILCNTL: 0x4f04 */
uint32_t stencil_ref_mask; /* R300_ZB_STENCILREFMASK: 0x4f08 */
uint32_t cb_reg;
uint32_t stencil_ref_bf; /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */
uint32_t cb_reg1;
uint32_t alpha_value; /* R500_FG_ALPHA_VALUE: 0x4be0 */
 
/* Same, but without ZB reads and writes. */
uint32_t cb_zb_no_readwrite[8]; /* ZB not bound */
 
/* Emitted separately: */
uint32_t alpha_function;
 
/* Whether a two-sided stencil is enabled. */
boolean two_sided;
/* Whether a fallback should be used for a two-sided stencil ref value. */
boolean two_sided_stencil_ref;
};
 
struct r300_hyperz_state {
int flush;
/* This is actually a command buffer with named dwords. */
uint32_t cb_flush_begin;
uint32_t zb_zcache_ctlstat; /* R300_ZB_CACHE_CNTL */
uint32_t cb_begin;
uint32_t zb_bw_cntl; /* R300_ZB_BW_CNTL */
uint32_t cb_reg1;
uint32_t zb_depthclearvalue; /* R300_ZB_DEPTHCLEARVALUE */
uint32_t cb_reg2;
uint32_t sc_hyperz; /* R300_SC_HYPERZ */
uint32_t cb_reg3;
uint32_t gb_z_peq_config; /* R300_GB_Z_PEQ_CONFIG: 0x4028 */
};
 
struct r300_gpu_flush {
uint32_t cb_flush_clean[6];
};
 
#define RS_STATE_MAIN_SIZE 27
 
struct r300_rs_state {
/* Original rasterizer state. */
struct pipe_rasterizer_state rs;
/* Draw-specific rasterizer state. */
struct pipe_rasterizer_state rs_draw;
 
/* Command buffers. */
uint32_t cb_main[RS_STATE_MAIN_SIZE];
uint32_t cb_poly_offset_zb16[5];
uint32_t cb_poly_offset_zb24[5];
 
/* The index to cb_main where the cull_mode register value resides. */
unsigned cull_mode_index;
 
/* Whether polygon offset is enabled. */
boolean polygon_offset_enable;
 
/* This is emitted in the draw function. */
uint32_t color_control; /* R300_GA_COLOR_CONTROL: 0x4278 */
};
 
struct r300_rs_block {
uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */
uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */
uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */
uint32_t gb_enable;
 
uint32_t ip[8]; /* R300_RS_IP_[0-7], R500_RS_IP_[0-7] */
uint32_t count; /* R300_RS_COUNT */
uint32_t inst_count; /* R300_RS_INST_COUNT */
uint32_t inst[8]; /* R300_RS_INST_[0-7] */
};
 
struct r300_sampler_state {
struct pipe_sampler_state state;
 
uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */
uint32_t filter1; /* R300_TX_FILTER1: 0x4440 */
 
/* Min/max LOD must be clamped to [0, last_level], thus
* it's dependent on a currently bound texture */
unsigned min_lod, max_lod;
};
 
struct r300_texture_format_state {
uint32_t format0; /* R300_TX_FORMAT0: 0x4480 */
uint32_t format1; /* R300_TX_FORMAT1: 0x44c0 */
uint32_t format2; /* R300_TX_FORMAT2: 0x4500 */
uint32_t tile_config; /* R300_TX_OFFSET (subset thereof) */
uint32_t us_format0; /* R500_US_FORMAT0_0: 0x4640 (through 15) */
};
 
struct r300_sampler_view {
struct pipe_sampler_view base;
 
/* For resource_copy_region. */
unsigned width0_override;
unsigned height0_override;
 
/* Swizzles in the UTIL_FORMAT_SWIZZLE_* representation,
* derived from base. */
unsigned char swizzle[4];
 
/* Copy of r300_texture::texture_format_state with format-specific bits
* added. */
struct r300_texture_format_state format;
 
/* The texture cache region for this texture. */
uint32_t texcache_region;
};
 
struct r300_texture_sampler_state {
struct r300_texture_format_state format;
uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */
uint32_t filter1; /* R300_TX_FILTER1: 0x4440 */
uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */
};
 
struct r300_textures_state {
/* Textures. */
struct r300_sampler_view *sampler_views[16];
int sampler_view_count;
/* Sampler states. */
struct r300_sampler_state *sampler_states[16];
int sampler_state_count;
 
/* This is the merge of the texture and sampler states. */
unsigned count;
uint32_t tx_enable; /* R300_TX_ENABLE: 0x4101 */
struct r300_texture_sampler_state regs[16];
};
 
struct r300_vertex_stream_state {
/* R300_VAP_PROG_STREAK_CNTL_[0-7] */
uint32_t vap_prog_stream_cntl[8];
/* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */
uint32_t vap_prog_stream_cntl_ext[8];
 
unsigned count;
};
 
struct r300_invariant_state {
uint32_t cb[24];
};
 
struct r300_vap_invariant_state {
uint32_t cb[11];
};
 
struct r300_viewport_state {
float xscale; /* R300_VAP_VPORT_XSCALE: 0x2098 */
float xoffset; /* R300_VAP_VPORT_XOFFSET: 0x209c */
float yscale; /* R300_VAP_VPORT_YSCALE: 0x20a0 */
float yoffset; /* R300_VAP_VPORT_YOFFSET: 0x20a4 */
float zscale; /* R300_VAP_VPORT_ZSCALE: 0x20a8 */
float zoffset; /* R300_VAP_VPORT_ZOFFSET: 0x20ac */
uint32_t vte_control; /* R300_VAP_VTE_CNTL: 0x20b0 */
};
 
struct r300_ztop_state {
uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */
};
 
/* The next several objects are not pure Radeon state; they inherit from
* various Gallium classes. */
 
struct r300_constant_buffer {
/* Buffer of constants */
uint32_t *ptr;
/* Remapping table. */
unsigned *remap_table;
/* const buffer base */
uint32_t buffer_base;
};
 
/* Query object.
*
* This is not a subclass of pipe_query because pipe_query is never
* actually fully defined. So, rather than have it as a member, and do
* subclass-style casting, we treat pipe_query as an opaque, and just
* trust that our state tracker does not ever mess up query objects.
*/
struct r300_query {
/* The kind of query. Currently only OQ is supported. */
unsigned type;
/* The number of pipes where query results are stored. */
unsigned num_pipes;
/* How many results have been written, in dwords. It's incremented
* after end_query and flush. */
unsigned num_results;
/* if begin has been emitted */
boolean begin_emitted;
 
/* The buffer where query results are stored. */
struct pb_buffer *buf;
struct radeon_winsys_cs_handle *cs_buf;
};
 
struct r300_surface {
struct pipe_surface base;
 
/* Winsys buffer backing the texture. */
struct pb_buffer *buf;
struct radeon_winsys_cs_handle *cs_buf;
 
enum radeon_bo_domain domain;
 
uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */
uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */
uint32_t pitch_zmask; /* ZMASK_PITCH */
uint32_t pitch_hiz; /* HIZ_PITCH */
uint32_t pitch_cmask; /* CMASK_PITCH */
uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */
 
/* Parameters dedicated to the CBZB clear. */
uint32_t cbzb_width; /* Aligned width. */
uint32_t cbzb_height; /* Half of the height. */
uint32_t cbzb_midpoint_offset; /* DEPTHOFFSET. */
uint32_t cbzb_pitch; /* DEPTHPITCH. */
uint32_t cbzb_format; /* ZB_FORMAT. */
 
/* Whether the CBZB clear is allowed on the surface. */
boolean cbzb_allowed;
 
unsigned colormask_swizzle;
};
 
struct r300_texture_desc {
/* Width, height, and depth.
* Most of the time, these are equal to pipe_texture::width0, height0,
* and depth0. However, NPOT 3D textures must have dimensions aligned
* to POT, and this is the only case when these variables differ from
* pipe_texture. */
unsigned width0, height0, depth0;
 
/* Buffer tiling.
* Macrotiling is specified per-level because small mipmaps cannot
* be macrotiled. */
enum radeon_bo_layout microtile;
enum radeon_bo_layout macrotile[R300_MAX_TEXTURE_LEVELS];
 
/* Offsets into the buffer. */
unsigned offset_in_bytes[R300_MAX_TEXTURE_LEVELS];
 
/* Strides for each mip-level. */
unsigned stride_in_bytes[R300_MAX_TEXTURE_LEVELS];
 
/* Size of one zslice or face or 2D image based on the texture target. */
unsigned layer_size_in_bytes[R300_MAX_TEXTURE_LEVELS];
 
/* Total size of this texture, in bytes,
* derived from the texture properties. */
unsigned size_in_bytes;
 
/**
* If non-zero, override the natural texture layout with
* a custom stride (in bytes).
*
* \note Mipmapping fails for textures with a non-natural layout!
*
* \sa r300_texture_get_stride
*/
unsigned stride_in_bytes_override;
 
/* Whether this texture has non-power-of-two dimensions.
* It can be either a regular texture or a rectangle one. */
boolean is_npot;
 
/* This flag says that hardware must use the stride for addressing
* instead of the width. */
boolean uses_stride_addressing;
 
/* Whether CBZB fast color clear is allowed on the miplevel. */
boolean cbzb_allowed[R300_MAX_TEXTURE_LEVELS];
 
/* Zbuffer compression info for each miplevel. */
boolean zcomp8x8[R300_MAX_TEXTURE_LEVELS];
/* If zero, then disable Z compression/HiZ. */
unsigned zmask_dwords[R300_MAX_TEXTURE_LEVELS];
unsigned hiz_dwords[R300_MAX_TEXTURE_LEVELS];
/* Zmask/HiZ strides for each miplevel. */
unsigned zmask_stride_in_pixels[R300_MAX_TEXTURE_LEVELS];
unsigned hiz_stride_in_pixels[R300_MAX_TEXTURE_LEVELS];
 
/* CMASK info for AA buffers (no mipmapping). */
unsigned cmask_dwords;
unsigned cmask_stride_in_pixels;
};
 
struct r300_resource
{
struct u_resource b;
 
/* Winsys buffer backing this resource. */
struct pb_buffer *buf;
struct radeon_winsys_cs_handle *cs_buf;
enum radeon_bo_domain domain;
 
/* Constant buffers and SWTCL vertex and index buffers are in user
* memory. */
uint8_t *malloced_buffer;
 
/* Texture description (addressing, layout, special features). */
struct r300_texture_desc tex;
 
/* This is the level tiling flags were last time set for.
* It's used to prevent redundant tiling-flags changes from happening.*/
unsigned surface_level;
};
 
struct r300_vertex_element_state {
unsigned count;
struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
unsigned format_size[PIPE_MAX_ATTRIBS];
 
/* The size of the vertex, in dwords. */
unsigned vertex_size_dwords;
 
struct r300_vertex_stream_state vertex_stream;
};
 
enum r300_hiz_func {
HIZ_FUNC_NONE,
 
/* The function, when determined, is set in stone
* until the next HiZ clear. */
 
/* MAX is written to the HiZ buffer.
* Used for LESS, LEQUAL. */
HIZ_FUNC_MAX,
 
/* MIN is written to the HiZ buffer.
* Used for GREATER, GEQUAL. */
HIZ_FUNC_MIN,
};
 
/* For deferred fragment shader state validation. */
enum r300_fs_validity_status {
FRAGMENT_SHADER_VALID, /* No need to change/validate the FS. */
FRAGMENT_SHADER_MAYBE_DIRTY,/* Validate the FS if external state was changed. */
FRAGMENT_SHADER_DIRTY /* Always validate the FS (if the FS was changed) */
};
 
struct r300_context {
/* Parent class */
struct pipe_context context;
 
/* The interface to the windowing system, etc. */
struct radeon_winsys *rws;
/* The command stream. */
struct radeon_winsys_cs *cs;
/* Screen. */
struct r300_screen *screen;
 
/* Draw module. Used mostly for SW TCL. */
struct draw_context* draw;
/* Vertex buffer for SW TCL. */
struct pb_buffer *vbo;
struct radeon_winsys_cs_handle *vbo_cs;
/* Offset and size into the SW TCL VBO. */
size_t draw_vbo_offset;
 
/* Accelerated blit support. */
struct blitter_context* blitter;
/* Stencil two-sided reference value fallback. */
struct r300_stencilref_context *stencilref_fallback;
 
/* The KIL opcode needs the first texture unit to be enabled
* on r3xx-r4xx. In order to calm down the CS checker, we bind this
* dummy texture there. */
struct r300_sampler_view *texkill_sampler;
 
/* When no vertex buffer is set, this one is used instead to prevent
* hardlocks. */
struct pipe_vertex_buffer dummy_vb;
 
/* The currently active query. */
struct r300_query *query_current;
/* The saved query for blitter operations. */
struct r300_query *blitter_saved_query;
/* Query list. */
struct r300_query query_list;
 
/* Various CSO state objects. */
 
/* Each atom is emitted in the order it appears here, which can affect
* performance and stability if not handled with care. */
/* GPU flush. */
struct r300_atom gpu_flush;
/* Clears must be emitted immediately after the flush. */
/* HiZ clear */
struct r300_atom hiz_clear;
/* zmask clear */
struct r300_atom zmask_clear;
/* cmask clear */
struct r300_atom cmask_clear;
/* Anti-aliasing (MSAA) state. */
struct r300_atom aa_state;
/* Framebuffer state. */
struct r300_atom fb_state;
/* HyperZ state (various SC/ZB bits). */
struct r300_atom hyperz_state;
/* ZTOP state. */
struct r300_atom ztop_state;
/* Depth, stencil, and alpha state. */
struct r300_atom dsa_state;
/* Blend state. */
struct r300_atom blend_state;
/* Blend color state. */
struct r300_atom blend_color_state;
/* Scissor state. */
struct r300_atom scissor_state;
/* Sample mask. */
struct r300_atom sample_mask;
/* Invariant state. This must be emitted to get the engine started. */
struct r300_atom invariant_state;
/* Viewport state. */
struct r300_atom viewport_state;
/* PVS flush. */
struct r300_atom pvs_flush;
/* VAP invariant state. */
struct r300_atom vap_invariant_state;
/* Vertex stream formatting state. */
struct r300_atom vertex_stream_state;
/* Vertex shader. */
struct r300_atom vs_state;
/* User clip planes. */
struct r300_atom clip_state;
/* RS block state + VAP (vertex shader) output mapping state. */
struct r300_atom rs_block_state;
/* Rasterizer state. */
struct r300_atom rs_state;
/* Framebuffer state (pipelined regs). */
struct r300_atom fb_state_pipelined;
/* Fragment shader. */
struct r300_atom fs;
/* Fragment shader RC_CONSTANT_STATE variables. */
struct r300_atom fs_rc_constant_state;
/* Fragment shader constant buffer. */
struct r300_atom fs_constants;
/* Vertex shader constant buffer. */
struct r300_atom vs_constants;
/* Texture cache invalidate. */
struct r300_atom texture_cache_inval;
/* Textures state. */
struct r300_atom textures_state;
/* Occlusion query. */
struct r300_atom query_start;
 
/* The pointers to the first and the last atom. */
struct r300_atom *first_dirty, *last_dirty;
 
/* Vertex elements for Gallium. */
struct r300_vertex_element_state *velems;
 
/* Vertex info for Draw. */
struct vertex_info vertex_info;
 
struct pipe_stencil_ref stencil_ref;
struct pipe_viewport_state viewport;
 
/* Stream locations for SWTCL. */
int stream_loc_notcl[16];
 
/* Flag indicating whether or not the HW is dirty. */
uint32_t dirty_hw;
/* Whether polygon offset is enabled. */
boolean polygon_offset_enabled;
/* Z buffer bit depth. */
uint32_t zbuffer_bpp;
/* Whether rendering is conditional and should be skipped. */
boolean skip_rendering;
/* The flag above saved by blitter. */
unsigned char blitter_saved_skip_rendering;
/* Point sprites texcoord index, 1 bit per texcoord */
int sprite_coord_enable;
/* Whether two-sided color selection is enabled (AKA light_twoside). */
boolean two_sided_color;
boolean flatshade;
/* Whether fast color clear is enabled. */
boolean cbzb_clear;
/* Whether fragment shader needs to be validated. */
enum r300_fs_validity_status fs_status;
/* Framebuffer multi-write. */
boolean fb_multiwrite;
unsigned num_samples;
boolean msaa_enable;
boolean alpha_to_one;
boolean alpha_to_coverage;
 
void *dsa_decompress_zmask;
 
struct pipe_index_buffer index_buffer;
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
unsigned nr_vertex_buffers;
struct u_upload_mgr *uploader;
 
struct util_slab_mempool pool_transfers;
 
/* Stat counter. */
uint64_t flush_counter;
 
/* const tracking for VS */
int vs_const_base;
 
/* Vertex array state info */
boolean vertex_arrays_dirty;
boolean vertex_arrays_indexed;
int vertex_arrays_offset;
int vertex_arrays_instance_id;
boolean instancing_enabled;
 
/* Hyper-Z stats. */
boolean hyperz_enabled; /* Whether it owns Hyper-Z access. */
int64_t hyperz_time_of_last_flush; /* Time of the last flush with Z clear. */
unsigned num_z_clears; /* Since the last flush. */
 
/* ZMask state. */
boolean zmask_in_use; /* Whether ZMASK is enabled. */
boolean zmask_decompress; /* Whether ZMASK is being decompressed. */
struct pipe_surface *locked_zbuffer; /* Unbound zbuffer which still has data in ZMASK. */
 
/* HiZ state. */
boolean hiz_in_use; /* Whether HIZ is enabled. */
enum r300_hiz_func hiz_func; /* HiZ function. Can be either MIN or MAX. */
uint32_t hiz_clear_value; /* HiZ clear value. */
 
/* CMASK state. */
boolean cmask_access;
boolean cmask_in_use;
uint32_t color_clear_value; /* RGBA8 or RGBA1010102 */
uint32_t color_clear_value_ar; /* RGBA16F */
uint32_t color_clear_value_gb; /* RGBA16F */
 
/* Compiler state. */
struct rc_regalloc_state fs_regalloc_state; /* Register allocator info for
* fragment shaders. */
};
 
#define foreach_atom(r300, atom) \
for (atom = &r300->gpu_flush; atom != (&r300->query_start)+1; atom++)
 
#define foreach_dirty_atom(r300, atom) \
for (atom = r300->first_dirty; atom != r300->last_dirty; atom++)
 
/* Convenience cast wrappers. */
static INLINE struct r300_query* r300_query(struct pipe_query* q)
{
return (struct r300_query*)q;
}
 
static INLINE struct r300_surface* r300_surface(struct pipe_surface* surf)
{
return (struct r300_surface*)surf;
}
 
static INLINE struct r300_resource* r300_resource(struct pipe_resource* tex)
{
return (struct r300_resource*)tex;
}
 
static INLINE struct r300_context* r300_context(struct pipe_context* context)
{
return (struct r300_context*)context;
}
 
static INLINE struct r300_fragment_shader *r300_fs(struct r300_context *r300)
{
return (struct r300_fragment_shader*)r300->fs.state;
}
 
static INLINE void r300_mark_atom_dirty(struct r300_context *r300,
struct r300_atom *atom)
{
atom->dirty = TRUE;
 
if (!r300->first_dirty) {
r300->first_dirty = atom;
r300->last_dirty = atom+1;
} else {
if (atom < r300->first_dirty)
r300->first_dirty = atom;
else if (atom+1 > r300->last_dirty)
r300->last_dirty = atom+1;
}
}
 
struct pipe_context* r300_create_context(struct pipe_screen* screen,
void *priv);
 
/* Context initialization. */
struct draw_stage* r300_draw_stage(struct r300_context* r300);
void r300_init_blit_functions(struct r300_context *r300);
void r300_init_flush_functions(struct r300_context* r300);
void r300_init_query_functions(struct r300_context* r300);
void r300_init_render_functions(struct r300_context *r300);
void r300_init_state_functions(struct r300_context* r300);
void r300_init_resource_functions(struct r300_context* r300);
 
/* r300_blit.c */
void r300_decompress_zmask(struct r300_context *r300);
void r300_decompress_zmask_locked_unsafe(struct r300_context *r300);
void r300_decompress_zmask_locked(struct r300_context *r300);
bool r300_is_blit_supported(enum pipe_format format);
 
/* r300_flush.c */
void r300_flush(struct pipe_context *pipe,
unsigned flags,
struct pipe_fence_handle **fence);
 
/* r300_hyperz.c */
void r300_update_hyperz_state(struct r300_context* r300);
 
/* r300_query.c */
void r300_resume_query(struct r300_context *r300,
struct r300_query *query);
void r300_stop_query(struct r300_context *r300);
 
/* r300_render_translate.c */
void r300_translate_index_buffer(struct r300_context *r300,
struct pipe_index_buffer *ib,
struct pipe_resource **out_index_buffer,
unsigned *index_size, unsigned index_offset,
unsigned *start, unsigned count);
 
/* r300_render_stencilref.c */
void r300_plug_in_stencil_ref_fallback(struct r300_context *r300);
 
/* r300_render.c */
void r500_emit_index_bias(struct r300_context *r300, int index_bias);
void r300_blitter_draw_rectangle(struct blitter_context *blitter,
int x1, int y1, int x2, int y2,
float depth,
enum blitter_attrib_type type,
const union pipe_color_union *attrib);
 
/* r300_state.c */
enum r300_fb_state_change {
R300_CHANGED_FB_STATE = 0,
R300_CHANGED_HYPERZ_FLAG,
R300_CHANGED_MULTIWRITE,
R300_CHANGED_CMASK_ENABLE,
};
 
void r300_mark_fb_state_dirty(struct r300_context *r300,
enum r300_fb_state_change change);
void r300_mark_fs_code_dirty(struct r300_context *r300);
 
struct pipe_sampler_view *
r300_create_sampler_view_custom(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_sampler_view *templ,
unsigned width0_override,
unsigned height0_override);
 
/* r300_state_derived.c */
void r300_update_derived_state(struct r300_context* r300);
 
/* r300_debug.c */
void r500_dump_rs_block(struct r300_rs_block *rs);
 
 
static INLINE boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags)
{
return SCREEN_DBG_ON(ctx->screen, flags);
}
 
static INLINE void CTX_DBG(struct r300_context * ctx, unsigned flags,
const char * fmt, ...)
{
if (CTX_DBG_ON(ctx, flags)) {
va_list va;
va_start(va, fmt);
vfprintf(stderr, fmt, va);
va_end(va);
}
}
 
#define DBG_ON CTX_DBG_ON
#define DBG CTX_DBG
 
#endif /* R300_CONTEXT_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_cs.h
0,0 → 1,127
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
/**
* This file contains macros for immediate command submission.
*/
 
#ifndef R300_CS_H
#define R300_CS_H
 
#include "r300_reg.h"
#include "r300_context.h"
 
/* Yes, I know macros are ugly. However, they are much prettier than the code
* that they neatly hide away, and don't have the cost of function setup,so
* we're going to use them. */
 
/**
* Command submission setup.
*/
 
#define CS_LOCALS(context) \
struct radeon_winsys_cs *cs_copy = (context)->cs; \
struct radeon_winsys *cs_winsys = (context)->rws; \
int cs_count = 0; (void) cs_count; (void) cs_winsys;
 
#ifdef DEBUG
 
#define BEGIN_CS(size) do { \
assert(size <= (RADEON_MAX_CMDBUF_DWORDS - cs_copy->cdw)); \
cs_count = size; \
} while (0)
 
#define END_CS do { \
if (cs_count != 0) \
debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \
cs_count, __FUNCTION__, __FILE__, __LINE__); \
cs_count = 0; \
} while (0)
 
#define CS_USED_DW(x) cs_count -= (x)
 
#else
 
#define BEGIN_CS(size)
#define END_CS
#define CS_USED_DW(x)
 
#endif
 
/**
* Writing pure DWORDs.
*/
 
#define OUT_CS(value) do { \
cs_copy->buf[cs_copy->cdw++] = (value); \
CS_USED_DW(1); \
} while (0)
 
#define OUT_CS_32F(value) \
OUT_CS(fui(value))
 
#define OUT_CS_REG(register, value) do { \
OUT_CS(CP_PACKET0(register, 0)); \
OUT_CS(value); \
} while (0)
 
/* Note: This expects count to be the number of registers,
* not the actual packet0 count! */
#define OUT_CS_REG_SEQ(register, count) \
OUT_CS(CP_PACKET0((register), ((count) - 1)))
 
#define OUT_CS_ONE_REG(register, count) \
OUT_CS(CP_PACKET0((register), ((count) - 1)) | RADEON_ONE_REG_WR)
 
#define OUT_CS_PKT3(op, count) \
OUT_CS(CP_PACKET3(op, count))
 
#define OUT_CS_TABLE(values, count) do { \
memcpy(cs_copy->buf + cs_copy->cdw, (values), (count) * 4); \
cs_copy->cdw += (count); \
CS_USED_DW(count); \
} while (0)
 
 
/**
* Writing relocations.
*/
 
#define OUT_CS_RELOC(r) do { \
assert((r)); \
assert((r)->cs_buf); \
cs_winsys->cs_write_reloc(cs_copy, (r)->cs_buf); \
CS_USED_DW(2); \
} while (0)
 
 
/**
* Command buffer emission.
*/
 
#define WRITE_CS_TABLE(values, count) do { \
assert(cs_count == 0); \
memcpy(cs_copy->buf + cs_copy->cdw, (values), (count) * 4); \
cs_copy->cdw += (count); \
} while (0)
 
#endif /* R300_CS_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_debug.c
0,0 → 1,143
/*
* Copyright 2009 Nicolai Haehnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "r300_context.h"
 
#include "util/u_debug.h"
 
#include <stdio.h>
 
static const struct debug_named_value debug_options[] = {
{ "info", DBG_INFO, "Print hardware info (printed by default on debug builds"},
{ "fp", DBG_FP, "Log fragment program compilation" },
{ "vp", DBG_VP, "Log vertex program compilation" },
{ "pstat", DBG_P_STAT, "Log vertex/fragment program stats" },
{ "draw", DBG_DRAW, "Log draw calls" },
{ "swtcl", DBG_SWTCL, "Log SWTCL-specific info" },
{ "rsblock", DBG_RS_BLOCK, "Log rasterizer registers" },
{ "psc", DBG_PSC, "Log vertex stream registers" },
{ "tex", DBG_TEX, "Log basic info about textures" },
{ "texalloc", DBG_TEXALLOC, "Log texture mipmap tree info" },
{ "rs", DBG_RS, "Log rasterizer" },
{ "fb", DBG_FB, "Log framebuffer" },
{ "cbzb", DBG_CBZB, "Log fast color clear info" },
{ "hyperz", DBG_HYPERZ, "Log HyperZ info" },
{ "scissor", DBG_SCISSOR, "Log scissor info" },
{ "msaa", DBG_MSAA, "Log MSAA resources"},
{ "anisohq", DBG_ANISOHQ, "Use high quality anisotropic filtering" },
{ "notiling", DBG_NO_TILING, "Disable tiling" },
{ "noimmd", DBG_NO_IMMD, "Disable immediate mode" },
{ "noopt", DBG_NO_OPT, "Disable shader optimizations" },
{ "nocbzb", DBG_NO_CBZB, "Disable fast color clear" },
{ "nozmask", DBG_NO_ZMASK, "Disable zbuffer compression" },
{ "nohiz", DBG_NO_HIZ, "Disable hierarchical zbuffer" },
{ "nocmask", DBG_NO_CMASK, "Disable AA compression and fast AA clear" },
 
/* must be last */
DEBUG_NAMED_VALUE_END
};
 
void r300_init_debug(struct r300_screen * screen)
{
screen->debug = debug_get_flags_option("RADEON_DEBUG", debug_options, 0);
}
 
void r500_dump_rs_block(struct r300_rs_block *rs)
{
unsigned count, ip, it_count, ic_count, i, j;
unsigned tex_ptr;
unsigned col_ptr, col_fmt;
 
count = rs->inst_count & 0xf;
count++;
 
it_count = rs->count & 0x7f;
ic_count = (rs->count >> 7) & 0xf;
 
fprintf(stderr, "RS Block: %d texcoords (linear), %d colors (perspective)\n",
it_count, ic_count);
fprintf(stderr, "%d instructions\n", count);
 
for (i = 0; i < count; i++) {
if (rs->inst[i] & 0x10) {
ip = rs->inst[i] & 0xf;
fprintf(stderr, "texture: ip %d to psf %d\n",
ip, (rs->inst[i] >> 5) & 0x7f);
 
tex_ptr = rs->ip[ip] & 0xffffff;
fprintf(stderr, " : ");
 
j = 3;
do {
if ((tex_ptr & 0x3f) == 63) {
fprintf(stderr, "1.0");
} else if ((tex_ptr & 0x3f) == 62) {
fprintf(stderr, "0.0");
} else {
fprintf(stderr, "[%d]", tex_ptr & 0x3f);
}
} while (j-- && fprintf(stderr, "/"));
fprintf(stderr, "\n");
}
 
if (rs->inst[i] & 0x10000) {
ip = (rs->inst[i] >> 12) & 0xf;
fprintf(stderr, "color: ip %d to psf %d\n",
ip, (rs->inst[i] >> 18) & 0x7f);
 
col_ptr = (rs->ip[ip] >> 24) & 0x7;
col_fmt = (rs->ip[ip] >> 27) & 0xf;
fprintf(stderr, " : offset %d ", col_ptr);
 
switch (col_fmt) {
case 0:
fprintf(stderr, "(R/G/B/A)");
break;
case 1:
fprintf(stderr, "(R/G/B/0)");
break;
case 2:
fprintf(stderr, "(R/G/B/1)");
break;
case 4:
fprintf(stderr, "(0/0/0/A)");
break;
case 5:
fprintf(stderr, "(0/0/0/0)");
break;
case 6:
fprintf(stderr, "(0/0/0/1)");
break;
case 8:
fprintf(stderr, "(1/1/1/A)");
break;
case 9:
fprintf(stderr, "(1/1/1/0)");
break;
case 10:
fprintf(stderr, "(1/1/1/1)");
break;
}
fprintf(stderr, "\n");
}
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_defines.h
0,0 → 1,36
/*
* Copyright 2010 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#ifndef R300_DEFINES_H
#define R300_DEFINES_H
 
#include "pipe/p_defines.h"
 
#define R300_MAX_TEXTURE_LEVELS 13
#define R300_MAX_DRAW_VBO_SIZE (1024 * 1024)
 
#define R300_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
#define R300_RESOURCE_FORCE_MICROTILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
 
#define R300_INVALID_FORMAT 0xffff
 
#endif
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_emit.c
0,0 → 1,1439
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Copyright 2009 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
/* r300_emit: Functions for emitting state. */
 
#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_mm.h"
 
#include "r300_context.h"
#include "r300_cb.h"
#include "r300_cs.h"
#include "r300_emit.h"
#include "r300_fs.h"
#include "r300_screen.h"
#include "r300_screen_buffer.h"
#include "r300_vs.h"
 
void r300_emit_blend_state(struct r300_context* r300,
unsigned size, void* state)
{
struct r300_blend_state* blend = (struct r300_blend_state*)state;
struct pipe_framebuffer_state* fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
CS_LOCALS(r300);
 
if (fb->nr_cbufs) {
if (fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT) {
WRITE_CS_TABLE(blend->cb_noclamp, size);
} else if (fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16X16_FLOAT) {
WRITE_CS_TABLE(blend->cb_noclamp_noalpha, size);
} else {
unsigned swz = r300_surface(fb->cbufs[0])->colormask_swizzle;
WRITE_CS_TABLE(blend->cb_clamp[swz], size);
}
} else {
WRITE_CS_TABLE(blend->cb_no_readwrite, size);
}
}
 
void r300_emit_blend_color_state(struct r300_context* r300,
unsigned size, void* state)
{
struct r300_blend_color_state* bc = (struct r300_blend_color_state*)state;
CS_LOCALS(r300);
 
WRITE_CS_TABLE(bc->cb, size);
}
 
void r300_emit_clip_state(struct r300_context* r300,
unsigned size, void* state)
{
struct r300_clip_state* clip = (struct r300_clip_state*)state;
CS_LOCALS(r300);
 
WRITE_CS_TABLE(clip->cb, size);
}
 
void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state)
{
struct r300_dsa_state* dsa = (struct r300_dsa_state*)state;
struct pipe_framebuffer_state* fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
boolean is_r500 = r300->screen->caps.is_r500;
CS_LOCALS(r300);
uint32_t alpha_func = dsa->alpha_function;
 
/* Choose the alpha ref value between 8-bit (FG_ALPHA_FUNC.AM_VAL) and
* 16-bit (FG_ALPHA_VALUE). */
if (is_r500 && (alpha_func & R300_FG_ALPHA_FUNC_ENABLE)) {
if (fb->nr_cbufs &&
(fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT ||
fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16X16_FLOAT)) {
alpha_func |= R500_FG_ALPHA_FUNC_FP16_ENABLE;
} else {
alpha_func |= R500_FG_ALPHA_FUNC_8BIT;
}
}
 
/* Setup alpha-to-coverage. */
if (r300->alpha_to_coverage && r300->msaa_enable) {
/* Always set 3/6, it improves precision even for 2x and 4x MSAA. */
alpha_func |= R300_FG_ALPHA_FUNC_MASK_ENABLE |
R300_FG_ALPHA_FUNC_CFG_3_OF_6;
}
 
BEGIN_CS(size);
OUT_CS_REG(R300_FG_ALPHA_FUNC, alpha_func);
OUT_CS_TABLE(fb->zsbuf ? &dsa->cb_begin : dsa->cb_zb_no_readwrite, size-2);
END_CS;
}
 
static void get_rc_constant_state(
float vec[4],
struct r300_context * r300,
struct rc_constant * constant)
{
struct r300_textures_state* texstate = r300->textures_state.state;
struct r300_resource *tex;
 
assert(constant->Type == RC_CONSTANT_STATE);
 
/* vec should either be (0, 0, 0, 1), which should be a relatively safe
* RGBA or STRQ value, or it could be one of the RC_CONSTANT_STATE
* state factors. */
 
switch (constant->u.State[0]) {
/* Factor for converting rectangle coords to
* normalized coords. Should only show up on non-r500. */
case RC_STATE_R300_TEXRECT_FACTOR:
tex = r300_resource(texstate->sampler_views[constant->u.State[1]]->base.texture);
vec[0] = 1.0 / tex->tex.width0;
vec[1] = 1.0 / tex->tex.height0;
vec[2] = 0;
vec[3] = 1;
break;
 
case RC_STATE_R300_TEXSCALE_FACTOR:
tex = r300_resource(texstate->sampler_views[constant->u.State[1]]->base.texture);
/* Add a small number to the texture size to work around rounding errors in hw. */
vec[0] = tex->b.b.width0 / (tex->tex.width0 + 0.001f);
vec[1] = tex->b.b.height0 / (tex->tex.height0 + 0.001f);
vec[2] = tex->b.b.depth0 / (tex->tex.depth0 + 0.001f);
vec[3] = 1;
break;
 
case RC_STATE_R300_VIEWPORT_SCALE:
vec[0] = r300->viewport.scale[0];
vec[1] = r300->viewport.scale[1];
vec[2] = r300->viewport.scale[2];
vec[3] = 1;
break;
 
case RC_STATE_R300_VIEWPORT_OFFSET:
vec[0] = r300->viewport.translate[0];
vec[1] = r300->viewport.translate[1];
vec[2] = r300->viewport.translate[2];
vec[3] = 1;
break;
 
default:
fprintf(stderr, "r300: Implementation error: "
"Unknown RC_CONSTANT type %d\n", constant->u.State[0]);
vec[0] = 0;
vec[1] = 0;
vec[2] = 0;
vec[3] = 1;
}
}
 
/* Convert a normal single-precision float into the 7.16 format
* used by the R300 fragment shader.
*/
uint32_t pack_float24(float f)
{
union {
float fl;
uint32_t u;
} u;
float mantissa;
int exponent;
uint32_t float24 = 0;
 
if (f == 0.0)
return 0;
 
u.fl = f;
 
mantissa = frexpf(f, &exponent);
 
/* Handle -ve */
if (mantissa < 0) {
float24 |= (1 << 23);
mantissa = mantissa * -1.0;
}
/* Handle exponent, bias of 63 */
exponent += 62;
float24 |= (exponent << 16);
/* Kill 7 LSB of mantissa */
float24 |= (u.u & 0x7FFFFF) >> 7;
 
return float24;
}
 
void r300_emit_fs(struct r300_context* r300, unsigned size, void *state)
{
struct r300_fragment_shader *fs = r300_fs(r300);
CS_LOCALS(r300);
 
WRITE_CS_TABLE(fs->shader->cb_code, fs->shader->cb_code_size);
}
 
void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state)
{
struct r300_fragment_shader *fs = r300_fs(r300);
struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state;
unsigned count = fs->shader->externals_count;
unsigned i, j;
CS_LOCALS(r300);
 
if (count == 0)
return;
 
BEGIN_CS(size);
OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count * 4);
if (buf->remap_table){
for (i = 0; i < count; i++) {
float *data = (float*)&buf->ptr[buf->remap_table[i]*4];
for (j = 0; j < 4; j++)
OUT_CS(pack_float24(data[j]));
}
} else {
for (i = 0; i < count; i++)
for (j = 0; j < 4; j++)
OUT_CS(pack_float24(*(float*)&buf->ptr[i*4+j]));
}
 
END_CS;
}
 
void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, void *state)
{
struct r300_fragment_shader *fs = r300_fs(r300);
struct rc_constant_list *constants = &fs->shader->code.constants;
unsigned i;
unsigned count = fs->shader->rc_state_count;
unsigned first = fs->shader->externals_count;
unsigned end = constants->Count;
unsigned j;
CS_LOCALS(r300);
 
if (count == 0)
return;
 
BEGIN_CS(size);
for(i = first; i < end; ++i) {
if (constants->Constants[i].Type == RC_CONSTANT_STATE) {
float data[4];
 
get_rc_constant_state(data, r300, &constants->Constants[i]);
 
OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4);
for (j = 0; j < 4; j++)
OUT_CS(pack_float24(data[j]));
}
}
END_CS;
}
 
void r500_emit_fs(struct r300_context* r300, unsigned size, void *state)
{
struct r300_fragment_shader *fs = r300_fs(r300);
CS_LOCALS(r300);
 
WRITE_CS_TABLE(fs->shader->cb_code, fs->shader->cb_code_size);
}
 
void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *state)
{
struct r300_fragment_shader *fs = r300_fs(r300);
struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state;
unsigned count = fs->shader->externals_count;
CS_LOCALS(r300);
 
if (count == 0)
return;
 
BEGIN_CS(size);
OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST);
OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count * 4);
if (buf->remap_table){
for (unsigned i = 0; i < count; i++) {
uint32_t *data = &buf->ptr[buf->remap_table[i]*4];
OUT_CS_TABLE(data, 4);
}
} else {
OUT_CS_TABLE(buf->ptr, count * 4);
}
END_CS;
}
 
void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, void *state)
{
struct r300_fragment_shader *fs = r300_fs(r300);
struct rc_constant_list *constants = &fs->shader->code.constants;
unsigned i;
unsigned count = fs->shader->rc_state_count;
unsigned first = fs->shader->externals_count;
unsigned end = constants->Count;
CS_LOCALS(r300);
 
if (count == 0)
return;
 
BEGIN_CS(size);
for(i = first; i < end; ++i) {
if (constants->Constants[i].Type == RC_CONSTANT_STATE) {
float data[4];
 
get_rc_constant_state(data, r300, &constants->Constants[i]);
 
OUT_CS_REG(R500_GA_US_VECTOR_INDEX,
R500_GA_US_VECTOR_INDEX_TYPE_CONST |
(i & R500_GA_US_VECTOR_INDEX_MASK));
OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, 4);
OUT_CS_TABLE(data, 4);
}
}
END_CS;
}
 
void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state)
{
struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state;
struct pipe_framebuffer_state* fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
uint32_t height = fb->height;
uint32_t width = fb->width;
CS_LOCALS(r300);
 
if (r300->cbzb_clear) {
struct r300_surface *surf = r300_surface(fb->cbufs[0]);
 
height = surf->cbzb_height;
width = surf->cbzb_width;
}
 
DBG(r300, DBG_SCISSOR,
"r300: Scissor width: %i, height: %i, CBZB clear: %s\n",
width, height, r300->cbzb_clear ? "YES" : "NO");
 
BEGIN_CS(size);
 
/* Set up scissors.
* By writing to the SC registers, SC & US assert idle. */
OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
if (r300->screen->caps.is_r500) {
OUT_CS(0);
OUT_CS(((width - 1) << R300_SCISSORS_X_SHIFT) |
((height - 1) << R300_SCISSORS_Y_SHIFT));
} else {
OUT_CS((1440 << R300_SCISSORS_X_SHIFT) |
(1440 << R300_SCISSORS_Y_SHIFT));
OUT_CS(((width + 1440-1) << R300_SCISSORS_X_SHIFT) |
((height + 1440-1) << R300_SCISSORS_Y_SHIFT));
}
 
/* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */
OUT_CS_TABLE(gpuflush->cb_flush_clean, 6);
END_CS;
}
 
void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state)
{
struct r300_aa_state *aa = (struct r300_aa_state*)state;
CS_LOCALS(r300);
 
BEGIN_CS(size);
OUT_CS_REG(R300_GB_AA_CONFIG, aa->aa_config);
 
if (aa->dest) {
OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 3);
OUT_CS(aa->dest->offset);
OUT_CS(aa->dest->pitch & R300_RB3D_AARESOLVE_PITCH_MASK);
OUT_CS(R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE |
R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE);
OUT_CS_RELOC(aa->dest);
} else {
OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0);
}
 
END_CS;
}
 
void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
{
struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state;
struct r300_surface* surf;
unsigned i;
uint32_t rb3d_cctl = 0;
 
CS_LOCALS(r300);
 
BEGIN_CS(size);
 
if (r300->screen->caps.is_r500) {
rb3d_cctl = R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE;
}
/* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers. */
if (fb->nr_cbufs && r300->fb_multiwrite) {
rb3d_cctl |= R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs);
}
if (r300->cmask_in_use) {
rb3d_cctl |= R300_RB3D_CCTL_AA_COMPRESSION_ENABLE |
R300_RB3D_CCTL_CMASK_ENABLE;
}
 
OUT_CS_REG(R300_RB3D_CCTL, rb3d_cctl);
 
/* Set up colorbuffers. */
for (i = 0; i < fb->nr_cbufs; i++) {
surf = r300_surface(fb->cbufs[i]);
 
OUT_CS_REG(R300_RB3D_COLOROFFSET0 + (4 * i), surf->offset);
OUT_CS_RELOC(surf);
 
OUT_CS_REG(R300_RB3D_COLORPITCH0 + (4 * i), surf->pitch);
OUT_CS_RELOC(surf);
 
if (r300->cmask_in_use && i == 0) {
OUT_CS_REG(R300_RB3D_CMASK_OFFSET0, 0);
OUT_CS_REG(R300_RB3D_CMASK_PITCH0, surf->pitch_cmask);
OUT_CS_REG(R300_RB3D_COLOR_CLEAR_VALUE, r300->color_clear_value);
if (r300->screen->caps.is_r500 && r300->screen->info.drm_minor >= 29) {
OUT_CS_REG_SEQ(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2);
OUT_CS(r300->color_clear_value_ar);
OUT_CS(r300->color_clear_value_gb);
}
}
}
 
/* Set up the ZB part of the CBZB clear. */
if (r300->cbzb_clear) {
surf = r300_surface(fb->cbufs[0]);
 
OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format);
 
OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->cbzb_midpoint_offset);
OUT_CS_RELOC(surf);
 
OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->cbzb_pitch);
OUT_CS_RELOC(surf);
 
DBG(r300, DBG_CBZB,
"CBZB clearing cbuf %08x %08x\n", surf->cbzb_format,
surf->cbzb_pitch);
}
/* Set up a zbuffer. */
else if (fb->zsbuf) {
surf = r300_surface(fb->zsbuf);
 
OUT_CS_REG(R300_ZB_FORMAT, surf->format);
 
OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->offset);
OUT_CS_RELOC(surf);
 
OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->pitch);
OUT_CS_RELOC(surf);
 
if (r300->hyperz_enabled) {
/* HiZ RAM. */
OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0);
OUT_CS_REG(R300_ZB_HIZ_PITCH, surf->pitch_hiz);
/* Z Mask RAM. (compressed zbuffer) */
OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0);
OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf->pitch_zmask);
}
}
 
END_CS;
}
 
void r300_emit_hyperz_state(struct r300_context *r300,
unsigned size, void *state)
{
struct r300_hyperz_state *z = state;
CS_LOCALS(r300);
 
if (z->flush)
WRITE_CS_TABLE(&z->cb_flush_begin, size);
else
WRITE_CS_TABLE(&z->cb_begin, size - 2);
}
 
void r300_emit_hyperz_end(struct r300_context *r300)
{
struct r300_hyperz_state z =
*(struct r300_hyperz_state*)r300->hyperz_state.state;
 
z.flush = 1;
z.zb_bw_cntl = 0;
z.zb_depthclearvalue = 0;
z.sc_hyperz = R300_SC_HYPERZ_ADJ_2;
z.gb_z_peq_config = 0;
 
r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z);
}
 
#define R300_NIBBLES(x0, y0, x1, y1, x2, y2, d0y, d0x) \
(((x0) & 0xf) | (((y0) & 0xf) << 4) | \
(((x1) & 0xf) << 8) | (((y1) & 0xf) << 12) | \
(((x2) & 0xf) << 16) | (((y2) & 0xf) << 20) | \
(((d0y) & 0xf) << 24) | (((d0x) & 0xf) << 28))
 
static unsigned r300_get_mspos(int index, unsigned *p)
{
unsigned reg, i, distx, disty, dist;
 
if (index == 0) {
/* MSPOS0 contains positions for samples 0,1,2 as (X,Y) pairs of nibbles,
* followed by a (Y,X) pair containing the minimum distance from the pixel
* edge:
* X0, Y0, X1, Y1, X2, Y2, D0_Y, D0_X
*
* There is a quirk when setting D0_X. The value represents the distance
* from the left edge of the pixel quad to the first sample in subpixels.
* All values less than eight should use the actual value, but „7‟ should
* be used for the distance „8‟. The hardware will convert 7 into 8 internally.
*/
distx = 11;
for (i = 0; i < 12; i += 2) {
if (p[i] < distx)
distx = p[i];
}
 
disty = 11;
for (i = 1; i < 12; i += 2) {
if (p[i] < disty)
disty = p[i];
}
 
if (distx == 8)
distx = 7;
 
reg = R300_NIBBLES(p[0], p[1], p[2], p[3], p[4], p[5], disty, distx);
} else {
/* MSPOS1 contains positions for samples 3,4,5 as (X,Y) pairs of nibbles,
* followed by the minimum distance from the pixel edge (not sure if X or Y):
* X3, Y3, X4, Y4, X5, Y5, D1
*/
dist = 11;
for (i = 0; i < 12; i++) {
if (p[i] < dist)
dist = p[i];
}
 
reg = R300_NIBBLES(p[6], p[7], p[8], p[9], p[10], p[11], dist, 0);
}
return reg;
}
 
void r300_emit_fb_state_pipelined(struct r300_context *r300,
unsigned size, void *state)
{
/* The sample coordinates are in the range [0,11], because
* GB_TILE_CONFIG.SUBPIXEL is set to the 1/12 subpixel precision.
*
* Some sample coordinates reach to neighboring pixels and should not be used.
* (e.g. Y=11)
*
* The unused samples must be set to the positions of other valid samples. */
static unsigned sample_locs_1x[12] = {
6,6, 6,6, 6,6, 6,6, 6,6, 6,6
};
static unsigned sample_locs_2x[12] = {
3,9, 9,3, 9,3, 9,3, 9,3, 9,3
};
static unsigned sample_locs_4x[12] = {
4,4, 8,8, 2,10, 10,2, 10,2, 10,2
};
static unsigned sample_locs_6x[12] = {
3,1, 7,3, 11,5, 1,7, 5,9, 9,10
};
 
struct pipe_framebuffer_state* fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
unsigned i, num_cbufs = fb->nr_cbufs;
unsigned mspos0, mspos1;
CS_LOCALS(r300);
 
/* If we use the multiwrite feature, the colorbuffers 2,3,4 must be
* marked as UNUSED in the US block. */
if (r300->fb_multiwrite) {
num_cbufs = MIN2(num_cbufs, 1);
}
 
BEGIN_CS(size);
 
/* Colorbuffer format in the US block.
* (must be written after unpipelined regs) */
OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4);
for (i = 0; i < num_cbufs; i++) {
OUT_CS(r300_surface(fb->cbufs[i])->format);
}
for (; i < 1; i++) {
OUT_CS(R300_US_OUT_FMT_C4_8 |
R300_C0_SEL_B | R300_C1_SEL_G |
R300_C2_SEL_R | R300_C3_SEL_A);
}
for (; i < 4; i++) {
OUT_CS(R300_US_OUT_FMT_UNUSED);
}
 
/* Set sample positions. It depends on the framebuffer sample count.
* These are pipelined regs and as such cannot be moved to the AA state.
*/
switch (r300->num_samples) {
default:
mspos0 = r300_get_mspos(0, sample_locs_1x);
mspos1 = r300_get_mspos(1, sample_locs_1x);
break;
case 2:
mspos0 = r300_get_mspos(0, sample_locs_2x);
mspos1 = r300_get_mspos(1, sample_locs_2x);
break;
case 4:
mspos0 = r300_get_mspos(0, sample_locs_4x);
mspos1 = r300_get_mspos(1, sample_locs_4x);
break;
case 6:
mspos0 = r300_get_mspos(0, sample_locs_6x);
mspos1 = r300_get_mspos(1, sample_locs_6x);
break;
}
 
OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2);
OUT_CS(mspos0);
OUT_CS(mspos1);
END_CS;
}
 
void r300_emit_query_start(struct r300_context *r300, unsigned size, void*state)
{
struct r300_query *query = r300->query_current;
CS_LOCALS(r300);
 
if (!query)
return;
 
BEGIN_CS(size);
if (r300->screen->caps.family == CHIP_RV530) {
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
} else {
OUT_CS_REG(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL);
}
OUT_CS_REG(R300_ZB_ZPASS_DATA, 0);
END_CS;
query->begin_emitted = TRUE;
}
 
static void r300_emit_query_end_frag_pipes(struct r300_context *r300,
struct r300_query *query)
{
struct r300_capabilities* caps = &r300->screen->caps;
uint32_t gb_pipes = r300->screen->info.r300_num_gb_pipes;
CS_LOCALS(r300);
 
assert(gb_pipes);
 
BEGIN_CS(6 * gb_pipes + 2);
/* I'm not so sure I like this switch, but it's hard to be elegant
* when there's so many special cases...
*
* So here's the basic idea. For each pipe, enable writes to it only,
* then put out the relocation for ZPASS_ADDR, taking into account a
* 4-byte offset for each pipe. RV380 and older are special; they have
* only two pipes, and the second pipe's enable is on bit 3, not bit 1,
* so there's a chipset cap for that. */
switch (gb_pipes) {
case 4:
/* pipe 3 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 3);
OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 3) * 4);
OUT_CS_RELOC(r300->query_current);
case 3:
/* pipe 2 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 2);
OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 2) * 4);
OUT_CS_RELOC(r300->query_current);
case 2:
/* pipe 1 only */
/* As mentioned above, accomodate RV380 and older. */
OUT_CS_REG(R300_SU_REG_DEST,
1 << (caps->high_second_pipe ? 3 : 1));
OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 1) * 4);
OUT_CS_RELOC(r300->query_current);
case 1:
/* pipe 0 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 0);
OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 0) * 4);
OUT_CS_RELOC(r300->query_current);
break;
default:
fprintf(stderr, "r300: Implementation error: Chipset reports %d"
" pixel pipes!\n", gb_pipes);
abort();
}
 
/* And, finally, reset it to normal... */
OUT_CS_REG(R300_SU_REG_DEST, 0xF);
END_CS;
}
 
static void rv530_emit_query_end_single_z(struct r300_context *r300,
struct r300_query *query)
{
CS_LOCALS(r300);
 
BEGIN_CS(8);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
OUT_CS_REG(R300_ZB_ZPASS_ADDR, query->num_results * 4);
OUT_CS_RELOC(r300->query_current);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
END_CS;
}
 
static void rv530_emit_query_end_double_z(struct r300_context *r300,
struct r300_query *query)
{
CS_LOCALS(r300);
 
BEGIN_CS(14);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 0) * 4);
OUT_CS_RELOC(r300->query_current);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1);
OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 1) * 4);
OUT_CS_RELOC(r300->query_current);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
END_CS;
}
 
void r300_emit_query_end(struct r300_context* r300)
{
struct r300_capabilities *caps = &r300->screen->caps;
struct r300_query *query = r300->query_current;
 
if (!query)
return;
 
if (query->begin_emitted == FALSE)
return;
 
if (caps->family == CHIP_RV530) {
if (r300->screen->info.r300_num_z_pipes == 2)
rv530_emit_query_end_double_z(r300, query);
else
rv530_emit_query_end_single_z(r300, query);
} else
r300_emit_query_end_frag_pipes(r300, query);
 
query->begin_emitted = FALSE;
query->num_results += query->num_pipes;
 
/* XXX grab all the results and reset the counter. */
if (query->num_results >= query->buf->size / 4 - 4) {
query->num_results = (query->buf->size / 4) / 2;
fprintf(stderr, "r300: Rewinding OQBO...\n");
}
}
 
void r300_emit_invariant_state(struct r300_context *r300,
unsigned size, void *state)
{
CS_LOCALS(r300);
WRITE_CS_TABLE(state, size);
}
 
void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state)
{
struct r300_rs_state* rs = state;
CS_LOCALS(r300);
 
BEGIN_CS(size);
OUT_CS_TABLE(rs->cb_main, RS_STATE_MAIN_SIZE);
if (rs->polygon_offset_enable) {
if (r300->zbuffer_bpp == 16) {
OUT_CS_TABLE(rs->cb_poly_offset_zb16, 5);
} else {
OUT_CS_TABLE(rs->cb_poly_offset_zb24, 5);
}
}
END_CS;
}
 
void r300_emit_rs_block_state(struct r300_context* r300,
unsigned size, void* state)
{
struct r300_rs_block* rs = (struct r300_rs_block*)state;
unsigned i;
/* It's the same for both INST and IP tables */
unsigned count = (rs->inst_count & R300_RS_INST_COUNT_MASK) + 1;
CS_LOCALS(r300);
 
if (DBG_ON(r300, DBG_RS_BLOCK)) {
r500_dump_rs_block(rs);
 
fprintf(stderr, "r300: RS emit:\n");
 
for (i = 0; i < count; i++)
fprintf(stderr, " : ip %d: 0x%08x\n", i, rs->ip[i]);
 
for (i = 0; i < count; i++)
fprintf(stderr, " : inst %d: 0x%08x\n", i, rs->inst[i]);
 
fprintf(stderr, " : count: 0x%08x inst_count: 0x%08x\n",
rs->count, rs->inst_count);
}
 
BEGIN_CS(size);
OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2);
OUT_CS(rs->vap_vtx_state_cntl);
OUT_CS(rs->vap_vsm_vtx_assm);
OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
OUT_CS(rs->vap_out_vtx_fmt[0]);
OUT_CS(rs->vap_out_vtx_fmt[1]);
OUT_CS_REG_SEQ(R300_GB_ENABLE, 1);
OUT_CS(rs->gb_enable);
 
if (r300->screen->caps.is_r500) {
OUT_CS_REG_SEQ(R500_RS_IP_0, count);
} else {
OUT_CS_REG_SEQ(R300_RS_IP_0, count);
}
OUT_CS_TABLE(rs->ip, count);
 
OUT_CS_REG_SEQ(R300_RS_COUNT, 2);
OUT_CS(rs->count);
OUT_CS(rs->inst_count);
 
if (r300->screen->caps.is_r500) {
OUT_CS_REG_SEQ(R500_RS_INST_0, count);
} else {
OUT_CS_REG_SEQ(R300_RS_INST_0, count);
}
OUT_CS_TABLE(rs->inst, count);
END_CS;
}
 
void r300_emit_sample_mask(struct r300_context *r300,
unsigned size, void *state)
{
unsigned mask = (*(unsigned*)state) & ((1 << 6)-1);
CS_LOCALS(r300);
 
BEGIN_CS(size);
OUT_CS_REG(R300_SC_SCREENDOOR,
mask | (mask << 6) | (mask << 12) | (mask << 18));
END_CS;
}
 
void r300_emit_scissor_state(struct r300_context* r300,
unsigned size, void* state)
{
struct pipe_scissor_state* scissor = (struct pipe_scissor_state*)state;
CS_LOCALS(r300);
 
BEGIN_CS(size);
OUT_CS_REG_SEQ(R300_SC_CLIPRECT_TL_0, 2);
if (r300->screen->caps.is_r500) {
OUT_CS((scissor->minx << R300_CLIPRECT_X_SHIFT) |
(scissor->miny << R300_CLIPRECT_Y_SHIFT));
OUT_CS(((scissor->maxx - 1) << R300_CLIPRECT_X_SHIFT) |
((scissor->maxy - 1) << R300_CLIPRECT_Y_SHIFT));
} else {
OUT_CS(((scissor->minx + 1440) << R300_CLIPRECT_X_SHIFT) |
((scissor->miny + 1440) << R300_CLIPRECT_Y_SHIFT));
OUT_CS(((scissor->maxx + 1440-1) << R300_CLIPRECT_X_SHIFT) |
((scissor->maxy + 1440-1) << R300_CLIPRECT_Y_SHIFT));
}
END_CS;
}
 
void r300_emit_textures_state(struct r300_context *r300,
unsigned size, void *state)
{
struct r300_textures_state *allstate = (struct r300_textures_state*)state;
struct r300_texture_sampler_state *texstate;
struct r300_resource *tex;
unsigned i;
boolean has_us_format = r300->screen->caps.has_us_format;
CS_LOCALS(r300);
 
BEGIN_CS(size);
OUT_CS_REG(R300_TX_ENABLE, allstate->tx_enable);
 
for (i = 0; i < allstate->count; i++) {
if ((1 << i) & allstate->tx_enable) {
texstate = &allstate->regs[i];
tex = r300_resource(allstate->sampler_views[i]->base.texture);
 
OUT_CS_REG(R300_TX_FILTER0_0 + (i * 4), texstate->filter0);
OUT_CS_REG(R300_TX_FILTER1_0 + (i * 4), texstate->filter1);
OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (i * 4),
texstate->border_color);
 
OUT_CS_REG(R300_TX_FORMAT0_0 + (i * 4), texstate->format.format0);
OUT_CS_REG(R300_TX_FORMAT1_0 + (i * 4), texstate->format.format1);
OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format.format2);
 
OUT_CS_REG(R300_TX_OFFSET_0 + (i * 4), texstate->format.tile_config);
OUT_CS_RELOC(tex);
 
if (has_us_format) {
OUT_CS_REG(R500_US_FORMAT0_0 + (i * 4),
texstate->format.us_format0);
}
}
}
END_CS;
}
 
void r300_emit_vertex_arrays(struct r300_context* r300, int offset,
boolean indexed, int instance_id)
{
struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
struct pipe_vertex_element *velem = r300->velems->velem;
struct r300_resource *buf;
int i;
unsigned vertex_array_count = r300->velems->count;
unsigned packet_size = (vertex_array_count * 3 + 1) / 2;
struct pipe_vertex_buffer *vb1, *vb2;
unsigned *hw_format_size = r300->velems->format_size;
unsigned size1, size2, offset1, offset2, stride1, stride2;
CS_LOCALS(r300);
 
BEGIN_CS(2 + packet_size + vertex_array_count * 2);
OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
 
if (instance_id == -1) {
/* Non-instanced arrays. This ignores instance_divisor and instance_id. */
for (i = 0; i < vertex_array_count - 1; i += 2) {
vb1 = &vbuf[velem[i].vertex_buffer_index];
vb2 = &vbuf[velem[i+1].vertex_buffer_index];
size1 = hw_format_size[i];
size2 = hw_format_size[i+1];
 
OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) |
R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride));
OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride);
}
 
if (vertex_array_count & 1) {
vb1 = &vbuf[velem[i].vertex_buffer_index];
size1 = hw_format_size[i];
 
OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride));
OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
}
 
for (i = 0; i < vertex_array_count; i++) {
buf = r300_resource(vbuf[velem[i].vertex_buffer_index].buffer);
OUT_CS_RELOC(buf);
}
} else {
/* Instanced arrays. */
for (i = 0; i < vertex_array_count - 1; i += 2) {
vb1 = &vbuf[velem[i].vertex_buffer_index];
vb2 = &vbuf[velem[i+1].vertex_buffer_index];
size1 = hw_format_size[i];
size2 = hw_format_size[i+1];
 
if (velem[i].instance_divisor) {
stride1 = 0;
offset1 = vb1->buffer_offset + velem[i].src_offset +
(instance_id / velem[i].instance_divisor) * vb1->stride;
} else {
stride1 = vb1->stride;
offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride;
}
if (velem[i+1].instance_divisor) {
stride2 = 0;
offset2 = vb2->buffer_offset + velem[i+1].src_offset +
(instance_id / velem[i+1].instance_divisor) * vb2->stride;
} else {
stride2 = vb2->stride;
offset2 = vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride;
}
 
OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1) |
R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(stride2));
OUT_CS(offset1);
OUT_CS(offset2);
}
 
if (vertex_array_count & 1) {
vb1 = &vbuf[velem[i].vertex_buffer_index];
size1 = hw_format_size[i];
 
if (velem[i].instance_divisor) {
stride1 = 0;
offset1 = vb1->buffer_offset + velem[i].src_offset +
(instance_id / velem[i].instance_divisor) * vb1->stride;
} else {
stride1 = vb1->stride;
offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride;
}
 
OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1));
OUT_CS(offset1);
}
 
for (i = 0; i < vertex_array_count; i++) {
buf = r300_resource(vbuf[velem[i].vertex_buffer_index].buffer);
OUT_CS_RELOC(buf);
}
}
END_CS;
}
 
void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed)
{
CS_LOCALS(r300);
 
DBG(r300, DBG_SWTCL, "r300: Preparing vertex buffer %p for render, "
"vertex size %d\n", r300->vbo,
r300->vertex_info.size);
/* Set the pointer to our vertex buffer. The emitted values are this:
* PACKET3 [3D_LOAD_VBPNTR]
* COUNT [1]
* FORMAT [size | stride << 8]
* OFFSET [offset into BO]
* VBPNTR [relocated BO]
*/
BEGIN_CS(7);
OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3);
OUT_CS(1 | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
OUT_CS(r300->vertex_info.size |
(r300->vertex_info.size << 8));
OUT_CS(r300->draw_vbo_offset);
OUT_CS(0);
 
assert(r300->vbo_cs);
cs_winsys->cs_write_reloc(cs_copy, r300->vbo_cs);
CS_USED_DW(2);
END_CS;
}
 
void r300_emit_vertex_stream_state(struct r300_context* r300,
unsigned size, void* state)
{
struct r300_vertex_stream_state *streams =
(struct r300_vertex_stream_state*)state;
unsigned i;
CS_LOCALS(r300);
 
if (DBG_ON(r300, DBG_PSC)) {
fprintf(stderr, "r300: PSC emit:\n");
 
for (i = 0; i < streams->count; i++) {
fprintf(stderr, " : prog_stream_cntl%d: 0x%08x\n", i,
streams->vap_prog_stream_cntl[i]);
}
 
for (i = 0; i < streams->count; i++) {
fprintf(stderr, " : prog_stream_cntl_ext%d: 0x%08x\n", i,
streams->vap_prog_stream_cntl_ext[i]);
}
}
 
BEGIN_CS(size);
OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count);
OUT_CS_TABLE(streams->vap_prog_stream_cntl, streams->count);
OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count);
OUT_CS_TABLE(streams->vap_prog_stream_cntl_ext, streams->count);
END_CS;
}
 
void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state)
{
CS_LOCALS(r300);
 
BEGIN_CS(size);
OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
END_CS;
}
 
void r300_emit_vap_invariant_state(struct r300_context *r300,
unsigned size, void *state)
{
CS_LOCALS(r300);
WRITE_CS_TABLE(state, size);
}
 
void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
{
struct r300_vertex_shader* vs = (struct r300_vertex_shader*)state;
struct r300_vertex_program_code* code = &vs->code;
struct r300_screen* r300screen = r300->screen;
unsigned instruction_count = code->length / 4;
 
unsigned vtx_mem_size = r300screen->caps.is_r500 ? 128 : 72;
unsigned input_count = MAX2(util_bitcount(code->InputsRead), 1);
unsigned output_count = MAX2(util_bitcount(code->OutputsWritten), 1);
unsigned temp_count = MAX2(code->num_temporaries, 1);
 
unsigned pvs_num_slots = MIN3(vtx_mem_size / input_count,
vtx_mem_size / output_count, 10);
unsigned pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 5);
 
CS_LOCALS(r300);
 
BEGIN_CS(size);
 
/* R300_VAP_PVS_CODE_CNTL_0
* R300_VAP_PVS_CONST_CNTL
* R300_VAP_PVS_CODE_CNTL_1
* See the r5xx docs for instructions on how to use these. */
OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_0, R300_PVS_FIRST_INST(0) |
R300_PVS_XYZW_VALID_INST(instruction_count - 1) |
R300_PVS_LAST_INST(instruction_count - 1));
OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_1, instruction_count - 1);
 
OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0);
OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->length);
OUT_CS_TABLE(code->body.d, code->length);
 
OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) |
R300_PVS_NUM_CNTLRS(pvs_num_controllers) |
R300_PVS_NUM_FPUS(r300screen->caps.num_vert_fpus) |
R300_PVS_VF_MAX_VTX_NUM(12) |
(r300screen->caps.is_r500 ? R500_TCL_STATE_OPTIMIZATION : 0));
 
/* Emit flow control instructions. Even if there are no fc instructions,
* we still need to write the registers to make sure they are cleared. */
OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops);
if (r300screen->caps.is_r500) {
OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, R300_VS_MAX_FC_OPS * 2);
OUT_CS_TABLE(code->fc_op_addrs.r500, R300_VS_MAX_FC_OPS * 2);
} else {
OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, R300_VS_MAX_FC_OPS);
OUT_CS_TABLE(code->fc_op_addrs.r300, R300_VS_MAX_FC_OPS);
}
OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, R300_VS_MAX_FC_OPS);
OUT_CS_TABLE(code->fc_loop_index, R300_VS_MAX_FC_OPS);
 
END_CS;
}
 
void r300_emit_vs_constants(struct r300_context* r300,
unsigned size, void *state)
{
unsigned count =
((struct r300_vertex_shader*)r300->vs_state.state)->externals_count;
struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state;
struct r300_vertex_shader *vs = (struct r300_vertex_shader*)r300->vs_state.state;
unsigned i;
int imm_first = vs->externals_count;
int imm_end = vs->code.constants.Count;
int imm_count = vs->immediates_count;
CS_LOCALS(r300);
 
BEGIN_CS(size);
OUT_CS_REG(R300_VAP_PVS_CONST_CNTL,
R300_PVS_CONST_BASE_OFFSET(buf->buffer_base) |
R300_PVS_MAX_CONST_ADDR(MAX2(imm_end - 1, 0)));
if (vs->externals_count) {
OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG,
(r300->screen->caps.is_r500 ?
R500_PVS_CONST_START : R300_PVS_CONST_START) + buf->buffer_base);
OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4);
if (buf->remap_table){
for (i = 0; i < count; i++) {
uint32_t *data = &buf->ptr[buf->remap_table[i]*4];
OUT_CS_TABLE(data, 4);
}
} else {
OUT_CS_TABLE(buf->ptr, count * 4);
}
}
 
/* Emit immediates. */
if (imm_count) {
OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG,
(r300->screen->caps.is_r500 ?
R500_PVS_CONST_START : R300_PVS_CONST_START) +
buf->buffer_base + imm_first);
OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, imm_count * 4);
for (i = imm_first; i < imm_end; i++) {
const float *data = vs->code.constants.Constants[i].u.Immediate;
OUT_CS_TABLE(data, 4);
}
}
END_CS;
}
 
void r300_emit_viewport_state(struct r300_context* r300,
unsigned size, void* state)
{
struct r300_viewport_state* viewport = (struct r300_viewport_state*)state;
CS_LOCALS(r300);
 
BEGIN_CS(size);
OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6);
OUT_CS_TABLE(&viewport->xscale, 6);
OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control);
END_CS;
}
 
void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state)
{
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct r300_resource* tex;
CS_LOCALS(r300);
 
tex = r300_resource(fb->zsbuf->texture);
 
BEGIN_CS(size);
OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2);
OUT_CS(0);
OUT_CS(tex->tex.hiz_dwords[fb->zsbuf->u.tex.level]);
OUT_CS(r300->hiz_clear_value);
END_CS;
 
/* Mark the current zbuffer's hiz ram as in use. */
r300->hiz_in_use = TRUE;
r300->hiz_func = HIZ_FUNC_NONE;
r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
 
void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state)
{
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct r300_resource *tex;
CS_LOCALS(r300);
 
tex = r300_resource(fb->zsbuf->texture);
 
BEGIN_CS(size);
OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2);
OUT_CS(0);
OUT_CS(tex->tex.zmask_dwords[fb->zsbuf->u.tex.level]);
OUT_CS(0);
END_CS;
 
/* Mark the current zbuffer's zmask as in use. */
r300->zmask_in_use = TRUE;
r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
 
void r300_emit_cmask_clear(struct r300_context *r300, unsigned size, void *state)
{
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct r300_resource *tex;
CS_LOCALS(r300);
 
tex = r300_resource(fb->cbufs[0]->texture);
 
BEGIN_CS(size);
OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_CMASK, 2);
OUT_CS(0);
OUT_CS(tex->tex.cmask_dwords);
OUT_CS(0);
END_CS;
 
/* Mark the current zbuffer's zmask as in use. */
r300->cmask_in_use = TRUE;
r300_mark_fb_state_dirty(r300, R300_CHANGED_CMASK_ENABLE);
}
 
void r300_emit_ztop_state(struct r300_context* r300,
unsigned size, void* state)
{
struct r300_ztop_state* ztop = (struct r300_ztop_state*)state;
CS_LOCALS(r300);
 
BEGIN_CS(size);
OUT_CS_REG(R300_ZB_ZTOP, ztop->z_buffer_top);
END_CS;
}
 
void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, void* state)
{
CS_LOCALS(r300);
 
BEGIN_CS(size);
OUT_CS_REG(R300_TX_INVALTAGS, 0);
END_CS;
}
 
boolean r300_emit_buffer_validate(struct r300_context *r300,
boolean do_validate_vertex_buffers,
struct pipe_resource *index_buffer)
{
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
struct r300_textures_state *texstate =
(struct r300_textures_state*)r300->textures_state.state;
struct r300_resource *tex;
unsigned i;
boolean flushed = FALSE;
 
validate:
if (r300->fb_state.dirty) {
/* Color buffers... */
for (i = 0; i < fb->nr_cbufs; i++) {
tex = r300_resource(fb->cbufs[i]->texture);
assert(tex && tex->buf && "cbuf is marked, but NULL!");
r300->rws->cs_add_reloc(r300->cs, tex->cs_buf,
RADEON_USAGE_READWRITE,
r300_surface(fb->cbufs[i])->domain);
}
/* ...depth buffer... */
if (fb->zsbuf) {
tex = r300_resource(fb->zsbuf->texture);
assert(tex && tex->buf && "zsbuf is marked, but NULL!");
r300->rws->cs_add_reloc(r300->cs, tex->cs_buf,
RADEON_USAGE_READWRITE,
r300_surface(fb->zsbuf)->domain);
}
}
/* The AA resolve buffer. */
if (r300->aa_state.dirty) {
if (aa->dest) {
r300->rws->cs_add_reloc(r300->cs, aa->dest->cs_buf,
RADEON_USAGE_WRITE,
aa->dest->domain);
}
}
if (r300->textures_state.dirty) {
/* ...textures... */
for (i = 0; i < texstate->count; i++) {
if (!(texstate->tx_enable & (1 << i))) {
continue;
}
 
tex = r300_resource(texstate->sampler_views[i]->base.texture);
r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, RADEON_USAGE_READ,
tex->domain);
}
}
/* ...occlusion query buffer... */
if (r300->query_current)
r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf,
RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
/* ...vertex buffer for SWTCL path... */
if (r300->vbo_cs)
r300->rws->cs_add_reloc(r300->cs, r300->vbo_cs,
RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
/* ...vertex buffers for HWTCL path... */
if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) {
struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
struct pipe_vertex_buffer *last = r300->vertex_buffer +
r300->nr_vertex_buffers;
struct pipe_resource *buf;
 
for (; vbuf != last; vbuf++) {
buf = vbuf->buffer;
if (!buf)
continue;
 
r300->rws->cs_add_reloc(r300->cs, r300_resource(buf)->cs_buf,
RADEON_USAGE_READ,
r300_resource(buf)->domain);
}
}
/* ...and index buffer for HWTCL path. */
if (index_buffer)
r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf,
RADEON_USAGE_READ,
r300_resource(index_buffer)->domain);
 
/* Now do the validation (flush is called inside cs_validate on failure). */
if (!r300->rws->cs_validate(r300->cs)) {
/* Ooops, an infinite loop, give up. */
if (flushed)
return FALSE;
 
flushed = TRUE;
goto validate;
}
 
return TRUE;
}
 
unsigned r300_get_num_dirty_dwords(struct r300_context *r300)
{
struct r300_atom* atom;
unsigned dwords = 0;
 
foreach_dirty_atom(r300, atom) {
if (atom->dirty) {
dwords += atom->size;
}
}
 
/* let's reserve some more, just in case */
dwords += 32;
 
return dwords;
}
 
unsigned r300_get_num_cs_end_dwords(struct r300_context *r300)
{
unsigned dwords = 0;
 
/* Emitted in flush. */
dwords += 26; /* emit_query_end */
dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */
if (r300->screen->caps.is_r500)
dwords += 2; /* emit_index_bias */
if (r300->screen->info.drm_minor >= 6)
dwords += 3; /* MSPOS */
 
return dwords;
}
 
/* Emit all dirty state. */
void r300_emit_dirty_state(struct r300_context* r300)
{
struct r300_atom *atom;
 
foreach_dirty_atom(r300, atom) {
if (atom->dirty) {
atom->emit(r300, atom->size, atom->state);
atom->dirty = FALSE;
}
}
 
r300->first_dirty = NULL;
r300->last_dirty = NULL;
r300->dirty_hw++;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_emit.h
0,0 → 1,132
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#ifndef R300_EMIT_H
#define R300_EMIT_H
 
#include "r300_context.h"
 
struct rX00_fragment_program_code;
struct r300_vertex_program_code;
 
uint32_t pack_float24(float f);
 
void r300_emit_vertex_arrays(struct r300_context* r300, int offset,
boolean indexed, int instance_id);
 
void r300_emit_blend_state(struct r300_context* r300,
unsigned size, void* state);
 
void r300_emit_blend_color_state(struct r300_context* r300,
unsigned size, void* state);
 
void r300_emit_clip_state(struct r300_context* r300,
unsigned size, void* state);
 
void r300_emit_dsa_state(struct r300_context* r300,
unsigned size, void* state);
 
void r300_emit_hyperz_state(struct r300_context *r300,
unsigned size, void *state);
 
void r300_emit_hyperz_end(struct r300_context *r300);
 
void r300_emit_fs(struct r300_context* r300, unsigned size, void *state);
 
void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state);
 
void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, void *state);
 
void r500_emit_fs(struct r300_context* r300, unsigned size, void *state);
 
void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *state);
 
void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, void *state);
 
void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state);
 
void r300_emit_fb_state_pipelined(struct r300_context *r300,
unsigned size, void *state);
 
void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state);
 
void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state);
 
void r300_emit_query_start(struct r300_context *r300, unsigned size, void *state);
 
void r300_emit_query_end(struct r300_context* r300);
 
void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state);
 
void r300_emit_rs_block_state(struct r300_context* r300,
unsigned size, void* state);
 
void r300_emit_sample_mask(struct r300_context *r300,
unsigned size, void *state);
 
void r300_emit_scissor_state(struct r300_context* r300,
unsigned size, void* state);
 
void r300_emit_textures_state(struct r300_context *r300,
unsigned size, void *state);
 
void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed);
 
void r300_emit_vap_invariant_state(struct r300_context *r300,
unsigned size, void *state);
 
void r300_emit_vertex_stream_state(struct r300_context* r300,
unsigned size, void* state);
 
void r300_emit_vs_constants(struct r300_context* r300,
unsigned size, void *state);
 
void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state);
 
void r300_emit_viewport_state(struct r300_context* r300,
unsigned size, void* state);
 
void r300_emit_ztop_state(struct r300_context* r300,
unsigned size, void* state);
 
void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state);
 
void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, void* state);
 
void r300_emit_invariant_state(struct r300_context *r300,
unsigned size, void *state);
 
void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state);
void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state);
void r300_emit_cmask_clear(struct r300_context *r300, unsigned size, void *state);
 
unsigned r300_get_num_dirty_dwords(struct r300_context *r300);
unsigned r300_get_num_cs_end_dwords(struct r300_context *r300);
 
/* Emit all dirty state. */
void r300_emit_dirty_state(struct r300_context* r300);
 
boolean r300_emit_buffer_validate(struct r300_context *r300,
boolean do_validate_vertex_buffers,
struct pipe_resource *index_buffer);
 
#endif /* R300_EMIT_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_flush.c
0,0 → 1,152
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Copyright 2010 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "draw/draw_context.h"
#include "draw/draw_private.h"
 
#include "util/u_simple_list.h"
#include "util/u_upload_mgr.h"
 
#include "os/os_time.h"
 
#include "r300_context.h"
#include "r300_cs.h"
#include "r300_emit.h"
 
 
static void r300_flush_and_cleanup(struct r300_context *r300, unsigned flags)
{
struct r300_atom *atom;
 
r300_emit_hyperz_end(r300);
r300_emit_query_end(r300);
if (r300->screen->caps.is_r500)
r500_emit_index_bias(r300, 0);
 
/* The DDX doesn't set these regs. */
if (r300->screen->info.drm_minor >= 6) {
CS_LOCALS(r300);
OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2);
OUT_CS(0x66666666);
OUT_CS(0x6666666);
}
 
r300->flush_counter++;
r300->rws->cs_flush(r300->cs, flags, 0);
r300->dirty_hw = 0;
 
/* New kitchen sink, baby. */
foreach_atom(r300, atom) {
if (atom->state || atom->allow_null_state) {
r300_mark_atom_dirty(r300, atom);
}
}
r300->vertex_arrays_dirty = TRUE;
 
/* Unmark HWTCL state for SWTCL. */
if (!r300->screen->caps.has_tcl) {
r300->vs_state.dirty = FALSE;
r300->vs_constants.dirty = FALSE;
r300->clip_state.dirty = FALSE;
}
}
 
void r300_flush(struct pipe_context *pipe,
unsigned flags,
struct pipe_fence_handle **fence)
{
struct r300_context *r300 = r300_context(pipe);
struct pb_buffer **rfence = (struct pb_buffer**)fence;
 
if (r300->screen->info.drm_minor >= 12) {
flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
}
 
if (rfence) {
/* Create a fence, which is a dummy BO. */
*rfence = r300->rws->buffer_create(r300->rws, 1, 1, TRUE,
RADEON_DOMAIN_GTT);
/* Add the fence as a dummy relocation. */
r300->rws->cs_add_reloc(r300->cs,
r300->rws->buffer_get_cs_handle(*rfence),
RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT);
}
 
if (r300->dirty_hw) {
r300_flush_and_cleanup(r300, flags);
} else {
if (rfence) {
/* We have to create a fence object, but the command stream is empty
* and we cannot emit an empty CS. Let's write to some reg. */
CS_LOCALS(r300);
OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0);
r300->rws->cs_flush(r300->cs, flags, 0);
} else {
/* Even if hw is not dirty, we should at least reset the CS in case
* the space checking failed for the first draw operation. */
r300->rws->cs_flush(r300->cs, flags, 0);
}
}
 
/* Update Hyper-Z status. */
if (r300->hyperz_enabled) {
/* If there was a Z clear, keep Hyper-Z access. */
if (r300->num_z_clears) {
r300->hyperz_time_of_last_flush = os_time_get();
r300->num_z_clears = 0;
} else if (r300->hyperz_time_of_last_flush - os_time_get() > 2000000) {
/* If there hasn't been a Z clear for 2 seconds, revoke Hyper-Z access. */
r300->hiz_in_use = FALSE;
 
/* Decompress the Z buffer. */
if (r300->zmask_in_use) {
if (r300->locked_zbuffer) {
r300_decompress_zmask_locked(r300);
} else {
r300_decompress_zmask(r300);
}
 
r300_flush_and_cleanup(r300, flags);
}
 
/* Revoke Hyper-Z access, so that some other process can take it. */
r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS,
FALSE);
r300->hyperz_enabled = FALSE;
}
}
}
 
static void r300_flush_wrapped(struct pipe_context *pipe,
struct pipe_fence_handle **fence,
unsigned flags)
{
r300_flush(pipe,
flags & PIPE_FLUSH_END_OF_FRAME ? RADEON_FLUSH_END_OF_FRAME : 0,
fence);
}
 
void r300_init_flush_functions(struct r300_context* r300)
{
r300->context.flush = r300_flush_wrapped;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_fs.c
0,0 → 1,630
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Joakim Sindholt <opensource@zhasha.com>
* Copyright 2009 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
 
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_ureg.h"
 
#include "r300_cb.h"
#include "r300_context.h"
#include "r300_emit.h"
#include "r300_screen.h"
#include "r300_fs.h"
#include "r300_reg.h"
#include "r300_texture.h"
#include "r300_tgsi_to_rc.h"
 
#include "compiler/radeon_compiler.h"
 
/* Convert info about FS input semantics to r300_shader_semantics. */
void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
struct r300_shader_semantics* fs_inputs)
{
int i;
unsigned index;
 
r300_shader_semantics_reset(fs_inputs);
 
for (i = 0; i < info->num_inputs; i++) {
index = info->input_semantic_index[i];
 
switch (info->input_semantic_name[i]) {
case TGSI_SEMANTIC_COLOR:
assert(index < ATTR_COLOR_COUNT);
fs_inputs->color[index] = i;
break;
 
case TGSI_SEMANTIC_GENERIC:
assert(index < ATTR_GENERIC_COUNT);
fs_inputs->generic[index] = i;
break;
 
case TGSI_SEMANTIC_FOG:
assert(index == 0);
fs_inputs->fog = i;
break;
 
case TGSI_SEMANTIC_POSITION:
assert(index == 0);
fs_inputs->wpos = i;
break;
 
case TGSI_SEMANTIC_FACE:
assert(index == 0);
fs_inputs->face = i;
break;
 
default:
fprintf(stderr, "r300: FP: Unknown input semantic: %i\n",
info->input_semantic_name[i]);
}
}
}
 
static void find_output_registers(struct r300_fragment_program_compiler * compiler,
struct r300_fragment_shader_code *shader)
{
unsigned i, colorbuf_count = 0;
 
/* Mark the outputs as not present initially */
compiler->OutputColor[0] = shader->info.num_outputs;
compiler->OutputColor[1] = shader->info.num_outputs;
compiler->OutputColor[2] = shader->info.num_outputs;
compiler->OutputColor[3] = shader->info.num_outputs;
compiler->OutputDepth = shader->info.num_outputs;
 
/* Now see where they really are. */
for(i = 0; i < shader->info.num_outputs; ++i) {
switch(shader->info.output_semantic_name[i]) {
case TGSI_SEMANTIC_COLOR:
compiler->OutputColor[colorbuf_count] = i;
colorbuf_count++;
break;
case TGSI_SEMANTIC_POSITION:
compiler->OutputDepth = i;
break;
}
}
}
 
static void allocate_hardware_inputs(
struct r300_fragment_program_compiler * c,
void (*allocate)(void * data, unsigned input, unsigned hwreg),
void * mydata)
{
struct r300_shader_semantics* inputs =
(struct r300_shader_semantics*)c->UserData;
int i, reg = 0;
 
/* Allocate input registers. */
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
if (inputs->color[i] != ATTR_UNUSED) {
allocate(mydata, inputs->color[i], reg++);
}
}
if (inputs->face != ATTR_UNUSED) {
allocate(mydata, inputs->face, reg++);
}
for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
if (inputs->generic[i] != ATTR_UNUSED) {
allocate(mydata, inputs->generic[i], reg++);
}
}
if (inputs->fog != ATTR_UNUSED) {
allocate(mydata, inputs->fog, reg++);
}
if (inputs->wpos != ATTR_UNUSED) {
allocate(mydata, inputs->wpos, reg++);
}
}
 
static void get_external_state(
struct r300_context* r300,
struct r300_fragment_program_external_state* state)
{
struct r300_textures_state *texstate = r300->textures_state.state;
unsigned i;
 
state->alpha_to_one = r300->alpha_to_one && r300->msaa_enable;
 
for (i = 0; i < texstate->sampler_state_count; i++) {
struct r300_sampler_state *s = texstate->sampler_states[i];
struct r300_sampler_view *v = texstate->sampler_views[i];
struct r300_resource *t;
 
if (!s || !v) {
continue;
}
 
t = r300_resource(v->base.texture);
 
if (s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
state->unit[i].compare_mode_enabled = 1;
 
/* Fortunately, no need to translate this. */
state->unit[i].texture_compare_func = s->state.compare_func;
}
 
state->unit[i].non_normalized_coords = !s->state.normalized_coords;
state->unit[i].convert_unorm_to_snorm =
v->base.format == PIPE_FORMAT_RGTC1_SNORM ||
v->base.format == PIPE_FORMAT_LATC1_SNORM;
 
/* Pass texture swizzling to the compiler, some lowering passes need it. */
if (v->base.format == PIPE_FORMAT_RGTC1_SNORM ||
v->base.format == PIPE_FORMAT_LATC1_SNORM) {
unsigned char swizzle[4];
 
util_format_compose_swizzles(
util_format_description(v->base.format)->swizzle,
v->swizzle,
swizzle);
 
state->unit[i].texture_swizzle =
RC_MAKE_SWIZZLE(swizzle[0], swizzle[1],
swizzle[2], swizzle[3]);
} else if (state->unit[i].compare_mode_enabled) {
state->unit[i].texture_swizzle =
RC_MAKE_SWIZZLE(v->swizzle[0], v->swizzle[1],
v->swizzle[2], v->swizzle[3]);
}
 
/* XXX this should probably take into account STR, not just S. */
if (t->tex.is_npot) {
switch (s->state.wrap_s) {
case PIPE_TEX_WRAP_REPEAT:
state->unit[i].wrap_mode = RC_WRAP_REPEAT;
break;
 
case PIPE_TEX_WRAP_MIRROR_REPEAT:
state->unit[i].wrap_mode = RC_WRAP_MIRRORED_REPEAT;
break;
 
case PIPE_TEX_WRAP_MIRROR_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
state->unit[i].wrap_mode = RC_WRAP_MIRRORED_CLAMP;
break;
 
default:
state->unit[i].wrap_mode = RC_WRAP_NONE;
}
 
if (t->b.b.target == PIPE_TEXTURE_3D)
state->unit[i].clamp_and_scale_before_fetch = TRUE;
}
}
}
 
static void r300_translate_fragment_shader(
struct r300_context* r300,
struct r300_fragment_shader_code* shader,
const struct tgsi_token *tokens);
 
static void r300_dummy_fragment_shader(
struct r300_context* r300,
struct r300_fragment_shader_code* shader)
{
struct pipe_shader_state state;
struct ureg_program *ureg;
struct ureg_dst out;
struct ureg_src imm;
 
/* Make a simple fragment shader which outputs (0, 0, 0, 1) */
ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
imm = ureg_imm4f(ureg, 0, 0, 0, 1);
 
ureg_MOV(ureg, out, imm);
ureg_END(ureg);
 
state.tokens = ureg_finalize(ureg);
 
shader->dummy = TRUE;
r300_translate_fragment_shader(r300, shader, state.tokens);
 
ureg_destroy(ureg);
}
 
static void r300_emit_fs_code_to_buffer(
struct r300_context *r300,
struct r300_fragment_shader_code *shader)
{
struct rX00_fragment_program_code *generic_code = &shader->code;
unsigned imm_count = shader->immediates_count;
unsigned imm_first = shader->externals_count;
unsigned imm_end = generic_code->constants.Count;
struct rc_constant *constants = generic_code->constants.Constants;
unsigned i;
CB_LOCALS;
 
if (r300->screen->caps.is_r500) {
struct r500_fragment_program_code *code = &generic_code->code.r500;
 
shader->cb_code_size = 19 +
((code->inst_end + 1) * 6) +
imm_count * 7 +
code->int_constant_count * 2;
 
NEW_CB(shader->cb_code, shader->cb_code_size);
OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
OUT_CB_REG(R500_US_PIXSIZE, code->max_temp_idx);
OUT_CB_REG(R500_US_FC_CTRL, code->us_fc_ctrl);
for(i = 0; i < code->int_constant_count; i++){
OUT_CB_REG(R500_US_FC_INT_CONST_0 + (i * 4),
code->int_constants[i]);
}
OUT_CB_REG(R500_US_CODE_RANGE,
R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end));
OUT_CB_REG(R500_US_CODE_OFFSET, 0);
OUT_CB_REG(R500_US_CODE_ADDR,
R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(code->inst_end));
 
OUT_CB_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_INSTR);
OUT_CB_ONE_REG(R500_GA_US_VECTOR_DATA, (code->inst_end + 1) * 6);
for (i = 0; i <= code->inst_end; i++) {
OUT_CB(code->inst[i].inst0);
OUT_CB(code->inst[i].inst1);
OUT_CB(code->inst[i].inst2);
OUT_CB(code->inst[i].inst3);
OUT_CB(code->inst[i].inst4);
OUT_CB(code->inst[i].inst5);
}
 
/* Emit immediates. */
if (imm_count) {
for(i = imm_first; i < imm_end; ++i) {
if (constants[i].Type == RC_CONSTANT_IMMEDIATE) {
const float *data = constants[i].u.Immediate;
 
OUT_CB_REG(R500_GA_US_VECTOR_INDEX,
R500_GA_US_VECTOR_INDEX_TYPE_CONST |
(i & R500_GA_US_VECTOR_INDEX_MASK));
OUT_CB_ONE_REG(R500_GA_US_VECTOR_DATA, 4);
OUT_CB_TABLE(data, 4);
}
}
}
} else { /* r300 */
struct r300_fragment_program_code *code = &generic_code->code.r300;
unsigned int alu_length = code->alu.length;
unsigned int alu_iterations = ((alu_length - 1) / 64) + 1;
unsigned int tex_length = code->tex.length;
unsigned int tex_iterations =
tex_length > 0 ? ((tex_length - 1) / 32) + 1 : 0;
unsigned int iterations =
alu_iterations > tex_iterations ? alu_iterations : tex_iterations;
unsigned int bank = 0;
 
shader->cb_code_size = 15 +
/* R400_US_CODE_BANK */
(r300->screen->caps.is_r400 ? 2 * (iterations + 1): 0) +
/* R400_US_CODE_EXT */
(r300->screen->caps.is_r400 ? 2 : 0) +
/* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0, R400_US_ALU_EXT_ADDR_0 */
(code->r390_mode ? (5 * alu_iterations) : 4) +
/* R400_US_ALU_EXT_ADDR_[0-63] */
(code->r390_mode ? (code->alu.length) : 0) +
/* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0 */
code->alu.length * 4 +
/* R300_US_TEX_INST_0, R300_US_TEX_INST_[0-31] */
(code->tex.length > 0 ? code->tex.length + tex_iterations : 0) +
imm_count * 5;
 
NEW_CB(shader->cb_code, shader->cb_code_size);
 
OUT_CB_REG(R300_US_CONFIG, code->config);
OUT_CB_REG(R300_US_PIXSIZE, code->pixsize);
OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset);
 
if (code->r390_mode) {
OUT_CB_REG(R400_US_CODE_EXT, code->r400_code_offset_ext);
} else if (r300->screen->caps.is_r400) {
/* This register appears to affect shaders even if r390_mode is
* disabled, so it needs to be set to 0 for shaders that
* don't use r390_mode. */
OUT_CB_REG(R400_US_CODE_EXT, 0);
}
 
OUT_CB_REG_SEQ(R300_US_CODE_ADDR_0, 4);
OUT_CB_TABLE(code->code_addr, 4);
 
do {
unsigned int bank_alu_length = (alu_length < 64 ? alu_length : 64);
unsigned int bank_alu_offset = bank * 64;
unsigned int bank_tex_length = (tex_length < 32 ? tex_length : 32);
unsigned int bank_tex_offset = bank * 32;
 
if (r300->screen->caps.is_r400) {
OUT_CB_REG(R400_US_CODE_BANK, code->r390_mode ?
(bank << R400_BANK_SHIFT) | R400_R390_MODE_ENABLE : 0);//2
}
 
if (bank_alu_length > 0) {
OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, bank_alu_length);
for (i = 0; i < bank_alu_length; i++)
OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_inst);
 
OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, bank_alu_length);
for (i = 0; i < bank_alu_length; i++)
OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_addr);
 
OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, bank_alu_length);
for (i = 0; i < bank_alu_length; i++)
OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_inst);
 
OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, bank_alu_length);
for (i = 0; i < bank_alu_length; i++)
OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_addr);
 
if (code->r390_mode) {
OUT_CB_REG_SEQ(R400_US_ALU_EXT_ADDR_0, bank_alu_length);
for (i = 0; i < bank_alu_length; i++)
OUT_CB(code->alu.inst[i + bank_alu_offset].r400_ext_addr);
}
}
 
if (bank_tex_length > 0) {
OUT_CB_REG_SEQ(R300_US_TEX_INST_0, bank_tex_length);
OUT_CB_TABLE(code->tex.inst + bank_tex_offset, bank_tex_length);
}
 
alu_length -= bank_alu_length;
tex_length -= bank_tex_length;
bank++;
} while(code->r390_mode && (alu_length > 0 || tex_length > 0));
 
/* R400_US_CODE_BANK needs to be reset to 0, otherwise some shaders
* will be rendered incorrectly. */
if (r300->screen->caps.is_r400) {
OUT_CB_REG(R400_US_CODE_BANK,
code->r390_mode ? R400_R390_MODE_ENABLE : 0);
}
 
/* Emit immediates. */
if (imm_count) {
for(i = imm_first; i < imm_end; ++i) {
if (constants[i].Type == RC_CONSTANT_IMMEDIATE) {
const float *data = constants[i].u.Immediate;
 
OUT_CB_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4);
OUT_CB(pack_float24(data[0]));
OUT_CB(pack_float24(data[1]));
OUT_CB(pack_float24(data[2]));
OUT_CB(pack_float24(data[3]));
}
}
}
}
 
OUT_CB_REG(R300_FG_DEPTH_SRC, shader->fg_depth_src);
OUT_CB_REG(R300_US_W_FMT, shader->us_out_w);
END_CB;
}
 
static void r300_translate_fragment_shader(
struct r300_context* r300,
struct r300_fragment_shader_code* shader,
const struct tgsi_token *tokens)
{
struct r300_fragment_program_compiler compiler;
struct tgsi_to_rc ttr;
int wpos, face;
unsigned i;
 
tgsi_scan_shader(tokens, &shader->info);
r300_shader_read_fs_inputs(&shader->info, &shader->inputs);
 
wpos = shader->inputs.wpos;
face = shader->inputs.face;
 
/* Setup the compiler. */
memset(&compiler, 0, sizeof(compiler));
rc_init(&compiler.Base, &r300->fs_regalloc_state);
DBG_ON(r300, DBG_FP) ? compiler.Base.Debug |= RC_DBG_LOG : 0;
DBG_ON(r300, DBG_P_STAT) ? compiler.Base.Debug |= RC_DBG_STATS : 0;
 
compiler.code = &shader->code;
compiler.state = shader->compare_state;
compiler.Base.is_r500 = r300->screen->caps.is_r500;
compiler.Base.is_r400 = r300->screen->caps.is_r400;
compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
compiler.Base.has_half_swizzles = TRUE;
compiler.Base.has_presub = TRUE;
compiler.Base.has_omod = TRUE;
compiler.Base.max_temp_regs =
compiler.Base.is_r500 ? 128 : (compiler.Base.is_r400 ? 64 : 32);
compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;
compiler.Base.max_alu_insts =
(compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 64;
compiler.Base.max_tex_insts =
(compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 32;
compiler.AllocateHwInputs = &allocate_hardware_inputs;
compiler.UserData = &shader->inputs;
 
find_output_registers(&compiler, shader);
 
shader->write_all = FALSE;
for (i = 0; i < shader->info.num_properties; i++) {
if (shader->info.properties[i].name == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
shader->write_all = TRUE;
}
}
 
if (compiler.Base.Debug & RC_DBG_LOG) {
DBG(r300, DBG_FP, "r300: Initial fragment program\n");
tgsi_dump(tokens, 0);
}
 
/* Translate TGSI to our internal representation */
ttr.compiler = &compiler.Base;
ttr.info = &shader->info;
ttr.use_half_swizzles = TRUE;
 
r300_tgsi_to_rc(&ttr, tokens);
 
if (ttr.error) {
fprintf(stderr, "r300 FP: Cannot translate a shader. "
"Using a dummy shader instead.\n");
r300_dummy_fragment_shader(r300, shader);
return;
}
 
if (!r300->screen->caps.is_r500 ||
compiler.Base.Program.Constants.Count > 200) {
compiler.Base.remove_unused_constants = TRUE;
}
 
/**
* Transform the program to support WPOS.
*
* Introduce a small fragment at the start of the program that will be
* the only code that directly reads the WPOS input.
* All other code pieces that reference that input will be rewritten
* to read from a newly allocated temporary. */
if (wpos != ATTR_UNUSED) {
/* Moving the input to some other reg is not really necessary. */
rc_transform_fragment_wpos(&compiler.Base, wpos, wpos, TRUE);
}
 
if (face != ATTR_UNUSED) {
rc_transform_fragment_face(&compiler.Base, face);
}
 
/* Invoke the compiler */
r3xx_compile_fragment_program(&compiler);
 
if (compiler.Base.Error) {
fprintf(stderr, "r300 FP: Compiler Error:\n%sUsing a dummy shader"
" instead.\n", compiler.Base.ErrorMsg);
 
if (shader->dummy) {
fprintf(stderr, "r300 FP: Cannot compile the dummy shader! "
"Giving up...\n");
abort();
}
 
rc_destroy(&compiler.Base);
r300_dummy_fragment_shader(r300, shader);
return;
}
 
/* Shaders with zero instructions are invalid,
* use the dummy shader instead. */
if (shader->code.code.r500.inst_end == -1) {
rc_destroy(&compiler.Base);
r300_dummy_fragment_shader(r300, shader);
return;
}
 
/* Initialize numbers of constants for each type. */
shader->externals_count = 0;
for (i = 0;
i < shader->code.constants.Count &&
shader->code.constants.Constants[i].Type == RC_CONSTANT_EXTERNAL; i++) {
shader->externals_count = i+1;
}
shader->immediates_count = 0;
shader->rc_state_count = 0;
 
for (i = shader->externals_count; i < shader->code.constants.Count; i++) {
switch (shader->code.constants.Constants[i].Type) {
case RC_CONSTANT_IMMEDIATE:
++shader->immediates_count;
break;
case RC_CONSTANT_STATE:
++shader->rc_state_count;
break;
default:
assert(0);
}
}
 
/* Setup shader depth output. */
if (shader->code.writes_depth) {
shader->fg_depth_src = R300_FG_DEPTH_SRC_SHADER;
shader->us_out_w = R300_W_FMT_W24 | R300_W_SRC_US;
} else {
shader->fg_depth_src = R300_FG_DEPTH_SRC_SCAN;
shader->us_out_w = R300_W_FMT_W0 | R300_W_SRC_US;
}
 
/* And, finally... */
rc_destroy(&compiler.Base);
 
/* Build the command buffer. */
r300_emit_fs_code_to_buffer(r300, shader);
}
 
boolean r300_pick_fragment_shader(struct r300_context* r300)
{
struct r300_fragment_shader* fs = r300_fs(r300);
struct r300_fragment_program_external_state state = {{{ 0 }}};
struct r300_fragment_shader_code* ptr;
 
get_external_state(r300, &state);
 
if (!fs->first) {
/* Build the fragment shader for the first time. */
fs->first = fs->shader = CALLOC_STRUCT(r300_fragment_shader_code);
 
memcpy(&fs->shader->compare_state, &state,
sizeof(struct r300_fragment_program_external_state));
r300_translate_fragment_shader(r300, fs->shader, fs->state.tokens);
return TRUE;
 
} else {
/* Check if the currently-bound shader has been compiled
* with the texture-compare state we need. */
if (memcmp(&fs->shader->compare_state, &state, sizeof(state)) != 0) {
/* Search for the right shader. */
ptr = fs->first;
while (ptr) {
if (memcmp(&ptr->compare_state, &state, sizeof(state)) == 0) {
if (fs->shader != ptr) {
fs->shader = ptr;
return TRUE;
}
/* The currently-bound one is OK. */
return FALSE;
}
ptr = ptr->next;
}
 
/* Not found, gotta compile a new one. */
ptr = CALLOC_STRUCT(r300_fragment_shader_code);
ptr->next = fs->first;
fs->first = fs->shader = ptr;
 
ptr->compare_state = state;
r300_translate_fragment_shader(r300, ptr, fs->state.tokens);
return TRUE;
}
}
 
return FALSE;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_fs.h
0,0 → 1,93
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Joakim Sindholt <opensource@zhasha.com>
* Copyright 2009 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#ifndef R300_FS_H
#define R300_FS_H
 
#include "pipe/p_state.h"
#include "tgsi/tgsi_scan.h"
#include "compiler/radeon_code.h"
#include "r300_shader_semantics.h"
 
struct r300_fragment_shader_code {
struct rX00_fragment_program_code code;
struct tgsi_shader_info info;
struct r300_shader_semantics inputs;
 
/* Whether the shader was replaced by a dummy one due to a shader
* compilation failure. */
boolean dummy;
 
/* Numbers of constants for each type. */
unsigned externals_count;
unsigned immediates_count;
unsigned rc_state_count;
 
/* Registers for fragment depth output setup. */
uint32_t fg_depth_src; /* R300_FG_DEPTH_SRC: 0x4bd8 */
uint32_t us_out_w; /* R300_US_W_FMT: 0x46b4 */
 
struct r300_fragment_program_external_state compare_state;
 
unsigned cb_code_size;
uint32_t *cb_code;
 
struct r300_fragment_shader_code* next;
 
boolean write_all;
 
};
 
struct r300_fragment_shader {
/* Parent class */
struct pipe_shader_state state;
 
/* Currently-bound fragment shader. */
struct r300_fragment_shader_code* shader;
 
/* List of the same shaders compiled with different texture-compare
* states. */
struct r300_fragment_shader_code* first;
};
 
void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
struct r300_shader_semantics* fs_inputs);
 
/* Return TRUE if the shader was switched and should be re-emitted. */
boolean r300_pick_fragment_shader(struct r300_context* r300);
 
static INLINE boolean r300_fragment_shader_writes_depth(struct r300_fragment_shader *fs)
{
if (!fs)
return FALSE;
return (fs->shader->code.writes_depth) ? TRUE : FALSE;
}
 
static INLINE boolean r300_fragment_shader_writes_all(struct r300_fragment_shader *fs)
{
if (!fs)
return FALSE;
return (fs->shader->write_all) ? TRUE : FALSE;
}
#endif /* R300_FS_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_hyperz.c
0,0 → 1,313
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Copyright 2009 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "r300_context.h"
#include "r300_reg.h"
#include "r300_fs.h"
 
#include "util/u_format.h"
#include "util/u_mm.h"
 
/*
HiZ rules - taken from various docs
1. HiZ only works on depth values
2. Cannot HiZ if stencil fail or zfail is !KEEP
3. on R300/400, HiZ is disabled if depth test is EQUAL
4. comparison changes without clears usually mean disabling HiZ
*/
/*****************************************************************************/
/* The HyperZ setup */
/*****************************************************************************/
 
static enum r300_hiz_func r300_get_hiz_func(struct r300_context *r300)
{
struct r300_dsa_state *dsa = r300->dsa_state.state;
 
switch (dsa->dsa.depth.func) {
case PIPE_FUNC_NEVER:
case PIPE_FUNC_EQUAL:
case PIPE_FUNC_NOTEQUAL:
case PIPE_FUNC_ALWAYS:
default:
/* Guess MAX for uncertain cases. */
case PIPE_FUNC_LESS:
case PIPE_FUNC_LEQUAL:
return HIZ_FUNC_MAX;
 
case PIPE_FUNC_GREATER:
case PIPE_FUNC_GEQUAL:
return HIZ_FUNC_MIN;
}
}
 
/* Return what's used for the depth test (either minimum or maximum). */
static unsigned r300_get_sc_hz_max(struct r300_context *r300)
{
struct r300_dsa_state *dsa = r300->dsa_state.state;
unsigned func = dsa->dsa.depth.func;
 
return func >= PIPE_FUNC_GREATER ? R300_SC_HYPERZ_MAX : R300_SC_HYPERZ_MIN;
}
 
static boolean r300_is_hiz_func_valid(struct r300_context *r300)
{
struct r300_dsa_state *dsa = r300->dsa_state.state;
unsigned func = dsa->dsa.depth.func;
 
if (r300->hiz_func == HIZ_FUNC_NONE)
return TRUE;
 
/* func1 is less/lessthan */
if (r300->hiz_func == HIZ_FUNC_MAX &&
(func == PIPE_FUNC_GEQUAL || func == PIPE_FUNC_GREATER))
return FALSE;
 
/* func1 is greater/greaterthan */
if (r300->hiz_func == HIZ_FUNC_MIN &&
(func == PIPE_FUNC_LESS || func == PIPE_FUNC_LEQUAL))
return FALSE;
 
return TRUE;
}
 
static boolean r300_dsa_stencil_op_not_keep(struct pipe_stencil_state *s)
{
return s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP ||
s->zfail_op != PIPE_STENCIL_OP_KEEP);
}
 
static boolean r300_hiz_allowed(struct r300_context *r300)
{
struct r300_dsa_state *dsa = r300->dsa_state.state;
struct r300_screen *r300screen = r300->screen;
 
if (r300_fragment_shader_writes_depth(r300_fs(r300)))
return FALSE;
 
if (r300->query_current)
return FALSE;
 
/* If the depth function is inverted, HiZ must be disabled. */
if (!r300_is_hiz_func_valid(r300))
return FALSE;
 
/* if stencil fail/zfail op is not KEEP */
if (r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[0]) ||
r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[1]))
return FALSE;
 
if (dsa->dsa.depth.enabled) {
/* if depth func is EQUAL pre-r500 */
if (dsa->dsa.depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500)
return FALSE;
 
/* if depth func is NOTEQUAL */
if (dsa->dsa.depth.func == PIPE_FUNC_NOTEQUAL)
return FALSE;
}
return TRUE;
}
 
static void r300_update_hyperz(struct r300_context* r300)
{
struct r300_hyperz_state *z =
(struct r300_hyperz_state*)r300->hyperz_state.state;
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct r300_dsa_state *dsa = r300->dsa_state.state;
struct r300_resource *zstex =
fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL;
 
z->gb_z_peq_config = 0;
z->zb_bw_cntl = 0;
z->sc_hyperz = R300_SC_HYPERZ_ADJ_2;
z->flush = 0;
 
if (r300->cbzb_clear) {
z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY;
return;
}
 
if (!zstex || !r300->hyperz_enabled)
return;
 
/* Set the size of ZMASK tiles. */
if (zstex->tex.zcomp8x8[fb->zsbuf->u.tex.level]) {
z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8;
}
 
/* R500-specific features and optimizations. */
if (r300->screen->caps.is_r500) {
z->zb_bw_cntl |= R500_PEQ_PACKING_ENABLE |
R500_COVERED_PTR_MASKING_ENABLE;
}
 
/* Setup decompression if needed. No other HyperZ setting is required. */
if (r300->zmask_decompress) {
z->zb_bw_cntl |= R300_FAST_FILL_ENABLE |
R300_RD_COMP_ENABLE;
return;
}
 
/* Do not set anything if depth and stencil tests are off. */
if (!dsa->dsa.depth.enabled &&
!dsa->dsa.stencil[0].enabled &&
!dsa->dsa.stencil[1].enabled) {
assert(!dsa->dsa.depth.writemask);
return;
}
 
/* Zbuffer compression. */
if (r300->zmask_in_use && !r300->locked_zbuffer) {
z->zb_bw_cntl |= R300_FAST_FILL_ENABLE |
R300_RD_COMP_ENABLE |
R300_WR_COMP_ENABLE;
}
 
/* HiZ. */
if (r300->hiz_in_use && !r300->locked_zbuffer) {
/* HiZ cannot be used under some circumstances. */
if (!r300_hiz_allowed(r300)) {
/* If writemask is disabled, the HiZ memory will not be changed,
* so we can keep its content for later. */
if (dsa->dsa.depth.writemask) {
r300->hiz_in_use = FALSE;
}
return;
}
DBG(r300, DBG_HYPERZ, "r300: Z-func: %i\n", dsa->dsa.depth.func);
 
/* Set the HiZ function if needed. */
if (r300->hiz_func == HIZ_FUNC_NONE) {
r300->hiz_func = r300_get_hiz_func(r300);
}
 
/* Setup the HiZ bits. */
z->zb_bw_cntl |= R300_HIZ_ENABLE |
(r300->hiz_func == HIZ_FUNC_MIN ? R300_HIZ_MIN : R300_HIZ_MAX);
 
z->sc_hyperz |= R300_SC_HYPERZ_ENABLE |
r300_get_sc_hz_max(r300);
 
if (r300->screen->caps.is_r500) {
z->zb_bw_cntl |= R500_HIZ_EQUAL_REJECT_ENABLE;
}
}
}
 
/*****************************************************************************/
/* The ZTOP state */
/*****************************************************************************/
 
static boolean r300_dsa_writes_stencil(
struct pipe_stencil_state *s)
{
return s->enabled && s->writemask &&
(s->fail_op != PIPE_STENCIL_OP_KEEP ||
s->zfail_op != PIPE_STENCIL_OP_KEEP ||
s->zpass_op != PIPE_STENCIL_OP_KEEP);
}
 
static boolean r300_dsa_writes_depth_stencil(
struct pipe_depth_stencil_alpha_state *dsa)
{
/* We are interested only in the cases when a depth or stencil value
* can be changed. */
 
if (dsa->depth.enabled && dsa->depth.writemask &&
dsa->depth.func != PIPE_FUNC_NEVER)
return TRUE;
 
if (r300_dsa_writes_stencil(&dsa->stencil[0]) ||
r300_dsa_writes_stencil(&dsa->stencil[1]))
return TRUE;
 
return FALSE;
}
 
static boolean r300_dsa_alpha_test_enabled(
struct pipe_depth_stencil_alpha_state *dsa)
{
/* We are interested only in the cases when alpha testing can kill
* a fragment. */
 
return dsa->alpha.enabled && dsa->alpha.func != PIPE_FUNC_ALWAYS;
}
 
static void r300_update_ztop(struct r300_context* r300)
{
struct r300_ztop_state* ztop_state =
(struct r300_ztop_state*)r300->ztop_state.state;
uint32_t old_ztop = ztop_state->z_buffer_top;
 
/* This is important enough that I felt it warranted a comment.
*
* According to the docs, these are the conditions where ZTOP must be
* disabled:
* 1) Alpha testing enabled
* 2) Texture kill instructions in fragment shader
* 3) Chroma key culling enabled
* 4) W-buffering enabled
*
* The docs claim that for the first three cases, if no ZS writes happen,
* then ZTOP can be used.
*
* (3) will never apply since we do not support chroma-keyed operations.
* (4) will need to be re-examined (and this comment updated) if/when
* Hyper-Z becomes supported.
*
* Additionally, the following conditions require disabled ZTOP:
* 5) Depth writes in fragment shader
* 6) Outstanding occlusion queries
*
* This register causes stalls all the way from SC to CB when changed,
* but it is buffered on-chip so it does not hurt to write it if it has
* not changed.
*
* ~C.
*/
 
/* ZS writes */
if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) &&
(r300_dsa_alpha_test_enabled(r300->dsa_state.state) || /* (1) */
r300_fs(r300)->shader->info.uses_kill)) { /* (2) */
ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
} else if (r300_fragment_shader_writes_depth(r300_fs(r300))) { /* (5) */
ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
} else if (r300->query_current) { /* (6) */
ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
} else {
ztop_state->z_buffer_top = R300_ZTOP_ENABLE;
}
if (ztop_state->z_buffer_top != old_ztop)
r300_mark_atom_dirty(r300, &r300->ztop_state);
}
 
void r300_update_hyperz_state(struct r300_context* r300)
{
r300_update_ztop(r300);
 
if (r300->hyperz_state.dirty) {
r300_update_hyperz(r300);
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_public.h
0,0 → 1,9
 
#ifndef R300_PUBLIC_H
#define R300_PUBLIC_H
 
struct radeon_winsys;
 
struct pipe_screen* r300_screen_create(struct radeon_winsys *rws);
 
#endif
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_query.c
0,0 → 1,212
/*
* Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "util/u_memory.h"
#include "util/u_simple_list.h"
 
#include "r300_context.h"
#include "r300_screen.h"
#include "r300_emit.h"
 
#include <stdio.h>
 
static struct pipe_query *r300_create_query(struct pipe_context *pipe,
unsigned query_type)
{
struct r300_context *r300 = r300_context(pipe);
struct r300_screen *r300screen = r300->screen;
struct r300_query *q;
 
if (query_type != PIPE_QUERY_OCCLUSION_COUNTER &&
query_type != PIPE_QUERY_OCCLUSION_PREDICATE &&
query_type != PIPE_QUERY_GPU_FINISHED) {
return NULL;
}
 
q = CALLOC_STRUCT(r300_query);
if (!q)
return NULL;
 
q->type = query_type;
 
if (query_type == PIPE_QUERY_GPU_FINISHED) {
return (struct pipe_query*)q;
}
 
if (r300screen->caps.family == CHIP_RV530)
q->num_pipes = r300screen->info.r300_num_z_pipes;
else
q->num_pipes = r300screen->info.r300_num_gb_pipes;
 
q->buf = r300->rws->buffer_create(r300->rws, 4096, 4096, TRUE,
RADEON_DOMAIN_GTT);
if (!q->buf) {
FREE(q);
return NULL;
}
q->cs_buf = r300->rws->buffer_get_cs_handle(q->buf);
 
return (struct pipe_query*)q;
}
 
static void r300_destroy_query(struct pipe_context* pipe,
struct pipe_query* query)
{
struct r300_query* q = r300_query(query);
 
pb_reference(&q->buf, NULL);
FREE(query);
}
 
void r300_resume_query(struct r300_context *r300,
struct r300_query *query)
{
r300->query_current = query;
r300_mark_atom_dirty(r300, &r300->query_start);
}
 
static void r300_begin_query(struct pipe_context* pipe,
struct pipe_query* query)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_query* q = r300_query(query);
 
if (q->type == PIPE_QUERY_GPU_FINISHED)
return;
 
if (r300->query_current != NULL) {
fprintf(stderr, "r300: begin_query: "
"Some other query has already been started.\n");
assert(0);
return;
}
 
q->num_results = 0;
r300_resume_query(r300, q);
}
 
void r300_stop_query(struct r300_context *r300)
{
r300_emit_query_end(r300);
r300->query_current = NULL;
}
 
static void r300_end_query(struct pipe_context* pipe,
struct pipe_query* query)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_query *q = r300_query(query);
 
if (q->type == PIPE_QUERY_GPU_FINISHED) {
pb_reference(&q->buf, NULL);
r300_flush(pipe, RADEON_FLUSH_ASYNC,
(struct pipe_fence_handle**)&q->buf);
return;
}
 
if (q != r300->query_current) {
fprintf(stderr, "r300: end_query: Got invalid query.\n");
assert(0);
return;
}
 
r300_stop_query(r300);
}
 
static boolean r300_get_query_result(struct pipe_context* pipe,
struct pipe_query* query,
boolean wait,
union pipe_query_result *vresult)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_query *q = r300_query(query);
unsigned i;
uint32_t temp, *map;
 
if (q->type == PIPE_QUERY_GPU_FINISHED) {
if (wait) {
r300->rws->buffer_wait(q->buf, RADEON_USAGE_READWRITE);
vresult->b = TRUE;
} else {
vresult->b = !r300->rws->buffer_is_busy(q->buf, RADEON_USAGE_READWRITE);
}
return vresult->b;
}
 
map = r300->rws->buffer_map(q->cs_buf, r300->cs,
PIPE_TRANSFER_READ |
(!wait ? PIPE_TRANSFER_DONTBLOCK : 0));
if (!map)
return FALSE;
 
/* Sum up the results. */
temp = 0;
for (i = 0; i < q->num_results; i++) {
/* Convert little endian values written by GPU to CPU byte order */
temp += util_le32_to_cpu(*map);
map++;
}
 
r300->rws->buffer_unmap(q->cs_buf);
 
if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE) {
vresult->b = temp != 0;
} else {
vresult->u64 = temp;
}
return TRUE;
}
 
static void r300_render_condition(struct pipe_context *pipe,
struct pipe_query *query,
boolean condition,
uint mode)
{
struct r300_context *r300 = r300_context(pipe);
union pipe_query_result result;
boolean wait;
 
r300->skip_rendering = FALSE;
 
if (query) {
wait = mode == PIPE_RENDER_COND_WAIT ||
mode == PIPE_RENDER_COND_BY_REGION_WAIT;
 
if (r300_get_query_result(pipe, query, wait, &result)) {
if (r300_query(query)->type == PIPE_QUERY_OCCLUSION_PREDICATE) {
r300->skip_rendering = condition == result.b;
} else {
r300->skip_rendering = condition == !!result.u64;
}
}
}
}
 
void r300_init_query_functions(struct r300_context* r300)
{
r300->context.create_query = r300_create_query;
r300->context.destroy_query = r300_destroy_query;
r300->context.begin_query = r300_begin_query;
r300->context.end_query = r300_end_query;
r300->context.get_query_result = r300_get_query_result;
r300->context.render_condition = r300_render_condition;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_reg.h
0,0 → 1,3576
/**************************************************************************
 
Copyright (C) 2004-2005 Nicolai Haehnle et al.
 
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
on the rights to use, copy, modify, merge, publish, distribute, sub
license, and/or sell copies of the Software, and to permit persons to whom
the Software is furnished to do so, subject to the following conditions:
 
The above copyright notice and this permission notice (including the next
paragraph) shall be included in all copies or substantial portions of the
Software.
 
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.
 
**************************************************************************/
 
/* *INDENT-OFF* */
 
#ifndef _R300_REG_H
#define _R300_REG_H
 
#define R300_MC_INIT_MISC_LAT_TIMER 0x180
# define R300_MC_MISC__MC_CPR_INIT_LAT_SHIFT 0
# define R300_MC_MISC__MC_VF_INIT_LAT_SHIFT 4
# define R300_MC_MISC__MC_DISP0R_INIT_LAT_SHIFT 8
# define R300_MC_MISC__MC_DISP1R_INIT_LAT_SHIFT 12
# define R300_MC_MISC__MC_FIXED_INIT_LAT_SHIFT 16
# define R300_MC_MISC__MC_E2R_INIT_LAT_SHIFT 20
# define R300_MC_MISC__MC_SAME_PAGE_PRIO_SHIFT 24
# define R300_MC_MISC__MC_GLOBW_INIT_LAT_SHIFT 28
 
 
#define R300_MC_INIT_GFX_LAT_TIMER 0x154
# define R300_MC_MISC__MC_G3D0R_INIT_LAT_SHIFT 0
# define R300_MC_MISC__MC_G3D1R_INIT_LAT_SHIFT 4
# define R300_MC_MISC__MC_G3D2R_INIT_LAT_SHIFT 8
# define R300_MC_MISC__MC_G3D3R_INIT_LAT_SHIFT 12
# define R300_MC_MISC__MC_TX0R_INIT_LAT_SHIFT 16
# define R300_MC_MISC__MC_TX1R_INIT_LAT_SHIFT 20
# define R300_MC_MISC__MC_GLOBR_INIT_LAT_SHIFT 24
# define R300_MC_MISC__MC_GLOBW_FULL_LAT_SHIFT 28
 
/*
* This file contains registers and constants for the R300. They have been
* found mostly by examining command buffers captured using glxtest, as well
* as by extrapolating some known registers and constants from the R200.
* I am fairly certain that they are correct unless stated otherwise
* in comments.
*/
 
#define R300_SE_VPORT_XSCALE 0x1D98
#define R300_SE_VPORT_XOFFSET 0x1D9C
#define R300_SE_VPORT_YSCALE 0x1DA0
#define R300_SE_VPORT_YOFFSET 0x1DA4
#define R300_SE_VPORT_ZSCALE 0x1DA8
#define R300_SE_VPORT_ZOFFSET 0x1DAC
 
#define R300_VAP_PORT_IDX0 0x2040
/*
* Vertex Array Processing (VAP) Control
*/
#define R300_VAP_CNTL 0x2080
# define R300_PVS_NUM_SLOTS_SHIFT 0
# define R300_PVS_NUM_CNTLRS_SHIFT 4
# define R300_PVS_NUM_FPUS_SHIFT 8
# define R300_VF_MAX_VTX_NUM_SHIFT 18
# define R300_PVS_NUM_SLOTS(x) ((x) << 0)
# define R300_PVS_NUM_CNTLRS(x) ((x) << 4)
# define R300_PVS_NUM_FPUS(x) ((x) << 8)
# define R300_PVS_VF_MAX_VTX_NUM(x) ((x) << 18)
# define R300_GL_CLIP_SPACE_DEF (0 << 22)
# define R300_DX_CLIP_SPACE_DEF (1 << 22)
# define R500_TCL_STATE_OPTIMIZATION (1 << 23)
 
/* This register is written directly and also starts data section
* in many 3d CP_PACKET3's
*/
#define R300_VAP_VF_CNTL 0x2084
# define R300_VAP_VF_CNTL__PRIM_TYPE__SHIFT 0
# define R300_VAP_VF_CNTL__PRIM_NONE (0<<0)
# define R300_VAP_VF_CNTL__PRIM_POINTS (1<<0)
# define R300_VAP_VF_CNTL__PRIM_LINES (2<<0)
# define R300_VAP_VF_CNTL__PRIM_LINE_STRIP (3<<0)
# define R300_VAP_VF_CNTL__PRIM_TRIANGLES (4<<0)
# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN (5<<0)
# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP (6<<0)
# define R300_VAP_VF_CNTL__PRIM_LINE_LOOP (12<<0)
# define R300_VAP_VF_CNTL__PRIM_QUADS (13<<0)
# define R300_VAP_VF_CNTL__PRIM_QUAD_STRIP (14<<0)
# define R300_VAP_VF_CNTL__PRIM_POLYGON (15<<0)
 
# define R300_VAP_VF_CNTL__PRIM_WALK__SHIFT 4
/* State based - direct writes to registers trigger vertex
generation */
# define R300_VAP_VF_CNTL__PRIM_WALK_STATE_BASED (0<<4)
# define R300_VAP_VF_CNTL__PRIM_WALK_INDICES (1<<4)
# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST (2<<4)
# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED (3<<4)
 
/* I don't think I saw these three used.. */
# define R300_VAP_VF_CNTL__COLOR_ORDER__SHIFT 6
# define R300_VAP_VF_CNTL__TCL_OUTPUT_CTL_ENA__SHIFT 9
# define R300_VAP_VF_CNTL__PROG_STREAM_ENA__SHIFT 10
 
/* index size - when not set the indices are assumed to be 16 bit */
# define R300_VAP_VF_CNTL__INDEX_SIZE_32bit (1<<11)
# define R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS (1<<14)
/* number of vertices */
# define R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT 16
 
#define R500_VAP_INDEX_OFFSET 0x208c
 
#define R500_VAP_ALT_NUM_VERTICES 0x2088
 
#define R300_VAP_OUTPUT_VTX_FMT_0 0x2090
# define R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT (1<<0)
# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT (1<<1)
# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2)
# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3)
# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4)
# define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16)
 
#define R300_VAP_OUTPUT_VTX_FMT_1 0x2094
/* each of the following is 3 bits wide, specifies number
of components */
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18
# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21
# define R300_VAP_OUTPUT_VTX_FMT_1__NOT_PRESENT 0
# define R300_VAP_OUTPUT_VTX_FMT_1__1_COMPONENT 1
# define R300_VAP_OUTPUT_VTX_FMT_1__2_COMPONENTS 2
# define R300_VAP_OUTPUT_VTX_FMT_1__3_COMPONENTS 3
# define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS 4
 
#define R300_VAP_VPORT_XSCALE 0x2098
#define R300_VAP_VPORT_XOFFSET 0x209c
#define R300_VAP_VPORT_YSCALE 0x20a0
#define R300_VAP_VPORT_YOFFSET 0x20a4
#define R300_VAP_VPORT_ZSCALE 0x20a8
#define R300_VAP_VPORT_ZOFFSET 0x20ac
 
#define R300_VAP_VTE_CNTL 0x20b0
#define R300_SE_VTE_CNTL R300_VAP_VTE_CNTL
# define R300_VPORT_X_SCALE_ENA (1 << 0)
# define R300_VPORT_X_OFFSET_ENA (1 << 1)
# define R300_VPORT_Y_SCALE_ENA (1 << 2)
# define R300_VPORT_Y_OFFSET_ENA (1 << 3)
# define R300_VPORT_Z_SCALE_ENA (1 << 4)
# define R300_VPORT_Z_OFFSET_ENA (1 << 5)
# define R300_VTX_XY_FMT (1 << 8)
# define R300_VTX_Z_FMT (1 << 9)
# define R300_VTX_W0_FMT (1 << 10)
# define R300_SERIAL_PROC_ENA (1 << 11)
 
#define R300_VAP_VTX_SIZE 0x20b4
 
/* BEGIN: Vertex data assembly - lots of uncertainties */
 
/* gap */
 
/* Maximum Vertex Indx Clamp */
#define R300_VAP_VF_MAX_VTX_INDX 0x2134
/* Minimum Vertex Indx Clamp */
#define R300_VAP_VF_MIN_VTX_INDX 0x2138
 
/** Vertex assembler/processor control status */
#define R300_VAP_CNTL_STATUS 0x2140
/* No swap at all (default) */
# define R300_VC_NO_SWAP (0 << 0)
/* 16-bit swap: 0xAABBCCDD becomes 0xBBAADDCC */
# define R300_VC_16BIT_SWAP (1 << 0)
/* 32-bit swap: 0xAABBCCDD becomes 0xDDCCBBAA */
# define R300_VC_32BIT_SWAP (2 << 0)
/* Half-dword swap: 0xAABBCCDD becomes 0xCCDDAABB */
# define R300_VC_HALF_DWORD_SWAP (3 << 0)
/* The TCL engine will not be used (as it is logically or even physically removed) */
# define R300_VAP_TCL_BYPASS (1 << 8)
/* Read only flag if TCL engine is busy. */
# define R300_VAP_PVS_BUSY (1 << 11)
/* TODO: gap for MAX_MPS */
/* Read only flag if the vertex store is busy. */
# define R300_VAP_VS_BUSY (1 << 24)
/* Read only flag if the reciprocal engine is busy. */
# define R300_VAP_RCP_BUSY (1 << 25)
/* Read only flag if the viewport transform engine is busy. */
# define R300_VAP_VTE_BUSY (1 << 26)
/* Read only flag if the memory interface unit is busy. */
# define R300_VAP_MUI_BUSY (1 << 27)
/* Read only flag if the vertex cache is busy. */
# define R300_VAP_VC_BUSY (1 << 28)
/* Read only flag if the vertex fetcher is busy. */
# define R300_VAP_VF_BUSY (1 << 29)
/* Read only flag if the register pipeline is busy. */
# define R300_VAP_REGPIPE_BUSY (1 << 30)
/* Read only flag if the VAP engine is busy. */
# define R300_VAP_VAP_BUSY (1 << 31)
 
/* gap */
 
/* Where do we get our vertex data?
*
* Vertex data either comes either from immediate mode registers or from
* vertex arrays.
* There appears to be no mixed mode (though we can force the pitch of
* vertex arrays to 0, effectively reusing the same element over and over
* again).
*
* Immediate mode is controlled by the INPUT_CNTL registers. I am not sure
* if these registers influence vertex array processing.
*
* Vertex arrays are controlled via the 3D_LOAD_VBPNTR packet3.
*
* In both cases, vertex attributes are then passed through INPUT_ROUTE.
*
* Beginning with INPUT_ROUTE_0_0 is a list of WORDs that route vertex data
* into the vertex processor's input registers.
* The first word routes the first input, the second word the second, etc.
* The corresponding input is routed into the register with the given index.
* The list is ended by a word with INPUT_ROUTE_END set.
*
* Always set COMPONENTS_4 in immediate mode.
*/
 
#define R300_VAP_PROG_STREAM_CNTL_0 0x2150
# define R300_DATA_TYPE_0_SHIFT 0
# define R300_DATA_TYPE_FLOAT_1 0
# define R300_DATA_TYPE_FLOAT_2 1
# define R300_DATA_TYPE_FLOAT_3 2
# define R300_DATA_TYPE_FLOAT_4 3
# define R300_DATA_TYPE_BYTE 4
# define R300_DATA_TYPE_D3DCOLOR 5
# define R300_DATA_TYPE_SHORT_2 6
# define R300_DATA_TYPE_SHORT_4 7
# define R300_DATA_TYPE_VECTOR_3_TTT 8
# define R300_DATA_TYPE_VECTOR_3_EET 9
# define R300_DATA_TYPE_FLOAT_8 10
# define R300_DATA_TYPE_FLT16_2 11
# define R300_DATA_TYPE_FLT16_4 12
# define R300_SKIP_DWORDS_SHIFT 4
# define R300_DST_VEC_LOC_SHIFT 8
# define R300_LAST_VEC (1 << 13)
# define R300_SIGNED (1 << 14)
# define R300_NORMALIZE (1 << 15)
# define R300_DATA_TYPE_1_SHIFT 16
#define R300_VAP_PROG_STREAM_CNTL_1 0x2154
#define R300_VAP_PROG_STREAM_CNTL_2 0x2158
#define R300_VAP_PROG_STREAM_CNTL_3 0x215C
#define R300_VAP_PROG_STREAM_CNTL_4 0x2160
#define R300_VAP_PROG_STREAM_CNTL_5 0x2164
#define R300_VAP_PROG_STREAM_CNTL_6 0x2168
#define R300_VAP_PROG_STREAM_CNTL_7 0x216C
/* gap */
 
/* Notes:
* - always set up to produce at least two attributes:
* if vertex program uses only position, fglrx will set normal, too
* - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal.
*/
#define R300_VAP_VTX_STATE_CNTL 0x2180
# define R300_COLOR_0_ASSEMBLY_SHIFT 0
# define R300_SEL_COLOR 0
# define R300_SEL_USER_COLOR_0 1
# define R300_SEL_USER_COLOR_1 2
# define R300_COLOR_1_ASSEMBLY_SHIFT 2
# define R300_COLOR_2_ASSEMBLY_SHIFT 4
# define R300_COLOR_3_ASSEMBLY_SHIFT 6
# define R300_COLOR_4_ASSEMBLY_SHIFT 8
# define R300_COLOR_5_ASSEMBLY_SHIFT 10
# define R300_COLOR_6_ASSEMBLY_SHIFT 12
# define R300_COLOR_7_ASSEMBLY_SHIFT 14
# define R300_UPDATE_USER_COLOR_0_ENA (1 << 16)
 
/*
* Each bit in this field applies to the corresponding vector in the VSM
* memory (i.e. Bit 0 applies to VECTOR_0 (POSITION), etc.). If the bit
* is set, then the corresponding 4-Dword Vector is output into the Vertex Stream.
*/
#define R300_VAP_VSM_VTX_ASSM 0x2184
# define R300_INPUT_CNTL_POS 0x00000001
# define R300_INPUT_CNTL_NORMAL 0x00000002
# define R300_INPUT_CNTL_COLOR 0x00000004
# define R300_INPUT_CNTL_TC0 0x00000400
# define R300_INPUT_CNTL_TC1 0x00000800
# define R300_INPUT_CNTL_TC2 0x00001000 /* GUESS */
# define R300_INPUT_CNTL_TC3 0x00002000 /* GUESS */
# define R300_INPUT_CNTL_TC4 0x00004000 /* GUESS */
# define R300_INPUT_CNTL_TC5 0x00008000 /* GUESS */
# define R300_INPUT_CNTL_TC6 0x00010000 /* GUESS */
# define R300_INPUT_CNTL_TC7 0x00020000 /* GUESS */
 
/* Programmable Stream Control Signed Normalize Control */
#define R300_VAP_PSC_SGN_NORM_CNTL 0x21dc
# define SGN_NORM_ZERO 0
# define SGN_NORM_ZERO_CLAMP_MINUS_ONE 1
# define SGN_NORM_NO_ZERO 2
# define R300_SGN_NORM_NO_ZERO (SGN_NORM_NO_ZERO | \
(SGN_NORM_NO_ZERO << 2) | (SGN_NORM_NO_ZERO << 4) | \
(SGN_NORM_NO_ZERO << 6) | (SGN_NORM_NO_ZERO << 8) | \
(SGN_NORM_NO_ZERO << 10) | (SGN_NORM_NO_ZERO << 12) | \
(SGN_NORM_NO_ZERO << 14) | (SGN_NORM_NO_ZERO << 16) | \
(SGN_NORM_NO_ZERO << 18) | (SGN_NORM_NO_ZERO << 20) | \
(SGN_NORM_NO_ZERO << 22) | (SGN_NORM_NO_ZERO << 24) | \
(SGN_NORM_NO_ZERO << 26) | (SGN_NORM_NO_ZERO << 28) | \
(SGN_NORM_NO_ZERO << 30))
 
/* gap */
 
/* Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0
* are set to a swizzling bit pattern, other words are 0.
*
* In immediate mode, the pattern is always set to xyzw. In vertex array
* mode, the swizzling pattern is e.g. used to set zw components in texture
* coordinates with only tweo components.
*/
#define R300_VAP_PROG_STREAM_CNTL_EXT_0 0x21e0
# define R300_SWIZZLE0_SHIFT 0
# define R300_SWIZZLE_SELECT_X_SHIFT 0
# define R300_SWIZZLE_SELECT_Y_SHIFT 3
# define R300_SWIZZLE_SELECT_Z_SHIFT 6
# define R300_SWIZZLE_SELECT_W_SHIFT 9
 
# define R300_SWIZZLE_SELECT_X 0
# define R300_SWIZZLE_SELECT_Y 1
# define R300_SWIZZLE_SELECT_Z 2
# define R300_SWIZZLE_SELECT_W 3
# define R300_SWIZZLE_SELECT_FP_ZERO 4
# define R300_SWIZZLE_SELECT_FP_ONE 5
/* alternate forms for r300_emit.c */
# define R300_INPUT_ROUTE_SELECT_X 0
# define R300_INPUT_ROUTE_SELECT_Y 1
# define R300_INPUT_ROUTE_SELECT_Z 2
# define R300_INPUT_ROUTE_SELECT_W 3
# define R300_INPUT_ROUTE_SELECT_ZERO 4
# define R300_INPUT_ROUTE_SELECT_ONE 5
 
# define R300_WRITE_ENA_SHIFT 12
# define R300_WRITE_ENA_X 1
# define R300_WRITE_ENA_Y 2
# define R300_WRITE_ENA_Z 4
# define R300_WRITE_ENA_W 8
# define R300_SWIZZLE1_SHIFT 16
 
# define R300_VAP_SWIZZLE_X001 \
((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \
(R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Y_SHIFT) | \
(R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | \
(R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \
(0xf << R300_WRITE_ENA_SHIFT))
 
# define R300_VAP_SWIZZLE_XY01 \
((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \
(R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \
(R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | \
(R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \
(0xf << R300_WRITE_ENA_SHIFT))
 
# define R300_VAP_SWIZZLE_XYZ1 \
((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \
(R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \
(R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | \
(R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \
(0xf << R300_WRITE_ENA_SHIFT))
 
# define R300_VAP_SWIZZLE_XYZW \
((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \
(R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \
(R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | \
(R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | \
(0xf << R300_WRITE_ENA_SHIFT))
 
#define R300_VAP_PROG_STREAM_CNTL_EXT_1 0x21e4
#define R300_VAP_PROG_STREAM_CNTL_EXT_2 0x21e8
#define R300_VAP_PROG_STREAM_CNTL_EXT_3 0x21ec
#define R300_VAP_PROG_STREAM_CNTL_EXT_4 0x21f0
#define R300_VAP_PROG_STREAM_CNTL_EXT_5 0x21f4
#define R300_VAP_PROG_STREAM_CNTL_EXT_6 0x21f8
#define R300_VAP_PROG_STREAM_CNTL_EXT_7 0x21fc
 
/* END: Vertex data assembly */
 
/* gap */
 
/* BEGIN: Upload vertex program and data */
 
/*
* The programmable vertex shader unit has a memory bank of unknown size
* that can be written to in 16 byte units by writing the address into
* UPLOAD_ADDRESS, followed by data in UPLOAD_DATA (multiples of 4 DWORDs).
*
* Pointers into the memory bank are always in multiples of 16 bytes.
*
* The memory bank is divided into areas with fixed meaning.
*
* Starting at address UPLOAD_PROGRAM: Vertex program instructions.
* Native limits reported by drivers from ATI suggest size 256 (i.e. 4KB),
* whereas the difference between known addresses suggests size 512.
*
* Starting at address UPLOAD_PARAMETERS: Vertex program parameters.
* Native reported limits and the VPI layout suggest size 256, whereas
* difference between known addresses suggests size 512.
*
* At address UPLOAD_POINTSIZE is a vector (0, 0, ps, 0), where ps is the
* floating point pointsize. The exact purpose of this state is uncertain,
* as there is also the R300_RE_POINTSIZE register.
*
* Multiple vertex programs and parameter sets can be loaded at once,
* which could explain the size discrepancy.
*/
#define R300_VAP_PVS_VECTOR_INDX_REG 0x2200
# define R300_PVS_CODE_START 0
# define R300_MAX_PVS_CODE_LINES 256
# define R500_MAX_PVS_CODE_LINES 1024
# define R300_PVS_CONST_START 512
# define R500_PVS_CONST_START 1024
# define R300_MAX_PVS_CONST_VECS 256
# define R500_MAX_PVS_CONST_VECS 256
# define R300_PVS_UCP_START 1024
# define R500_PVS_UCP_START 1536
# define R300_POINT_VPORT_SCALE_OFFSET 1030
# define R500_POINT_VPORT_SCALE_OFFSET 1542
# define R300_POINT_GEN_TEX_OFFSET 1031
# define R500_POINT_GEN_TEX_OFFSET 1543
 
/*
* These are obsolete defines form r300_context.h, but they might give some
* clues when investigating the addresses further...
*/
#if 0
#define VSF_DEST_PROGRAM 0x0
#define VSF_DEST_MATRIX0 0x200
#define VSF_DEST_MATRIX1 0x204
#define VSF_DEST_MATRIX2 0x208
#define VSF_DEST_VECTOR0 0x20c
#define VSF_DEST_VECTOR1 0x20d
#define VSF_DEST_UNKNOWN1 0x400
#define VSF_DEST_UNKNOWN2 0x406
#endif
 
/* gap */
 
#define R300_VAP_PVS_UPLOAD_DATA 0x2208
 
/* END: Upload vertex program and data */
 
/* gap */
 
/* I do not know the purpose of this register. However, I do know that
* it is set to 221C_CLEAR for clear operations and to 221C_NORMAL
* for normal rendering.
*
* 2007-11-05: This register is the user clip plane control register, but there
* also seems to be a rendering mode control; the NORMAL/CLEAR defines.
*
* See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view
*/
#define R500_VAP_TEX_TO_COLOR_CNTL 0x2218
 
#define R300_VAP_CLIP_CNTL 0x221C
# define R300_VAP_UCP_ENABLE_0 (1 << 0)
# define R300_VAP_UCP_ENABLE_1 (1 << 1)
# define R300_VAP_UCP_ENABLE_2 (1 << 2)
# define R300_VAP_UCP_ENABLE_3 (1 << 3)
# define R300_VAP_UCP_ENABLE_4 (1 << 4)
# define R300_VAP_UCP_ENABLE_5 (1 << 5)
# define R300_PS_UCP_MODE_DIST_COP (0 << 14)
# define R300_PS_UCP_MODE_RADIUS_COP (1 << 14)
# define R300_PS_UCP_MODE_RADIUS_COP_CLIP (2 << 14)
# define R300_PS_UCP_MODE_CLIP_AS_TRIFAN (3 << 14)
# define R300_CLIP_DISABLE (1 << 16)
# define R300_UCP_CULL_ONLY_ENABLE (1 << 17)
# define R300_BOUNDARY_EDGE_FLAG_ENABLE (1 << 18)
# define R500_COLOR2_IS_TEXTURE (1 << 20)
# define R500_COLOR3_IS_TEXTURE (1 << 21)
 
/* These seem to be per-pixel and per-vertex X and Y clipping planes. The first
* plane is per-pixel and the second plane is per-vertex.
*
* This was determined by experimentation alone but I believe it is correct.
*
* These registers are called X_QUAD0_1_FL to X_QUAD0_4_FL by glxtest.
*/
#define R300_VAP_GB_VERT_CLIP_ADJ 0x2220
#define R300_VAP_GB_VERT_DISC_ADJ 0x2224
#define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228
#define R300_VAP_GB_HORZ_DISC_ADJ 0x222c
 
#define R300_VAP_PVS_FLOW_CNTL_ADDRS_0 0x2230
#define R300_PVS_FC_ACT_ADRS(x) ((x) << 0)
#define R300_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 8)
#define R300_PVS_FC_LAST_INST(x) ((x) << 16)
#define R300_PVS_FC_RTN_INST(x) ((x) << 24)
 
/* gap */
 
/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between
* rendering commands and overwriting vertex program parameters.
* Therefore, I suspect writing zero to 0x2284 synchronizes the engine and
* avoids bugs caused by still running shaders reading bad data from memory.
*/
#define R300_VAP_PVS_STATE_FLUSH_REG 0x2284
 
/* This register is used to define the number of core clocks to wait for a
* vertex to be received by the VAP input controller (while the primitive
* path is backed up) before forcing any accumulated vertices to be submitted
* to the vertex processing path.
*/
#define VAP_PVS_VTX_TIMEOUT_REG 0x2288
# define R300_2288_R300 0x00750000 /* -- nh */
# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */
 
#define R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0 0x2290
#define R300_PVS_FC_LOOP_INIT_VAL(x) ((x) << 0)
#define R300_PVS_FC_LOOP_STEP_VAL(x) ((x) << 8)
 
/* gap */
 
/* Addresses are relative to the vertex program instruction area of the
* memory bank. PROGRAM_END points to the last instruction of the active
* program
*
* The meaning of the two UNKNOWN fields is obviously not known. However,
* experiments so far have shown that both *must* point to an instruction
* inside the vertex program, otherwise the GPU locks up.
*
* fglrx usually sets CNTL_3_UNKNOWN to the end of the program and
* R300_PVS_CNTL_1_POS_END_SHIFT points to instruction where last write to
* position takes place.
*
* Most likely this is used to ignore rest of the program in cases
* where group of verts arent visible. For some reason this "section"
* is sometimes accepted other instruction that have no relationship with
* position calculations.
*/
#define R300_VAP_PVS_CODE_CNTL_0 0x22D0
# define R300_PVS_FIRST_INST_SHIFT 0
# define R300_PVS_XYZW_VALID_INST_SHIFT 10
# define R300_PVS_LAST_INST_SHIFT 20
# define R300_PVS_FIRST_INST(x) ((x) << 0)
# define R300_PVS_XYZW_VALID_INST(x) ((x) << 10)
# define R300_PVS_LAST_INST(x) ((x) << 20)
/* Addresses are relative to the vertex program parameters area. */
#define R300_VAP_PVS_CONST_CNTL 0x22D4
# define R300_PVS_CONST_BASE_OFFSET_SHIFT 0
# define R300_PVS_CONST_BASE_OFFSET(x) (x)
# define R300_PVS_MAX_CONST_ADDR_SHIFT 16
# define R300_PVS_MAX_CONST_ADDR(x) ((x) << 16)
#define R300_VAP_PVS_CODE_CNTL_1 0x22D8
# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0
#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC
#define R300_VAP_PVS_FC_OPC_JUMP(x) (1 << (2 * (x)))
#define R300_VAP_PVS_FC_OPC_LOOP(x) (2 << (2 * (x)))
#define R300_VAP_PVS_FC_OPC_JSR(x) (3 << (2 * (x)))
 
/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for
* immediate vertices
*/
#define R300_VAP_VTX_COLOR_R 0x2464
#define R300_VAP_VTX_COLOR_G 0x2468
#define R300_VAP_VTX_COLOR_B 0x246C
#define R300_VAP_VTX_POS_0_X_1 0x2490 /* used for glVertex2*() */
#define R300_VAP_VTX_POS_0_Y_1 0x2494
#define R300_VAP_VTX_COLOR_PKD 0x249C /* RGBA */
#define R300_VAP_VTX_POS_0_X_2 0x24A0 /* used for glVertex3*() */
#define R300_VAP_VTX_POS_0_Y_2 0x24A4
#define R300_VAP_VTX_POS_0_Z_2 0x24A8
/* write 0 to indicate end of packet? */
#define R300_VAP_VTX_END_OF_PKT 0x24AC
 
#define R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0 0x2500
#define R500_PVS_FC_ACT_ADRS(x) ((x) << 0)
#define R500_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 16)
 
#define R500_VAP_PVS_FLOW_CNTL_ADDRS_UW_0 0x2504
#define R500_PVS_FC_LAST_INST(x) ((x) << 0)
#define R500_PVS_FC_RTN_INST(x) ((x) << 16)
 
/* gap */
 
/* These are values from r300_reg/r300_reg.h - they are known to be correct
* and are here so we can use one register file instead of several
* - Vladimir
*/
#define R300_GB_VAP_RASTER_VTX_FMT_0 0x4000
# define R300_GB_VAP_RASTER_VTX_FMT_0__POS_PRESENT (1<<0)
# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_0_PRESENT (1<<1)
# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_1_PRESENT (1<<2)
# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_2_PRESENT (1<<3)
# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_3_PRESENT (1<<4)
# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_SPACE (0xf<<5)
# define R300_GB_VAP_RASTER_VTX_FMT_0__PT_SIZE_PRESENT (0x1<<16)
 
#define R300_GB_VAP_RASTER_VTX_FMT_1 0x4004
/* each of the following is 3 bits wide, specifies number
of components */
# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0
# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3
# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6
# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9
# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12
# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15
# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18
# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21
 
/* UNK30 seems to enables point to quad transformation on textures
* (or something closely related to that).
* This bit is rather fatal at the time being due to lackings at pixel
* shader side
* Specifies top of Raster pipe specific enable controls.
*/
#define R300_GB_ENABLE 0x4008
# define R300_GB_POINT_STUFF_DISABLE (0 << 0)
# define R300_GB_POINT_STUFF_ENABLE (1 << 0) /* Specifies if points will have stuffed texture coordinates. */
# define R300_GB_LINE_STUFF_DISABLE (0 << 1)
# define R300_GB_LINE_STUFF_ENABLE (1 << 1) /* Specifies if lines will have stuffed texture coordinates. */
# define R300_GB_TRIANGLE_STUFF_DISABLE (0 << 2)
# define R300_GB_TRIANGLE_STUFF_ENABLE (1 << 2) /* Specifies if triangles will have stuffed texture coordinates. */
# define R300_GB_STENCIL_AUTO_DISABLE (0 << 4)
# define R300_GB_STENCIL_AUTO_ENABLE (1 << 4) /* Enable stencil auto inc/dec based on triangle cw/ccw, force into dzy low bit. */
# define R300_GB_STENCIL_AUTO_FORCE (2 << 4) /* Force 0 into dzy low bit. */
 
/* each of the following is 2 bits wide */
#define R300_GB_TEX_REPLICATE 0 /* Replicate VAP source texture coordinates (S,T,[R,Q]). */
#define R300_GB_TEX_ST 1 /* Stuff with source texture coordinates (S,T). */
#define R300_GB_TEX_STR 2 /* Stuff with source texture coordinates (S,T,R). */
# define R300_GB_TEX0_SOURCE_SHIFT 16
# define R300_GB_TEX1_SOURCE_SHIFT 18
# define R300_GB_TEX2_SOURCE_SHIFT 20
# define R300_GB_TEX3_SOURCE_SHIFT 22
# define R300_GB_TEX4_SOURCE_SHIFT 24
# define R300_GB_TEX5_SOURCE_SHIFT 26
# define R300_GB_TEX6_SOURCE_SHIFT 28
# define R300_GB_TEX7_SOURCE_SHIFT 30
 
/* MSPOS - positions for multisample antialiasing (?) */
#define R300_GB_MSPOS0 0x4010
/* shifts - each of the fields is 4 bits */
# define R300_GB_MSPOS0__MS_X0_SHIFT 0
# define R300_GB_MSPOS0__MS_Y0_SHIFT 4
# define R300_GB_MSPOS0__MS_X1_SHIFT 8
# define R300_GB_MSPOS0__MS_Y1_SHIFT 12
# define R300_GB_MSPOS0__MS_X2_SHIFT 16
# define R300_GB_MSPOS0__MS_Y2_SHIFT 20
# define R300_GB_MSPOS0__MSBD0_Y 24
# define R300_GB_MSPOS0__MSBD0_X 28
 
#define R300_GB_MSPOS1 0x4014
# define R300_GB_MSPOS1__MS_X3_SHIFT 0
# define R300_GB_MSPOS1__MS_Y3_SHIFT 4
# define R300_GB_MSPOS1__MS_X4_SHIFT 8
# define R300_GB_MSPOS1__MS_Y4_SHIFT 12
# define R300_GB_MSPOS1__MS_X5_SHIFT 16
# define R300_GB_MSPOS1__MS_Y5_SHIFT 20
# define R300_GB_MSPOS1__MSBD1 24
 
/* Specifies the graphics pipeline configuration for rasterization. */
#define R300_GB_TILE_CONFIG 0x4018
# define R300_GB_TILE_DISABLE (0 << 0)
# define R300_GB_TILE_ENABLE (1 << 0)
# define R300_GB_TILE_PIPE_COUNT_RV300 (0 << 1) /* RV350 (1 pipe, 1 ctx) */
# define R300_GB_TILE_PIPE_COUNT_R300 (3 << 1) /* R300 (2 pipes, 1 ctx) */
# define R300_GB_TILE_PIPE_COUNT_R420_3P (6 << 1) /* R420-3P (3 pipes, 1 ctx) */
# define R300_GB_TILE_PIPE_COUNT_R420 (7 << 1) /* R420 (4 pipes, 1 ctx) */
# define R300_GB_TILE_SIZE_8 (0 << 4)
# define R300_GB_TILE_SIZE_16 (1 << 4)
# define R300_GB_TILE_SIZE_32 (2 << 4)
# define R300_GB_SUPER_SIZE_1 (0 << 6)
# define R300_GB_SUPER_SIZE_2 (1 << 6)
# define R300_GB_SUPER_SIZE_4 (2 << 6)
# define R300_GB_SUPER_SIZE_8 (3 << 6)
# define R300_GB_SUPER_SIZE_16 (4 << 6)
# define R300_GB_SUPER_SIZE_32 (5 << 6)
# define R300_GB_SUPER_SIZE_64 (6 << 6)
# define R300_GB_SUPER_SIZE_128 (7 << 6)
# define R300_GB_SUPER_X_SHIFT 9 /* 3 bits wide */
# define R300_GB_SUPER_Y_SHIFT 12 /* 3 bits wide */
# define R300_GB_SUPER_TILE_A (0 << 15)
# define R300_GB_SUPER_TILE_B (1 << 15)
# define R300_GB_SUBPIXEL_1_12 (0 << 16)
# define R300_GB_SUBPIXEL_1_16 (1 << 16)
# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17)
# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17)
# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17)
# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17)
# define R300_GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19)
# define R300_GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19)
# define R300_GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20)
# define R300_GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20)
# define R300_GB_TILE_CONFIG_ALT_OFFSET (0 << 21)
# define R300_GB_TILE_CONFIG_SUBPRECISION (0 << 22)
# define R300_GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23)
# define R300_GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23)
# define R300_GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24)
# define R300_GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24)
 
/* Specifies the sizes of the various FIFO`s in the sc/rs/us. This register must be the first one written */
#define R300_GB_FIFO_SIZE 0x4024
/* each of the following is 2 bits wide */
#define R300_GB_FIFO_SIZE_32 0
#define R300_GB_FIFO_SIZE_64 1
#define R300_GB_FIFO_SIZE_128 2
#define R300_GB_FIFO_SIZE_256 3
# define R300_SC_IFIFO_SIZE_SHIFT 0
# define R300_SC_TZFIFO_SIZE_SHIFT 2
# define R300_SC_BFIFO_SIZE_SHIFT 4
 
# define R300_US_OFIFO_SIZE_SHIFT 12
# define R300_US_WFIFO_SIZE_SHIFT 14
/* the following use the same constants as above, but meaning is
is times 2 (i.e. instead of 32 words it means 64 */
# define R300_RS_TFIFO_SIZE_SHIFT 6
# define R300_RS_CFIFO_SIZE_SHIFT 8
# define R300_US_RAM_SIZE_SHIFT 10
/* watermarks, 3 bits wide */
# define R300_RS_HIGHWATER_COL_SHIFT 16
# define R300_RS_HIGHWATER_TEX_SHIFT 19
# define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */
# define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24
 
#define R300_GB_Z_PEQ_CONFIG 0x4028
# define R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0)
# define R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0)
 
/* Specifies various polygon specific selects (fog, depth, perspective). */
#define R300_GB_SELECT 0x401c
# define R300_GB_FOG_SELECT_C0A (0 << 0)
# define R300_GB_FOG_SELECT_C1A (1 << 0)
# define R300_GB_FOG_SELECT_C2A (2 << 0)
# define R300_GB_FOG_SELECT_C3A (3 << 0)
# define R300_GB_FOG_SELECT_1_1_W (4 << 0)
# define R300_GB_FOG_SELECT_Z (5 << 0)
# define R300_GB_DEPTH_SELECT_Z (0 << 3)
# define R300_GB_DEPTH_SELECT_1_1_W (1 << 3)
# define R300_GB_W_SELECT_1_W (0 << 4)
# define R300_GB_W_SELECT_1 (1 << 4)
# define R300_GB_FOG_STUFF_DISABLE (0 << 5)
# define R300_GB_FOG_STUFF_ENABLE (1 << 5)
# define R300_GB_FOG_STUFF_TEX_SHIFT 6
# define R300_GB_FOG_STUFF_TEX_MASK 0x000003c0
# define R300_GB_FOG_STUFF_COMP_SHIFT 10
# define R300_GB_FOG_STUFF_COMP_MASK 0x00000c00
 
/* Specifies the graphics pipeline configuration for antialiasing. */
#define R300_GB_AA_CONFIG 0x4020
# define R300_GB_AA_CONFIG_AA_DISABLE (0 << 0)
# define R300_GB_AA_CONFIG_AA_ENABLE (1 << 0)
# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1)
# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1)
# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1)
# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1)
 
/* Selects which of 4 pipes are active. */
#define R300_GB_PIPE_SELECT 0x402c
# define R300_GB_PIPE_SELECT_PIPE0_ID_SHIFT 0
# define R300_GB_PIPE_SELECT_PIPE1_ID_SHIFT 2
# define R300_GB_PIPE_SELECT_PIPE2_ID_SHIFT 4
# define R300_GB_PIPE_SELECT_PIPE3_ID_SHIFT 6
# define R300_GB_PIPE_SELECT_PIPE_MASK_SHIFT 8
# define R300_GB_PIPE_SELECT_MAX_PIPE 12
# define R300_GB_PIPE_SELECT_BAD_PIPES 14
# define R300_GB_PIPE_SELECT_CONFIG_PIPES 18
 
 
/* Specifies the sizes of the various FIFO`s in the sc/rs. */
#define R300_GB_FIFO_SIZE1 0x4070
/* High water mark for SC input fifo */
# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0
# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f
/* High water mark for SC input fifo (B) */
# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6
# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0
/* High water mark for RS colors' fifo */
# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12
# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000
/* High water mark for RS textures' fifo */
# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18
# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000
 
/* This table specifies the source location and format for up to 16 texture
* addresses (i[0]:i[15]) and four colors (c[0]:c[3])
*/
#define R500_RS_IP_0 0x4074
#define R500_RS_IP_1 0x4078
#define R500_RS_IP_2 0x407C
#define R500_RS_IP_3 0x4080
#define R500_RS_IP_4 0x4084
#define R500_RS_IP_5 0x4088
#define R500_RS_IP_6 0x408C
#define R500_RS_IP_7 0x4090
#define R500_RS_IP_8 0x4094
#define R500_RS_IP_9 0x4098
#define R500_RS_IP_10 0x409C
#define R500_RS_IP_11 0x40A0
#define R500_RS_IP_12 0x40A4
#define R500_RS_IP_13 0x40A8
#define R500_RS_IP_14 0x40AC
#define R500_RS_IP_15 0x40B0
#define R500_RS_IP_PTR_K0 62
#define R500_RS_IP_PTR_K1 63
#define R500_RS_IP_TEX_PTR_S_SHIFT 0
#define R500_RS_IP_TEX_PTR_T_SHIFT 6
#define R500_RS_IP_TEX_PTR_R_SHIFT 12
#define R500_RS_IP_TEX_PTR_Q_SHIFT 18
#define R500_RS_IP_COL_PTR_SHIFT 24
#define R500_RS_IP_COL_FMT_SHIFT 27
# define R500_RS_SEL_S(x) ((x) << 0)
# define R500_RS_SEL_T(x) ((x) << 6)
# define R500_RS_SEL_R(x) ((x) << 12)
# define R500_RS_SEL_Q(x) ((x) << 18)
# define R500_RS_COL_PTR(x) ((x) << 24)
# define R500_RS_COL_FMT(x) ((x) << 27)
/* gap */
#define R500_RS_IP_OFFSET_DIS (0 << 31)
#define R500_RS_IP_OFFSET_EN (1 << 31)
 
/* gap */
 
/* Zero to flush caches. */
#define R300_TX_INVALTAGS 0x4100
#define R300_TX_FLUSH 0x0
 
/* The upper enable bits are guessed, based on fglrx reported limits. */
#define R300_TX_ENABLE 0x4104
# define R300_TX_ENABLE_0 (1 << 0)
# define R300_TX_ENABLE_1 (1 << 1)
# define R300_TX_ENABLE_2 (1 << 2)
# define R300_TX_ENABLE_3 (1 << 3)
# define R300_TX_ENABLE_4 (1 << 4)
# define R300_TX_ENABLE_5 (1 << 5)
# define R300_TX_ENABLE_6 (1 << 6)
# define R300_TX_ENABLE_7 (1 << 7)
# define R300_TX_ENABLE_8 (1 << 8)
# define R300_TX_ENABLE_9 (1 << 9)
# define R300_TX_ENABLE_10 (1 << 10)
# define R300_TX_ENABLE_11 (1 << 11)
# define R300_TX_ENABLE_12 (1 << 12)
# define R300_TX_ENABLE_13 (1 << 13)
# define R300_TX_ENABLE_14 (1 << 14)
# define R300_TX_ENABLE_15 (1 << 15)
 
#define R500_TX_FILTER_4 0x4110
# define R500_TX_WEIGHT_1_SHIFT (0)
# define R500_TX_WEIGHT_0_SHIFT (11)
# define R500_TX_WEIGHT_PAIR (1<<22)
# define R500_TX_PHASE_SHIFT (23)
# define R500_TX_DIRECTION_HORIZONTAL (0<<27)
# define R500_TX_DIRECTION_VERITCAL (1<<27)
 
#define R500_SU_TEX_WRAP_PS3 0x4114
 
/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */
#define R300_GA_POINT_S0 0x4200
 
/* T Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */
#define R300_GA_POINT_T0 0x4204
 
/* S Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */
#define R300_GA_POINT_S1 0x4208
 
/* T Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */
#define R300_GA_POINT_T1 0x420c
 
/* Specifies amount to shift integer position of vertex (screen space) before
* converting to float for triangle stipple.
*/
#define R300_GA_TRIANGLE_STIPPLE 0x4214
# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_SHIFT 0
# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_MASK 0x0000000f
# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT 16
# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_MASK 0x000f0000
 
/* The pointsize is given in multiples of 6. The pointsize can be enormous:
* Clear() renders a single point that fills the entire framebuffer.
* 1/2 Height of point; fixed (16.0), subpixel format (1/12 or 1/16, even if in
* 8b precision).
*/
#define R300_GA_POINT_SIZE 0x421C
# define R300_POINTSIZE_Y_SHIFT 0
# define R300_POINTSIZE_Y_MASK 0x0000ffff
# define R300_POINTSIZE_X_SHIFT 16
# define R300_POINTSIZE_X_MASK 0xffff0000
# define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6)
 
/* Red fill color */
#define R500_GA_FILL_R 0x4220
 
/* Green fill color */
#define R500_GA_FILL_G 0x4224
 
/* Blue fill color */
#define R500_GA_FILL_B 0x4228
 
/* Alpha fill color */
#define R500_GA_FILL_A 0x422c
 
 
/* Specifies maximum and minimum point & sprite sizes for per vertex size
* specification. The lower part (15:0) is MIN and (31:16) is max.
*/
#define R300_GA_POINT_MINMAX 0x4230
# define R300_GA_POINT_MINMAX_MIN_SHIFT 0
# define R300_GA_POINT_MINMAX_MIN_MASK (0xFFFF << 0)
# define R300_GA_POINT_MINMAX_MAX_SHIFT 16
# define R300_GA_POINT_MINMAX_MAX_MASK (0xFFFF << 16)
 
/* 1/2 width of line, in subpixels (1/12 or 1/16 only, even in 8b
* subprecision); (16.0) fixed format.
*
* The line width is given in multiples of 6.
* In default mode lines are classified as vertical lines.
* HO: horizontal
* VE: vertical or horizontal
* HO & VE: no classification
*/
#define R300_GA_LINE_CNTL 0x4234
# define R300_GA_LINE_CNTL_WIDTH_SHIFT 0
# define R300_GA_LINE_CNTL_WIDTH_MASK 0x0000ffff
# define R300_GA_LINE_CNTL_END_TYPE_HOR (0 << 16)
# define R300_GA_LINE_CNTL_END_TYPE_VER (1 << 16)
# define R300_GA_LINE_CNTL_END_TYPE_SQR (2 << 16) /* horizontal or vertical depending upon slope */
# define R300_GA_LINE_CNTL_END_TYPE_COMP (3 << 16) /* Computed (perpendicular to slope) */
# define R500_GA_LINE_CNTL_SORT_NO (0 << 18)
# define R500_GA_LINE_CNTL_SORT_MINX_MINY (1 << 18)
/** TODO: looks wrong */
# define R300_LINESIZE_MAX (R300_GA_LINE_CNTL_WIDTH_MASK / 6)
/** TODO: looks wrong */
# define R300_LINE_CNT_HO (1 << 16)
/** TODO: looks wrong */
# define R300_LINE_CNT_VE (1 << 17)
 
/* Line Stipple configuration information. */
#define R300_GA_LINE_STIPPLE_CONFIG 0x4238
# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_NO (0 << 0)
# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE (1 << 0)
# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_PACKET (2 << 0)
# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_SHIFT 2
# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK 0xfffffffc
 
/* Used to load US instructions and constants */
#define R500_GA_US_VECTOR_INDEX 0x4250
# define R500_GA_US_VECTOR_INDEX_SHIFT 0
# define R500_GA_US_VECTOR_INDEX_MASK 0x000000ff
# define R500_GA_US_VECTOR_INDEX_TYPE_INSTR (0 << 16)
# define R500_GA_US_VECTOR_INDEX_TYPE_CONST (1 << 16)
# define R500_GA_US_VECTOR_INDEX_CLAMP_NO (0 << 17)
# define R500_GA_US_VECTOR_INDEX_CLAMP_CONST (1 << 17)
 
/* Data register for loading US instructions and constants */
#define R500_GA_US_VECTOR_DATA 0x4254
 
/* Specifies color properties and mappings of textures. */
#define R500_GA_COLOR_CONTROL_PS3 0x4258
# define R500_TEX0_SHADING_PS3_SOLID (0 << 0)
# define R500_TEX0_SHADING_PS3_FLAT (1 << 0)
# define R500_TEX0_SHADING_PS3_GOURAUD (2 << 0)
# define R500_TEX1_SHADING_PS3_SOLID (0 << 2)
# define R500_TEX1_SHADING_PS3_FLAT (1 << 2)
# define R500_TEX1_SHADING_PS3_GOURAUD (2 << 2)
# define R500_TEX2_SHADING_PS3_SOLID (0 << 4)
# define R500_TEX2_SHADING_PS3_FLAT (1 << 4)
# define R500_TEX2_SHADING_PS3_GOURAUD (2 << 4)
# define R500_TEX3_SHADING_PS3_SOLID (0 << 6)
# define R500_TEX3_SHADING_PS3_FLAT (1 << 6)
# define R500_TEX3_SHADING_PS3_GOURAUD (2 << 6)
# define R500_TEX4_SHADING_PS3_SOLID (0 << 8)
# define R500_TEX4_SHADING_PS3_FLAT (1 << 8)
# define R500_TEX4_SHADING_PS3_GOURAUD (2 << 8)
# define R500_TEX5_SHADING_PS3_SOLID (0 << 10)
# define R500_TEX5_SHADING_PS3_FLAT (1 << 10)
# define R500_TEX5_SHADING_PS3_GOURAUD (2 << 10)
# define R500_TEX6_SHADING_PS3_SOLID (0 << 12)
# define R500_TEX6_SHADING_PS3_FLAT (1 << 12)
# define R500_TEX6_SHADING_PS3_GOURAUD (2 << 12)
# define R500_TEX7_SHADING_PS3_SOLID (0 << 14)
# define R500_TEX7_SHADING_PS3_FLAT (1 << 14)
# define R500_TEX7_SHADING_PS3_GOURAUD (2 << 14)
# define R500_TEX8_SHADING_PS3_SOLID (0 << 16)
# define R500_TEX8_SHADING_PS3_FLAT (1 << 16)
# define R500_TEX8_SHADING_PS3_GOURAUD (2 << 16)
# define R500_TEX9_SHADING_PS3_SOLID (0 << 18)
# define R500_TEX9_SHADING_PS3_FLAT (1 << 18)
# define R500_TEX9_SHADING_PS3_GOURAUD (2 << 18)
# define R500_TEX10_SHADING_PS3_SOLID (0 << 20)
# define R500_TEX10_SHADING_PS3_FLAT (1 << 20)
# define R500_TEX10_SHADING_PS3_GOURAUD (2 << 20)
# define R500_COLOR0_TEX_OVERRIDE_NO (0 << 22)
# define R500_COLOR0_TEX_OVERRIDE_TEX_0 (1 << 22)
# define R500_COLOR0_TEX_OVERRIDE_TEX_1 (2 << 22)
# define R500_COLOR0_TEX_OVERRIDE_TEX_2 (3 << 22)
# define R500_COLOR0_TEX_OVERRIDE_TEX_3 (4 << 22)
# define R500_COLOR0_TEX_OVERRIDE_TEX_4 (5 << 22)
# define R500_COLOR0_TEX_OVERRIDE_TEX_5 (6 << 22)
# define R500_COLOR0_TEX_OVERRIDE_TEX_6 (7 << 22)
# define R500_COLOR0_TEX_OVERRIDE_TEX_7 (8 << 22)
# define R500_COLOR0_TEX_OVERRIDE_TEX_8_C2 (9 << 22)
# define R500_COLOR0_TEX_OVERRIDE_TEX_9_C3 (10 << 22)
# define R500_COLOR1_TEX_OVERRIDE_NO (0 << 26)
# define R500_COLOR1_TEX_OVERRIDE_TEX_0 (1 << 26)
# define R500_COLOR1_TEX_OVERRIDE_TEX_1 (2 << 26)
# define R500_COLOR1_TEX_OVERRIDE_TEX_2 (3 << 26)
# define R500_COLOR1_TEX_OVERRIDE_TEX_3 (4 << 26)
# define R500_COLOR1_TEX_OVERRIDE_TEX_4 (5 << 26)
# define R500_COLOR1_TEX_OVERRIDE_TEX_5 (6 << 26)
# define R500_COLOR1_TEX_OVERRIDE_TEX_6 (7 << 26)
# define R500_COLOR1_TEX_OVERRIDE_TEX_7 (8 << 26)
# define R500_COLOR1_TEX_OVERRIDE_TEX_8_C2 (9 << 26)
# define R500_COLOR1_TEX_OVERRIDE_TEX_9_C3 (10 << 26)
 
/* Returns idle status of various G3D block, captured when GA_IDLE written or
* when hard or soft reset asserted.
*/
#define R500_GA_IDLE 0x425c
# define R500_GA_IDLE_PIPE3_Z_IDLE (0 << 0)
# define R500_GA_IDLE_PIPE2_Z_IDLE (0 << 1)
# define R500_GA_IDLE_PIPE3_CD_IDLE (0 << 2)
# define R500_GA_IDLE_PIPE2_CD_IDLE (0 << 3)
# define R500_GA_IDLE_PIPE3_FG_IDLE (0 << 4)
# define R500_GA_IDLE_PIPE2_FG_IDLE (0 << 5)
# define R500_GA_IDLE_PIPE3_US_IDLE (0 << 6)
# define R500_GA_IDLE_PIPE2_US_IDLE (0 << 7)
# define R500_GA_IDLE_PIPE3_SC_IDLE (0 << 8)
# define R500_GA_IDLE_PIPE2_SC_IDLE (0 << 9)
# define R500_GA_IDLE_PIPE3_RS_IDLE (0 << 10)
# define R500_GA_IDLE_PIPE2_RS_IDLE (0 << 11)
# define R500_GA_IDLE_PIPE1_Z_IDLE (0 << 12)
# define R500_GA_IDLE_PIPE0_Z_IDLE (0 << 13)
# define R500_GA_IDLE_PIPE1_CD_IDLE (0 << 14)
# define R500_GA_IDLE_PIPE0_CD_IDLE (0 << 15)
# define R500_GA_IDLE_PIPE1_FG_IDLE (0 << 16)
# define R500_GA_IDLE_PIPE0_FG_IDLE (0 << 17)
# define R500_GA_IDLE_PIPE1_US_IDLE (0 << 18)
# define R500_GA_IDLE_PIPE0_US_IDLE (0 << 19)
# define R500_GA_IDLE_PIPE1_SC_IDLE (0 << 20)
# define R500_GA_IDLE_PIPE0_SC_IDLE (0 << 21)
# define R500_GA_IDLE_PIPE1_RS_IDLE (0 << 22)
# define R500_GA_IDLE_PIPE0_RS_IDLE (0 << 23)
# define R500_GA_IDLE_SU_IDLE (0 << 24)
# define R500_GA_IDLE_GA_IDLE (0 << 25)
# define R500_GA_IDLE_GA_UNIT2_IDLE (0 << 26)
 
/* Current value of stipple accumulator. */
#define R300_GA_LINE_STIPPLE_VALUE 0x4260
 
/* S Texture Coordinate Value for Vertex 0 of Line (stuff textures -- i.e. AA) */
#define R300_GA_LINE_S0 0x4264
/* S Texture Coordinate Value for Vertex 1 of Lines (V2 of parallelogram -- stuff textures -- i.e. AA) */
#define R300_GA_LINE_S1 0x4268
 
/* GA Input fifo high water marks */
#define R500_GA_FIFO_CNTL 0x4270
# define R500_GA_FIFO_CNTL_VERTEX_FIFO_MASK 0x00000007
# define R500_GA_FIFO_CNTL_VERTEX_FIFO_SHIFT 0
# define R500_GA_FIFO_CNTL_VERTEX_INDEX_MASK 0x00000038
# define R500_GA_FIFO_CNTL_VERTEX_INDEX_SHIFT 3
# define R500_GA_FIFO_CNTL_VERTEX_REG_MASK 0x00003fc0
# define R500_GA_FIFO_CNTL_VERTEX_REG_SHIFT 6
 
/* GA enhance/tweaks */
#define R300_GA_ENHANCE 0x4274
# define R300_GA_ENHANCE_DEADLOCK_CNTL_NO_EFFECT (0 << 0)
# define R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL (1 << 0) /* Prevents TCL interface from deadlocking on GA side. */
# define R300_GA_ENHANCE_FASTSYNC_CNTL_NO_EFFECT (0 << 1)
# define R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE (1 << 1) /* Enables high-performance register/primitive switching. */
# define R500_GA_ENHANCE_REG_READWRITE_NO_EFFECT (0 << 2) /* R520+ only */
# define R500_GA_ENHANCE_REG_READWRITE_ENABLE (1 << 2) /* R520+ only, Enables GA support of simultaneous register reads and writes. */
# define R500_GA_ENHANCE_REG_NOSTALL_NO_EFFECT (0 << 3)
# define R500_GA_ENHANCE_REG_NOSTALL_ENABLE (1 << 3) /* Enables GA support of no-stall reads for register read back. */
 
#define R300_GA_COLOR_CONTROL 0x4278
# define R300_GA_COLOR_CONTROL_RGB0_SHADING_SOLID (0 << 0)
# define R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT (1 << 0)
# define R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD (2 << 0)
# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_SOLID (0 << 2)
# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT (1 << 2)
# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD (2 << 2)
# define R300_GA_COLOR_CONTROL_RGB1_SHADING_SOLID (0 << 4)
# define R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT (1 << 4)
# define R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD (2 << 4)
# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_SOLID (0 << 6)
# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_FLAT (1 << 6)
# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD (2 << 6)
# define R300_GA_COLOR_CONTROL_RGB2_SHADING_SOLID (0 << 8)
# define R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT (1 << 8)
# define R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD (2 << 8)
# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_SOLID (0 << 10)
# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT (1 << 10)
# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD (2 << 10)
# define R300_GA_COLOR_CONTROL_RGB3_SHADING_SOLID (0 << 12)
# define R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT (1 << 12)
# define R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD (2 << 12)
# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_SOLID (0 << 14)
# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_FLAT (1 << 14)
# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD (2 << 14)
# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST (0 << 16)
# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND (1 << 16)
# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_THIRD (2 << 16)
# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST (3 << 16)
 
# define R300_SHADE_MODEL_FLAT ( \
R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT | \
R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT | \
R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT | \
R300_GA_COLOR_CONTROL_ALPHA1_SHADING_FLAT | \
R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | \
R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \
R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | \
R300_GA_COLOR_CONTROL_ALPHA3_SHADING_FLAT )
 
# define R300_SHADE_MODEL_SMOOTH ( \
R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | \
R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD | \
R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD | \
R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \
R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | \
R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \
R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | \
R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD )
 
/* Specifies red & green components of fill color -- S312 format -- Backwards comp. */
#define R300_GA_SOLID_RG 0x427c
# define GA_SOLID_RG_COLOR_GREEN_SHIFT 0
# define GA_SOLID_RG_COLOR_GREEN_MASK 0x0000ffff
# define GA_SOLID_RG_COLOR_RED_SHIFT 16
# define GA_SOLID_RG_COLOR_RED_MASK 0xffff0000
/* Specifies blue & alpha components of fill color -- S312 format -- Backwards comp. */
#define R300_GA_SOLID_BA 0x4280
# define GA_SOLID_BA_COLOR_ALPHA_SHIFT 0
# define GA_SOLID_BA_COLOR_ALPHA_MASK 0x0000ffff
# define GA_SOLID_BA_COLOR_BLUE_SHIFT 16
# define GA_SOLID_BA_COLOR_BLUE_MASK 0xffff0000
 
/* Polygon Mode
* Dangerous
*/
#define R300_GA_POLY_MODE 0x4288
# define R300_GA_POLY_MODE_DISABLE (0 << 0)
# define R300_GA_POLY_MODE_DUAL (1 << 0) /* send 2 sets of 3 polys with specified poly type */
/* reserved */
# define R300_GA_POLY_MODE_FRONT_PTYPE_POINT (0 << 4)
# define R300_GA_POLY_MODE_FRONT_PTYPE_LINE (1 << 4)
# define R300_GA_POLY_MODE_FRONT_PTYPE_TRI (2 << 4)
/* reserved */
# define R300_GA_POLY_MODE_BACK_PTYPE_POINT (0 << 7)
# define R300_GA_POLY_MODE_BACK_PTYPE_LINE (1 << 7)
# define R300_GA_POLY_MODE_BACK_PTYPE_TRI (2 << 7)
/* reserved */
 
/* Specifies the rouding mode for geometry & color SPFP to FP conversions. */
#define R300_GA_ROUND_MODE 0x428c
# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_TRUNC (0 << 0)
# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST (1 << 0)
# define R300_GA_ROUND_MODE_COLOR_ROUND_TRUNC (0 << 2)
# define R300_GA_ROUND_MODE_COLOR_ROUND_NEAREST (1 << 2)
# define R300_GA_ROUND_MODE_RGB_CLAMP_RGB (0 << 4)
# define R300_GA_ROUND_MODE_RGB_CLAMP_FP20 (1 << 4)
# define R300_GA_ROUND_MODE_ALPHA_CLAMP_RGB (0 << 5)
# define R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20 (1 << 5)
# define R500_GA_ROUND_MODE_GEOMETRY_MASK_SHIFT 6
# define R500_GA_ROUND_MODE_GEOMETRY_MASK_MASK 0x000003c0
 
/* Specifies x & y offsets for vertex data after conversion to FP.
* Offsets are in S15 format (subpixels -- 1/12 or 1/16, even in 8b
* subprecision).
*/
#define R300_GA_OFFSET 0x4290
# define R300_GA_OFFSET_X_OFFSET_SHIFT 0
# define R300_GA_OFFSET_X_OFFSET_MASK 0x0000ffff
# define R300_GA_OFFSET_Y_OFFSET_SHIFT 16
# define R300_GA_OFFSET_Y_OFFSET_MASK 0xffff0000
 
/* Specifies the scale to apply to fog. */
#define R300_GA_FOG_SCALE 0x4294
/* Specifies the offset to apply to fog. */
#define R300_GA_FOG_OFFSET 0x4298
/* Specifies number of cycles to assert reset, and also causes RB3D soft reset to assert. */
#define R300_GA_SOFT_RESET 0x429c
 
/* Not sure why there are duplicate of factor and constant values.
* My best guess so far is that there are seperate zbiases for test and write.
* Ordering might be wrong.
* Some of the tests indicate that fgl has a fallback implementation of zbias
* via pixel shaders.
*/
#define R300_SU_TEX_WRAP 0x42A0
#define R300_SU_POLY_OFFSET_FRONT_SCALE 0x42A4
#define R300_SU_POLY_OFFSET_FRONT_OFFSET 0x42A8
#define R300_SU_POLY_OFFSET_BACK_SCALE 0x42AC
#define R300_SU_POLY_OFFSET_BACK_OFFSET 0x42B0
 
/* This register needs to be set to (1<<1) for RV350 to correctly
* perform depth test (see --vb-triangles in r300_demo)
* Don't know about other chips. - Vladimir
* This is set to 3 when GL_POLYGON_OFFSET_FILL is on.
* My guess is that there are two bits for each zbias primitive
* (FILL, LINE, POINT).
* One to enable depth test and one for depth write.
* Yet this doesnt explain why depth writes work ...
*/
#define R300_SU_POLY_OFFSET_ENABLE 0x42B4
# define R300_FRONT_ENABLE (1 << 0)
# define R300_BACK_ENABLE (1 << 1)
# define R300_PARA_ENABLE (1 << 2)
 
#define R300_SU_CULL_MODE 0x42B8
# define R300_CULL_FRONT (1 << 0)
# define R300_CULL_BACK (1 << 1)
# define R300_FRONT_FACE_CCW (0 << 2)
# define R300_FRONT_FACE_CW (1 << 2)
 
/* SU Depth Scale value */
#define R300_SU_DEPTH_SCALE 0x42c0
/* SU Depth Offset value */
#define R300_SU_DEPTH_OFFSET 0x42c4
 
#define R300_SU_REG_DEST 0x42c8
# define R300_RASTER_PIPE_SELECT_0 (1 << 0)
# define R300_RASTER_PIPE_SELECT_1 (1 << 1)
# define R300_RASTER_PIPE_SELECT_2 (1 << 2)
# define R300_RASTER_PIPE_SELECT_3 (1 << 3)
# define R300_RASTER_PIPE_SELECT_ALL 0xf
 
 
/* BEGIN: Rasterization / Interpolators - many guesses */
 
/*
* TC_CNT is the number of incoming texture coordinate sets (i.e. it depends
* on the vertex program, *not* the fragment program)
*/
#define R300_RS_COUNT 0x4300
# define R300_IT_COUNT_SHIFT 0
# define R300_IT_COUNT_MASK 0x0000007f
# define R300_IC_COUNT_SHIFT 7
# define R300_IC_COUNT_MASK 0x00000780
# define R300_W_ADDR_SHIFT 12
# define R300_W_ADDR_MASK 0x0003f000
# define R300_HIRES_DIS (0 << 18)
# define R300_HIRES_EN (1 << 18)
# define R300_IT_COUNT(x) ((x) << 0)
# define R300_IC_COUNT(x) ((x) << 7)
# define R300_W_COUNT(x) ((x) << 12)
 
#define R300_RS_INST_COUNT 0x4304
# define R300_RS_INST_COUNT_SHIFT 0
# define R300_RS_INST_COUNT_MASK 0x0000000f
# define R300_RS_TX_OFFSET_SHIFT 5
# define R300_RS_TX_OFFSET_MASK 0x000000e0
# define R300_RS_TX_OFFSET(x) ((x) << 5)
 
/* gap */
 
/* Only used for texture coordinates.
* Use the source field to route texture coordinate input from the
* vertex program to the desired interpolator. Note that the source
* field is relative to the outputs the vertex program *actually*
* writes. If a vertex program only writes texcoord[1], this will
* be source index 0.
* Set INTERP_USED on all interpolators that produce data used by
* the fragment program. INTERP_USED looks like a swizzling mask,
* but I haven't seen it used that way.
*
* Note: The _UNKNOWN constants are always set in their respective
* register. I don't know if this is necessary.
*/
#define R300_RS_IP_0 0x4310
#define R300_RS_IP_1 0x4314
#define R300_RS_IP_2 0x4318
#define R300_RS_IP_3 0x431C
# define R300_RS_INTERP_SRC_SHIFT 2 /* TODO: check for removal */
# define R300_RS_INTERP_SRC_MASK (7 << 2) /* TODO: check for removal */
# define R300_RS_TEX_PTR(x) (x << 0)
# define R300_RS_COL_PTR(x) ((x) << 6)
# define R300_RS_COL_FMT(x) ((x) << 9)
# define R300_RS_COL_FMT_RGBA 0
# define R300_RS_COL_FMT_RGB0 1
# define R300_RS_COL_FMT_RGB1 2
# define R300_RS_COL_FMT_000A 4
# define R300_RS_COL_FMT_0000 5
# define R300_RS_COL_FMT_0001 6
# define R300_RS_COL_FMT_111A 8
# define R300_RS_COL_FMT_1110 9
# define R300_RS_COL_FMT_1111 10
# define R300_RS_SEL_S(x) ((x) << 13)
# define R300_RS_SEL_T(x) ((x) << 16)
# define R300_RS_SEL_R(x) ((x) << 19)
# define R300_RS_SEL_Q(x) ((x) << 22)
# define R300_RS_SEL_C0 0
# define R300_RS_SEL_C1 1
# define R300_RS_SEL_C2 2
# define R300_RS_SEL_C3 3
# define R300_RS_SEL_K0 4
# define R300_RS_SEL_K1 5
 
 
/* */
#define R500_RS_INST_0 0x4320
#define R500_RS_INST_1 0x4324
#define R500_RS_INST_2 0x4328
#define R500_RS_INST_3 0x432c
#define R500_RS_INST_4 0x4330
#define R500_RS_INST_5 0x4334
#define R500_RS_INST_6 0x4338
#define R500_RS_INST_7 0x433c
#define R500_RS_INST_8 0x4340
#define R500_RS_INST_9 0x4344
#define R500_RS_INST_10 0x4348
#define R500_RS_INST_11 0x434c
#define R500_RS_INST_12 0x4350
#define R500_RS_INST_13 0x4354
#define R500_RS_INST_14 0x4358
#define R500_RS_INST_15 0x435c
#define R500_RS_INST_TEX_ID_SHIFT 0
# define R500_RS_INST_TEX_ID(x) ((x) << 0)
#define R500_RS_INST_TEX_CN_WRITE (1 << 4)
#define R500_RS_INST_TEX_ADDR_SHIFT 5
# define R500_RS_INST_TEX_ADDR(x) ((x) << 5)
#define R500_RS_INST_COL_ID_SHIFT 12
# define R500_RS_INST_COL_ID(x) ((x) << 12)
#define R500_RS_INST_COL_CN_NO_WRITE (0 << 16)
#define R500_RS_INST_COL_CN_WRITE (1 << 16)
#define R500_RS_INST_COL_CN_WRITE_FBUFFER (2 << 16)
#define R500_RS_INST_COL_CN_WRITE_BACKFACE (3 << 16)
#define R500_RS_INST_COL_ADDR_SHIFT 18
# define R500_RS_INST_COL_ADDR(x) ((x) << 18)
#define R500_RS_INST_TEX_ADJ (1 << 25)
#define R500_RS_INST_W_CN (1 << 26)
 
/* These DWORDs control how vertex data is routed into fragment program
* registers, after interpolators.
*/
#define R300_RS_INST_0 0x4330
#define R300_RS_INST_1 0x4334
#define R300_RS_INST_2 0x4338
#define R300_RS_INST_3 0x433C
#define R300_RS_INST_4 0x4340
#define R300_RS_INST_5 0x4344
#define R300_RS_INST_6 0x4348
#define R300_RS_INST_7 0x434C
# define R300_RS_INST_TEX_ID(x) ((x) << 0)
# define R300_RS_INST_TEX_CN_WRITE (1 << 3)
# define R300_RS_INST_TEX_ADDR(x) ((x) << 6)
# define R300_RS_INST_TEX_ADDR_SHIFT 6
# define R300_RS_INST_COL_ID(x) ((x) << 11)
# define R300_RS_INST_COL_CN_WRITE (1 << 14)
# define R300_RS_INST_COL_ADDR(x) ((x) << 17)
# define R300_RS_INST_COL_ADDR_SHIFT 17
# define R300_RS_INST_TEX_ADJ (1 << 22)
# define R300_RS_COL_BIAS_UNUSED_SHIFT 23
 
/* END: Rasterization / Interpolators - many guesses */
 
/* Hierarchical Z Enable */
#define R300_SC_HYPERZ 0x43a4
# define R300_SC_HYPERZ_DISABLE (0 << 0)
# define R300_SC_HYPERZ_ENABLE (1 << 0)
# define R300_SC_HYPERZ_MIN (0 << 1)
# define R300_SC_HYPERZ_MAX (1 << 1)
# define R300_SC_HYPERZ_ADJ_256 (0 << 2)
# define R300_SC_HYPERZ_ADJ_128 (1 << 2)
# define R300_SC_HYPERZ_ADJ_64 (2 << 2)
# define R300_SC_HYPERZ_ADJ_32 (3 << 2)
# define R300_SC_HYPERZ_ADJ_16 (4 << 2)
# define R300_SC_HYPERZ_ADJ_8 (5 << 2)
# define R300_SC_HYPERZ_ADJ_4 (6 << 2)
# define R300_SC_HYPERZ_ADJ_2 (7 << 2)
# define R300_SC_HYPERZ_HZ_Z0MIN_NO (0 << 5)
# define R300_SC_HYPERZ_HZ_Z0MIN (1 << 5)
# define R300_SC_HYPERZ_HZ_Z0MAX_NO (0 << 6)
# define R300_SC_HYPERZ_HZ_Z0MAX (1 << 6)
 
#define R300_SC_EDGERULE 0x43a8
 
/* BEGIN: Scissors and cliprects */
 
/* There are four clipping rectangles. Their corner coordinates are inclusive.
* Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending
* on whether the pixel is inside cliprects 0-3, respectively. For example,
* if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned
* the number 3 (binary 0011).
* Iff the bit corresponding to the pixel's number in RE_CLIPRECT_CNTL is set,
* the pixel is rasterized.
*
* In addition to this, there is a scissors rectangle. Only pixels inside the
* scissors rectangle are drawn. (coordinates are inclusive)
*
* For some reason, the top-left corner of the framebuffer is at (1440, 1440)
* for the purpose of clipping and scissors.
*/
#define R300_SC_CLIPRECT_TL_0 0x43B0
#define R300_SC_CLIPRECT_BR_0 0x43B4
#define R300_SC_CLIPRECT_TL_1 0x43B8
#define R300_SC_CLIPRECT_BR_1 0x43BC
#define R300_SC_CLIPRECT_TL_2 0x43C0
#define R300_SC_CLIPRECT_BR_2 0x43C4
#define R300_SC_CLIPRECT_TL_3 0x43C8
#define R300_SC_CLIPRECT_BR_3 0x43CC
# define R300_CLIPRECT_OFFSET 1440
# define R300_CLIPRECT_MASK 0x1FFF
# define R300_CLIPRECT_X_SHIFT 0
# define R300_CLIPRECT_X_MASK (0x1FFF << 0)
# define R300_CLIPRECT_Y_SHIFT 13
# define R300_CLIPRECT_Y_MASK (0x1FFF << 13)
#define R300_SC_CLIP_RULE 0x43D0
# define R300_CLIP_OUT (1 << 0)
# define R300_CLIP_0 (1 << 1)
# define R300_CLIP_1 (1 << 2)
# define R300_CLIP_10 (1 << 3)
# define R300_CLIP_2 (1 << 4)
# define R300_CLIP_20 (1 << 5)
# define R300_CLIP_21 (1 << 6)
# define R300_CLIP_210 (1 << 7)
# define R300_CLIP_3 (1 << 8)
# define R300_CLIP_30 (1 << 9)
# define R300_CLIP_31 (1 << 10)
# define R300_CLIP_310 (1 << 11)
# define R300_CLIP_32 (1 << 12)
# define R300_CLIP_320 (1 << 13)
# define R300_CLIP_321 (1 << 14)
# define R300_CLIP_3210 (1 << 15)
 
/* gap */
 
#define R300_SC_SCISSORS_TL 0x43E0
#define R300_SC_SCISSORS_BR 0x43E4
# define R300_SCISSORS_OFFSET 1440
# define R300_SCISSORS_X_SHIFT 0
# define R300_SCISSORS_X_MASK (0x1FFF << 0)
# define R300_SCISSORS_Y_SHIFT 13
# define R300_SCISSORS_Y_MASK (0x1FFF << 13)
 
/* Screen door sample mask */
#define R300_SC_SCREENDOOR 0x43e8
 
/* END: Scissors and cliprects */
 
/* BEGIN: Texture specification */
 
/*
* The texture specification dwords are grouped by meaning and not by texture
* unit. This means that e.g. the offset for texture image unit N is found in
* register TX_OFFSET_0 + (4*N)
*/
#define R300_TX_FILTER0_0 0x4400
#define R300_TX_FILTER0_1 0x4404
#define R300_TX_FILTER0_2 0x4408
#define R300_TX_FILTER0_3 0x440c
#define R300_TX_FILTER0_4 0x4410
#define R300_TX_FILTER0_5 0x4414
#define R300_TX_FILTER0_6 0x4418
#define R300_TX_FILTER0_7 0x441c
#define R300_TX_FILTER0_8 0x4420
#define R300_TX_FILTER0_9 0x4424
#define R300_TX_FILTER0_10 0x4428
#define R300_TX_FILTER0_11 0x442c
#define R300_TX_FILTER0_12 0x4430
#define R300_TX_FILTER0_13 0x4434
#define R300_TX_FILTER0_14 0x4438
#define R300_TX_FILTER0_15 0x443c
# define R300_TX_REPEAT 0
# define R300_TX_MIRRORED 1
# define R300_TX_CLAMP_TO_EDGE 2
# define R300_TX_MIRROR_ONCE_TO_EDGE 3
# define R300_TX_CLAMP 4
# define R300_TX_MIRROR_ONCE 5
# define R300_TX_CLAMP_TO_BORDER 6
# define R300_TX_MIRROR_ONCE_TO_BORDER 7
# define R300_TX_WRAP_S_SHIFT 0
# define R300_TX_WRAP_S_MASK (7 << 0)
# define R300_TX_WRAP_T_SHIFT 3
# define R300_TX_WRAP_T_MASK (7 << 3)
# define R300_TX_WRAP_R_SHIFT 6
# define R300_TX_WRAP_R_MASK (7 << 6)
# define R300_TX_MAG_FILTER_4 (0 << 9)
# define R300_TX_MAG_FILTER_NEAREST (1 << 9)
# define R300_TX_MAG_FILTER_LINEAR (2 << 9)
# define R300_TX_MAG_FILTER_ANISO (3 << 9)
# define R300_TX_MAG_FILTER_MASK (3 << 9)
# define R300_TX_MIN_FILTER_NEAREST (1 << 11)
# define R300_TX_MIN_FILTER_LINEAR (2 << 11)
# define R300_TX_MIN_FILTER_ANISO (3 << 11)
# define R300_TX_MIN_FILTER_MASK (3 << 11)
# define R300_TX_MIN_FILTER_MIP_NONE (0 << 13)
# define R300_TX_MIN_FILTER_MIP_NEAREST (1 << 13)
# define R300_TX_MIN_FILTER_MIP_LINEAR (2 << 13)
# define R300_TX_MIN_FILTER_MIP_MASK (3 << 13)
# define R300_TX_MAX_MIP_LEVEL_SHIFT 17
# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 17)
# define R300_TX_MAX_ANISO_1_TO_1 (0 << 21)
# define R300_TX_MAX_ANISO_2_TO_1 (1 << 21)
# define R300_TX_MAX_ANISO_4_TO_1 (2 << 21)
# define R300_TX_MAX_ANISO_8_TO_1 (3 << 21)
# define R300_TX_MAX_ANISO_16_TO_1 (4 << 21)
# define R300_TX_MAX_ANISO_MASK (7 << 21)
# define R300_TX_WRAP_S(x) ((x) << 0)
# define R300_TX_WRAP_T(x) ((x) << 3)
# define R300_TX_MAX_MIP_LEVEL(x) ((x) << 17)
 
#define R300_TX_FILTER1_0 0x4440
# define R300_CHROMA_KEY_MODE_DISABLE 0
# define R300_CHROMA_KEY_FORCE 1
# define R300_CHROMA_KEY_BLEND 2
# define R300_MC_ROUND_NORMAL (0<<2)
# define R300_MC_ROUND_MPEG4 (1<<2)
# define R300_LOD_BIAS_SHIFT 3
# define R300_LOD_BIAS_MASK 0x1ff8
# define R300_EDGE_ANISO_EDGE_DIAG (0<<13)
# define R300_EDGE_ANISO_EDGE_ONLY (1<<13)
# define R300_MC_COORD_TRUNCATE_DISABLE (0<<14)
# define R300_MC_COORD_TRUNCATE_MPEG (1<<14)
# define R300_TX_TRI_PERF_0_8 (0<<15)
# define R300_TX_TRI_PERF_1_8 (1<<15)
# define R300_TX_TRI_PERF_1_4 (2<<15)
# define R300_TX_TRI_PERF_3_8 (3<<15)
# define R300_ANISO_THRESHOLD_MASK (7<<17)
 
# define R400_DXTC_SWIZZLE_ENABLE (1<<21)
# define R500_MACRO_SWITCH (1<<22)
# define R500_TX_MAX_ANISO(x) ((x) << 23)
# define R500_TX_MAX_ANISO_MASK (63 << 23)
# define R500_TX_ANISO_HIGH_QUALITY (1 << 30)
# define R500_BORDER_FIX (1<<31)
 
#define R300_TX_FORMAT0_0 0x4480
# define R300_TX_WIDTHMASK_SHIFT 0
# define R300_TX_WIDTHMASK_MASK (2047 << 0)
# define R300_TX_HEIGHTMASK_SHIFT 11
# define R300_TX_HEIGHTMASK_MASK (2047 << 11)
# define R300_TX_DEPTHMASK_SHIFT 22
# define R300_TX_DEPTHMASK_MASK (0xf << 22)
# define R300_TX_SIZE_PROJECTED (1 << 30)
# define R300_TX_PITCH_EN (1 << 31)
# define R300_TX_WIDTH(x) ((x) << 0)
# define R300_TX_HEIGHT(x) ((x) << 11)
# define R300_TX_DEPTH(x) ((x) << 22)
# define R300_TX_NUM_LEVELS(x) ((x) << 26)
 
#define R300_TX_FORMAT1_0 0x44C0
/* The interpretation of the format word by Wladimir van der Laan */
/* The X, Y, Z and W refer to the layout of the components.
They are given meanings as R, G, B and Alpha by the swizzle
specification */
# define R300_TX_FORMAT_X8 0x0
# define R300_TX_FORMAT_X16 0x1
# define R300_TX_FORMAT_Y4X4 0x2
# define R300_TX_FORMAT_Y8X8 0x3
# define R300_TX_FORMAT_Y16X16 0x4
# define R300_TX_FORMAT_Z3Y3X2 0x5
# define R300_TX_FORMAT_Z5Y6X5 0x6
# define R300_TX_FORMAT_Z6Y5X5 0x7
# define R300_TX_FORMAT_Z11Y11X10 0x8
# define R300_TX_FORMAT_Z10Y11X11 0x9
# define R300_TX_FORMAT_W4Z4Y4X4 0xA
# define R300_TX_FORMAT_W1Z5Y5X5 0xB
# define R300_TX_FORMAT_W8Z8Y8X8 0xC
# define R300_TX_FORMAT_W2Z10Y10X10 0xD
# define R300_TX_FORMAT_W16Z16Y16X16 0xE
# define R300_TX_FORMAT_DXT1 0xF
# define R300_TX_FORMAT_DXT3 0x10
# define R300_TX_FORMAT_DXT5 0x11
# define R300_TX_FORMAT_CxV8U8 0x12
# define R300_TX_FORMAT_AVYU444 0x13
# define R300_TX_FORMAT_VYUY422 0x14
# define R300_TX_FORMAT_YVYU422 0x15
# define R300_TX_FORMAT_16_MPEG 0x16
# define R300_TX_FORMAT_16_16_MPEG 0x17
# define R300_TX_FORMAT_16F 0x18
# define R300_TX_FORMAT_16F_16F 0x19
# define R300_TX_FORMAT_16F_16F_16F_16F 0x1A
# define R300_TX_FORMAT_32F 0x1B
# define R300_TX_FORMAT_32F_32F 0x1C
# define R300_TX_FORMAT_32F_32F_32F_32F 0x1D
# define R300_TX_FORMAT_W24_FP 0x1E
# define R400_TX_FORMAT_ATI2N 0x1F
 
/* These need TX_FORMAT2_[0-15].TXFORMAT_MSB set.
 
My guess is the 10-bit formats are the 8-bit ones but with filtering being
performed with the precision of 10 bits per channel. This makes sense
with sRGB textures since the conversion to linear space reduces the precision
significantly so the shader gets approximately the 8-bit precision
in the end. It might also improve the quality of HDR rendering where
high-precision filtering is desirable.
 
Again, this is guessed, the formats might mean something entirely else.
The others should be fine. */
# define R500_TX_FORMAT_X1 0x0
# define R500_TX_FORMAT_X1_REV 0x1
# define R500_TX_FORMAT_X10 0x2
# define R500_TX_FORMAT_Y10X10 0x3
# define R500_TX_FORMAT_W10Z10Y10X10 0x4
# define R500_TX_FORMAT_ATI1N 0x5
# define R500_TX_FORMAT_Y8X24 0x6
 
 
# define R300_TX_FORMAT_SIGNED_W (1 << 5)
# define R300_TX_FORMAT_SIGNED_Z (1 << 6)
# define R300_TX_FORMAT_SIGNED_Y (1 << 7)
# define R300_TX_FORMAT_SIGNED_X (1 << 8)
# define R300_TX_FORMAT_SIGNED (0xf << 5)
 
# define R300_TX_FORMAT_3D (1 << 25)
# define R300_TX_FORMAT_CUBIC_MAP (2 << 25)
# define R300_TX_FORMAT_TEX_COORD_TYPE_MASK (0x3 << 25)
 
/* alpha modes, convenience mostly */
/* if you have alpha, pick constant appropriate to the
number of channels (1 for I8, 2 for I8A8, 4 for R8G8B8A8, etc */
# define R300_TX_FORMAT_ALPHA_1CH 0x000
# define R300_TX_FORMAT_ALPHA_2CH 0x200
# define R300_TX_FORMAT_ALPHA_4CH 0x600
# define R300_TX_FORMAT_ALPHA_NONE 0xA00
/* Swizzling */
/* constants */
# define R300_TX_FORMAT_X 0
# define R300_TX_FORMAT_Y 1
# define R300_TX_FORMAT_Z 2
# define R300_TX_FORMAT_W 3
# define R300_TX_FORMAT_ZERO 4
# define R300_TX_FORMAT_ONE 5
/* 2.0*Z, everything above 1.0 is set to 0.0 */
# define R300_TX_FORMAT_CUT_Z 6
/* 2.0*W, everything above 1.0 is set to 0.0 */
# define R300_TX_FORMAT_CUT_W 7
 
# define R300_TX_FORMAT_B_SHIFT 18
# define R300_TX_FORMAT_G_SHIFT 15
# define R300_TX_FORMAT_R_SHIFT 12
# define R300_TX_FORMAT_A_SHIFT 9
/* Convenience macro to take care of layout and swizzling */
# define R300_EASY_TX_FORMAT(B, G, R, A, FMT) ( \
((R300_TX_FORMAT_##B)<<R300_TX_FORMAT_B_SHIFT) \
| ((R300_TX_FORMAT_##G)<<R300_TX_FORMAT_G_SHIFT) \
| ((R300_TX_FORMAT_##R)<<R300_TX_FORMAT_R_SHIFT) \
| ((R300_TX_FORMAT_##A)<<R300_TX_FORMAT_A_SHIFT) \
| (R300_TX_FORMAT_##FMT) \
)
/* These can be ORed with result of R300_EASY_TX_FORMAT()
We don't really know what they do. Take values from a
constant color ? */
# define R300_TX_FORMAT_CONST_X (1<<5)
# define R300_TX_FORMAT_CONST_Y (2<<5)
# define R300_TX_FORMAT_CONST_Z (4<<5)
# define R300_TX_FORMAT_CONST_W (8<<5)
 
# define R300_TX_FORMAT_GAMMA (1 << 21)
# define R300_TX_FORMAT_YUV_TO_RGB (1 << 22)
 
# define R300_TX_CACHE(x) ((x) << 27)
# define R300_TX_CACHE_WHOLE 0
/* reserved */
# define R300_TX_CACHE_HALF_0 2
# define R300_TX_CACHE_HALF_1 3
# define R300_TX_CACHE_FOURTH_0 4
# define R300_TX_CACHE_FOURTH_1 5
# define R300_TX_CACHE_FOURTH_2 6
# define R300_TX_CACHE_FOURTH_3 7
# define R300_TX_CACHE_EIGHTH_0 8
# define R300_TX_CACHE_EIGHTH_1 9
# define R300_TX_CACHE_EIGHTH_2 10
# define R300_TX_CACHE_EIGHTH_3 11
# define R300_TX_CACHE_EIGHTH_4 12
# define R300_TX_CACHE_EIGHTH_5 13
# define R300_TX_CACHE_EIGHTH_6 14
# define R300_TX_CACHE_EIGHTH_7 15
# define R300_TX_CACHE_SIXTEENTH_0 16
# define R300_TX_CACHE_SIXTEENTH_1 17
# define R300_TX_CACHE_SIXTEENTH_2 18
# define R300_TX_CACHE_SIXTEENTH_3 19
# define R300_TX_CACHE_SIXTEENTH_4 20
# define R300_TX_CACHE_SIXTEENTH_5 21
# define R300_TX_CACHE_SIXTEENTH_6 22
# define R300_TX_CACHE_SIXTEENTH_7 23
# define R300_TX_CACHE_SIXTEENTH_8 24
# define R300_TX_CACHE_SIXTEENTH_9 25
# define R300_TX_CACHE_SIXTEENTH_10 26
# define R300_TX_CACHE_SIXTEENTH_11 27
# define R300_TX_CACHE_SIXTEENTH_12 28
# define R300_TX_CACHE_SIXTEENTH_13 29
# define R300_TX_CACHE_SIXTEENTH_14 30
# define R300_TX_CACHE_SIXTEENTH_15 31
 
#define R300_TX_FORMAT2_0 0x4500 /* obvious missing in gap */
# define R300_TX_PITCHMASK_SHIFT 0
# define R300_TX_PITCHMASK_MASK (2047 << 0)
# define R500_TXFORMAT_MSB (1 << 14)
# define R500_TXWIDTH_BIT11 (1 << 15)
# define R500_TXHEIGHT_BIT11 (1 << 16)
# define R500_POW2FIX2FLT (1 << 17)
# define R500_SEL_FILTER4_TC0 (0 << 18)
# define R500_SEL_FILTER4_TC1 (1 << 18)
# define R500_SEL_FILTER4_TC2 (2 << 18)
# define R500_SEL_FILTER4_TC3 (3 << 18)
 
#define R300_TX_OFFSET_0 0x4540
#define R300_TX_OFFSET_1 0x4544
#define R300_TX_OFFSET_2 0x4548
#define R300_TX_OFFSET_3 0x454C
#define R300_TX_OFFSET_4 0x4550
#define R300_TX_OFFSET_5 0x4554
#define R300_TX_OFFSET_6 0x4558
#define R300_TX_OFFSET_7 0x455C
 
# define R300_TXO_ENDIAN_NO_SWAP (0 << 0)
# define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0)
# define R300_TXO_ENDIAN_WORD_SWAP (2 << 0)
# define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0)
# define R300_TXO_MACRO_TILE_LINEAR (0 << 2)
# define R300_TXO_MACRO_TILE_TILED (1 << 2)
# define R300_TXO_MACRO_TILE(x) ((x) << 2)
# define R300_TXO_MICRO_TILE_LINEAR (0 << 3)
# define R300_TXO_MICRO_TILE_TILED (1 << 3)
# define R300_TXO_MICRO_TILE_TILED_SQUARE (2 << 3)
# define R300_TXO_MICRO_TILE(x) ((x) << 3)
# define R300_TXO_OFFSET_MASK 0xffffffe0
# define R300_TXO_OFFSET_SHIFT 5
 
/* 32 bit chroma key */
#define R300_TX_CHROMA_KEY_0 0x4580
#define R300_TX_CHROMA_KEY_1 0x4584
#define R300_TX_CHROMA_KEY_2 0x4588
#define R300_TX_CHROMA_KEY_3 0x458c
#define R300_TX_CHROMA_KEY_4 0x4590
#define R300_TX_CHROMA_KEY_5 0x4594
#define R300_TX_CHROMA_KEY_6 0x4598
#define R300_TX_CHROMA_KEY_7 0x459c
#define R300_TX_CHROMA_KEY_8 0x45a0
#define R300_TX_CHROMA_KEY_9 0x45a4
#define R300_TX_CHROMA_KEY_10 0x45a8
#define R300_TX_CHROMA_KEY_11 0x45ac
#define R300_TX_CHROMA_KEY_12 0x45b0
#define R300_TX_CHROMA_KEY_13 0x45b4
#define R300_TX_CHROMA_KEY_14 0x45b8
#define R300_TX_CHROMA_KEY_15 0x45bc
/* ff00ff00 == { 0, 1.0, 0, 1.0 } */
 
/* Border Color */
#define R300_TX_BORDER_COLOR_0 0x45c0
#define R300_TX_BORDER_COLOR_1 0x45c4
#define R300_TX_BORDER_COLOR_2 0x45c8
#define R300_TX_BORDER_COLOR_3 0x45cc
#define R300_TX_BORDER_COLOR_4 0x45d0
#define R300_TX_BORDER_COLOR_5 0x45d4
#define R300_TX_BORDER_COLOR_6 0x45d8
#define R300_TX_BORDER_COLOR_7 0x45dc
#define R300_TX_BORDER_COLOR_8 0x45e0
#define R300_TX_BORDER_COLOR_9 0x45e4
#define R300_TX_BORDER_COLOR_10 0x45e8
#define R300_TX_BORDER_COLOR_11 0x45ec
#define R300_TX_BORDER_COLOR_12 0x45f0
#define R300_TX_BORDER_COLOR_13 0x45f4
#define R300_TX_BORDER_COLOR_14 0x45f8
#define R300_TX_BORDER_COLOR_15 0x45fc
 
 
/* END: Texture specification */
 
/* BEGIN: Fragment program instruction set */
 
/* Fragment programs are written directly into register space.
* There are separate instruction streams for texture instructions and ALU
* instructions.
* In order to synchronize these streams, the program is divided into up
* to 4 nodes. Each node begins with a number of TEX operations, followed
* by a number of ALU operations.
* The first node can have zero TEX ops, all subsequent nodes must have at
* least
* one TEX ops.
* All nodes must have at least one ALU op.
*
* The index of the last node is stored in PFS_CNTL_0: A value of 0 means
* 1 node, a value of 3 means 4 nodes.
* The total amount of instructions is defined in PFS_CNTL_2. The offsets are
* offsets into the respective instruction streams, while *_END points to the
* last instruction relative to this offset.
*/
#define R300_US_CONFIG 0x4600
# define R300_PFS_CNTL_LAST_NODES_SHIFT 0
# define R300_PFS_CNTL_LAST_NODES_MASK (3 << 0)
# define R300_PFS_CNTL_FIRST_NODE_HAS_TEX (1 << 3)
#define R300_US_PIXSIZE 0x4604
/* There is an unshifted value here which has so far always been equal to the
* index of the highest used temporary register.
*/
#define R300_US_CODE_OFFSET 0x4608
# define R300_PFS_CNTL_ALU_OFFSET_SHIFT 0
# define R300_PFS_CNTL_ALU_OFFSET_MASK (63 << 0)
# define R300_PFS_CNTL_ALU_END_SHIFT 6
# define R300_PFS_CNTL_ALU_END_MASK (63 << 6)
# define R300_PFS_CNTL_TEX_OFFSET_SHIFT 13
# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 13)
# define R300_PFS_CNTL_TEX_END_SHIFT 18
# define R300_PFS_CNTL_TEX_END_MASK (31 << 18)
# define R400_PFS_CNTL_TEX_OFFSET_MSB_SHIFT 24
# define R400_PFS_CNTL_TEX_OFFSET_MSB_MASK (0xf << 24)
# define R400_PFS_CNTL_TEX_END_MSB_SHIFT 28
# define R400_PFS_CNTL_TEX_END_MSB_MASK (0xf << 28)
 
/* gap */
 
/* Nodes are stored backwards. The last active node is always stored in
* PFS_NODE_3.
* Example: In a 2-node program, NODE_0 and NODE_1 are set to 0. The
* first node is stored in NODE_2, the second node is stored in NODE_3.
*
* Offsets are relative to the master offset from PFS_CNTL_2.
*/
#define R300_US_CODE_ADDR_0 0x4610
#define R300_US_CODE_ADDR_1 0x4614
#define R300_US_CODE_ADDR_2 0x4618
#define R300_US_CODE_ADDR_3 0x461C
# define R300_ALU_START_SHIFT 0
# define R300_ALU_START_MASK (63 << 0)
# define R300_ALU_SIZE_SHIFT 6
# define R300_ALU_SIZE_MASK (63 << 6)
# define R300_TEX_START_SHIFT 12
# define R300_TEX_START_MASK (31 << 12)
# define R300_TEX_SIZE_SHIFT 17
# define R300_TEX_SIZE_MASK (31 << 17)
# define R300_RGBA_OUT (1 << 22)
# define R300_W_OUT (1 << 23)
# define R400_TEX_START_MSB_SHIFT 24
# define R400_TEX_START_MSG_MASK (0xf << 24)
# define R400_TEX_SIZE_MSB_SHIFT 28
# define R400_TEX_SIZE_MSG_MASK (0xf << 28)
 
/* TEX
* As far as I can tell, texture instructions cannot write into output
* registers directly. A subsequent ALU instruction is always necessary,
* even if it's just MAD o0, r0, 1, 0
*/
#define R300_US_TEX_INST_0 0x4620
# define R300_SRC_ADDR_SHIFT 0
# define R300_SRC_ADDR_MASK (31 << 0)
# define R300_DST_ADDR_SHIFT 6
# define R300_DST_ADDR_MASK (31 << 6)
# define R300_TEX_ID_SHIFT 11
# define R300_TEX_ID_MASK (15 << 11)
# define R300_TEX_INST_SHIFT 15
# define R300_TEX_OP_NOP 0
# define R300_TEX_OP_LD 1
# define R300_TEX_OP_KIL 2
# define R300_TEX_OP_TXP 3
# define R300_TEX_OP_TXB 4
# define R300_TEX_INST_MASK (7 << 15)
# define R400_SRC_ADDR_EXT_BIT (1 << 19)
# define R400_DST_ADDR_EXT_BIT (1 << 20)
 
/* Output format from the unfied shader */
#define R300_US_OUT_FMT_0 0x46A4
# define R300_US_OUT_FMT_C4_8 (0 << 0)
# define R300_US_OUT_FMT_C4_10 (1 << 0)
# define R300_US_OUT_FMT_C4_10_GAMMA (2 << 0)
# define R300_US_OUT_FMT_C_16 (3 << 0)
# define R300_US_OUT_FMT_C2_16 (4 << 0)
# define R300_US_OUT_FMT_C4_16 (5 << 0)
# define R300_US_OUT_FMT_C_16_MPEG (6 << 0)
# define R300_US_OUT_FMT_C2_16_MPEG (7 << 0)
# define R300_US_OUT_FMT_C2_4 (8 << 0)
# define R300_US_OUT_FMT_C_3_3_2 (9 << 0)
# define R300_US_OUT_FMT_C_6_5_6 (10 << 0)
# define R300_US_OUT_FMT_C_11_11_10 (11 << 0)
# define R300_US_OUT_FMT_C_10_11_11 (12 << 0)
# define R300_US_OUT_FMT_C_2_10_10_10 (13 << 0)
/* reserved */
# define R300_US_OUT_FMT_UNUSED (15 << 0)
# define R300_US_OUT_FMT_C_16_FP (16 << 0)
# define R300_US_OUT_FMT_C2_16_FP (17 << 0)
# define R300_US_OUT_FMT_C4_16_FP (18 << 0)
# define R300_US_OUT_FMT_C_32_FP (19 << 0)
# define R300_US_OUT_FMT_C2_32_FP (20 << 0)
# define R300_US_OUT_FMT_C4_32_FP (21 << 0)
# define R300_C0_SEL_A (0 << 8)
# define R300_C0_SEL_R (1 << 8)
# define R300_C0_SEL_G (2 << 8)
# define R300_C0_SEL_B (3 << 8)
# define R300_C1_SEL_A (0 << 10)
# define R300_C1_SEL_R (1 << 10)
# define R300_C1_SEL_G (2 << 10)
# define R300_C1_SEL_B (3 << 10)
# define R300_C2_SEL_A (0 << 12)
# define R300_C2_SEL_R (1 << 12)
# define R300_C2_SEL_G (2 << 12)
# define R300_C2_SEL_B (3 << 12)
# define R300_C3_SEL_A (0 << 14)
# define R300_C3_SEL_R (1 << 14)
# define R300_C3_SEL_G (2 << 14)
# define R300_C3_SEL_B (3 << 14)
# define R300_OUT_SIGN(x) ((x) << 16)
# define R500_ROUND_ADJ (1 << 20)
 
/* ALU
* The ALU instructions register blocks are enumerated according to the order
* in which fglrx. I assume there is space for 64 instructions, since
* each block has space for a maximum of 64 DWORDs, and this matches reported
* native limits.
*
* The basic functional block seems to be one MAD for each color and alpha,
* and an adder that adds all components after the MUL.
* - ADD, MUL, MAD etc.: use MAD with appropriate neutral operands
* - DP4: Use OUTC_DP4, OUTA_DP4
* - DP3: Use OUTC_DP3, OUTA_DP4, appropriate alpha operands
* - DPH: Use OUTC_DP4, OUTA_DP4, appropriate alpha operands
* - CMPH: If ARG2 > 0.5, return ARG0, else return ARG1
* - CMP: If ARG2 < 0, return ARG1, else return ARG0
* - FLR: use FRC+MAD
* - XPD: use MAD+MAD
* - SGE, SLT: use MAD+CMP
* - RSQ: use ABS modifier for argument
* - Use OUTC_REPL_ALPHA to write results of an alpha-only operation
* (e.g. RCP) into color register
* - apparently, there's no quick DST operation
* - fglrx set FPI2_UNKNOWN_31 on a "MAD fragment.color, tmp0, tmp1, tmp2"
* - fglrx set FPI2_UNKNOWN_31 on a "MAX r2, r1, c0"
* - fglrx once set FPI0_UNKNOWN_31 on a "FRC r1, r1"
*
* Operand selection
* First stage selects three sources from the available registers and
* constant parameters. This is defined in INSTR1 (color) and INSTR3 (alpha).
* fglrx sorts the three source fields: Registers before constants,
* lower indices before higher indices; I do not know whether this is
* necessary.
*
* fglrx fills unused sources with "read constant 0"
* According to specs, you cannot select more than two different constants.
*
* Second stage selects the operands from the sources. This is defined in
* INSTR0 (color) and INSTR2 (alpha). You can also select the special constants
* zero and one.
* Swizzling and negation happens in this stage, as well.
*
* Important: Color and alpha seem to be mostly separate, i.e. their sources
* selection appears to be fully independent (the register storage is probably
* physically split into a color and an alpha section).
* However (because of the apparent physical split), there is some interaction
* WRT swizzling. If, for example, you want to load an R component into an
* Alpha operand, this R component is taken from a *color* source, not from
* an alpha source. The corresponding register doesn't even have to appear in
* the alpha sources list. (I hope this all makes sense to you)
*
* Destination selection
* The destination register index is in FPI1 (color) and FPI3 (alpha)
* together with enable bits.
* There are separate enable bits for writing into temporary registers
* (DSTC_REG_* /DSTA_REG) and program output registers (DSTC_OUTPUT_*
* /DSTA_OUTPUT). You can write to both at once, or not write at all (the
* same index must be used for both).
*
* Note: There is a special form for LRP
* - Argument order is the same as in ARB_fragment_program.
* - Operation is MAD
* - ARG1 is set to ARGC_SRC1C_LRP/ARGC_SRC1A_LRP
* - Set FPI0/FPI2_SPECIAL_LRP
* Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD
*/
#define R300_US_ALU_RGB_ADDR_0 0x46C0
# define R300_ALU_SRC0C_SHIFT 0
# define R300_ALU_SRC0C_MASK (31 << 0)
# define R300_ALU_SRC0C_CONST (1 << 5)
# define R300_ALU_SRC1C_SHIFT 6
# define R300_ALU_SRC1C_MASK (31 << 6)
# define R300_ALU_SRC1C_CONST (1 << 11)
# define R300_ALU_SRC2C_SHIFT 12
# define R300_ALU_SRC2C_MASK (31 << 12)
# define R300_ALU_SRC2C_CONST (1 << 17)
# define R300_ALU_SRC_MASK 0x0003ffff
# define R300_ALU_DSTC_SHIFT 18
# define R300_ALU_DSTC_MASK (31 << 18)
# define R300_ALU_DSTC_REG_MASK_SHIFT 23
# define R300_ALU_DSTC_REG_X (1 << 23)
# define R300_ALU_DSTC_REG_Y (1 << 24)
# define R300_ALU_DSTC_REG_Z (1 << 25)
# define R300_ALU_DSTC_OUTPUT_MASK_SHIFT 26
# define R300_ALU_DSTC_OUTPUT_X (1 << 26)
# define R300_ALU_DSTC_OUTPUT_Y (1 << 27)
# define R300_ALU_DSTC_OUTPUT_Z (1 << 28)
# define R300_ALU_DSTC_OUTPUT_XYZ (7 << 26)
# define R300_RGB_ADDR0(x) ((x) << 0)
# define R300_RGB_ADDR1(x) ((x) << 6)
# define R300_RGB_ADDR2(x) ((x) << 12)
# define R300_RGB_TARGET(x) ((x) << 29)
 
#define R300_US_ALU_ALPHA_ADDR_0 0x47C0
# define R300_ALU_SRC0A_SHIFT 0
# define R300_ALU_SRC0A_MASK (31 << 0)
# define R300_ALU_SRC0A_CONST (1 << 5)
# define R300_ALU_SRC1A_SHIFT 6
# define R300_ALU_SRC1A_MASK (31 << 6)
# define R300_ALU_SRC1A_CONST (1 << 11)
# define R300_ALU_SRC2A_SHIFT 12
# define R300_ALU_SRC2A_MASK (31 << 12)
# define R300_ALU_SRC2A_CONST (1 << 17)
# define R300_ALU_SRC_MASK 0x0003ffff
# define R300_ALU_DSTA_SHIFT 18
# define R300_ALU_DSTA_MASK (31 << 18)
# define R300_ALU_DSTA_REG (1 << 23)
# define R300_ALU_DSTA_OUTPUT (1 << 24)
# define R300_ALU_DSTA_DEPTH (1 << 27)
# define R300_ALPHA_ADDR0(x) ((x) << 0)
# define R300_ALPHA_ADDR1(x) ((x) << 6)
# define R300_ALPHA_ADDR2(x) ((x) << 12)
# define R300_ALPHA_TARGET(x) ((x) << 25)
 
#define R300_US_ALU_RGB_INST_0 0x48C0
# define R300_ALU_ARGC_SRC0C_XYZ 0
# define R300_ALU_ARGC_SRC0C_XXX 1
# define R300_ALU_ARGC_SRC0C_YYY 2
# define R300_ALU_ARGC_SRC0C_ZZZ 3
# define R300_ALU_ARGC_SRC1C_XYZ 4
# define R300_ALU_ARGC_SRC1C_XXX 5
# define R300_ALU_ARGC_SRC1C_YYY 6
# define R300_ALU_ARGC_SRC1C_ZZZ 7
# define R300_ALU_ARGC_SRC2C_XYZ 8
# define R300_ALU_ARGC_SRC2C_XXX 9
# define R300_ALU_ARGC_SRC2C_YYY 10
# define R300_ALU_ARGC_SRC2C_ZZZ 11
# define R300_ALU_ARGC_SRC0A 12
# define R300_ALU_ARGC_SRC1A 13
# define R300_ALU_ARGC_SRC2A 14
# define R300_ALU_ARGC_SRCP_XYZ 15
# define R300_ALU_ARGC_SRCP_XXX 16
# define R300_ALU_ARGC_SRCP_YYY 17
# define R300_ALU_ARGC_SRCP_ZZZ 18
# define R300_ALU_ARGC_SRCP_WWW 19
# define R300_ALU_ARGC_ZERO 20
# define R300_ALU_ARGC_ONE 21
# define R300_ALU_ARGC_HALF 22
# define R300_ALU_ARGC_SRC0C_YZX 23
# define R300_ALU_ARGC_SRC1C_YZX 24
# define R300_ALU_ARGC_SRC2C_YZX 25
# define R300_ALU_ARGC_SRC0C_ZXY 26
# define R300_ALU_ARGC_SRC1C_ZXY 27
# define R300_ALU_ARGC_SRC2C_ZXY 28
# define R300_ALU_ARGC_SRC0CA_WZY 29
# define R300_ALU_ARGC_SRC1CA_WZY 30
# define R300_ALU_ARGC_SRC2CA_WZY 31
# define R300_RGB_SWIZA(x) ((x) << 0)
# define R300_RGB_SWIZB(x) ((x) << 7)
# define R300_RGB_SWIZC(x) ((x) << 14)
 
# define R300_ALU_ARG0C_SHIFT 0
# define R300_ALU_ARG0C_MASK (31 << 0)
# define R300_ALU_ARG0C_NOP (0 << 5)
# define R300_ALU_ARG0C_NEG (1 << 5)
# define R300_ALU_ARG0C_ABS (2 << 5)
# define R300_ALU_ARG0C_NAB (3 << 5)
# define R300_ALU_ARG1C_SHIFT 7
# define R300_ALU_ARG1C_MASK (31 << 7)
# define R300_ALU_ARG1C_NOP (0 << 12)
# define R300_ALU_ARG1C_NEG (1 << 12)
# define R300_ALU_ARG1C_ABS (2 << 12)
# define R300_ALU_ARG1C_NAB (3 << 12)
# define R300_ALU_ARG2C_SHIFT 14
# define R300_ALU_ARG2C_MASK (31 << 14)
# define R300_ALU_ARG2C_NOP (0 << 19)
# define R300_ALU_ARG2C_NEG (1 << 19)
# define R300_ALU_ARG2C_ABS (2 << 19)
# define R300_ALU_ARG2C_NAB (3 << 19)
# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21)
# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21)
# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21)
# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21)
 
# define R300_ALU_OUTC_MAD (0 << 23)
# define R300_ALU_OUTC_DP3 (1 << 23)
# define R300_ALU_OUTC_DP4 (2 << 23)
# define R300_ALU_OUTC_D2A (3 << 23)
# define R300_ALU_OUTC_MIN (4 << 23)
# define R300_ALU_OUTC_MAX (5 << 23)
# define R300_ALU_OUTC_CND (7 << 23)
# define R300_ALU_OUTC_CMP (8 << 23)
# define R300_ALU_OUTC_FRC (9 << 23)
# define R300_ALU_OUTC_REPL_ALPHA (10 << 23)
 
# define R300_ALU_OUTC_MOD_SHIFT 27
# define R300_ALU_OUTC_MOD_NOP (0 << R300_ALU_OUTC_MOD_SHIFT)
# define R300_ALU_OUTC_MOD_MUL2 (1 << R300_ALU_OUTC_MOD_SHIFT)
# define R300_ALU_OUTC_MOD_MUL4 (2 << R300_ALU_OUTC_MOD_SHIFT)
# define R300_ALU_OUTC_MOD_MUL8 (3 << R300_ALU_OUTC_MOD_SHIFT)
# define R300_ALU_OUTC_MOD_DIV2 (4 << R300_ALU_OUTC_MOD_SHIFT)
# define R300_ALU_OUTC_MOD_DIV4 (5 << R300_ALU_OUTC_MOD_SHIFT)
# define R300_ALU_OUTC_MOD_DIV8 (6 << R300_ALU_OUTC_MOD_SHIFT)
 
# define R300_ALU_OUTC_CLAMP (1 << 30)
# define R300_ALU_INSERT_NOP (1 << 31)
 
#define R300_US_ALU_ALPHA_INST_0 0x49C0
# define R300_ALU_ARGA_SRC0C_X 0
# define R300_ALU_ARGA_SRC0C_Y 1
# define R300_ALU_ARGA_SRC0C_Z 2
# define R300_ALU_ARGA_SRC1C_X 3
# define R300_ALU_ARGA_SRC1C_Y 4
# define R300_ALU_ARGA_SRC1C_Z 5
# define R300_ALU_ARGA_SRC2C_X 6
# define R300_ALU_ARGA_SRC2C_Y 7
# define R300_ALU_ARGA_SRC2C_Z 8
# define R300_ALU_ARGA_SRC0A 9
# define R300_ALU_ARGA_SRC1A 10
# define R300_ALU_ARGA_SRC2A 11
# define R300_ALU_ARGA_SRCP_X 12
# define R300_ALU_ARGA_SRCP_Y 13
# define R300_ALU_ARGA_SRCP_Z 14
# define R300_ALU_ARGA_SRCP_W 15
# define R300_ALU_ARGA_ZERO 16
# define R300_ALU_ARGA_ONE 17
# define R300_ALU_ARGA_HALF 18
# define R300_ALPHA_SWIZA(x) ((x) << 0)
# define R300_ALPHA_SWIZB(x) ((x) << 7)
# define R300_ALPHA_SWIZC(x) ((x) << 14)
 
# define R300_ALU_ARG0A_SHIFT 0
# define R300_ALU_ARG0A_MASK (31 << 0)
# define R300_ALU_ARG0A_NOP (0 << 5)
# define R300_ALU_ARG0A_NEG (1 << 5)
# define R300_ALU_ARG0A_ABS (2 << 5)
# define R300_ALU_ARG0A_NAB (3 << 5)
# define R300_ALU_ARG1A_SHIFT 7
# define R300_ALU_ARG1A_MASK (31 << 7)
# define R300_ALU_ARG1A_NOP (0 << 12)
# define R300_ALU_ARG1A_NEG (1 << 12)
# define R300_ALU_ARG1A_ABS (2 << 12)
# define R300_ALU_ARG1A_NAB (3 << 12)
# define R300_ALU_ARG2A_SHIFT 14
# define R300_ALU_ARG2A_MASK (31 << 14)
# define R300_ALU_ARG2A_NOP (0 << 19)
# define R300_ALU_ARG2A_NEG (1 << 19)
# define R300_ALU_ARG2A_ABS (2 << 19)
# define R300_ALU_ARG2A_NAB (3 << 19)
# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21)
# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21)
# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21)
# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21)
 
# define R300_ALU_OUTA_MAD (0 << 23)
# define R300_ALU_OUTA_DP4 (1 << 23)
# define R300_ALU_OUTA_MIN (2 << 23)
# define R300_ALU_OUTA_MAX (3 << 23)
# define R300_ALU_OUTA_CND (5 << 23)
# define R300_ALU_OUTA_CMP (6 << 23)
# define R300_ALU_OUTA_FRC (7 << 23)
# define R300_ALU_OUTA_EX2 (8 << 23)
# define R300_ALU_OUTA_LG2 (9 << 23)
# define R300_ALU_OUTA_RCP (10 << 23)
# define R300_ALU_OUTA_RSQ (11 << 23)
 
# define R300_ALU_OUTA_MOD_NOP (0 << 27)
# define R300_ALU_OUTA_MOD_MUL2 (1 << 27)
# define R300_ALU_OUTA_MOD_MUL4 (2 << 27)
# define R300_ALU_OUTA_MOD_MUL8 (3 << 27)
# define R300_ALU_OUTA_MOD_DIV2 (4 << 27)
# define R300_ALU_OUTA_MOD_DIV4 (5 << 27)
# define R300_ALU_OUTA_MOD_DIV8 (6 << 27)
 
# define R300_ALU_OUTA_CLAMP (1 << 30)
/* END: Fragment program instruction set */
 
/* R4xx extended fragment shader registers. */
#define R400_US_ALU_EXT_ADDR_0 0x4ac0 /* up to 63 (0x4bbc) */
# define R400_ADDR_EXT_RGB_MSB_BIT(x) (1 << (x))
# define R400_ADDRD_EXT_RGB_MSB_BIT 0x08
# define R400_ADDR_EXT_A_MSB_BIT(x) (1 << ((x) + 4))
# define R400_ADDRD_EXT_A_MSB_BIT 0x80
 
#define R400_US_CODE_BANK 0x46b8
# define R400_BANK_SHIFT 0
# define R400_BANK_MASK 0xf
# define R400_R390_MODE_ENABLE (1 << 4)
#define R400_US_CODE_EXT 0x46bc
# define R400_ALU_OFFSET_MSB_SHIFT 0
# define R400_ALU_OFFSET_MSB_MASK (0x7 << 0)
# define R400_ALU_SIZE_MSB_SHIFT 3
# define R400_ALU_SIZE_MSB_MASK (0x7 << 3)
# define R400_ALU_START0_MSB_SHIFT 6
# define R400_ALU_START0_MSB_MASK (0x7 << 6)
# define R400_ALU_SIZE0_MSB_SHIFT 9
# define R400_ALU_SIZE0_MSB_MASK (0x7 << 9)
# define R400_ALU_START1_MSB_SHIFT 12
# define R400_ALU_START1_MSB_MASK (0x7 << 12)
# define R400_ALU_SIZE1_MSB_SHIFT 15
# define R400_ALU_SIZE1_MSB_MASK (0x7 << 15)
# define R400_ALU_START2_MSB_SHIFT 18
# define R400_ALU_START2_MSB_MASK (0x7 << 18)
# define R400_ALU_SIZE2_MSB_SHIFT 21
# define R400_ALU_SIZE2_MSB_MASK (0x7 << 21)
# define R400_ALU_START3_MSB_SHIFT 24
# define R400_ALU_START3_MSB_MASK (0x7 << 24)
# define R400_ALU_SIZE3_MSB_SHIFT 27
# define R400_ALU_SIZE3_MSB_MASK (0x7 << 27)
/* END: R4xx extended fragment shader registers. */
 
/* Fog: Fog Blending Enable */
#define R300_FG_FOG_BLEND 0x4bc0
# define R300_FG_FOG_BLEND_DISABLE (0 << 0)
# define R300_FG_FOG_BLEND_ENABLE (1 << 0)
# define R300_FG_FOG_BLEND_FN_LINEAR (0 << 1)
# define R300_FG_FOG_BLEND_FN_EXP (1 << 1)
# define R300_FG_FOG_BLEND_FN_EXP2 (2 << 1)
# define R300_FG_FOG_BLEND_FN_CONSTANT (3 << 1)
# define R300_FG_FOG_BLEND_FN_MASK (3 << 1)
 
/* Fog: Red Component of Fog Color */
#define R300_FG_FOG_COLOR_R 0x4bc8
/* Fog: Green Component of Fog Color */
#define R300_FG_FOG_COLOR_G 0x4bcc
/* Fog: Blue Component of Fog Color */
#define R300_FG_FOG_COLOR_B 0x4bd0
# define R300_FG_FOG_COLOR_MASK 0x000003ff
 
/* Fog: Constant Factor for Fog Blending */
#define R300_FG_FOG_FACTOR 0x4bc4
# define FG_FOG_FACTOR_MASK 0x000003ff
 
/* Fog: Alpha function */
#define R300_FG_ALPHA_FUNC 0x4bd4
# define R300_FG_ALPHA_FUNC_VAL_MASK 0x000000ff
# define R300_FG_ALPHA_FUNC_NEVER (0 << 8)
# define R300_FG_ALPHA_FUNC_LESS (1 << 8)
# define R300_FG_ALPHA_FUNC_EQUAL (2 << 8)
# define R300_FG_ALPHA_FUNC_LE (3 << 8)
# define R300_FG_ALPHA_FUNC_GREATER (4 << 8)
# define R300_FG_ALPHA_FUNC_NOTEQUAL (5 << 8)
# define R300_FG_ALPHA_FUNC_GE (6 << 8)
# define R300_FG_ALPHA_FUNC_ALWAYS (7 << 8)
# define R300_ALPHA_TEST_OP_MASK (7 << 8)
# define R300_FG_ALPHA_FUNC_DISABLE (0 << 11)
# define R300_FG_ALPHA_FUNC_ENABLE (1 << 11)
 
# define R500_FG_ALPHA_FUNC_10BIT (0 << 12)
# define R500_FG_ALPHA_FUNC_8BIT (1 << 12)
 
# define R300_FG_ALPHA_FUNC_MASK_DISABLE (0 << 16)
# define R300_FG_ALPHA_FUNC_MASK_ENABLE (1 << 16)
# define R300_FG_ALPHA_FUNC_CFG_2_OF_4 (0 << 17)
# define R300_FG_ALPHA_FUNC_CFG_3_OF_6 (1 << 17)
 
# define R300_FG_ALPHA_FUNC_DITH_DISABLE (0 << 20)
# define R300_FG_ALPHA_FUNC_DITH_ENABLE (1 << 20)
 
# define R500_FG_ALPHA_FUNC_OFFSET_DISABLE (0 << 24)
# define R500_FG_ALPHA_FUNC_OFFSET_ENABLE (1 << 24) /* Not supported in R520 */
# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_DISABLE (0 << 25)
# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_ENABLE (1 << 25)
 
# define R500_FG_ALPHA_FUNC_FP16_DISABLE (0 << 28)
# define R500_FG_ALPHA_FUNC_FP16_ENABLE (1 << 28)
 
 
/* Fog: Where does the depth come from? */
#define R300_FG_DEPTH_SRC 0x4bd8
# define R300_FG_DEPTH_SRC_SCAN (0 << 0)
# define R300_FG_DEPTH_SRC_SHADER (1 << 0)
 
/* Fog: Alpha Compare Value */
#define R500_FG_ALPHA_VALUE 0x4be0
# define R500_FG_ALPHA_VALUE_MASK 0x0000ffff
 
#define RV530_FG_ZBREG_DEST 0x4be8
# define RV530_FG_ZBREG_DEST_PIPE_SELECT_0 (1 << 0)
# define RV530_FG_ZBREG_DEST_PIPE_SELECT_1 (1 << 1)
# define RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL (3 << 0)
/* gap */
 
/* Fragment program parameters in 7.16 floating point */
#define R300_PFS_PARAM_0_X 0x4C00
#define R300_PFS_PARAM_0_Y 0x4C04
#define R300_PFS_PARAM_0_Z 0x4C08
#define R300_PFS_PARAM_0_W 0x4C0C
/* last consts */
#define R300_PFS_PARAM_31_X 0x4DF0
#define R300_PFS_PARAM_31_Y 0x4DF4
#define R300_PFS_PARAM_31_Z 0x4DF8
#define R300_PFS_PARAM_31_W 0x4DFC
 
/* Unpipelined. */
#define R300_RB3D_CCTL 0x4e00
# define R300_RB3D_CCTL_NUM_MULTIWRITES(x) (MAX2(((x)-1), 0) << 5)
# define R300_RB3D_CCTL_NUM_MULTIWRITES_1_BUFFER (0 << 5)
# define R300_RB3D_CCTL_NUM_MULTIWRITES_2_BUFFERS (1 << 5)
# define R300_RB3D_CCTL_NUM_MULTIWRITES_3_BUFFERS (2 << 5)
# define R300_RB3D_CCTL_NUM_MULTIWRITES_4_BUFFERS (3 << 5)
# define R300_RB3D_CCTL_CLRCMP_FLIPE_DISABLE (0 << 7)
# define R300_RB3D_CCTL_CLRCMP_FLIPE_ENABLE (1 << 7)
# define R300_RB3D_CCTL_AA_COMPRESSION_DISABLE (0 << 9)
# define R300_RB3D_CCTL_AA_COMPRESSION_ENABLE (1 << 9)
# define R300_RB3D_CCTL_CMASK_DISABLE (0 << 10)
# define R300_RB3D_CCTL_CMASK_ENABLE (1 << 10)
/* reserved */
# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_DISABLE (0 << 12)
# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_ENABLE (1 << 12)
# define R300_RB3D_CCTL_WRITE_COMPRESSION_ENABLE (0 << 13)
# define R300_RB3D_CCTL_WRITE_COMPRESSION_DISABLE (1 << 13)
# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_DISABLE (0 << 14)
# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE (1 << 14)
 
 
/* Notes:
* - AFAIK fglrx always sets BLEND_UNKNOWN when blending is used in
* the application
* - AFAIK fglrx always sets BLEND_NO_SEPARATE when CBLEND and ABLEND
* are set to the same
* function (both registers are always set up completely in any case)
* - Most blend flags are simply copied from R200 and not tested yet
*/
#define R300_RB3D_CBLEND 0x4E04
#define R300_RB3D_ABLEND 0x4E08
/* the following only appear in CBLEND */
# define R300_ALPHA_BLEND_ENABLE (1 << 0)
# define R300_SEPARATE_ALPHA_ENABLE (1 << 1)
# define R300_READ_ENABLE (1 << 2)
# define R300_DISCARD_SRC_PIXELS_DIS (0 << 3)
# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0 (1 << 3)
# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_0 (2 << 3)
# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0 (3 << 3)
# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1 (4 << 3)
# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1 (5 << 3)
# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1 (6 << 3)
# define R500_SRC_ALPHA_0_NO_READ (1 << 30)
# define R500_SRC_ALPHA_1_NO_READ (1 << 31)
 
/* the following are shared between CBLEND and ABLEND */
# define R300_FCN_MASK (3 << 12)
# define R300_COMB_FCN_ADD_CLAMP (0 << 12)
# define R300_COMB_FCN_ADD_NOCLAMP (1 << 12)
# define R300_COMB_FCN_SUB_CLAMP (2 << 12)
# define R300_COMB_FCN_SUB_NOCLAMP (3 << 12)
# define R300_COMB_FCN_MIN (4 << 12)
# define R300_COMB_FCN_MAX (5 << 12)
# define R300_COMB_FCN_RSUB_CLAMP (6 << 12)
# define R300_COMB_FCN_RSUB_NOCLAMP (7 << 12)
# define R300_BLEND_GL_ZERO (32)
# define R300_BLEND_GL_ONE (33)
# define R300_BLEND_GL_SRC_COLOR (34)
# define R300_BLEND_GL_ONE_MINUS_SRC_COLOR (35)
# define R300_BLEND_GL_DST_COLOR (36)
# define R300_BLEND_GL_ONE_MINUS_DST_COLOR (37)
# define R300_BLEND_GL_SRC_ALPHA (38)
# define R300_BLEND_GL_ONE_MINUS_SRC_ALPHA (39)
# define R300_BLEND_GL_DST_ALPHA (40)
# define R300_BLEND_GL_ONE_MINUS_DST_ALPHA (41)
# define R300_BLEND_GL_SRC_ALPHA_SATURATE (42)
# define R300_BLEND_GL_CONST_COLOR (43)
# define R300_BLEND_GL_ONE_MINUS_CONST_COLOR (44)
# define R300_BLEND_GL_CONST_ALPHA (45)
# define R300_BLEND_GL_ONE_MINUS_CONST_ALPHA (46)
# define R300_BLEND_MASK (63)
# define R300_SRC_BLEND_SHIFT (16)
# define R300_DST_BLEND_SHIFT (24)
 
/* Constant color used by the blender. Pipelined through the blender.
* Note: For R520, this field is ignored, use RB3D_CONSTANT_COLOR_GB__BLUE,
* RB3D_CONSTANT_COLOR_GB__GREEN, etc. instead.
*/
#define R300_RB3D_BLEND_COLOR 0x4E10
 
 
/* 3D Color Channel Mask. If all the channels used in the current color format
* are disabled, then the cb will discard all the incoming quads. Pipelined
* through the blender.
*/
#define RB3D_COLOR_CHANNEL_MASK 0x4E0C
# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 (1 << 0)
# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 (1 << 1)
# define RB3D_COLOR_CHANNEL_MASK_RED_MASK0 (1 << 2)
# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 (1 << 3)
# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK1 (1 << 4)
# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK1 (1 << 5)
# define RB3D_COLOR_CHANNEL_MASK_RED_MASK1 (1 << 6)
# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK1 (1 << 7)
# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK2 (1 << 8)
# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK2 (1 << 9)
# define RB3D_COLOR_CHANNEL_MASK_RED_MASK2 (1 << 10)
# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK2 (1 << 11)
# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK3 (1 << 12)
# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK3 (1 << 13)
# define RB3D_COLOR_CHANNEL_MASK_RED_MASK3 (1 << 14)
# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK3 (1 << 15)
 
/* Clear color that is used when the color mask is set to 00. Unpipelined.
* Program this register with a 32-bit value in ARGB8888 or ARGB2101010
* formats, ignoring the fields.
*/
#define R300_RB3D_COLOR_CLEAR_VALUE 0x4E14
/* For FP16 AA. */
#define R500_RB3D_COLOR_CLEAR_VALUE_AR 0x46C0
#define R500_RB3D_COLOR_CLEAR_VALUE_GB 0x46C4
 
/* gap */
 
/* Color Compare Color. Stalls the 2d/3d datapath until it is idle. */
#define RB3D_CLRCMP_CLR 0x4e20
 
/* Color Compare Mask. Stalls the 2d/3d datapath until it is idle. */
#define RB3D_CLRCMP_MSK 0x4e24
 
/* Color Buffer Address Offset of multibuffer 0. Unpipelined. */
#define R300_RB3D_COLOROFFSET0 0x4E28
# define R300_COLOROFFSET_MASK 0xFFFFFFE0
/* Color Buffer Address Offset of multibuffer 1. Unpipelined. */
#define R300_RB3D_COLOROFFSET1 0x4E2C
/* Color Buffer Address Offset of multibuffer 2. Unpipelined. */
#define R300_RB3D_COLOROFFSET2 0x4E30
/* Color Buffer Address Offset of multibuffer 3. Unpipelined. */
#define R300_RB3D_COLOROFFSET3 0x4E34
 
/* Color buffer format and tiling control for all the multibuffers and the
* pitch of multibuffer 0 to 3. Unpipelined. The cache must be empty before any
* of the registers are changed.
*
* Bit 16: Larger tiles
* Bit 17: 4x2 tiles
* Bit 18: Extremely weird tile like, but some pixels duplicated?
*/
#define R300_RB3D_COLORPITCH0 0x4E38
# define R300_COLORPITCH_MASK 0x00003FFE
# define R300_COLOR_TILE_DISABLE (0 << 16)
# define R300_COLOR_TILE_ENABLE (1 << 16)
# define R300_COLOR_TILE(x) ((x) << 16)
# define R300_COLOR_MICROTILE_DISABLE (0 << 17)
# define R300_COLOR_MICROTILE_ENABLE (1 << 17)
# define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */
# define R300_COLOR_MICROTILE(x) ((x) << 17)
# define R300_COLOR_ENDIAN_NO_SWAP (0 << 19)
# define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19)
# define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19)
# define R300_COLOR_ENDIAN_HALF_DWORD_SWAP (3 << 19)
# define R500_COLOR_FORMAT_ARGB10101010 (0 << 21)
# define R500_COLOR_FORMAT_UV1010 (1 << 21)
# define R500_COLOR_FORMAT_CI8 (2 << 21) /* 2D only */
# define R300_COLOR_FORMAT_ARGB1555 (3 << 21)
# define R300_COLOR_FORMAT_RGB565 (4 << 21)
# define R500_COLOR_FORMAT_ARGB2101010 (5 << 21)
# define R300_COLOR_FORMAT_ARGB8888 (6 << 21)
# define R300_COLOR_FORMAT_ARGB32323232 (7 << 21)
/* reserved */
# define R300_COLOR_FORMAT_I8 (9 << 21)
# define R300_COLOR_FORMAT_ARGB16161616 (10 << 21)
# define R300_COLOR_FORMAT_VYUY (11 << 21)
# define R300_COLOR_FORMAT_YVYU (12 << 21)
# define R300_COLOR_FORMAT_UV88 (13 << 21)
# define R500_COLOR_FORMAT_I10 (14 << 21)
# define R300_COLOR_FORMAT_ARGB4444 (15 << 21)
#define R300_RB3D_COLORPITCH1 0x4E3C
#define R300_RB3D_COLORPITCH2 0x4E40
#define R300_RB3D_COLORPITCH3 0x4E44
 
/* gap */
 
/* Destination Color Buffer Cache Control/Status. If the cb is in e2 mode, then
* a flush or free will not occur upon a write to this register, but a sync
* will be immediately sent if one is requested. If both DC_FLUSH and DC_FREE
* are zero but DC_FINISH is one, then a sync will be sent immediately -- the
* cb will not wait for all the previous operations to complete before sending
* the sync. Unpipelined except when DC_FINISH and DC_FREE are both set to
* zero.
*
* Set to 0A before 3D operations, set to 02 afterwards.
*/
#define R300_RB3D_DSTCACHE_CTLSTAT 0x4e4c
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT (0 << 0)
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT_1 (1 << 0)
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D (2 << 0)
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D_1 (3 << 0)
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT (0 << 2)
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT_1 (1 << 2)
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS (2 << 2)
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS_1 (3 << 2)
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_NO_SIGNAL (0 << 4)
# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL (1 << 4)
 
#define R300_RB3D_DITHER_CTL 0x4E50
# define R300_RB3D_DITHER_CTL_DITHER_MODE_TRUNCATE (0 << 0)
# define R300_RB3D_DITHER_CTL_DITHER_MODE_ROUND (1 << 0)
# define R300_RB3D_DITHER_CTL_DITHER_MODE_LUT (2 << 0)
/* reserved */
# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_TRUNCATE (0 << 2)
# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_ROUND (1 << 2)
# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT (2 << 2)
/* reserved */
 
#define R300_RB3D_CMASK_OFFSET0 0x4E54
#define R300_RB3D_CMASK_OFFSET1 0x4E58
#define R300_RB3D_CMASK_OFFSET2 0x4E5C
#define R300_RB3D_CMASK_OFFSET3 0x4E60
#define R300_RB3D_CMASK_PITCH0 0x4E64
#define R300_RB3D_CMASK_PITCH1 0x4E68
#define R300_RB3D_CMASK_PITCH2 0x4E6C
#define R300_RB3D_CMASK_PITCH3 0x4E70
#define R300_RB3D_CMASK_WRINDEX 0x4E74
#define R300_RB3D_CMASK_DWORD 0x4E78
#define R300_RB3D_CMASK_RDINDEX 0x4E7C
 
/* Resolve buffer destination address. The cache must be empty before changing
* this register if the cb is in resolve mode. Unpipelined
*/
#define R300_RB3D_AARESOLVE_OFFSET 0x4e80
# define R300_RB3D_AARESOLVE_OFFSET_SHIFT 5
# define R300_RB3D_AARESOLVE_OFFSET_MASK 0xffffffe0 /* At least according to the calculations of Christoph Brill */
 
/* Resolve Buffer Pitch and Tiling Control. The cache must be empty before
* changing this register if the cb is in resolve mode. Unpipelined
*/
#define R300_RB3D_AARESOLVE_PITCH 0x4e84
# define R300_RB3D_AARESOLVE_PITCH_SHIFT 1
# define R300_RB3D_AARESOLVE_PITCH_MASK 0x00003ffe /* At least according to the calculations of Christoph Brill */
 
/* Resolve Buffer Control. Unpipelined */
#define R300_RB3D_AARESOLVE_CTL 0x4e88
# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_NORMAL (0 << 0)
# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE (1 << 0)
# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_10 (0 << 1)
# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_22 (1 << 1)
# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_SAMPLE0 (0 << 2)
# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE (1 << 2)
 
 
/* Discard src pixels less than or equal to threshold. */
#define R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD 0x4ea0
/* Discard src pixels greater than or equal to threshold. */
#define R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD 0x4ea4
# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_SHIFT 0
# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_MASK 0x000000ff
# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_SHIFT 8
# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_MASK 0x0000ff00
# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_SHIFT 16
# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_MASK 0x00ff0000
# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_SHIFT 24
# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_MASK 0xff000000
 
/* 3D ROP Control. Stalls the 2d/3d datapath until it is idle. */
#define R300_RB3D_ROPCNTL 0x4e18
# define R300_RB3D_ROPCNTL_ROP_ENABLE 0x00000004
# define R300_RB3D_ROPCNTL_ROP_MASK (15 << 8)
# define R300_RB3D_ROPCNTL_ROP_SHIFT 8
 
/* Color Compare Flip. Stalls the 2d/3d datapath until it is idle. */
#define R300_RB3D_CLRCMP_FLIPE 0x4e1c
 
/* Sets the fifo sizes */
#define R500_RB3D_FIFO_SIZE 0x4ef4
# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_FULL (0 << 0)
# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_HALF (1 << 0)
# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_QUATER (2 << 0)
# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_EIGTHS (3 << 0)
 
/* Constant color used by the blender. Pipelined through the blender. */
#define R500_RB3D_CONSTANT_COLOR_AR 0x4ef8
# define R500_RB3D_CONSTANT_COLOR_AR_RED_MASK 0x0000ffff
# define R500_RB3D_CONSTANT_COLOR_AR_RED_SHIFT 0
# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_MASK 0xffff0000
# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_SHIFT 16
 
/* Constant color used by the blender. Pipelined through the blender. */
#define R500_RB3D_CONSTANT_COLOR_GB 0x4efc
# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_MASK 0x0000ffff
# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_SHIFT 0
# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_MASK 0xffff0000
# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_SHIFT 16
 
/* gap */
/* There seems to be no "write only" setting, so use Z-test = ALWAYS
* for this.
* Bit (1<<8) is the "test" bit. so plain write is 6 - vd
*/
#define R300_ZB_CNTL 0x4F00
# define R300_STENCIL_ENABLE (1 << 0)
# define R300_Z_ENABLE (1 << 1)
# define R300_Z_WRITE_ENABLE (1 << 2)
# define R300_Z_SIGNED_COMPARE (1 << 3)
# define R300_STENCIL_FRONT_BACK (1 << 4)
# define R500_STENCIL_ZSIGNED_MAGNITUDE (1 << 5)
# define R500_STENCIL_REFMASK_FRONT_BACK (1 << 6)
 
#define R300_ZB_ZSTENCILCNTL 0x4f04
/* functions */
# define R300_ZS_NEVER 0
# define R300_ZS_LESS 1
# define R300_ZS_LEQUAL 2
# define R300_ZS_EQUAL 3
# define R300_ZS_GEQUAL 4
# define R300_ZS_GREATER 5
# define R300_ZS_NOTEQUAL 6
# define R300_ZS_ALWAYS 7
# define R300_ZS_MASK 7
/* operations */
# define R300_ZS_KEEP 0
# define R300_ZS_ZERO 1
# define R300_ZS_REPLACE 2
# define R300_ZS_INCR 3
# define R300_ZS_DECR 4
# define R300_ZS_INVERT 5
# define R300_ZS_INCR_WRAP 6
# define R300_ZS_DECR_WRAP 7
# define R300_Z_FUNC_SHIFT 0
/* front and back refer to operations done for front
and back faces, i.e. separate stencil function support */
# define R300_S_FRONT_FUNC_SHIFT 3
# define R300_S_FRONT_SFAIL_OP_SHIFT 6
# define R300_S_FRONT_ZPASS_OP_SHIFT 9
# define R300_S_FRONT_ZFAIL_OP_SHIFT 12
# define R300_S_BACK_FUNC_SHIFT 15
# define R300_S_BACK_SFAIL_OP_SHIFT 18
# define R300_S_BACK_ZPASS_OP_SHIFT 21
# define R300_S_BACK_ZFAIL_OP_SHIFT 24
 
#define R300_ZB_STENCILREFMASK 0x4f08
# define R300_STENCILREF_SHIFT 0
# define R300_STENCILREF_MASK 0x000000ff
# define R300_STENCILMASK_SHIFT 8
# define R300_STENCILMASK_MASK 0x0000ff00
# define R300_STENCILWRITEMASK_SHIFT 16
# define R300_STENCILWRITEMASK_MASK 0x00ff0000
 
/* gap */
 
#define R300_ZB_FORMAT 0x4f10
# define R300_DEPTHFORMAT_16BIT_INT_Z (0 << 0)
# define R300_DEPTHFORMAT_16BIT_13E3 (1 << 0)
# define R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL (2 << 0)
/* reserved up to (15 << 0) */
# define R300_INVERT_13E3_LEADING_ONES (0 << 4)
# define R300_INVERT_13E3_LEADING_ZEROS (1 << 4)
 
#define R300_ZB_ZTOP 0x4F14
# define R300_ZTOP_DISABLE (0 << 0)
# define R300_ZTOP_ENABLE (1 << 0)
 
/* gap */
 
#define R300_ZB_ZCACHE_CTLSTAT 0x4f18
# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_NO_EFFECT (0 << 0)
# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE (1 << 0)
# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_NO_EFFECT (0 << 1)
# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE (1 << 1)
# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_IDLE (0 << 31)
# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_BUSY (1 << 31)
 
#define R300_ZB_BW_CNTL 0x4f1c
# define R300_HIZ_DISABLE (0 << 0)
# define R300_HIZ_ENABLE (1 << 0)
# define R300_HIZ_MAX (0 << 1)
# define R300_HIZ_MIN (1 << 1)
# define R300_FAST_FILL_DISABLE (0 << 2)
# define R300_FAST_FILL_ENABLE (1 << 2)
# define R300_RD_COMP_DISABLE (0 << 3)
# define R300_RD_COMP_ENABLE (1 << 3)
# define R300_WR_COMP_DISABLE (0 << 4)
# define R300_WR_COMP_ENABLE (1 << 4)
# define R300_ZB_CB_CLEAR_RMW (0 << 5)
# define R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY (1 << 5)
# define R300_FORCE_COMPRESSED_STENCIL_VALUE_DISABLE (0 << 6)
# define R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE (1 << 6)
 
# define R500_ZEQUAL_OPTIMIZE_ENABLE (0 << 7)
# define R500_ZEQUAL_OPTIMIZE_DISABLE (1 << 7)
# define R500_SEQUAL_OPTIMIZE_ENABLE (0 << 8)
# define R500_SEQUAL_OPTIMIZE_DISABLE (1 << 8)
 
# define R500_BMASK_ENABLE (0 << 10)
# define R500_BMASK_DISABLE (1 << 10)
# define R500_HIZ_EQUAL_REJECT_DISABLE (0 << 11)
# define R500_HIZ_EQUAL_REJECT_ENABLE (1 << 11)
# define R500_HIZ_FP_EXP_BITS_DISABLE (0 << 12)
# define R500_HIZ_FP_EXP_BITS_1 (1 << 12)
# define R500_HIZ_FP_EXP_BITS_2 (2 << 12)
# define R500_HIZ_FP_EXP_BITS_3 (3 << 12)
# define R500_HIZ_FP_EXP_BITS_4 (4 << 12)
# define R500_HIZ_FP_EXP_BITS_5 (5 << 12)
# define R500_HIZ_FP_INVERT_LEADING_ONES (0 << 15)
# define R500_HIZ_FP_INVERT_LEADING_ZEROS (1 << 15)
# define R500_TILE_OVERWRITE_RECOMPRESSION_ENABLE (0 << 16)
# define R500_TILE_OVERWRITE_RECOMPRESSION_DISABLE (1 << 16)
# define R500_CONTIGUOUS_6XAA_SAMPLES_ENABLE (0 << 17)
# define R500_CONTIGUOUS_6XAA_SAMPLES_DISABLE (1 << 17)
# define R500_PEQ_PACKING_DISABLE (0 << 18)
# define R500_PEQ_PACKING_ENABLE (1 << 18)
# define R500_COVERED_PTR_MASKING_DISABLE (0 << 18)
# define R500_COVERED_PTR_MASKING_ENABLE (1 << 18)
 
 
/* gap */
 
/* Z Buffer Address Offset.
* Bits 31 to 5 are used for aligned Z buffer address offset for macro tiles.
*/
#define R300_ZB_DEPTHOFFSET 0x4f20
 
/* Z Buffer Pitch and Endian Control */
#define R300_ZB_DEPTHPITCH 0x4f24
# define R300_DEPTHPITCH_MASK 0x00003FFC
# define R300_DEPTHMACROTILE_DISABLE (0 << 16)
# define R300_DEPTHMACROTILE_ENABLE (1 << 16)
# define R300_DEPTHMACROTILE(x) ((x) << 16)
# define R300_DEPTHMICROTILE_LINEAR (0 << 17)
# define R300_DEPTHMICROTILE_TILED (1 << 17)
# define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17)
# define R300_DEPTHMICROTILE(x) ((x) << 17)
# define R300_DEPTHENDIAN_NO_SWAP (0 << 18)
# define R300_DEPTHENDIAN_WORD_SWAP (1 << 18)
# define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18)
# define R300_DEPTHENDIAN_HALF_DWORD_SWAP (3 << 18)
 
/* Z Buffer Clear Value */
#define R300_ZB_DEPTHCLEARVALUE 0x4f28
 
/* Z Mask RAM is a Z compression buffer.
* Each dword of the Z Mask contains compression info for 16 4x4 pixel blocks,
* that is 2 bits for each block.
* On chips with 2 Z pipes, every other dword maps to a different pipe.
*/
 
/* The dword offset into Z mask RAM (bits 18:4) */
#define R300_ZB_ZMASK_OFFSET 0x4f30
 
/* Z Mask Pitch. */
#define R300_ZB_ZMASK_PITCH 0x4f34
 
/* Access to Z Mask RAM in a manner similar to HiZ RAM.
* The indices are autoincrementing. */
#define R300_ZB_ZMASK_WRINDEX 0x4f38
#define R300_ZB_ZMASK_DWORD 0x4f3c
#define R300_ZB_ZMASK_RDINDEX 0x4f40
 
/* Hierarchical Z Memory Offset */
#define R300_ZB_HIZ_OFFSET 0x4f44
 
/* Hierarchical Z Write Index */
#define R300_ZB_HIZ_WRINDEX 0x4f48
 
/* Hierarchical Z Data */
#define R300_ZB_HIZ_DWORD 0x4f4c
 
/* Hierarchical Z Read Index */
#define R300_ZB_HIZ_RDINDEX 0x4f50
 
/* Hierarchical Z Pitch */
#define R300_ZB_HIZ_PITCH 0x4f54
 
/* Z Buffer Z Pass Counter Data */
#define R300_ZB_ZPASS_DATA 0x4f58
 
/* Z Buffer Z Pass Counter Address */
#define R300_ZB_ZPASS_ADDR 0x4f5c
 
/* Depth buffer X and Y coordinate offset */
#define R300_ZB_DEPTHXY_OFFSET 0x4f60
# define R300_DEPTHX_OFFSET_SHIFT 1
# define R300_DEPTHX_OFFSET_MASK 0x000007FE
# define R300_DEPTHY_OFFSET_SHIFT 17
# define R300_DEPTHY_OFFSET_MASK 0x07FE0000
 
/* Sets the fifo sizes */
#define R500_ZB_FIFO_SIZE 0x4fd0
# define R500_OP_FIFO_SIZE_FULL (0 << 0)
# define R500_OP_FIFO_SIZE_HALF (1 << 0)
# define R500_OP_FIFO_SIZE_QUATER (2 << 0)
# define R500_OP_FIFO_SIZE_EIGTHS (4 << 0)
 
/* Stencil Reference Value and Mask for backfacing quads */
/* R300_ZB_STENCILREFMASK handles front face */
#define R500_ZB_STENCILREFMASK_BF 0x4fd4
# define R500_STENCILREF_SHIFT 0
# define R500_STENCILREF_MASK 0x000000ff
# define R500_STENCILMASK_SHIFT 8
# define R500_STENCILMASK_MASK 0x0000ff00
# define R500_STENCILWRITEMASK_SHIFT 16
# define R500_STENCILWRITEMASK_MASK 0x00ff0000
 
/**
* \defgroup R3XX_R5XX_PROGRAMMABLE_VERTEX_SHADER_DESCRIPTION R3XX-R5XX PROGRAMMABLE VERTEX SHADER DESCRIPTION
*
* The PVS_DST_MATH_INST is used to identify whether the instruction is a Vector
* Engine instruction or a Math Engine instruction.
*/
 
/*\{*/
 
enum {
/* R3XX */
VECTOR_NO_OP = 0,
VE_DOT_PRODUCT = 1,
VE_MULTIPLY = 2,
VE_ADD = 3,
VE_MULTIPLY_ADD = 4,
VE_DISTANCE_VECTOR = 5,
VE_FRACTION = 6,
VE_MAXIMUM = 7,
VE_MINIMUM = 8,
VE_SET_GREATER_THAN_EQUAL = 9,
VE_SET_LESS_THAN = 10,
VE_MULTIPLYX2_ADD = 11,
VE_MULTIPLY_CLAMP = 12,
VE_FLT2FIX_DX = 13,
VE_FLT2FIX_DX_RND = 14,
/* R5XX */
VE_PRED_SET_EQ_PUSH = 15,
VE_PRED_SET_GT_PUSH = 16,
VE_PRED_SET_GTE_PUSH = 17,
VE_PRED_SET_NEQ_PUSH = 18,
VE_COND_WRITE_EQ = 19,
VE_COND_WRITE_GT = 20,
VE_COND_WRITE_GTE = 21,
VE_COND_WRITE_NEQ = 22,
VE_COND_MUX_EQ = 23,
VE_COND_MUX_GT = 24,
VE_COND_MUX_GTE = 25,
VE_SET_GREATER_THAN = 26,
VE_SET_EQUAL = 27,
VE_SET_NOT_EQUAL = 28
};
 
enum {
/* R3XX */
MATH_NO_OP = 0,
ME_EXP_BASE2_DX = 1,
ME_LOG_BASE2_DX = 2,
ME_EXP_BASEE_FF = 3,
ME_LIGHT_COEFF_DX = 4,
ME_POWER_FUNC_FF = 5,
ME_RECIP_DX = 6,
ME_RECIP_FF = 7,
ME_RECIP_SQRT_DX = 8,
ME_RECIP_SQRT_FF = 9,
ME_MULTIPLY = 10,
ME_EXP_BASE2_FULL_DX = 11,
ME_LOG_BASE2_FULL_DX = 12,
ME_POWER_FUNC_FF_CLAMP_B = 13,
ME_POWER_FUNC_FF_CLAMP_B1 = 14,
ME_POWER_FUNC_FF_CLAMP_01 = 15,
ME_SIN = 16,
ME_COS = 17,
/* R5XX */
ME_LOG_BASE2_IEEE = 18,
ME_RECIP_IEEE = 19,
ME_RECIP_SQRT_IEEE = 20,
ME_PRED_SET_EQ = 21,
ME_PRED_SET_GT = 22,
ME_PRED_SET_GTE = 23,
ME_PRED_SET_NEQ = 24,
ME_PRED_SET_CLR = 25,
ME_PRED_SET_INV = 26,
ME_PRED_SET_POP = 27,
ME_PRED_SET_RESTORE = 28
};
 
enum {
/* R3XX */
PVS_MACRO_OP_2CLK_MADD = 0,
PVS_MACRO_OP_2CLK_M2X_ADD = 1
};
 
enum {
PVS_SRC_REG_TEMPORARY = 0, /* Intermediate Storage */
PVS_SRC_REG_INPUT = 1, /* Input Vertex Storage */
PVS_SRC_REG_CONSTANT = 2, /* Constant State Storage */
PVS_SRC_REG_ALT_TEMPORARY = 3 /* Alternate Intermediate Storage */
};
 
enum {
PVS_DST_REG_TEMPORARY = 0, /* Intermediate Storage */
PVS_DST_REG_A0 = 1, /* Address Register Storage */
PVS_DST_REG_OUT = 2, /* Output Memory. Used for all outputs */
PVS_DST_REG_OUT_REPL_X = 3, /* Output Memory & Replicate X to all channels */
PVS_DST_REG_ALT_TEMPORARY = 4, /* Alternate Intermediate Storage */
PVS_DST_REG_INPUT = 5 /* Output Memory & Replicate X to all channels */
};
 
enum {
PVS_SRC_SELECT_X = 0, /* Select X Component */
PVS_SRC_SELECT_Y = 1, /* Select Y Component */
PVS_SRC_SELECT_Z = 2, /* Select Z Component */
PVS_SRC_SELECT_W = 3, /* Select W Component */
PVS_SRC_SELECT_FORCE_0 = 4, /* Force Component to 0.0 */
PVS_SRC_SELECT_FORCE_1 = 5 /* Force Component to 1.0 */
};
 
/* PVS Opcode & Destination Operand Description */
 
enum {
PVS_DST_OPCODE_MASK = 0x3f,
PVS_DST_OPCODE_SHIFT = 0,
PVS_DST_MATH_INST_MASK = 0x1,
PVS_DST_MATH_INST_SHIFT = 6,
PVS_DST_MACRO_INST_MASK = 0x1,
PVS_DST_MACRO_INST_SHIFT = 7,
PVS_DST_REG_TYPE_MASK = 0xf,
PVS_DST_REG_TYPE_SHIFT = 8,
PVS_DST_ADDR_MODE_1_MASK = 0x1,
PVS_DST_ADDR_MODE_1_SHIFT = 12,
PVS_DST_OFFSET_MASK = 0x7f,
PVS_DST_OFFSET_SHIFT = 13,
PVS_DST_WE_X_MASK = 0x1,
PVS_DST_WE_X_SHIFT = 20,
PVS_DST_WE_Y_MASK = 0x1,
PVS_DST_WE_Y_SHIFT = 21,
PVS_DST_WE_Z_MASK = 0x1,
PVS_DST_WE_Z_SHIFT = 22,
PVS_DST_WE_W_MASK = 0x1,
PVS_DST_WE_W_SHIFT = 23,
PVS_DST_VE_SAT_MASK = 0x1,
PVS_DST_VE_SAT_SHIFT = 24,
PVS_DST_ME_SAT_MASK = 0x1,
PVS_DST_ME_SAT_SHIFT = 25,
PVS_DST_PRED_ENABLE_MASK = 0x1,
PVS_DST_PRED_ENABLE_SHIFT = 26,
PVS_DST_PRED_SENSE_MASK = 0x1,
PVS_DST_PRED_SENSE_SHIFT = 27,
PVS_DST_DUAL_MATH_OP_MASK = 0x3,
PVS_DST_DUAL_MATH_OP_SHIFT = 27,
PVS_DST_ADDR_SEL_MASK = 0x3,
PVS_DST_ADDR_SEL_SHIFT = 29,
PVS_DST_ADDR_MODE_0_MASK = 0x1,
PVS_DST_ADDR_MODE_0_SHIFT = 31
};
 
/* PVS Source Operand Description */
 
enum {
PVS_SRC_REG_TYPE_MASK = 0x3,
PVS_SRC_REG_TYPE_SHIFT = 0,
SPARE_0_MASK = 0x1,
SPARE_0_SHIFT = 2,
PVS_SRC_ABS_XYZW_MASK = 0x1,
PVS_SRC_ABS_XYZW_SHIFT = 3,
PVS_SRC_ADDR_MODE_0_MASK = 0x1,
PVS_SRC_ADDR_MODE_0_SHIFT = 4,
PVS_SRC_OFFSET_MASK = 0xff,
PVS_SRC_OFFSET_SHIFT = 5,
PVS_SRC_SWIZZLE_X_MASK = 0x7,
PVS_SRC_SWIZZLE_X_SHIFT = 13,
PVS_SRC_SWIZZLE_Y_MASK = 0x7,
PVS_SRC_SWIZZLE_Y_SHIFT = 16,
PVS_SRC_SWIZZLE_Z_MASK = 0x7,
PVS_SRC_SWIZZLE_Z_SHIFT = 19,
PVS_SRC_SWIZZLE_W_MASK = 0x7,
PVS_SRC_SWIZZLE_W_SHIFT = 22,
PVS_SRC_MODIFIER_X_MASK = 0x1,
PVS_SRC_MODIFIER_X_SHIFT = 25,
PVS_SRC_MODIFIER_Y_MASK = 0x1,
PVS_SRC_MODIFIER_Y_SHIFT = 26,
PVS_SRC_MODIFIER_Z_MASK = 0x1,
PVS_SRC_MODIFIER_Z_SHIFT = 27,
PVS_SRC_MODIFIER_W_MASK = 0x1,
PVS_SRC_MODIFIER_W_SHIFT = 28,
PVS_SRC_ADDR_SEL_MASK = 0x3,
PVS_SRC_ADDR_SEL_SHIFT = 29,
PVS_SRC_ADDR_MODE_1_MASK = 0x0,
PVS_SRC_ADDR_MODE_1_SHIFT = 32
};
 
/*\}*/
 
#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class, saturate) \
(((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \
| ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \
| ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \
| ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \
| ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \
| ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT)) \
| ((math_inst) ? (((saturate) & PVS_DST_ME_SAT_MASK) << PVS_DST_ME_SAT_SHIFT) : \
(((saturate) & PVS_DST_VE_SAT_MASK) << PVS_DST_VE_SAT_SHIFT))
 
#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \
(((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \
| ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \
| ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \
| ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \
| ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \
| ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \
| ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT))
 
/* BEGIN: Packet 3 commands */
 
/* A primitive emission dword. */
#define R300_PRIM_TYPE_NONE (0 << 0)
#define R300_PRIM_TYPE_POINT (1 << 0)
#define R300_PRIM_TYPE_LINE (2 << 0)
#define R300_PRIM_TYPE_LINE_STRIP (3 << 0)
#define R300_PRIM_TYPE_TRI_LIST (4 << 0)
#define R300_PRIM_TYPE_TRI_FAN (5 << 0)
#define R300_PRIM_TYPE_TRI_STRIP (6 << 0)
#define R300_PRIM_TYPE_TRI_TYPE2 (7 << 0)
#define R300_PRIM_TYPE_RECT_LIST (8 << 0)
#define R300_PRIM_TYPE_3VRT_POINT_LIST (9 << 0)
#define R300_PRIM_TYPE_3VRT_LINE_LIST (10 << 0)
/* GUESS (based on r200) */
#define R300_PRIM_TYPE_POINT_SPRITES (11 << 0)
#define R300_PRIM_TYPE_LINE_LOOP (12 << 0)
#define R300_PRIM_TYPE_QUADS (13 << 0)
#define R300_PRIM_TYPE_QUAD_STRIP (14 << 0)
#define R300_PRIM_TYPE_POLYGON (15 << 0)
#define R300_PRIM_TYPE_MASK 0xF
#define R300_PRIM_WALK_IND (1 << 4)
#define R300_PRIM_WALK_LIST (2 << 4)
#define R300_PRIM_WALK_RING (3 << 4)
#define R300_PRIM_WALK_MASK (3 << 4)
/* GUESS (based on r200) */
#define R300_PRIM_COLOR_ORDER_BGRA (0 << 6)
#define R300_PRIM_COLOR_ORDER_RGBA (1 << 6)
#define R300_PRIM_NUM_VERTICES_SHIFT 16
#define R300_PRIM_NUM_VERTICES_MASK 0xffff
 
 
 
/*
* The R500 unified shader (US) registers come in banks of 512 each, one
* for each instruction slot in the shader. You can't touch them directly.
* R500_US_VECTOR_INDEX() sets the base instruction to modify; successive
* writes to R500_GA_US_VECTOR_DATA autoincrement the index after the
* instruction is fully specified.
*/
#define R500_US_ALU_ALPHA_INST_0 0xa800
# define R500_ALPHA_OP_MAD 0
# define R500_ALPHA_OP_DP 1
# define R500_ALPHA_OP_MIN 2
# define R500_ALPHA_OP_MAX 3
/* #define R500_ALPHA_OP_RESERVED 4 */
# define R500_ALPHA_OP_CND 5
# define R500_ALPHA_OP_CMP 6
# define R500_ALPHA_OP_FRC 7
# define R500_ALPHA_OP_EX2 8
# define R500_ALPHA_OP_LN2 9
# define R500_ALPHA_OP_RCP 10
# define R500_ALPHA_OP_RSQ 11
# define R500_ALPHA_OP_SIN 12
# define R500_ALPHA_OP_COS 13
# define R500_ALPHA_OP_MDH 14
# define R500_ALPHA_OP_MDV 15
# define R500_ALPHA_ADDRD(x) ((x) << 4)
# define R500_ALPHA_ADDRD_REL (1 << 11)
# define R500_ALPHA_SEL_A_SHIFT 12
# define R500_ALPHA_SEL_A_SRC0 (0 << 12)
# define R500_ALPHA_SEL_A_SRC1 (1 << 12)
# define R500_ALPHA_SEL_A_SRC2 (2 << 12)
# define R500_ALPHA_SEL_A_SRCP (3 << 12)
# define R500_ALPHA_SWIZ_A_R (0 << 14)
# define R500_ALPHA_SWIZ_A_G (1 << 14)
# define R500_ALPHA_SWIZ_A_B (2 << 14)
# define R500_ALPHA_SWIZ_A_A (3 << 14)
# define R500_ALPHA_SWIZ_A_0 (4 << 14)
# define R500_ALPHA_SWIZ_A_HALF (5 << 14)
# define R500_ALPHA_SWIZ_A_1 (6 << 14)
/* #define R500_ALPHA_SWIZ_A_UNUSED (7 << 14) */
# define R500_ALPHA_MOD_A_NOP (0 << 17)
# define R500_ALPHA_MOD_A_NEG (1 << 17)
# define R500_ALPHA_MOD_A_ABS (2 << 17)
# define R500_ALPHA_MOD_A_NAB (3 << 17)
# define R500_ALPHA_SEL_B_SHIFT 19
# define R500_ALPHA_SEL_B_SRC0 (0 << 19)
# define R500_ALPHA_SEL_B_SRC1 (1 << 19)
# define R500_ALPHA_SEL_B_SRC2 (2 << 19)
# define R500_ALPHA_SEL_B_SRCP (3 << 19)
# define R500_ALPHA_SWIZ_B_R (0 << 21)
# define R500_ALPHA_SWIZ_B_G (1 << 21)
# define R500_ALPHA_SWIZ_B_B (2 << 21)
# define R500_ALPHA_SWIZ_B_A (3 << 21)
# define R500_ALPHA_SWIZ_B_0 (4 << 21)
# define R500_ALPHA_SWIZ_B_HALF (5 << 21)
# define R500_ALPHA_SWIZ_B_1 (6 << 21)
/* #define R500_ALPHA_SWIZ_B_UNUSED (7 << 21) */
# define R500_ALPHA_MOD_B_NOP (0 << 24)
# define R500_ALPHA_MOD_B_NEG (1 << 24)
# define R500_ALPHA_MOD_B_ABS (2 << 24)
# define R500_ALPHA_MOD_B_NAB (3 << 24)
# define R500_ALPHA_OMOD_SHIFT 26
# define R500_ALPHA_OMOD_IDENTITY (0 << R500_ALPHA_OMOD_SHIFT)
# define R500_ALPHA_OMOD_MUL_2 (1 << R500_ALPHA_OMOD_SHIFT)
# define R500_ALPHA_OMOD_MUL_4 (2 << R500_ALPHA_OMOD_SHIFT)
# define R500_ALPHA_OMOD_MUL_8 (3 << R500_ALPHA_OMOD_SHIFT)
# define R500_ALPHA_OMOD_DIV_2 (4 << R500_ALPHA_OMOD_SHIFT)
# define R500_ALPHA_OMOD_DIV_4 (5 << R500_ALPHA_OMOD_SHIFT)
# define R500_ALPHA_OMOD_DIV_8 (6 << R500_ALPHA_OMOD_SHIFT)
# define R500_ALPHA_OMOD_DISABLE (7 << R500_ALPHA_OMOD_SHIFT)
# define R500_ALPHA_TARGET(x) ((x) << 29)
# define R500_ALPHA_W_OMASK (1 << 31)
#define R500_US_ALU_ALPHA_ADDR_0 0x9800
# define R500_ALPHA_ADDR0(x) ((x) << 0)
# define R500_ALPHA_ADDR0_CONST (1 << 8)
# define R500_ALPHA_ADDR0_REL (1 << 9)
# define R500_ALPHA_ADDR1(x) ((x) << 10)
# define R500_ALPHA_ADDR1_CONST (1 << 18)
# define R500_ALPHA_ADDR1_REL (1 << 19)
# define R500_ALPHA_ADDR2(x) ((x) << 20)
# define R500_ALPHA_ADDR2_CONST (1 << 28)
# define R500_ALPHA_ADDR2_REL (1 << 29)
# define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30)
# define R500_ALPHA_SRCP_OP_A1_MINUS_A0 (1 << 30)
# define R500_ALPHA_SRCP_OP_A1_PLUS_A0 (2 << 30)
# define R500_ALPHA_SRCP_OP_1_MINUS_A0 (3 << 30)
#define R500_US_ALU_RGBA_INST_0 0xb000
# define R500_ALU_RGBA_OP_MAD (0 << 0)
# define R500_ALU_RGBA_OP_DP3 (1 << 0)
# define R500_ALU_RGBA_OP_DP4 (2 << 0)
# define R500_ALU_RGBA_OP_D2A (3 << 0)
# define R500_ALU_RGBA_OP_MIN (4 << 0)
# define R500_ALU_RGBA_OP_MAX (5 << 0)
/* #define R500_ALU_RGBA_OP_RESERVED (6 << 0) */
# define R500_ALU_RGBA_OP_CND (7 << 0)
# define R500_ALU_RGBA_OP_CMP (8 << 0)
# define R500_ALU_RGBA_OP_FRC (9 << 0)
# define R500_ALU_RGBA_OP_SOP (10 << 0)
# define R500_ALU_RGBA_OP_MDH (11 << 0)
# define R500_ALU_RGBA_OP_MDV (12 << 0)
# define R500_ALU_RGBA_ADDRD(x) ((x) << 4)
# define R500_ALU_RGBA_ADDRD_REL (1 << 11)
# define R500_ALU_RGBA_SEL_C_SHIFT 12
# define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12)
# define R500_ALU_RGBA_SEL_C_SRC1 (1 << 12)
# define R500_ALU_RGBA_SEL_C_SRC2 (2 << 12)
# define R500_ALU_RGBA_SEL_C_SRCP (3 << 12)
# define R500_ALU_RGBA_R_SWIZ_R (0 << 14)
# define R500_ALU_RGBA_R_SWIZ_G (1 << 14)
# define R500_ALU_RGBA_R_SWIZ_B (2 << 14)
# define R500_ALU_RGBA_R_SWIZ_A (3 << 14)
# define R500_ALU_RGBA_R_SWIZ_0 (4 << 14)
# define R500_ALU_RGBA_R_SWIZ_HALF (5 << 14)
# define R500_ALU_RGBA_R_SWIZ_1 (6 << 14)
/* #define R500_ALU_RGBA_R_SWIZ_UNUSED (7 << 14) */
# define R500_ALU_RGBA_G_SWIZ_R (0 << 17)
# define R500_ALU_RGBA_G_SWIZ_G (1 << 17)
# define R500_ALU_RGBA_G_SWIZ_B (2 << 17)
# define R500_ALU_RGBA_G_SWIZ_A (3 << 17)
# define R500_ALU_RGBA_G_SWIZ_0 (4 << 17)
# define R500_ALU_RGBA_G_SWIZ_HALF (5 << 17)
# define R500_ALU_RGBA_G_SWIZ_1 (6 << 17)
/* #define R500_ALU_RGBA_G_SWIZ_UNUSED (7 << 17) */
# define R500_ALU_RGBA_B_SWIZ_R (0 << 20)
# define R500_ALU_RGBA_B_SWIZ_G (1 << 20)
# define R500_ALU_RGBA_B_SWIZ_B (2 << 20)
# define R500_ALU_RGBA_B_SWIZ_A (3 << 20)
# define R500_ALU_RGBA_B_SWIZ_0 (4 << 20)
# define R500_ALU_RGBA_B_SWIZ_HALF (5 << 20)
# define R500_ALU_RGBA_B_SWIZ_1 (6 << 20)
/* #define R500_ALU_RGBA_B_SWIZ_UNUSED (7 << 20) */
# define R500_ALU_RGBA_MOD_C_NOP (0 << 23)
# define R500_ALU_RGBA_MOD_C_NEG (1 << 23)
# define R500_ALU_RGBA_MOD_C_ABS (2 << 23)
# define R500_ALU_RGBA_MOD_C_NAB (3 << 23)
# define R500_ALU_RGBA_ALPHA_SEL_C_SHIFT 25
# define R500_ALU_RGBA_ALPHA_SEL_C_SRC0 (0 << 25)
# define R500_ALU_RGBA_ALPHA_SEL_C_SRC1 (1 << 25)
# define R500_ALU_RGBA_ALPHA_SEL_C_SRC2 (2 << 25)
# define R500_ALU_RGBA_ALPHA_SEL_C_SRCP (3 << 25)
# define R500_ALU_RGBA_A_SWIZ_R (0 << 27)
# define R500_ALU_RGBA_A_SWIZ_G (1 << 27)
# define R500_ALU_RGBA_A_SWIZ_B (2 << 27)
# define R500_ALU_RGBA_A_SWIZ_A (3 << 27)
# define R500_ALU_RGBA_A_SWIZ_0 (4 << 27)
# define R500_ALU_RGBA_A_SWIZ_HALF (5 << 27)
# define R500_ALU_RGBA_A_SWIZ_1 (6 << 27)
/* #define R500_ALU_RGBA_A_SWIZ_UNUSED (7 << 27) */
# define R500_ALU_RGBA_ALPHA_MOD_C_NOP (0 << 30)
# define R500_ALU_RGBA_ALPHA_MOD_C_NEG (1 << 30)
# define R500_ALU_RGBA_ALPHA_MOD_C_ABS (2 << 30)
# define R500_ALU_RGBA_ALPHA_MOD_C_NAB (3 << 30)
#define R500_US_ALU_RGB_INST_0 0xa000
# define R500_ALU_RGB_SEL_A_SHIFT 0
# define R500_ALU_RGB_SEL_A_SRC0 (0 << 0)
# define R500_ALU_RGB_SEL_A_SRC1 (1 << 0)
# define R500_ALU_RGB_SEL_A_SRC2 (2 << 0)
# define R500_ALU_RGB_SEL_A_SRCP (3 << 0)
# define R500_ALU_RGB_R_SWIZ_A_R (0 << 2)
# define R500_ALU_RGB_R_SWIZ_A_G (1 << 2)
# define R500_ALU_RGB_R_SWIZ_A_B (2 << 2)
# define R500_ALU_RGB_R_SWIZ_A_A (3 << 2)
# define R500_ALU_RGB_R_SWIZ_A_0 (4 << 2)
# define R500_ALU_RGB_R_SWIZ_A_HALF (5 << 2)
# define R500_ALU_RGB_R_SWIZ_A_1 (6 << 2)
/* #define R500_ALU_RGB_R_SWIZ_A_UNUSED (7 << 2) */
# define R500_ALU_RGB_G_SWIZ_A_R (0 << 5)
# define R500_ALU_RGB_G_SWIZ_A_G (1 << 5)
# define R500_ALU_RGB_G_SWIZ_A_B (2 << 5)
# define R500_ALU_RGB_G_SWIZ_A_A (3 << 5)
# define R500_ALU_RGB_G_SWIZ_A_0 (4 << 5)
# define R500_ALU_RGB_G_SWIZ_A_HALF (5 << 5)
# define R500_ALU_RGB_G_SWIZ_A_1 (6 << 5)
/* #define R500_ALU_RGB_G_SWIZ_A_UNUSED (7 << 5) */
# define R500_ALU_RGB_B_SWIZ_A_R (0 << 8)
# define R500_ALU_RGB_B_SWIZ_A_G (1 << 8)
# define R500_ALU_RGB_B_SWIZ_A_B (2 << 8)
# define R500_ALU_RGB_B_SWIZ_A_A (3 << 8)
# define R500_ALU_RGB_B_SWIZ_A_0 (4 << 8)
# define R500_ALU_RGB_B_SWIZ_A_HALF (5 << 8)
# define R500_ALU_RGB_B_SWIZ_A_1 (6 << 8)
/* #define R500_ALU_RGB_B_SWIZ_A_UNUSED (7 << 8) */
# define R500_ALU_RGB_MOD_A_NOP (0 << 11)
# define R500_ALU_RGB_MOD_A_NEG (1 << 11)
# define R500_ALU_RGB_MOD_A_ABS (2 << 11)
# define R500_ALU_RGB_MOD_A_NAB (3 << 11)
# define R500_ALU_RGB_SEL_B_SHIFT 13
# define R500_ALU_RGB_SEL_B_SRC0 (0 << 13)
# define R500_ALU_RGB_SEL_B_SRC1 (1 << 13)
# define R500_ALU_RGB_SEL_B_SRC2 (2 << 13)
# define R500_ALU_RGB_SEL_B_SRCP (3 << 13)
# define R500_ALU_RGB_R_SWIZ_B_R (0 << 15)
# define R500_ALU_RGB_R_SWIZ_B_G (1 << 15)
# define R500_ALU_RGB_R_SWIZ_B_B (2 << 15)
# define R500_ALU_RGB_R_SWIZ_B_A (3 << 15)
# define R500_ALU_RGB_R_SWIZ_B_0 (4 << 15)
# define R500_ALU_RGB_R_SWIZ_B_HALF (5 << 15)
# define R500_ALU_RGB_R_SWIZ_B_1 (6 << 15)
/* #define R500_ALU_RGB_R_SWIZ_B_UNUSED (7 << 15) */
# define R500_ALU_RGB_G_SWIZ_B_R (0 << 18)
# define R500_ALU_RGB_G_SWIZ_B_G (1 << 18)
# define R500_ALU_RGB_G_SWIZ_B_B (2 << 18)
# define R500_ALU_RGB_G_SWIZ_B_A (3 << 18)
# define R500_ALU_RGB_G_SWIZ_B_0 (4 << 18)
# define R500_ALU_RGB_G_SWIZ_B_HALF (5 << 18)
# define R500_ALU_RGB_G_SWIZ_B_1 (6 << 18)
/* #define R500_ALU_RGB_G_SWIZ_B_UNUSED (7 << 18) */
# define R500_ALU_RGB_B_SWIZ_B_R (0 << 21)
# define R500_ALU_RGB_B_SWIZ_B_G (1 << 21)
# define R500_ALU_RGB_B_SWIZ_B_B (2 << 21)
# define R500_ALU_RGB_B_SWIZ_B_A (3 << 21)
# define R500_ALU_RGB_B_SWIZ_B_0 (4 << 21)
# define R500_ALU_RGB_B_SWIZ_B_HALF (5 << 21)
# define R500_ALU_RGB_B_SWIZ_B_1 (6 << 21)
/* #define R500_ALU_RGB_B_SWIZ_B_UNUSED (7 << 21) */
# define R500_ALU_RGB_MOD_B_NOP (0 << 24)
# define R500_ALU_RGB_MOD_B_NEG (1 << 24)
# define R500_ALU_RGB_MOD_B_ABS (2 << 24)
# define R500_ALU_RGB_MOD_B_NAB (3 << 24)
# define R500_ALU_RGB_OMOD_SHIFT 26
# define R500_ALU_RGB_OMOD_IDENTITY (0 << R500_ALU_RGB_OMOD_SHIFT)
# define R500_ALU_RGB_OMOD_MUL_2 (1 << R500_ALU_RGB_OMOD_SHIFT)
# define R500_ALU_RGB_OMOD_MUL_4 (2 << R500_ALU_RGB_OMOD_SHIFT)
# define R500_ALU_RGB_OMOD_MUL_8 (3 << R500_ALU_RGB_OMOD_SHIFT)
# define R500_ALU_RGB_OMOD_DIV_2 (4 << R500_ALU_RGB_OMOD_SHIFT)
# define R500_ALU_RGB_OMOD_DIV_4 (5 << R500_ALU_RGB_OMOD_SHIFT)
# define R500_ALU_RGB_OMOD_DIV_8 (6 << R500_ALU_RGB_OMOD_SHIFT)
# define R500_ALU_RGB_OMOD_DISABLE (7 << R500_ALU_RGB_OMOD_SHIFT)
# define R500_ALU_RGB_TARGET(x) ((x) << 29)
# define R500_ALU_RGB_WMASK (1 << 31)
#define R500_US_ALU_RGB_ADDR_0 0x9000
# define R500_RGB_ADDR0(x) ((x) << 0)
# define R500_RGB_ADDR0_CONST (1 << 8)
# define R500_RGB_ADDR0_REL (1 << 9)
# define R500_RGB_ADDR1(x) ((x) << 10)
# define R500_RGB_ADDR1_CONST (1 << 18)
# define R500_RGB_ADDR1_REL (1 << 19)
# define R500_RGB_ADDR2(x) ((x) << 20)
# define R500_RGB_ADDR2_CONST (1 << 28)
# define R500_RGB_ADDR2_REL (1 << 29)
# define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30)
# define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 (1 << 30)
# define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0 (2 << 30)
# define R500_RGB_SRCP_OP_1_MINUS_RGB0 (3 << 30)
#define R500_US_CMN_INST_0 0xb800
# define R500_INST_TYPE_MASK (3 << 0)
# define R500_INST_TYPE_ALU (0 << 0)
# define R500_INST_TYPE_OUT (1 << 0)
# define R500_INST_TYPE_FC (2 << 0)
# define R500_INST_TYPE_TEX (3 << 0)
# define R500_INST_TEX_SEM_WAIT_SHIFT 2
# define R500_INST_TEX_SEM_WAIT (1 << R500_INST_TEX_SEM_WAIT_SHIFT)
# define R500_INST_RGB_PRED_SEL_NONE (0 << 3)
# define R500_INST_RGB_PRED_SEL_RGBA (1 << 3)
# define R500_INST_RGB_PRED_SEL_RRRR (2 << 3)
# define R500_INST_RGB_PRED_SEL_GGGG (3 << 3)
# define R500_INST_RGB_PRED_SEL_BBBB (4 << 3)
# define R500_INST_RGB_PRED_SEL_AAAA (5 << 3)
# define R500_INST_RGB_PRED_INV (1 << 6)
# define R500_INST_WRITE_INACTIVE (1 << 7)
# define R500_INST_LAST (1 << 8)
# define R500_INST_NOP (1 << 9)
# define R500_INST_ALU_WAIT (1 << 10)
# define R500_INST_RGB_WMASK_R (1 << 11)
# define R500_INST_RGB_WMASK_G (1 << 12)
# define R500_INST_RGB_WMASK_B (1 << 13)
# define R500_INST_RGB_WMASK_RGB (7 << 11)
# define R500_INST_ALPHA_WMASK (1 << 14)
# define R500_INST_RGB_OMASK_R (1 << 15)
# define R500_INST_RGB_OMASK_G (1 << 16)
# define R500_INST_RGB_OMASK_B (1 << 17)
# define R500_INST_RGB_OMASK_RGB (7 << 15)
# define R500_INST_ALPHA_OMASK (1 << 18)
# define R500_INST_RGB_CLAMP (1 << 19)
# define R500_INST_ALPHA_CLAMP (1 << 20)
# define R500_INST_ALU_RESULT_SEL (1 << 21)
# define R500_INST_ALU_RESULT_SEL_RED (0 << 21)
# define R500_INST_ALU_RESULT_SEL_ALPHA (1 << 21)
# define R500_INST_ALPHA_PRED_INV (1 << 22)
# define R500_INST_ALU_RESULT_OP_EQ (0 << 23)
# define R500_INST_ALU_RESULT_OP_LT (1 << 23)
# define R500_INST_ALU_RESULT_OP_GE (2 << 23)
# define R500_INST_ALU_RESULT_OP_NE (3 << 23)
# define R500_INST_ALPHA_PRED_SEL_NONE (0 << 25)
# define R500_INST_ALPHA_PRED_SEL_RGBA (1 << 25)
# define R500_INST_ALPHA_PRED_SEL_RRRR (2 << 25)
# define R500_INST_ALPHA_PRED_SEL_GGGG (3 << 25)
# define R500_INST_ALPHA_PRED_SEL_BBBB (4 << 25)
# define R500_INST_ALPHA_PRED_SEL_AAAA (5 << 25)
/* XXX next four are kind of guessed */
# define R500_INST_STAT_WE_R (1 << 28)
# define R500_INST_STAT_WE_G (1 << 29)
# define R500_INST_STAT_WE_B (1 << 30)
# define R500_INST_STAT_WE_A (1 << 31)
 
/* note that these are 8 bit lengths, despite the offsets, at least for R500 */
#define R500_US_CODE_ADDR 0x4630
# define R500_US_CODE_START_ADDR(x) ((x) << 0)
# define R500_US_CODE_END_ADDR(x) ((x) << 16)
#define R500_US_CODE_OFFSET 0x4638
# define R500_US_CODE_OFFSET_ADDR(x) ((x) << 0)
#define R500_US_CODE_RANGE 0x4634
# define R500_US_CODE_RANGE_ADDR(x) ((x) << 0)
# define R500_US_CODE_RANGE_SIZE(x) ((x) << 16)
#define R500_US_CONFIG 0x4600
# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1)
#define R500_US_FC_ADDR_0 0xa000
# define R500_FC_BOOL_ADDR(x) ((x) << 0)
# define R500_FC_INT_ADDR(x) ((x) << 8)
# define R500_FC_JUMP_ADDR(x) ((x) << 16)
# define R500_FC_JUMP_GLOBAL (1 << 31)
#define R500_US_FC_BOOL_CONST 0x4620
# define R500_FC_KBOOL(x) (x)
#define R500_US_FC_CTRL 0x4624
# define R500_FC_TEST_EN (1 << 30)
# define R500_FC_FULL_FC_EN (1 << 31)
#define R500_US_FC_INST_0 0x9800
# define R500_FC_OP_JUMP (0 << 0)
# define R500_FC_OP_LOOP (1 << 0)
# define R500_FC_OP_ENDLOOP (2 << 0)
# define R500_FC_OP_REP (3 << 0)
# define R500_FC_OP_ENDREP (4 << 0)
# define R500_FC_OP_BREAKLOOP (5 << 0)
# define R500_FC_OP_BREAKREP (6 << 0)
# define R500_FC_OP_CONTINUE (7 << 0)
# define R500_FC_B_ELSE (1 << 4)
# define R500_FC_JUMP_ANY (1 << 5)
# define R500_FC_A_OP_NONE (0 << 6)
# define R500_FC_A_OP_POP (1 << 6)
# define R500_FC_A_OP_PUSH (2 << 6)
# define R500_FC_JUMP_FUNC(x) ((x) << 8)
# define R500_FC_B_POP_CNT(x) ((x) << 16)
# define R500_FC_B_OP0_NONE (0 << 24)
# define R500_FC_B_OP0_DECR (1 << 24)
# define R500_FC_B_OP0_INCR (2 << 24)
# define R500_FC_B_OP1_NONE (0 << 26)
# define R500_FC_B_OP1_DECR (1 << 26)
# define R500_FC_B_OP1_INCR (2 << 26)
# define R500_FC_IGNORE_UNCOVERED (1 << 28)
#define R500_US_FC_INT_CONST_0 0x4c00
# define R500_FC_INT_CONST_KR(x) ((x) << 0)
# define R500_FC_INT_CONST_KG(x) ((x) << 8)
# define R500_FC_INT_CONST_KB(x) ((x) << 16)
/* _0 through _15 */
#define R500_US_FORMAT0_0 0x4640
# define R500_FORMAT_TXWIDTH(x) ((x) << 0)
# define R500_FORMAT_TXHEIGHT(x) ((x) << 11)
# define R500_FORMAT_TXDEPTH(x) ((x) << 22)
#define R500_US_PIXSIZE 0x4604
# define R500_PIX_SIZE(x) (x)
#define R500_US_TEX_ADDR_0 0x9800
# define R500_TEX_SRC_ADDR(x) ((x) << 0)
# define R500_TEX_SRC_ADDR_REL (1 << 7)
# define R500_TEX_SRC_S_SWIZ_R (0 << 8)
# define R500_TEX_SRC_S_SWIZ_G (1 << 8)
# define R500_TEX_SRC_S_SWIZ_B (2 << 8)
# define R500_TEX_SRC_S_SWIZ_A (3 << 8)
# define R500_TEX_SRC_T_SWIZ_R (0 << 10)
# define R500_TEX_SRC_T_SWIZ_G (1 << 10)
# define R500_TEX_SRC_T_SWIZ_B (2 << 10)
# define R500_TEX_SRC_T_SWIZ_A (3 << 10)
# define R500_TEX_SRC_R_SWIZ_R (0 << 12)
# define R500_TEX_SRC_R_SWIZ_G (1 << 12)
# define R500_TEX_SRC_R_SWIZ_B (2 << 12)
# define R500_TEX_SRC_R_SWIZ_A (3 << 12)
# define R500_TEX_SRC_Q_SWIZ_R (0 << 14)
# define R500_TEX_SRC_Q_SWIZ_G (1 << 14)
# define R500_TEX_SRC_Q_SWIZ_B (2 << 14)
# define R500_TEX_SRC_Q_SWIZ_A (3 << 14)
# define R500_TEX_DST_ADDR(x) ((x) << 16)
# define R500_TEX_DST_ADDR_REL (1 << 23)
# define R500_TEX_DST_R_SWIZ_R (0 << 24)
# define R500_TEX_DST_R_SWIZ_G (1 << 24)
# define R500_TEX_DST_R_SWIZ_B (2 << 24)
# define R500_TEX_DST_R_SWIZ_A (3 << 24)
# define R500_TEX_DST_G_SWIZ_R (0 << 26)
# define R500_TEX_DST_G_SWIZ_G (1 << 26)
# define R500_TEX_DST_G_SWIZ_B (2 << 26)
# define R500_TEX_DST_G_SWIZ_A (3 << 26)
# define R500_TEX_DST_B_SWIZ_R (0 << 28)
# define R500_TEX_DST_B_SWIZ_G (1 << 28)
# define R500_TEX_DST_B_SWIZ_B (2 << 28)
# define R500_TEX_DST_B_SWIZ_A (3 << 28)
# define R500_TEX_DST_A_SWIZ_R (0 << 30)
# define R500_TEX_DST_A_SWIZ_G (1 << 30)
# define R500_TEX_DST_A_SWIZ_B (2 << 30)
# define R500_TEX_DST_A_SWIZ_A (3 << 30)
#define R500_US_TEX_ADDR_DXDY_0 0xa000
# define R500_DX_ADDR(x) ((x) << 0)
# define R500_DX_ADDR_REL (1 << 7)
# define R500_DX_S_SWIZ_R (0 << 8)
# define R500_DX_S_SWIZ_G (1 << 8)
# define R500_DX_S_SWIZ_B (2 << 8)
# define R500_DX_S_SWIZ_A (3 << 8)
# define R500_DX_T_SWIZ_R (0 << 10)
# define R500_DX_T_SWIZ_G (1 << 10)
# define R500_DX_T_SWIZ_B (2 << 10)
# define R500_DX_T_SWIZ_A (3 << 10)
# define R500_DX_R_SWIZ_R (0 << 12)
# define R500_DX_R_SWIZ_G (1 << 12)
# define R500_DX_R_SWIZ_B (2 << 12)
# define R500_DX_R_SWIZ_A (3 << 12)
# define R500_DX_Q_SWIZ_R (0 << 14)
# define R500_DX_Q_SWIZ_G (1 << 14)
# define R500_DX_Q_SWIZ_B (2 << 14)
# define R500_DX_Q_SWIZ_A (3 << 14)
# define R500_DY_ADDR(x) ((x) << 16)
# define R500_DY_ADDR_REL (1 << 17)
# define R500_DY_S_SWIZ_R (0 << 24)
# define R500_DY_S_SWIZ_G (1 << 24)
# define R500_DY_S_SWIZ_B (2 << 24)
# define R500_DY_S_SWIZ_A (3 << 24)
# define R500_DY_T_SWIZ_R (0 << 26)
# define R500_DY_T_SWIZ_G (1 << 26)
# define R500_DY_T_SWIZ_B (2 << 26)
# define R500_DY_T_SWIZ_A (3 << 26)
# define R500_DY_R_SWIZ_R (0 << 28)
# define R500_DY_R_SWIZ_G (1 << 28)
# define R500_DY_R_SWIZ_B (2 << 28)
# define R500_DY_R_SWIZ_A (3 << 28)
# define R500_DY_Q_SWIZ_R (0 << 30)
# define R500_DY_Q_SWIZ_G (1 << 30)
# define R500_DY_Q_SWIZ_B (2 << 30)
# define R500_DY_Q_SWIZ_A (3 << 30)
#define R500_US_TEX_INST_0 0x9000
# define R500_TEX_ID(x) ((x) << 16)
# define R500_TEX_INST_NOP (0 << 22)
# define R500_TEX_INST_LD (1 << 22)
# define R500_TEX_INST_TEXKILL (2 << 22)
# define R500_TEX_INST_PROJ (3 << 22)
# define R500_TEX_INST_LODBIAS (4 << 22)
# define R500_TEX_INST_LOD (5 << 22)
# define R500_TEX_INST_DXDY (6 << 22)
# define R500_TEX_SEM_ACQUIRE_SHIFT 25
# define R500_TEX_SEM_ACQUIRE (1 << R500_TEX_SEM_ACQUIRE_SHIFT)
# define R500_TEX_IGNORE_UNCOVERED (1 << 26)
# define R500_TEX_UNSCALED (1 << 27)
#define R300_US_W_FMT 0x46b4
# define R300_W_FMT_W0 (0 << 0)
# define R300_W_FMT_W24 (1 << 0)
# define R300_W_FMT_W24FP (2 << 0)
# define R300_W_SRC_US (0 << 2)
# define R300_W_SRC_RAS (1 << 2)
 
/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR.
* Two parameter dwords:
* 0. VAP_VTX_FMT: The first parameter is not written to hardware
* 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword.
*/
#define R300_PACKET3_3D_DRAW_VBUF 0x00002800
 
/* Draw a primitive from immediate vertices in this packet
* Up to 16382 dwords:
* 0. VAP_VTX_FMT: The first parameter is not written to hardware
* 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword.
* 2 to end: Up to 16380 dwords of vertex data.
*/
#define R300_PACKET3_3D_DRAW_IMMD 0x00002900
 
/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR and
* immediate vertices in this packet
* Up to 16382 dwords:
* 0. VAP_VTX_FMT: The first parameter is not written to hardware
* 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword.
* 2 to end: Up to 16380 dwords of vertex data.
*/
#define R300_PACKET3_3D_DRAW_INDX 0x00002A00
 
 
/* Specify the full set of vertex arrays as (address, stride).
* The first parameter is the number of vertex arrays specified.
* The rest of the command is a variable length list of blocks, where
* each block is three dwords long and specifies two arrays.
* The first dword of a block is split into two words, the lower significant
* word refers to the first array, the more significant word to the second
* array in the block.
* The low byte of each word contains the size of an array entry in dwords,
* the high byte contains the stride of the array.
* The second dword of a block contains the pointer to the first array,
* the third dword of a block contains the pointer to the second array.
* Note that if the total number of arrays is odd, the third dword of
* the last block is omitted.
*/
#define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00
# define R300_VC_FORCE_PREFETCH (1 << 5)
# define R300_VBPNTR_SIZE0(x) ((x) >> 2)
# define R300_VBPNTR_STRIDE0(x) (((x) >> 2) << 8)
# define R300_VBPNTR_SIZE1(x) (((x) >> 2) << 16)
# define R300_VBPNTR_STRIDE1(x) (((x) >> 2) << 24)
 
#define R300_PACKET3_3D_CLEAR_ZMASK 0x00003200
#define R300_PACKET3_INDX_BUFFER 0x00003300
# define R300_INDX_BUFFER_DST_SHIFT 0
# define R300_INDX_BUFFER_SKIP_SHIFT 16
# define R300_INDX_BUFFER_ONE_REG_WR (1<<31)
 
/* Same as R300_PACKET3_3D_DRAW_VBUF but without VAP_VTX_FMT */
#define R300_PACKET3_3D_DRAW_VBUF_2 0x00003400
/* Same as R300_PACKET3_3D_DRAW_IMMD but without VAP_VTX_FMT */
#define R300_PACKET3_3D_DRAW_IMMD_2 0x00003500
/* Same as R300_PACKET3_3D_DRAW_INDX but without VAP_VTX_FMT */
#define R300_PACKET3_3D_DRAW_INDX_2 0x00003600
 
/* Clears a portion of hierachical Z RAM
* 3 dword parameters
* 0. START
* 1. COUNT: 13:0 (max is 0x3FFF)
* 2. CLEAR_VALUE: Value to write into HIZ RAM.
*/
#define R300_PACKET3_3D_CLEAR_HIZ 0x00003700
#define R300_PACKET3_3D_CLEAR_CMASK 0x00003800
 
/* Draws a set of primitives using vertex buffers pointed by the state data.
* At least 2 Parameters:
* 0. VAP_VF_CNTL: The first parameter is a standard primitive emission dword.
* 2 to end: Data or indices (see other 3D_DRAW_* packets for details)
*/
#define R300_PACKET3_3D_DRAW_128 0x00003900
 
/* END: Packet 3 commands */
 
 
/* Color formats for 2d packets
*/
#define R300_CP_COLOR_FORMAT_CI8 2
#define R300_CP_COLOR_FORMAT_ARGB1555 3
#define R300_CP_COLOR_FORMAT_RGB565 4
#define R300_CP_COLOR_FORMAT_ARGB8888 6
#define R300_CP_COLOR_FORMAT_RGB332 7
#define R300_CP_COLOR_FORMAT_RGB8 9
#define R300_CP_COLOR_FORMAT_ARGB4444 15
 
/*
* CP type-3 packets
*/
#define R300_CP_CMD_BITBLT_MULTI 0xC0009B00
 
/* XXX Corbin's stuff from radeon and r200 */
 
#define RADEON_WAIT_UNTIL 0x1720
# define RADEON_WAIT_CRTC_PFLIP (1 << 0)
# define RADEON_WAIT_2D_IDLECLEAN (1 << 16)
# define RADEON_WAIT_3D_IDLECLEAN (1 << 17)
# define RADEON_WAIT_HOST_IDLECLEAN (1 << 18)
 
#define R200_3D_DRAW_IMMD_2 0xC0003500
 
#define RADEON_CP_PACKET0 0x0 /* XXX stolen from radeon_reg.h */
#define RADEON_CP_PACKET3 0xC0000000
 
#define RADEON_ONE_REG_WR (1 << 15)
 
#define CP_PACKET0(register, count) \
(RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2))
 
#define CP_PACKET3(op, count) \
(RADEON_CP_PACKET3 | (op) | ((count) << 16))
 
#endif /* _R300_REG_H */
 
/* *INDENT-ON* */
 
/* vim: set foldenable foldmarker=\\{,\\} foldmethod=marker : */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_render.c
0,0 → 1,1208
/*
* Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
* Copyright 2010 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
/* r300_render: Vertex and index buffer primitive emission. Contains both
* HW TCL fastpath rendering, and SW TCL Draw-assisted rendering. */
 
#include "draw/draw_context.h"
#include "draw/draw_vbuf.h"
 
#include "util/u_inlines.h"
 
#include "util/u_format.h"
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
#include "util/u_prim.h"
 
#include "r300_cs.h"
#include "r300_context.h"
#include "r300_screen_buffer.h"
#include "r300_emit.h"
#include "r300_reg.h"
 
#include <limits.h>
 
#define IMMD_DWORDS 32
 
static uint32_t r300_translate_primitive(unsigned prim)
{
static const int prim_conv[] = {
R300_VAP_VF_CNTL__PRIM_POINTS,
R300_VAP_VF_CNTL__PRIM_LINES,
R300_VAP_VF_CNTL__PRIM_LINE_LOOP,
R300_VAP_VF_CNTL__PRIM_LINE_STRIP,
R300_VAP_VF_CNTL__PRIM_TRIANGLES,
R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP,
R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN,
R300_VAP_VF_CNTL__PRIM_QUADS,
R300_VAP_VF_CNTL__PRIM_QUAD_STRIP,
R300_VAP_VF_CNTL__PRIM_POLYGON,
-1,
-1,
-1,
-1
};
unsigned hwprim = prim_conv[prim];
 
assert(hwprim != -1);
return hwprim;
}
 
static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300,
unsigned mode)
{
struct r300_rs_state* rs = (struct r300_rs_state*)r300->rs_state.state;
uint32_t color_control = rs->color_control;
 
/* By default (see r300_state.c:r300_create_rs_state) color_control is
* initialized to provoking the first vertex.
*
* Triangle fans must be reduced to the second vertex, not the first, in
* Gallium flatshade-first mode, as per the GL spec.
* (http://www.opengl.org/registry/specs/ARB/provoking_vertex.txt)
*
* Quads never provoke correctly in flatshade-first mode. The first
* vertex is never considered as provoking, so only the second, third,
* and fourth vertices can be selected, and both "third" and "last" modes
* select the fourth vertex. This is probably due to D3D lacking quads.
*
* Similarly, polygons reduce to the first, not the last, vertex, when in
* "last" mode, and all other modes start from the second vertex.
*
* ~ C.
*/
 
if (rs->rs.flatshade_first) {
switch (mode) {
case PIPE_PRIM_TRIANGLE_FAN:
color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND;
break;
case PIPE_PRIM_QUADS:
case PIPE_PRIM_QUAD_STRIP:
case PIPE_PRIM_POLYGON:
color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
break;
default:
color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST;
break;
}
} else {
color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
}
 
return color_control;
}
 
void r500_emit_index_bias(struct r300_context *r300, int index_bias)
{
CS_LOCALS(r300);
 
BEGIN_CS(2);
OUT_CS_REG(R500_VAP_INDEX_OFFSET,
(index_bias & 0xFFFFFF) | (index_bias < 0 ? 1<<24 : 0));
END_CS;
}
 
static void r300_emit_draw_init(struct r300_context *r300, unsigned mode,
unsigned max_index)
{
CS_LOCALS(r300);
 
assert(max_index < (1 << 24));
 
BEGIN_CS(5);
OUT_CS_REG(R300_GA_COLOR_CONTROL,
r300_provoking_vertex_fixes(r300, mode));
OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
OUT_CS(max_index);
OUT_CS(0);
END_CS;
}
 
/* This function splits the index bias value into two parts:
* - buffer_offset: the value that can be safely added to buffer offsets
* in r300_emit_vertex_arrays (it must yield a positive offset when added to
* a vertex buffer offset)
* - index_offset: the value that must be manually subtracted from indices
* in an index buffer to achieve negative offsets. */
static void r300_split_index_bias(struct r300_context *r300, int index_bias,
int *buffer_offset, int *index_offset)
{
struct pipe_vertex_buffer *vb, *vbufs = r300->vertex_buffer;
struct pipe_vertex_element *velem = r300->velems->velem;
unsigned i, size;
int max_neg_bias;
 
if (index_bias < 0) {
/* See how large index bias we may subtract. We must be careful
* here because negative buffer offsets are not allowed
* by the DRM API. */
max_neg_bias = INT_MAX;
for (i = 0; i < r300->velems->count; i++) {
vb = &vbufs[velem[i].vertex_buffer_index];
size = (vb->buffer_offset + velem[i].src_offset) / vb->stride;
max_neg_bias = MIN2(max_neg_bias, size);
}
 
/* Now set the minimum allowed value. */
*buffer_offset = MAX2(-max_neg_bias, index_bias);
} else {
/* A positive index bias is OK. */
*buffer_offset = index_bias;
}
 
*index_offset = index_bias - *buffer_offset;
}
 
enum r300_prepare_flags {
PREP_EMIT_STATES = (1 << 0), /* call emit_dirty_state and friends? */
PREP_VALIDATE_VBOS = (1 << 1), /* validate VBOs? */
PREP_EMIT_VARRAYS = (1 << 2), /* call emit_vertex_arrays? */
PREP_EMIT_VARRAYS_SWTCL = (1 << 3), /* call emit_vertex_arrays_swtcl? */
PREP_INDEXED = (1 << 4) /* is this draw_elements? */
};
 
/**
* Check if the requested number of dwords is available in the CS and
* if not, flush.
* \param r300 The context.
* \param flags See r300_prepare_flags.
* \param cs_dwords The number of dwords to reserve in CS.
* \return TRUE if the CS was flushed
*/
static boolean r300_reserve_cs_dwords(struct r300_context *r300,
enum r300_prepare_flags flags,
unsigned cs_dwords)
{
boolean flushed = FALSE;
boolean emit_states = flags & PREP_EMIT_STATES;
boolean emit_vertex_arrays = flags & PREP_EMIT_VARRAYS;
boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_VARRAYS_SWTCL;
 
/* Add dirty state, index offset, and AOS. */
if (emit_states)
cs_dwords += r300_get_num_dirty_dwords(r300);
 
if (r300->screen->caps.is_r500)
cs_dwords += 2; /* emit_index_offset */
 
if (emit_vertex_arrays)
cs_dwords += 55; /* emit_vertex_arrays */
 
if (emit_vertex_arrays_swtcl)
cs_dwords += 7; /* emit_vertex_arrays_swtcl */
 
cs_dwords += r300_get_num_cs_end_dwords(r300);
 
/* Reserve requested CS space. */
if (cs_dwords > (RADEON_MAX_CMDBUF_DWORDS - r300->cs->cdw)) {
r300_flush(&r300->context, RADEON_FLUSH_ASYNC, NULL);
flushed = TRUE;
}
 
return flushed;
}
 
/**
* Validate buffers and emit dirty state.
* \param r300 The context.
* \param flags See r300_prepare_flags.
* \param index_buffer The index buffer to validate. The parameter may be NULL.
* \param buffer_offset The offset passed to emit_vertex_arrays.
* \param index_bias The index bias to emit.
* \param instance_id Index of instance to render
* \return TRUE if rendering should be skipped
*/
static boolean r300_emit_states(struct r300_context *r300,
enum r300_prepare_flags flags,
struct pipe_resource *index_buffer,
int buffer_offset,
int index_bias, int instance_id)
{
boolean emit_states = flags & PREP_EMIT_STATES;
boolean emit_vertex_arrays = flags & PREP_EMIT_VARRAYS;
boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_VARRAYS_SWTCL;
boolean indexed = flags & PREP_INDEXED;
boolean validate_vbos = flags & PREP_VALIDATE_VBOS;
 
/* Validate buffers and emit dirty state if needed. */
if (emit_states || (emit_vertex_arrays && validate_vbos)) {
if (!r300_emit_buffer_validate(r300, validate_vbos,
index_buffer)) {
fprintf(stderr, "r300: CS space validation failed. "
"(not enough memory?) Skipping rendering.\n");
return FALSE;
}
}
 
if (emit_states)
r300_emit_dirty_state(r300);
 
if (r300->screen->caps.is_r500) {
if (r300->screen->caps.has_tcl)
r500_emit_index_bias(r300, index_bias);
else
r500_emit_index_bias(r300, 0);
}
 
if (emit_vertex_arrays &&
(r300->vertex_arrays_dirty ||
r300->vertex_arrays_indexed != indexed ||
r300->vertex_arrays_offset != buffer_offset ||
r300->vertex_arrays_instance_id != instance_id)) {
r300_emit_vertex_arrays(r300, buffer_offset, indexed, instance_id);
 
r300->vertex_arrays_dirty = FALSE;
r300->vertex_arrays_indexed = indexed;
r300->vertex_arrays_offset = buffer_offset;
r300->vertex_arrays_instance_id = instance_id;
}
 
if (emit_vertex_arrays_swtcl)
r300_emit_vertex_arrays_swtcl(r300, indexed);
 
return TRUE;
}
 
/**
* Check if the requested number of dwords is available in the CS and
* if not, flush. Then validate buffers and emit dirty state.
* \param r300 The context.
* \param flags See r300_prepare_flags.
* \param index_buffer The index buffer to validate. The parameter may be NULL.
* \param cs_dwords The number of dwords to reserve in CS.
* \param buffer_offset The offset passed to emit_vertex_arrays.
* \param index_bias The index bias to emit.
* \param instance_id The instance to render.
* \return TRUE if rendering should be skipped
*/
static boolean r300_prepare_for_rendering(struct r300_context *r300,
enum r300_prepare_flags flags,
struct pipe_resource *index_buffer,
unsigned cs_dwords,
int buffer_offset,
int index_bias,
int instance_id)
{
/* Make sure there is enough space in the command stream and emit states. */
if (r300_reserve_cs_dwords(r300, flags, cs_dwords))
flags |= PREP_EMIT_STATES;
 
return r300_emit_states(r300, flags, index_buffer, buffer_offset,
index_bias, instance_id);
}
 
static boolean immd_is_good_idea(struct r300_context *r300,
unsigned count)
{
if (DBG_ON(r300, DBG_NO_IMMD)) {
return FALSE;
}
 
if (count * r300->velems->vertex_size_dwords > IMMD_DWORDS) {
return FALSE;
}
 
/* Buffers can only be used for read by r300 (except query buffers, but
* those can't be bound by a state tracker as vertex buffers). */
return TRUE;
}
 
/*****************************************************************************
* The HWTCL draw functions. *
****************************************************************************/
 
static void r300_draw_arrays_immediate(struct r300_context *r300,
const struct pipe_draw_info *info)
{
struct pipe_vertex_element* velem;
struct pipe_vertex_buffer* vbuf;
unsigned vertex_element_count = r300->velems->count;
unsigned i, v, vbi;
 
/* Size of the vertex, in dwords. */
unsigned vertex_size = r300->velems->vertex_size_dwords;
 
/* The number of dwords for this draw operation. */
unsigned dwords = 4 + info->count * vertex_size;
 
/* Size of the vertex element, in dwords. */
unsigned size[PIPE_MAX_ATTRIBS];
 
/* Stride to the same attrib in the next vertex in the vertex buffer,
* in dwords. */
unsigned stride[PIPE_MAX_ATTRIBS];
 
/* Mapped vertex buffers. */
uint32_t* map[PIPE_MAX_ATTRIBS] = {0};
uint32_t* mapelem[PIPE_MAX_ATTRIBS];
 
CS_LOCALS(r300);
 
if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1))
return;
 
/* Calculate the vertex size, offsets, strides etc. and map the buffers. */
for (i = 0; i < vertex_element_count; i++) {
velem = &r300->velems->velem[i];
size[i] = r300->velems->format_size[i] / 4;
vbi = velem->vertex_buffer_index;
vbuf = &r300->vertex_buffer[vbi];
stride[i] = vbuf->stride / 4;
 
/* Map the buffer. */
if (!map[vbi]) {
map[vbi] = (uint32_t*)r300->rws->buffer_map(
r300_resource(vbuf->buffer)->cs_buf,
r300->cs, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED);
map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * info->start;
}
mapelem[i] = map[vbi] + (velem->src_offset / 4);
}
 
r300_emit_draw_init(r300, info->mode, info->count-1);
 
BEGIN_CS(dwords);
OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, info->count * vertex_size);
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (info->count << 16) |
r300_translate_primitive(info->mode));
 
/* Emit vertices. */
for (v = 0; v < info->count; v++) {
for (i = 0; i < vertex_element_count; i++) {
OUT_CS_TABLE(&mapelem[i][stride[i] * v], size[i]);
}
}
END_CS;
}
 
static void r300_emit_draw_arrays(struct r300_context *r300,
unsigned mode,
unsigned count)
{
boolean alt_num_verts = count > 65535;
CS_LOCALS(r300);
 
if (count >= (1 << 24)) {
fprintf(stderr, "r300: Got a huge number of vertices: %i, "
"refusing to render.\n", count);
return;
}
 
r300_emit_draw_init(r300, mode, count-1);
 
BEGIN_CS(2 + (alt_num_verts ? 2 : 0));
if (alt_num_verts) {
OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
}
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
r300_translate_primitive(mode) |
(alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0));
END_CS;
}
 
static void r300_emit_draw_elements(struct r300_context *r300,
struct pipe_resource* indexBuffer,
unsigned indexSize,
unsigned max_index,
unsigned mode,
unsigned start,
unsigned count,
uint16_t *imm_indices3)
{
uint32_t count_dwords, offset_dwords;
boolean alt_num_verts = count > 65535;
CS_LOCALS(r300);
 
if (count >= (1 << 24)) {
fprintf(stderr, "r300: Got a huge number of vertices: %i, "
"refusing to render (max_index: %i).\n", count, max_index);
return;
}
 
DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, max %u\n",
count, max_index);
 
r300_emit_draw_init(r300, mode, max_index);
 
/* If start is odd, render the first triangle with indices embedded
* in the command stream. This will increase start by 3 and make it
* even. We can then proceed without a fallback. */
if (indexSize == 2 && (start & 1) &&
mode == PIPE_PRIM_TRIANGLES) {
BEGIN_CS(4);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 2);
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (3 << 16) |
R300_VAP_VF_CNTL__PRIM_TRIANGLES);
OUT_CS(imm_indices3[1] << 16 | imm_indices3[0]);
OUT_CS(imm_indices3[2]);
END_CS;
 
start += 3;
count -= 3;
if (!count)
return;
}
 
offset_dwords = indexSize * start / sizeof(uint32_t);
 
BEGIN_CS(8 + (alt_num_verts ? 2 : 0));
if (alt_num_verts) {
OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
}
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
if (indexSize == 4) {
count_dwords = count;
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
R300_VAP_VF_CNTL__INDEX_SIZE_32bit |
r300_translate_primitive(mode) |
(alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0));
} else {
count_dwords = (count + 1) / 2;
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
r300_translate_primitive(mode) |
(alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0));
}
 
OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2);
OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) |
(0 << R300_INDX_BUFFER_SKIP_SHIFT));
OUT_CS(offset_dwords << 2);
OUT_CS(count_dwords);
OUT_CS_RELOC(r300_resource(indexBuffer));
END_CS;
}
 
static void r300_draw_elements_immediate(struct r300_context *r300,
const struct pipe_draw_info *info)
{
const uint8_t *ptr1;
const uint16_t *ptr2;
const uint32_t *ptr4;
unsigned index_size = r300->index_buffer.index_size;
unsigned i, count_dwords = index_size == 4 ? info->count :
(info->count + 1) / 2;
CS_LOCALS(r300);
 
/* 19 dwords for r300_draw_elements_immediate. Give up if the function fails. */
if (!r300_prepare_for_rendering(r300,
PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS |
PREP_INDEXED, NULL, 2+count_dwords, 0, info->index_bias, -1))
return;
 
r300_emit_draw_init(r300, info->mode, info->max_index);
 
BEGIN_CS(2 + count_dwords);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, count_dwords);
 
switch (index_size) {
case 1:
ptr1 = (uint8_t*)r300->index_buffer.user_buffer;
ptr1 += info->start;
 
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) |
r300_translate_primitive(info->mode));
 
if (info->index_bias && !r300->screen->caps.is_r500) {
for (i = 0; i < info->count-1; i += 2)
OUT_CS(((ptr1[i+1] + info->index_bias) << 16) |
(ptr1[i] + info->index_bias));
 
if (info->count & 1)
OUT_CS(ptr1[i] + info->index_bias);
} else {
for (i = 0; i < info->count-1; i += 2)
OUT_CS(((ptr1[i+1]) << 16) |
(ptr1[i] ));
 
if (info->count & 1)
OUT_CS(ptr1[i]);
}
break;
 
case 2:
ptr2 = (uint16_t*)r300->index_buffer.user_buffer;
ptr2 += info->start;
 
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) |
r300_translate_primitive(info->mode));
 
if (info->index_bias && !r300->screen->caps.is_r500) {
for (i = 0; i < info->count-1; i += 2)
OUT_CS(((ptr2[i+1] + info->index_bias) << 16) |
(ptr2[i] + info->index_bias));
 
if (info->count & 1)
OUT_CS(ptr2[i] + info->index_bias);
} else {
OUT_CS_TABLE(ptr2, count_dwords);
}
break;
 
case 4:
ptr4 = (uint32_t*)r300->index_buffer.user_buffer;
ptr4 += info->start;
 
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) |
R300_VAP_VF_CNTL__INDEX_SIZE_32bit |
r300_translate_primitive(info->mode));
 
if (info->index_bias && !r300->screen->caps.is_r500) {
for (i = 0; i < info->count; i++)
OUT_CS(ptr4[i] + info->index_bias);
} else {
OUT_CS_TABLE(ptr4, count_dwords);
}
break;
}
END_CS;
}
 
static void r300_draw_elements(struct r300_context *r300,
const struct pipe_draw_info *info,
int instance_id)
{
struct pipe_resource *indexBuffer = r300->index_buffer.buffer;
unsigned indexSize = r300->index_buffer.index_size;
struct pipe_resource* orgIndexBuffer = indexBuffer;
unsigned start = info->start;
unsigned count = info->count;
boolean alt_num_verts = r300->screen->caps.is_r500 &&
count > 65536;
unsigned short_count;
int buffer_offset = 0, index_offset = 0; /* for index bias emulation */
uint16_t indices3[3];
 
if (info->index_bias && !r300->screen->caps.is_r500) {
r300_split_index_bias(r300, info->index_bias, &buffer_offset,
&index_offset);
}
 
r300_translate_index_buffer(r300, &r300->index_buffer, &indexBuffer,
&indexSize, index_offset, &start, count);
 
/* Fallback for misaligned ushort indices. */
if (indexSize == 2 && (start & 1) && indexBuffer) {
/* If we got here, then orgIndexBuffer == indexBuffer. */
uint16_t *ptr = r300->rws->buffer_map(r300_resource(orgIndexBuffer)->cs_buf,
r300->cs,
PIPE_TRANSFER_READ |
PIPE_TRANSFER_UNSYNCHRONIZED);
 
if (info->mode == PIPE_PRIM_TRIANGLES) {
memcpy(indices3, ptr + start, 6);
} else {
/* Copy the mapped index buffer directly to the upload buffer.
* The start index will be aligned simply from the fact that
* every sub-buffer in the upload buffer is aligned. */
r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start,
count, (uint8_t*)ptr);
}
} else {
if (r300->index_buffer.user_buffer)
r300_upload_index_buffer(r300, &indexBuffer, indexSize,
&start, count,
r300->index_buffer.user_buffer);
}
 
/* 19 dwords for emit_draw_elements. Give up if the function fails. */
if (!r300_prepare_for_rendering(r300,
PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS |
PREP_INDEXED, indexBuffer, 19, buffer_offset, info->index_bias,
instance_id))
goto done;
 
if (alt_num_verts || count <= 65535) {
r300_emit_draw_elements(r300, indexBuffer, indexSize,
info->max_index, info->mode, start, count,
indices3);
} else {
do {
/* The maximum must be divisible by 4 and 3,
* so that quad and triangle lists are split correctly.
*
* Strips, loops, and fans won't work. */
short_count = MIN2(count, 65532);
 
r300_emit_draw_elements(r300, indexBuffer, indexSize,
info->max_index,
info->mode, start, short_count, indices3);
 
start += short_count;
count -= short_count;
 
/* 15 dwords for emit_draw_elements */
if (count) {
if (!r300_prepare_for_rendering(r300,
PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS | PREP_INDEXED,
indexBuffer, 19, buffer_offset, info->index_bias,
instance_id))
goto done;
}
} while (count);
}
 
done:
if (indexBuffer != orgIndexBuffer) {
pipe_resource_reference( &indexBuffer, NULL );
}
}
 
static void r300_draw_arrays(struct r300_context *r300,
const struct pipe_draw_info *info,
int instance_id)
{
boolean alt_num_verts = r300->screen->caps.is_r500 &&
info->count > 65536;
unsigned start = info->start;
unsigned count = info->count;
unsigned short_count;
 
/* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */
if (!r300_prepare_for_rendering(r300,
PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS,
NULL, 9, start, 0, instance_id))
return;
 
if (alt_num_verts || count <= 65535) {
r300_emit_draw_arrays(r300, info->mode, count);
} else {
do {
/* The maximum must be divisible by 4 and 3,
* so that quad and triangle lists are split correctly.
*
* Strips, loops, and fans won't work. */
short_count = MIN2(count, 65532);
r300_emit_draw_arrays(r300, info->mode, short_count);
 
start += short_count;
count -= short_count;
 
/* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */
if (count) {
if (!r300_prepare_for_rendering(r300,
PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS, NULL, 9,
start, 0, instance_id))
return;
}
} while (count);
}
}
 
static void r300_draw_arrays_instanced(struct r300_context *r300,
const struct pipe_draw_info *info)
{
int i;
 
for (i = 0; i < info->instance_count; i++)
r300_draw_arrays(r300, info, i);
}
 
static void r300_draw_elements_instanced(struct r300_context *r300,
const struct pipe_draw_info *info)
{
int i;
 
for (i = 0; i < info->instance_count; i++)
r300_draw_elements(r300, info, i);
}
 
static unsigned r300_max_vertex_count(struct r300_context *r300)
{
unsigned i, nr = r300->velems->count;
struct pipe_vertex_element *velems = r300->velems->velem;
unsigned result = ~0;
 
for (i = 0; i < nr; i++) {
struct pipe_vertex_buffer *vb =
&r300->vertex_buffer[velems[i].vertex_buffer_index];
unsigned size, max_count, value;
 
/* We're not interested in constant and per-instance attribs. */
if (!vb->buffer ||
!vb->stride ||
velems[i].instance_divisor) {
continue;
}
 
size = vb->buffer->width0;
 
/* Subtract buffer_offset. */
value = vb->buffer_offset;
if (value >= size) {
return 0;
}
size -= value;
 
/* Subtract src_offset. */
value = velems[i].src_offset;
if (value >= size) {
return 0;
}
size -= value;
 
/* Subtract format_size. */
value = r300->velems->format_size[i];
if (value >= size) {
return 0;
}
size -= value;
 
/* Compute the max count. */
max_count = 1 + size / vb->stride;
result = MIN2(result, max_count);
}
return result;
}
 
 
static void r300_draw_vbo(struct pipe_context* pipe,
const struct pipe_draw_info *dinfo)
{
struct r300_context* r300 = r300_context(pipe);
struct pipe_draw_info info = *dinfo;
 
info.indexed = info.indexed;
 
if (r300->skip_rendering ||
!u_trim_pipe_prim(info.mode, &info.count)) {
return;
}
 
r300_update_derived_state(r300);
 
/* Draw. */
if (info.indexed) {
unsigned max_count = r300_max_vertex_count(r300);
 
if (!max_count) {
fprintf(stderr, "r300: Skipping a draw command. There is a buffer "
" which is too small to be used for rendering.\n");
return;
}
 
if (max_count == ~0) {
/* There are no per-vertex vertex elements. Use the hardware maximum. */
max_count = 0xffffff;
}
 
info.max_index = max_count - 1;
info.start += r300->index_buffer.offset / r300->index_buffer.index_size;
 
if (info.instance_count <= 1) {
if (info.count <= 8 &&
r300->index_buffer.user_buffer) {
r300_draw_elements_immediate(r300, &info);
} else {
r300_draw_elements(r300, &info, -1);
}
} else {
r300_draw_elements_instanced(r300, &info);
}
} else {
if (info.instance_count <= 1) {
if (immd_is_good_idea(r300, info.count)) {
r300_draw_arrays_immediate(r300, &info);
} else {
r300_draw_arrays(r300, &info, -1);
}
} else {
r300_draw_arrays_instanced(r300, &info);
}
}
}
 
/****************************************************************************
* The rest of this file is for SW TCL rendering only. Please be polite and *
* keep these functions separated so that they are easier to locate. ~C. *
***************************************************************************/
 
/* SW TCL elements, using Draw. */
static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
const struct pipe_draw_info *info)
{
struct r300_context* r300 = r300_context(pipe);
 
if (r300->skip_rendering) {
return;
}
 
r300_update_derived_state(r300);
 
draw_vbo(r300->draw, info);
draw_flush(r300->draw);
}
 
/* Object for rendering using Draw. */
struct r300_render {
/* Parent class */
struct vbuf_render base;
 
/* Pipe context */
struct r300_context* r300;
 
/* Vertex information */
size_t vertex_size;
unsigned prim;
unsigned hwprim;
 
/* VBO */
size_t vbo_max_used;
uint8_t *vbo_ptr;
};
 
static INLINE struct r300_render*
r300_render(struct vbuf_render* render)
{
return (struct r300_render*)render;
}
 
static const struct vertex_info*
r300_render_get_vertex_info(struct vbuf_render* render)
{
struct r300_render* r300render = r300_render(render);
struct r300_context* r300 = r300render->r300;
 
return &r300->vertex_info;
}
 
static boolean r300_render_allocate_vertices(struct vbuf_render* render,
ushort vertex_size,
ushort count)
{
struct r300_render* r300render = r300_render(render);
struct r300_context* r300 = r300render->r300;
struct radeon_winsys *rws = r300->rws;
size_t size = (size_t)vertex_size * (size_t)count;
 
DBG(r300, DBG_DRAW, "r300: render_allocate_vertices (size: %d)\n", size);
 
if (!r300->vbo || size + r300->draw_vbo_offset > r300->vbo->size) {
pb_reference(&r300->vbo, NULL);
r300->vbo_cs = NULL;
r300render->vbo_ptr = NULL;
 
r300->vbo = rws->buffer_create(rws,
MAX2(R300_MAX_DRAW_VBO_SIZE, size),
R300_BUFFER_ALIGNMENT, TRUE,
RADEON_DOMAIN_GTT);
if (!r300->vbo) {
return FALSE;
}
r300->vbo_cs = rws->buffer_get_cs_handle(r300->vbo);
r300->draw_vbo_offset = 0;
r300render->vbo_ptr = rws->buffer_map(r300->vbo_cs, r300->cs,
PIPE_TRANSFER_WRITE);
}
 
r300render->vertex_size = vertex_size;
return TRUE;
}
 
static void* r300_render_map_vertices(struct vbuf_render* render)
{
struct r300_render* r300render = r300_render(render);
struct r300_context* r300 = r300render->r300;
 
DBG(r300, DBG_DRAW, "r300: render_map_vertices\n");
 
assert(r300render->vbo_ptr);
return r300render->vbo_ptr + r300->draw_vbo_offset;
}
 
static void r300_render_unmap_vertices(struct vbuf_render* render,
ushort min,
ushort max)
{
struct r300_render* r300render = r300_render(render);
struct r300_context* r300 = r300render->r300;
 
DBG(r300, DBG_DRAW, "r300: render_unmap_vertices\n");
 
r300render->vbo_max_used = MAX2(r300render->vbo_max_used,
r300render->vertex_size * (max + 1));
}
 
static void r300_render_release_vertices(struct vbuf_render* render)
{
struct r300_render* r300render = r300_render(render);
struct r300_context* r300 = r300render->r300;
 
DBG(r300, DBG_DRAW, "r300: render_release_vertices\n");
 
r300->draw_vbo_offset += r300render->vbo_max_used;
r300render->vbo_max_used = 0;
}
 
static void r300_render_set_primitive(struct vbuf_render* render,
unsigned prim)
{
struct r300_render* r300render = r300_render(render);
 
r300render->prim = prim;
r300render->hwprim = r300_translate_primitive(prim);
}
 
static void r300_render_draw_arrays(struct vbuf_render* render,
unsigned start,
unsigned count)
{
struct r300_render* r300render = r300_render(render);
struct r300_context* r300 = r300render->r300;
uint8_t* ptr;
unsigned i;
unsigned dwords = 6;
 
CS_LOCALS(r300);
(void) i; (void) ptr;
 
assert(start == 0);
assert(count < (1 << 16));
 
DBG(r300, DBG_DRAW, "r300: render_draw_arrays (count: %d)\n", count);
 
if (!r300_prepare_for_rendering(r300,
PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL,
NULL, dwords, 0, 0, -1)) {
return;
}
 
BEGIN_CS(dwords);
OUT_CS_REG(R300_GA_COLOR_CONTROL,
r300_provoking_vertex_fixes(r300, r300render->prim));
OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
r300render->hwprim);
END_CS;
}
 
static void r300_render_draw_elements(struct vbuf_render* render,
const ushort* indices,
uint count)
{
struct r300_render* r300render = r300_render(render);
struct r300_context* r300 = r300render->r300;
unsigned max_index = (r300->vbo->size - r300->draw_vbo_offset) /
(r300render->r300->vertex_info.size * 4) - 1;
struct pipe_resource *index_buffer = NULL;
unsigned index_buffer_offset;
 
CS_LOCALS(r300);
DBG(r300, DBG_DRAW, "r300: render_draw_elements (count: %d)\n", count);
 
u_upload_data(r300->uploader, 0, count * 2, indices,
&index_buffer_offset, &index_buffer);
if (!index_buffer) {
return;
}
 
if (!r300_prepare_for_rendering(r300,
PREP_EMIT_STATES |
PREP_EMIT_VARRAYS_SWTCL | PREP_INDEXED,
index_buffer, 12, 0, 0, -1)) {
pipe_resource_reference(&index_buffer, NULL);
return;
}
 
BEGIN_CS(12);
OUT_CS_REG(R300_GA_COLOR_CONTROL,
r300_provoking_vertex_fixes(r300, r300render->prim));
OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max_index);
 
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
r300render->hwprim);
 
OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2);
OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2));
OUT_CS(index_buffer_offset);
OUT_CS((count + 1) / 2);
OUT_CS_RELOC(r300_resource(index_buffer));
END_CS;
 
pipe_resource_reference(&index_buffer, NULL);
}
 
static void r300_render_destroy(struct vbuf_render* render)
{
FREE(render);
}
 
static struct vbuf_render* r300_render_create(struct r300_context* r300)
{
struct r300_render* r300render = CALLOC_STRUCT(r300_render);
 
r300render->r300 = r300;
 
r300render->base.max_vertex_buffer_bytes = R300_MAX_DRAW_VBO_SIZE;
r300render->base.max_indices = 16 * 1024;
 
r300render->base.get_vertex_info = r300_render_get_vertex_info;
r300render->base.allocate_vertices = r300_render_allocate_vertices;
r300render->base.map_vertices = r300_render_map_vertices;
r300render->base.unmap_vertices = r300_render_unmap_vertices;
r300render->base.set_primitive = r300_render_set_primitive;
r300render->base.draw_elements = r300_render_draw_elements;
r300render->base.draw_arrays = r300_render_draw_arrays;
r300render->base.release_vertices = r300_render_release_vertices;
r300render->base.destroy = r300_render_destroy;
 
return &r300render->base;
}
 
struct draw_stage* r300_draw_stage(struct r300_context* r300)
{
struct vbuf_render* render;
struct draw_stage* stage;
 
render = r300_render_create(r300);
 
if (!render) {
return NULL;
}
 
stage = draw_vbuf_stage(r300->draw, render);
 
if (!stage) {
render->destroy(render);
return NULL;
}
 
draw_set_render(r300->draw, render);
 
return stage;
}
 
/****************************************************************************
* End of SW TCL functions *
***************************************************************************/
 
/* This functions is used to draw a rectangle for the blitter module.
*
* If we rendered a quad, the pixels on the main diagonal
* would be computed and stored twice, which makes the clear/copy codepaths
* somewhat inefficient. Instead we use a rectangular point sprite. */
void r300_blitter_draw_rectangle(struct blitter_context *blitter,
int x1, int y1, int x2, int y2,
float depth,
enum blitter_attrib_type type,
const union pipe_color_union *attrib)
{
struct r300_context *r300 = r300_context(util_blitter_get_pipe(blitter));
unsigned last_sprite_coord_enable = r300->sprite_coord_enable;
unsigned width = x2 - x1;
unsigned height = y2 - y1;
unsigned vertex_size =
type == UTIL_BLITTER_ATTRIB_COLOR || !r300->draw ? 8 : 4;
unsigned dwords = 13 + vertex_size +
(type == UTIL_BLITTER_ATTRIB_TEXCOORD ? 7 : 0);
static const union pipe_color_union zeros;
CS_LOCALS(r300);
 
/* XXX workaround for a lockup in MSAA resolve on SWTCL chipsets, this
* function most probably doesn't handle type=NONE correctly */
if (!r300->screen->caps.has_tcl && type == UTIL_BLITTER_ATTRIB_NONE) {
util_blitter_draw_rectangle(blitter, x1, y1, x2, y2, depth, type, attrib);
return;
}
 
if (r300->skip_rendering)
return;
 
if (type == UTIL_BLITTER_ATTRIB_TEXCOORD)
r300->sprite_coord_enable = 1;
 
r300_update_derived_state(r300);
 
/* Mark some states we don't care about as non-dirty. */
r300->viewport_state.dirty = FALSE;
 
if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1))
goto done;
 
DBG(r300, DBG_DRAW, "r300: draw_rectangle\n");
 
BEGIN_CS(dwords);
/* Set up GA. */
OUT_CS_REG(R300_GA_POINT_SIZE, (height * 6) | ((width * 6) << 16));
 
if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) {
/* Set up the GA to generate texcoords. */
OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE |
(R300_GB_TEX_STR << R300_GB_TEX0_SOURCE_SHIFT));
OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4);
OUT_CS_32F(attrib->f[0]);
OUT_CS_32F(attrib->f[3]);
OUT_CS_32F(attrib->f[2]);
OUT_CS_32F(attrib->f[1]);
}
 
/* Set up VAP controls. */
OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
OUT_CS(1);
OUT_CS(0);
 
/* Draw. */
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, vertex_size);
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (1 << 16) |
R300_VAP_VF_CNTL__PRIM_POINTS);
 
OUT_CS_32F(x1 + width * 0.5f);
OUT_CS_32F(y1 + height * 0.5f);
OUT_CS_32F(depth);
OUT_CS_32F(1);
 
if (vertex_size == 8) {
if (!attrib)
attrib = &zeros;
OUT_CS_TABLE(attrib->f, 4);
}
END_CS;
 
done:
/* Restore the state. */
r300_mark_atom_dirty(r300, &r300->rs_state);
r300_mark_atom_dirty(r300, &r300->viewport_state);
 
r300->sprite_coord_enable = last_sprite_coord_enable;
}
 
void r300_init_render_functions(struct r300_context *r300)
{
/* Set draw functions based on presence of HW TCL. */
if (r300->screen->caps.has_tcl) {
r300->context.draw_vbo = r300_draw_vbo;
} else {
r300->context.draw_vbo = r300_swtcl_draw_vbo;
}
 
/* Plug in the two-sided stencil reference value fallback if needed. */
if (!r300->screen->caps.is_r500)
r300_plug_in_stencil_ref_fallback(r300);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_render_stencilref.c
0,0 → 1,129
/*
* Copyright 2010 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
/**
* The two-sided stencil reference value fallback for r3xx-r4xx chips.
* These chips support two-sided stencil functions but they do not support
* a two-sided reference value.
*
* The functions below split every draw call which uses the two-sided
* reference value into two draw calls -- the first one renders front faces
* and the second renders back faces with the other reference value.
*/
 
#include "r300_context.h"
#include "r300_reg.h"
 
struct r300_stencilref_context {
void (*draw_vbo)(struct pipe_context *pipe,
const struct pipe_draw_info *info);
 
uint32_t rs_cull_mode;
uint32_t zb_stencilrefmask;
ubyte ref_value_front;
};
 
static boolean r300_stencilref_needed(struct r300_context *r300)
{
struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
 
return dsa->two_sided_stencil_ref ||
(dsa->two_sided &&
r300->stencil_ref.ref_value[0] != r300->stencil_ref.ref_value[1]);
}
 
/* Set drawing for front faces. */
static void r300_stencilref_begin(struct r300_context *r300)
{
struct r300_stencilref_context *sr = r300->stencilref_fallback;
struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state;
struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
 
/* Save state. */
sr->rs_cull_mode = rs->cb_main[rs->cull_mode_index];
sr->zb_stencilrefmask = dsa->stencil_ref_mask;
sr->ref_value_front = r300->stencil_ref.ref_value[0];
 
/* We *cull* pixels, therefore no need to mask out the bits. */
rs->cb_main[rs->cull_mode_index] |= R300_CULL_BACK;
 
r300_mark_atom_dirty(r300, &r300->rs_state);
}
 
/* Set drawing for back faces. */
static void r300_stencilref_switch_side(struct r300_context *r300)
{
struct r300_stencilref_context *sr = r300->stencilref_fallback;
struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state;
struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
 
rs->cb_main[rs->cull_mode_index] = sr->rs_cull_mode | R300_CULL_FRONT;
dsa->stencil_ref_mask = dsa->stencil_ref_bf;
r300->stencil_ref.ref_value[0] = r300->stencil_ref.ref_value[1];
 
r300_mark_atom_dirty(r300, &r300->rs_state);
r300_mark_atom_dirty(r300, &r300->dsa_state);
}
 
/* Restore the original state. */
static void r300_stencilref_end(struct r300_context *r300)
{
struct r300_stencilref_context *sr = r300->stencilref_fallback;
struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state;
struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
 
/* Restore state. */
rs->cb_main[rs->cull_mode_index] = sr->rs_cull_mode;
dsa->stencil_ref_mask = sr->zb_stencilrefmask;
r300->stencil_ref.ref_value[0] = sr->ref_value_front;
 
r300_mark_atom_dirty(r300, &r300->rs_state);
r300_mark_atom_dirty(r300, &r300->dsa_state);
}
 
static void r300_stencilref_draw_vbo(struct pipe_context *pipe,
const struct pipe_draw_info *info)
{
struct r300_context *r300 = r300_context(pipe);
struct r300_stencilref_context *sr = r300->stencilref_fallback;
 
if (!r300_stencilref_needed(r300)) {
sr->draw_vbo(pipe, info);
} else {
r300_stencilref_begin(r300);
sr->draw_vbo(pipe, info);
r300_stencilref_switch_side(r300);
sr->draw_vbo(pipe, info);
r300_stencilref_end(r300);
}
}
 
void r300_plug_in_stencil_ref_fallback(struct r300_context *r300)
{
r300->stencilref_fallback = CALLOC_STRUCT(r300_stencilref_context);
 
/* Save original draw function. */
r300->stencilref_fallback->draw_vbo = r300->context.draw_vbo;
 
/* Override the draw function. */
r300->context.draw_vbo = r300_stencilref_draw_vbo;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_render_translate.c
0,0 → 1,79
/*
* Copyright 2010 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "r300_context.h"
#include "util/u_index_modify.h"
#include "util/u_upload_mgr.h"
 
 
void r300_translate_index_buffer(struct r300_context *r300,
struct pipe_index_buffer *ib,
struct pipe_resource **out_buffer,
unsigned *index_size, unsigned index_offset,
unsigned *start, unsigned count)
{
unsigned out_offset;
void *ptr;
 
switch (*index_size) {
case 1:
*out_buffer = NULL;
u_upload_alloc(r300->uploader, 0, count * 2,
&out_offset, out_buffer, &ptr);
 
util_shorten_ubyte_elts_to_userptr(
&r300->context, ib, index_offset,
*start, count, ptr);
 
*index_size = 2;
*start = out_offset / 2;
break;
 
case 2:
if (index_offset) {
*out_buffer = NULL;
u_upload_alloc(r300->uploader, 0, count * 2,
&out_offset, out_buffer, &ptr);
 
util_rebuild_ushort_elts_to_userptr(&r300->context, ib,
index_offset, *start,
count, ptr);
 
*start = out_offset / 2;
}
break;
 
case 4:
if (index_offset) {
*out_buffer = NULL;
u_upload_alloc(r300->uploader, 0, count * 4,
&out_offset, out_buffer, &ptr);
 
util_rebuild_uint_elts_to_userptr(&r300->context, ib,
index_offset, *start,
count, ptr);
 
*start = out_offset / 4;
}
break;
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_resource.c
0,0 → 1,57
/*
* Copyright 2010 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Dave Airlie
*/
 
#include "r300_context.h"
#include "r300_texture.h"
#include "r300_screen_buffer.h"
 
static struct pipe_resource *
r300_resource_create(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
if (templ->target == PIPE_BUFFER)
return r300_buffer_create(screen, templ);
else
return r300_texture_create(screen, templ);
 
}
 
void r300_init_resource_functions(struct r300_context *r300)
{
r300->context.transfer_map = u_transfer_map_vtbl;
r300->context.transfer_flush_region = u_default_transfer_flush_region;
r300->context.transfer_unmap = u_transfer_unmap_vtbl;
r300->context.transfer_inline_write = u_default_transfer_inline_write;
r300->context.create_surface = r300_create_surface;
r300->context.surface_destroy = r300_surface_destroy;
}
 
void r300_init_screen_resource_functions(struct r300_screen *r300screen)
{
r300screen->screen.resource_create = r300_resource_create;
r300screen->screen.resource_from_handle = r300_texture_from_handle;
r300screen->screen.resource_get_handle = r300_resource_get_handle;
r300screen->screen.resource_destroy = u_resource_destroy_vtbl;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_screen.c
0,0 → 1,640
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Copyright 2010 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "util/u_format.h"
#include "util/u_format_s3tc.h"
#include "util/u_memory.h"
#include "os/os_time.h"
#include "vl/vl_decoder.h"
#include "vl/vl_video_buffer.h"
 
#include "r300_context.h"
#include "r300_texture.h"
#include "r300_screen_buffer.h"
#include "r300_state_inlines.h"
#include "r300_public.h"
 
#include "draw/draw_context.h"
 
/* Return the identifier behind whom the brave coders responsible for this
* amalgamation of code, sweat, and duct tape, routinely obscure their names.
*
* ...I should have just put "Corbin Simpson", but I'm not that cool.
*
* (Or egotistical. Yet.) */
static const char* r300_get_vendor(struct pipe_screen* pscreen)
{
return "X.Org R300 Project";
}
 
static const char* chip_families[] = {
"unknown",
"ATI R300",
"ATI R350",
"ATI RV350",
"ATI RV370",
"ATI RV380",
"ATI RS400",
"ATI RC410",
"ATI RS480",
"ATI R420",
"ATI R423",
"ATI R430",
"ATI R480",
"ATI R481",
"ATI RV410",
"ATI RS600",
"ATI RS690",
"ATI RS740",
"ATI RV515",
"ATI R520",
"ATI RV530",
"ATI R580",
"ATI RV560",
"ATI RV570"
};
 
static const char* r300_get_name(struct pipe_screen* pscreen)
{
struct r300_screen* r300screen = r300_screen(pscreen);
 
return chip_families[r300screen->caps.family];
}
 
static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
{
struct r300_screen* r300screen = r300_screen(pscreen);
boolean is_r500 = r300screen->caps.is_r500;
 
switch (param) {
/* Supported features (boolean caps). */
case PIPE_CAP_NPOT_TEXTURES:
case PIPE_CAP_TWO_SIDED_STENCIL:
case PIPE_CAP_ANISOTROPIC_FILTER:
case PIPE_CAP_POINT_SPRITE:
case PIPE_CAP_OCCLUSION_QUERY:
case PIPE_CAP_TEXTURE_SHADOW_MAP:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
case PIPE_CAP_CONDITIONAL_RENDER:
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
case PIPE_CAP_USER_INDEX_BUFFERS:
case PIPE_CAP_USER_CONSTANT_BUFFERS:
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
return 1;
 
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return R300_BUFFER_ALIGNMENT;
 
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
return 16;
 
case PIPE_CAP_GLSL_FEATURE_LEVEL:
return 120;
 
/* r300 cannot do swizzling of compressed textures. Supported otherwise. */
case PIPE_CAP_TEXTURE_SWIZZLE:
return util_format_s3tc_enabled ? r300screen->caps.dxtc_swizzle : 1;
 
/* We don't support color clamping on r500, so that we can use color
* intepolators for generic varyings. */
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
return !is_r500;
 
/* Supported on r500 only. */
case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
case PIPE_CAP_SM3:
return is_r500 ? 1 : 0;
 
/* Unsupported features. */
case PIPE_CAP_QUERY_TIME_ELAPSED:
case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_INDEP_BLEND_FUNC:
case PIPE_CAP_DEPTH_CLIP_DISABLE:
case PIPE_CAP_SHADER_STENCIL_EXPORT:
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
case PIPE_CAP_TGSI_INSTANCEID:
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
case PIPE_CAP_SEAMLESS_CUBE_MAP:
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
case PIPE_CAP_SCALED_RESOLVE:
case PIPE_CAP_MIN_TEXEL_OFFSET:
case PIPE_CAP_MAX_TEXEL_OFFSET:
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
case PIPE_CAP_COMPUTE:
case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_QUERY_TIMESTAMP:
case PIPE_CAP_TEXTURE_MULTISAMPLE:
case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
return 0;
 
/* SWTCL-only features. */
case PIPE_CAP_PRIMITIVE_RESTART:
case PIPE_CAP_USER_VERTEX_BUFFERS:
return !r300screen->caps.has_tcl;
 
/* HWTCL-only features / limitations. */
case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
return r300screen->caps.has_tcl;
case PIPE_CAP_TGSI_TEXCOORD:
return 0;
 
/* Texturing. */
case PIPE_CAP_MAX_COMBINED_SAMPLERS:
return r300screen->caps.num_tex_units;
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
/* 13 == 4096, 12 == 2048 */
return is_r500 ? 13 : 12;
 
/* Render targets. */
case PIPE_CAP_MAX_RENDER_TARGETS:
return 4;
case PIPE_CAP_ENDIANNESS:
return PIPE_ENDIAN_LITTLE;
}
return 0;
}
 
static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, enum pipe_shader_cap param)
{
struct r300_screen* r300screen = r300_screen(pscreen);
boolean is_r400 = r300screen->caps.is_r400;
boolean is_r500 = r300screen->caps.is_r500;
 
switch (shader) {
case PIPE_SHADER_FRAGMENT:
switch (param)
{
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
return is_r500 || is_r400 ? 512 : 96;
case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
return is_r500 || is_r400 ? 512 : 64;
case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
return is_r500 || is_r400 ? 512 : 32;
case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
return is_r500 ? 511 : 4;
case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
return is_r500 ? 64 : 0; /* Actually unlimited on r500. */
/* Fragment shader limits. */
case PIPE_SHADER_CAP_MAX_INPUTS:
/* 2 colors + 8 texcoords are always supported
* (minus fog and wpos).
*
* R500 has the ability to turn 3rd and 4th color into
* additional texcoords but there is no two-sided color
* selection then. However the facing bit can be used instead. */
return 10;
case PIPE_SHADER_CAP_MAX_CONSTS:
return is_r500 ? 256 : 32;
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
return 1;
case PIPE_SHADER_CAP_MAX_TEMPS:
return is_r500 ? 128 : is_r400 ? 64 : 32;
case PIPE_SHADER_CAP_MAX_PREDS:
return is_r500 ? 1 : 0;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
return r300screen->caps.num_tex_units;
case PIPE_SHADER_CAP_MAX_ADDRS:
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
case PIPE_SHADER_CAP_SUBROUTINES:
case PIPE_SHADER_CAP_INTEGERS:
return 0;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
}
break;
case PIPE_SHADER_VERTEX:
switch (param)
{
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
case PIPE_SHADER_CAP_SUBROUTINES:
return 0;
default:;
}
 
if (!r300screen->caps.has_tcl) {
return draw_get_shader_param(shader, param);
}
 
switch (param)
{
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
return is_r500 ? 1024 : 256;
case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
return is_r500 ? 4 : 0; /* For loops; not sure about conditionals. */
case PIPE_SHADER_CAP_MAX_INPUTS:
return 16;
case PIPE_SHADER_CAP_MAX_CONSTS:
return 256;
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
return 1;
case PIPE_SHADER_CAP_MAX_TEMPS:
return 32;
case PIPE_SHADER_CAP_MAX_ADDRS:
return 1; /* XXX guessed */
case PIPE_SHADER_CAP_MAX_PREDS:
return is_r500 ? 4 : 0; /* XXX guessed. */
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
return 1;
case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_SUBROUTINES:
case PIPE_SHADER_CAP_INTEGERS:
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
return 0;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
}
break;
}
return 0;
}
 
static float r300_get_paramf(struct pipe_screen* pscreen,
enum pipe_capf param)
{
struct r300_screen* r300screen = r300_screen(pscreen);
 
switch (param) {
case PIPE_CAPF_MAX_LINE_WIDTH:
case PIPE_CAPF_MAX_LINE_WIDTH_AA:
case PIPE_CAPF_MAX_POINT_WIDTH:
case PIPE_CAPF_MAX_POINT_WIDTH_AA:
/* The maximum dimensions of the colorbuffer are our practical
* rendering limits. 2048 pixels should be enough for anybody. */
if (r300screen->caps.is_r500) {
return 4096.0f;
} else if (r300screen->caps.is_r400) {
return 4021.0f;
} else {
return 2560.0f;
}
case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
return 16.0f;
case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
return 16.0f;
case PIPE_CAPF_GUARD_BAND_LEFT:
case PIPE_CAPF_GUARD_BAND_TOP:
case PIPE_CAPF_GUARD_BAND_RIGHT:
case PIPE_CAPF_GUARD_BAND_BOTTOM:
/* XXX I don't know what these should be but the least we can do is
* silence the potential error message */
return 0.0f;
default:
debug_printf("r300: Warning: Unknown CAP %d in get_paramf.\n",
param);
return 0.0f;
}
}
 
static int r300_get_video_param(struct pipe_screen *screen,
enum pipe_video_profile profile,
enum pipe_video_cap param)
{
switch (param) {
case PIPE_VIDEO_CAP_SUPPORTED:
return vl_profile_supported(screen, profile);
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 0;
case PIPE_VIDEO_CAP_MAX_WIDTH:
case PIPE_VIDEO_CAP_MAX_HEIGHT:
return vl_video_buffer_max_size(screen);
case PIPE_VIDEO_CAP_PREFERED_FORMAT:
return PIPE_FORMAT_NV12;
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
return false;
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
return false;
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
return true;
default:
return 0;
}
}
 
/**
* Whether the format matches:
* PIPE_FORMAT_?10?10?10?2_UNORM
*/
static INLINE boolean
util_format_is_rgba1010102_variant(const struct util_format_description *desc)
{
static const unsigned size[4] = {10, 10, 10, 2};
unsigned chan;
 
if (desc->block.width != 1 ||
desc->block.height != 1 ||
desc->block.bits != 32)
return FALSE;
 
for (chan = 0; chan < 4; ++chan) {
if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED &&
desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID)
return FALSE;
if (desc->channel[chan].size != size[chan])
return FALSE;
}
 
return TRUE;
}
 
static boolean r300_is_format_supported(struct pipe_screen* screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
unsigned usage)
{
uint32_t retval = 0;
boolean drm_2_8_0 = r300_screen(screen)->info.drm_minor >= 8;
boolean is_r500 = r300_screen(screen)->caps.is_r500;
boolean is_r400 = r300_screen(screen)->caps.is_r400;
boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM ||
format == PIPE_FORMAT_R10G10B10X2_SNORM ||
format == PIPE_FORMAT_B10G10R10A2_UNORM ||
format == PIPE_FORMAT_R10SG10SB10SA2U_NORM;
boolean is_ati1n = format == PIPE_FORMAT_RGTC1_UNORM ||
format == PIPE_FORMAT_RGTC1_SNORM ||
format == PIPE_FORMAT_LATC1_UNORM ||
format == PIPE_FORMAT_LATC1_SNORM;
boolean is_ati2n = format == PIPE_FORMAT_RGTC2_UNORM ||
format == PIPE_FORMAT_RGTC2_SNORM ||
format == PIPE_FORMAT_LATC2_UNORM ||
format == PIPE_FORMAT_LATC2_SNORM;
boolean is_x16f_xy16f = format == PIPE_FORMAT_R16_FLOAT ||
format == PIPE_FORMAT_R16G16_FLOAT ||
format == PIPE_FORMAT_A16_FLOAT ||
format == PIPE_FORMAT_L16_FLOAT ||
format == PIPE_FORMAT_L16A16_FLOAT ||
format == PIPE_FORMAT_R16A16_FLOAT ||
format == PIPE_FORMAT_I16_FLOAT;
boolean is_half_float = format == PIPE_FORMAT_R16_FLOAT ||
format == PIPE_FORMAT_R16G16_FLOAT ||
format == PIPE_FORMAT_R16G16B16_FLOAT ||
format == PIPE_FORMAT_R16G16B16A16_FLOAT ||
format == PIPE_FORMAT_R16G16B16X16_FLOAT;
const struct util_format_description *desc;
 
if (!util_format_is_supported(format, usage))
return FALSE;
 
/* Check multisampling support. */
switch (sample_count) {
case 0:
case 1:
break;
case 2:
case 4:
case 6:
/* We need DRM 2.8.0. */
if (!drm_2_8_0) {
return FALSE;
}
/* Only support R500, because I didn't test older chipsets,
* but MSAA should work there too. */
if (!is_r500 && !debug_get_bool_option("RADEON_MSAA", FALSE)) {
return FALSE;
}
/* No texturing and scanout. */
if (usage & (PIPE_BIND_SAMPLER_VIEW |
PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT)) {
return FALSE;
}
 
desc = util_format_description(format);
 
if (is_r500) {
/* Only allow depth/stencil, RGBA8, RGBA1010102, RGBA16F. */
if (!util_format_is_depth_or_stencil(format) &&
!util_format_is_rgba8_variant(desc) &&
!util_format_is_rgba1010102_variant(desc) &&
format != PIPE_FORMAT_R16G16B16A16_FLOAT &&
format != PIPE_FORMAT_R16G16B16X16_FLOAT) {
return FALSE;
}
} else {
/* Only allow depth/stencil, RGBA8. */
if (!util_format_is_depth_or_stencil(format) &&
!util_format_is_rgba8_variant(desc)) {
return FALSE;
}
}
break;
default:
return FALSE;
}
 
/* Check sampler format support. */
if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
/* these two are broken for an unknown reason */
format != PIPE_FORMAT_R8G8B8X8_SNORM &&
format != PIPE_FORMAT_R16G16B16X16_SNORM &&
/* ATI1N is r5xx-only. */
(is_r500 || !is_ati1n) &&
/* ATI2N is supported on r4xx-r5xx. */
(is_r400 || is_r500 || !is_ati2n) &&
/* R16F and RG16F texture support was added in as late as DRM 2.8.0 */
(drm_2_8_0 || !is_x16f_xy16f) &&
r300_is_sampler_format_supported(format)) {
retval |= PIPE_BIND_SAMPLER_VIEW;
}
 
/* Check colorbuffer format support. */
if ((usage & (PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED)) &&
/* 2101010 cannot be rendered to on non-r5xx. */
(!is_color2101010 || (is_r500 && drm_2_8_0)) &&
r300_is_colorbuffer_format_supported(format)) {
retval |= usage &
(PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED);
}
 
/* Check depth-stencil format support. */
if (usage & PIPE_BIND_DEPTH_STENCIL &&
r300_is_zs_format_supported(format)) {
retval |= PIPE_BIND_DEPTH_STENCIL;
}
 
/* Check vertex buffer format support. */
if (usage & PIPE_BIND_VERTEX_BUFFER) {
if (r300_screen(screen)->caps.has_tcl) {
/* Half float is supported on >= R400. */
if ((is_r400 || is_r500 || !is_half_float) &&
r300_translate_vertex_data_type(format) != R300_INVALID_FORMAT) {
retval |= PIPE_BIND_VERTEX_BUFFER;
}
} else {
/* SW TCL */
if (!util_format_is_pure_integer(format)) {
retval |= PIPE_BIND_VERTEX_BUFFER;
}
}
}
 
/* Transfers are always supported. */
if (usage & PIPE_BIND_TRANSFER_READ)
retval |= PIPE_BIND_TRANSFER_READ;
if (usage & PIPE_BIND_TRANSFER_WRITE)
retval |= PIPE_BIND_TRANSFER_WRITE;
 
return retval == usage;
}
 
static void r300_destroy_screen(struct pipe_screen* pscreen)
{
struct r300_screen* r300screen = r300_screen(pscreen);
struct radeon_winsys *rws = radeon_winsys(pscreen);
 
pipe_mutex_destroy(r300screen->cmask_mutex);
 
if (rws)
rws->destroy(rws);
 
FREE(r300screen);
}
 
static void r300_fence_reference(struct pipe_screen *screen,
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *fence)
{
pb_reference((struct pb_buffer**)ptr,
(struct pb_buffer*)fence);
}
 
static boolean r300_fence_signalled(struct pipe_screen *screen,
struct pipe_fence_handle *fence)
{
struct radeon_winsys *rws = r300_screen(screen)->rws;
struct pb_buffer *rfence = (struct pb_buffer*)fence;
 
return !rws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE);
}
 
static boolean r300_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
uint64_t timeout)
{
struct radeon_winsys *rws = r300_screen(screen)->rws;
struct pb_buffer *rfence = (struct pb_buffer*)fence;
 
if (timeout != PIPE_TIMEOUT_INFINITE) {
int64_t start_time = os_time_get();
 
/* Convert to microseconds. */
timeout /= 1000;
 
/* Wait in a loop. */
while (rws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) {
if (os_time_get() - start_time >= timeout) {
return FALSE;
}
os_time_sleep(10);
}
return TRUE;
}
 
rws->buffer_wait(rfence, RADEON_USAGE_READWRITE);
return TRUE;
}
 
struct pipe_screen* r300_screen_create(struct radeon_winsys *rws)
{
struct r300_screen *r300screen = CALLOC_STRUCT(r300_screen);
 
if (!r300screen) {
FREE(r300screen);
return NULL;
}
 
rws->query_info(rws, &r300screen->info);
 
r300_init_debug(r300screen);
r300_parse_chipset(r300screen->info.pci_id, &r300screen->caps);
 
if (SCREEN_DBG_ON(r300screen, DBG_NO_ZMASK))
r300screen->caps.zmask_ram = 0;
if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ))
r300screen->caps.hiz_ram = 0;
 
if (r300screen->info.drm_minor < 8)
r300screen->caps.has_us_format = FALSE;
 
r300screen->rws = rws;
r300screen->screen.destroy = r300_destroy_screen;
r300screen->screen.get_name = r300_get_name;
r300screen->screen.get_vendor = r300_get_vendor;
r300screen->screen.get_param = r300_get_param;
r300screen->screen.get_shader_param = r300_get_shader_param;
r300screen->screen.get_paramf = r300_get_paramf;
r300screen->screen.get_video_param = r300_get_video_param;
r300screen->screen.is_format_supported = r300_is_format_supported;
r300screen->screen.is_video_format_supported = vl_video_buffer_is_format_supported;
r300screen->screen.context_create = r300_create_context;
r300screen->screen.fence_reference = r300_fence_reference;
r300screen->screen.fence_signalled = r300_fence_signalled;
r300screen->screen.fence_finish = r300_fence_finish;
 
r300_init_screen_resource_functions(r300screen);
 
util_format_s3tc_init();
pipe_mutex_init(r300screen->cmask_mutex);
 
return &r300screen->screen;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_screen.h
0,0 → 1,125
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Copyright 2010 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#ifndef R300_SCREEN_H
#define R300_SCREEN_H
 
#include "r300_chipset.h"
#include "../../winsys/radeon/drm/radeon_winsys.h"
#include "pipe/p_screen.h"
#include "util/u_slab.h"
#include "os/os_thread.h"
#include <stdio.h>
 
struct r300_screen {
/* Parent class */
struct pipe_screen screen;
 
struct radeon_winsys *rws;
 
/* Chipset info and capabilities. */
struct radeon_info info;
struct r300_capabilities caps;
 
/** Combination of DBG_xxx flags */
unsigned debug;
 
/* The MSAA texture with CMASK access; */
struct pipe_resource *cmask_resource;
pipe_mutex cmask_mutex;
};
 
 
/* Convenience cast wrappers. */
static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) {
return (struct r300_screen*)screen;
}
 
static INLINE struct radeon_winsys *
radeon_winsys(struct pipe_screen *screen) {
return r300_screen(screen)->rws;
}
 
/* Debug functionality. */
 
/**
* Debug flags to disable/enable certain groups of debugging outputs.
*
* \note These may be rather coarse, and the grouping may be impractical.
* If you find, while debugging the driver, that a different grouping
* of these flags would be beneficial, just feel free to change them
* but make sure to update the documentation in r300_debug.c to reflect
* those changes.
*/
/*@{*/
 
/* Logging. */
#define DBG_PSC (1 << 0)
#define DBG_FP (1 << 1)
#define DBG_VP (1 << 2)
#define DBG_SWTCL (1 << 3)
#define DBG_DRAW (1 << 4)
#define DBG_TEX (1 << 5)
#define DBG_TEXALLOC (1 << 6)
#define DBG_RS (1 << 7)
#define DBG_FB (1 << 8)
#define DBG_RS_BLOCK (1 << 9)
#define DBG_CBZB (1 << 10)
#define DBG_HYPERZ (1 << 11)
#define DBG_SCISSOR (1 << 12)
#define DBG_INFO (1 << 13)
#define DBG_MSAA (1 << 14)
/* Features. */
#define DBG_ANISOHQ (1 << 16)
#define DBG_NO_TILING (1 << 17)
#define DBG_NO_IMMD (1 << 18)
#define DBG_NO_OPT (1 << 19)
#define DBG_NO_CBZB (1 << 20)
#define DBG_NO_ZMASK (1 << 21)
#define DBG_NO_HIZ (1 << 22)
#define DBG_NO_CMASK (1 << 23)
/* Statistics. */
#define DBG_P_STAT (1 << 25)
/*@}*/
 
static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags)
{
return (screen->debug & flags) ? TRUE : FALSE;
}
 
static INLINE void SCREEN_DBG(struct r300_screen * screen, unsigned flags,
const char * fmt, ...)
{
if (SCREEN_DBG_ON(screen, flags)) {
va_list va;
va_start(va, fmt);
vfprintf(stderr, fmt, va);
va_end(va);
}
}
 
void r300_init_debug(struct r300_screen* ctx);
 
void r300_init_screen_resource_functions(struct r300_screen *r300screen);
 
#endif /* R300_SCREEN_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_screen_buffer.c
0,0 → 1,198
/*
* Copyright 2010 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Dave Airlie
*/
 
#include <stdio.h>
 
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
#include "util/u_math.h"
 
#include "r300_screen_buffer.h"
 
void r300_upload_index_buffer(struct r300_context *r300,
struct pipe_resource **index_buffer,
unsigned index_size, unsigned *start,
unsigned count, const uint8_t *ptr)
{
unsigned index_offset;
 
*index_buffer = NULL;
 
u_upload_data(r300->uploader,
0, count * index_size,
ptr + (*start * index_size),
&index_offset,
index_buffer);
 
*start = index_offset / index_size;
}
 
static void r300_buffer_destroy(struct pipe_screen *screen,
struct pipe_resource *buf)
{
struct r300_resource *rbuf = r300_resource(buf);
 
align_free(rbuf->malloced_buffer);
 
if (rbuf->buf)
pb_reference(&rbuf->buf, NULL);
 
FREE(rbuf);
}
 
static void *
r300_buffer_transfer_map( struct pipe_context *context,
struct pipe_resource *resource,
unsigned level,
unsigned usage,
const struct pipe_box *box,
struct pipe_transfer **ptransfer )
{
struct r300_context *r300 = r300_context(context);
struct radeon_winsys *rws = r300->screen->rws;
struct r300_resource *rbuf = r300_resource(resource);
struct pipe_transfer *transfer;
uint8_t *map;
 
transfer = util_slab_alloc(&r300->pool_transfers);
transfer->resource = resource;
transfer->level = level;
transfer->usage = usage;
transfer->box = *box;
transfer->stride = 0;
transfer->layer_stride = 0;
 
if (rbuf->malloced_buffer) {
*ptransfer = transfer;
return rbuf->malloced_buffer + box->x;
}
 
if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
assert(usage & PIPE_TRANSFER_WRITE);
 
/* Check if mapping this buffer would cause waiting for the GPU. */
if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf, RADEON_USAGE_READWRITE) ||
r300->rws->buffer_is_busy(rbuf->buf, RADEON_USAGE_READWRITE)) {
unsigned i;
struct pb_buffer *new_buf;
 
/* Create a new one in the same pipe_resource. */
new_buf = r300->rws->buffer_create(r300->rws, rbuf->b.b.width0,
R300_BUFFER_ALIGNMENT, TRUE,
rbuf->domain);
if (new_buf) {
/* Discard the old buffer. */
pb_reference(&rbuf->buf, NULL);
rbuf->buf = new_buf;
rbuf->cs_buf = r300->rws->buffer_get_cs_handle(rbuf->buf);
 
/* We changed the buffer, now we need to bind it where the old one was bound. */
for (i = 0; i < r300->nr_vertex_buffers; i++) {
if (r300->vertex_buffer[i].buffer == &rbuf->b.b) {
r300->vertex_arrays_dirty = TRUE;
break;
}
}
}
}
}
 
/* Buffers are never used for write, therefore mapping for read can be
* unsynchronized. */
if (!(usage & PIPE_TRANSFER_WRITE)) {
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
}
 
map = rws->buffer_map(rbuf->cs_buf, r300->cs, usage);
 
if (map == NULL) {
util_slab_free(&r300->pool_transfers, transfer);
return NULL;
}
 
*ptransfer = transfer;
return map + box->x;
}
 
static void r300_buffer_transfer_unmap( struct pipe_context *pipe,
struct pipe_transfer *transfer )
{
struct r300_context *r300 = r300_context(pipe);
 
util_slab_free(&r300->pool_transfers, transfer);
}
 
static const struct u_resource_vtbl r300_buffer_vtbl =
{
NULL, /* get_handle */
r300_buffer_destroy, /* resource_destroy */
r300_buffer_transfer_map, /* transfer_map */
NULL, /* transfer_flush_region */
r300_buffer_transfer_unmap, /* transfer_unmap */
NULL /* transfer_inline_write */
};
 
struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
struct r300_screen *r300screen = r300_screen(screen);
struct r300_resource *rbuf;
 
rbuf = MALLOC_STRUCT(r300_resource);
 
rbuf->b.b = *templ;
rbuf->b.vtbl = &r300_buffer_vtbl;
pipe_reference_init(&rbuf->b.b.reference, 1);
rbuf->b.b.screen = screen;
rbuf->domain = RADEON_DOMAIN_GTT;
rbuf->buf = NULL;
rbuf->malloced_buffer = NULL;
 
/* Allocate constant buffers and SWTCL vertex and index buffers in RAM.
* Note that uploaded index buffers use the flag PIPE_BIND_CUSTOM, so that
* we can distinguish them from user-created buffers.
*/
if (templ->bind & PIPE_BIND_CONSTANT_BUFFER ||
(!r300screen->caps.has_tcl && !(templ->bind & PIPE_BIND_CUSTOM))) {
rbuf->malloced_buffer = align_malloc(templ->width0, 64);
return &rbuf->b.b;
}
 
rbuf->buf =
r300screen->rws->buffer_create(r300screen->rws, rbuf->b.b.width0,
R300_BUFFER_ALIGNMENT, TRUE,
rbuf->domain);
if (!rbuf->buf) {
FREE(rbuf);
return NULL;
}
 
rbuf->cs_buf =
r300screen->rws->buffer_get_cs_handle(rbuf->buf);
 
return &rbuf->b.b;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_screen_buffer.h
0,0 → 1,54
/*
* Copyright 2010 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Dave Airlie
*/
 
#ifndef R300_SCREEN_BUFFER_H
#define R300_SCREEN_BUFFER_H
 
#include <stdio.h>
#include "pipe/p_compiler.h"
#include "pipe/p_state.h"
#include "util/u_transfer.h"
 
#include "r300_screen.h"
#include "r300_context.h"
 
/* Functions. */
 
void r300_upload_index_buffer(struct r300_context *r300,
struct pipe_resource **index_buffer,
unsigned index_size, unsigned *start,
unsigned count, const uint8_t *ptr);
 
struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ);
 
/* Inline functions. */
 
static INLINE struct r300_buffer *r300_buffer(struct pipe_resource *buffer)
{
return (struct r300_buffer *)buffer;
}
 
#endif
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_shader_semantics.h
0,0 → 1,72
/*
* Copyright 2009 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#ifndef R300_SHADER_SEMANTICS_H
#define R300_SHADER_SEMANTICS_H
 
#define ATTR_UNUSED (-1)
#define ATTR_COLOR_COUNT 2
#define ATTR_GENERIC_COUNT 32
 
/* This structure contains information about what attributes are written by VS
* or read by FS. (but not both) It's much easier to work with than
* tgsi_shader_info.
*
* The variables contain indices to tgsi_shader_info semantics and those
* indices are nothing else than input/output register numbers. */
struct r300_shader_semantics {
int pos;
int psize;
int color[ATTR_COLOR_COUNT];
int bcolor[ATTR_COLOR_COUNT];
int face;
int generic[ATTR_GENERIC_COUNT];
int fog;
int wpos;
 
int num_generic;
};
 
static INLINE void r300_shader_semantics_reset(
struct r300_shader_semantics* info)
{
int i;
 
info->pos = ATTR_UNUSED;
info->psize = ATTR_UNUSED;
info->face = ATTR_UNUSED;
info->fog = ATTR_UNUSED;
info->wpos = ATTR_UNUSED;
 
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
info->color[i] = ATTR_UNUSED;
info->bcolor[i] = ATTR_UNUSED;
}
 
for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
info->generic[i] = ATTR_UNUSED;
}
 
info->num_generic = 0;
}
 
#endif
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_state.c
0,0 → 1,2189
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Copyright 2009 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "draw/draw_context.h"
 
#include "util/u_framebuffer.h"
#include "util/u_half.h"
#include "util/u_helpers.h"
#include "util/u_math.h"
#include "util/u_mm.h"
#include "util/u_memory.h"
#include "util/u_pack_color.h"
#include "util/u_transfer.h"
 
#include "tgsi/tgsi_parse.h"
 
#include "pipe/p_config.h"
 
#include "r300_cb.h"
#include "r300_context.h"
#include "r300_emit.h"
#include "r300_reg.h"
#include "r300_screen.h"
#include "r300_screen_buffer.h"
#include "r300_state_inlines.h"
#include "r300_fs.h"
#include "r300_texture.h"
#include "r300_vs.h"
 
/* r300_state: Functions used to intialize state context by translating
* Gallium state objects into semi-native r300 state objects. */
 
#define UPDATE_STATE(cso, atom) \
if (cso != atom.state) { \
atom.state = cso; \
r300_mark_atom_dirty(r300, &(atom)); \
}
 
static boolean blend_discard_if_src_alpha_0(unsigned srcRGB, unsigned srcA,
unsigned dstRGB, unsigned dstA)
{
/* If the blend equation is ADD or REVERSE_SUBTRACT,
* SRC_ALPHA == 0, and the following state is set, the colorbuffer
* will not be changed.
* Notice that the dst factors are the src factors inverted. */
return (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
srcRGB == PIPE_BLENDFACTOR_ZERO) &&
(srcA == PIPE_BLENDFACTOR_SRC_COLOR ||
srcA == PIPE_BLENDFACTOR_SRC_ALPHA ||
srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
srcA == PIPE_BLENDFACTOR_ZERO) &&
(dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
dstRGB == PIPE_BLENDFACTOR_ONE) &&
(dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
dstA == PIPE_BLENDFACTOR_ONE);
}
 
static boolean blend_discard_if_src_alpha_1(unsigned srcRGB, unsigned srcA,
unsigned dstRGB, unsigned dstA)
{
/* If the blend equation is ADD or REVERSE_SUBTRACT,
* SRC_ALPHA == 1, and the following state is set, the colorbuffer
* will not be changed.
* Notice that the dst factors are the src factors inverted. */
return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
srcRGB == PIPE_BLENDFACTOR_ZERO) &&
(srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
srcA == PIPE_BLENDFACTOR_ZERO) &&
(dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
dstRGB == PIPE_BLENDFACTOR_ONE) &&
(dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
dstA == PIPE_BLENDFACTOR_ONE);
}
 
static boolean blend_discard_if_src_color_0(unsigned srcRGB, unsigned srcA,
unsigned dstRGB, unsigned dstA)
{
/* If the blend equation is ADD or REVERSE_SUBTRACT,
* SRC_COLOR == (0,0,0), and the following state is set, the colorbuffer
* will not be changed.
* Notice that the dst factors are the src factors inverted. */
return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
srcRGB == PIPE_BLENDFACTOR_ZERO) &&
(srcA == PIPE_BLENDFACTOR_ZERO) &&
(dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
dstRGB == PIPE_BLENDFACTOR_ONE) &&
(dstA == PIPE_BLENDFACTOR_ONE);
}
 
static boolean blend_discard_if_src_color_1(unsigned srcRGB, unsigned srcA,
unsigned dstRGB, unsigned dstA)
{
/* If the blend equation is ADD or REVERSE_SUBTRACT,
* SRC_COLOR == (1,1,1), and the following state is set, the colorbuffer
* will not be changed.
* Notice that the dst factors are the src factors inverted. */
return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
srcRGB == PIPE_BLENDFACTOR_ZERO) &&
(srcA == PIPE_BLENDFACTOR_ZERO) &&
(dstRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
dstRGB == PIPE_BLENDFACTOR_ONE) &&
(dstA == PIPE_BLENDFACTOR_ONE);
}
 
static boolean blend_discard_if_src_alpha_color_0(unsigned srcRGB, unsigned srcA,
unsigned dstRGB, unsigned dstA)
{
/* If the blend equation is ADD or REVERSE_SUBTRACT,
* SRC_ALPHA_COLOR == (0,0,0,0), and the following state is set,
* the colorbuffer will not be changed.
* Notice that the dst factors are the src factors inverted. */
return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
srcRGB == PIPE_BLENDFACTOR_ZERO) &&
(srcA == PIPE_BLENDFACTOR_SRC_COLOR ||
srcA == PIPE_BLENDFACTOR_SRC_ALPHA ||
srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
srcA == PIPE_BLENDFACTOR_ZERO) &&
(dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
dstRGB == PIPE_BLENDFACTOR_ONE) &&
(dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
dstA == PIPE_BLENDFACTOR_ONE);
}
 
static boolean blend_discard_if_src_alpha_color_1(unsigned srcRGB, unsigned srcA,
unsigned dstRGB, unsigned dstA)
{
/* If the blend equation is ADD or REVERSE_SUBTRACT,
* SRC_ALPHA_COLOR == (1,1,1,1), and the following state is set,
* the colorbuffer will not be changed.
* Notice that the dst factors are the src factors inverted. */
return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
srcRGB == PIPE_BLENDFACTOR_ZERO) &&
(srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
srcA == PIPE_BLENDFACTOR_ZERO) &&
(dstRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
dstRGB == PIPE_BLENDFACTOR_ONE) &&
(dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
dstA == PIPE_BLENDFACTOR_ONE);
}
 
static unsigned blend_discard_conditionally(unsigned eqRGB, unsigned eqA,
unsigned dstRGB, unsigned dstA,
unsigned srcRGB, unsigned srcA)
{
unsigned blend_control = 0;
 
/* Optimization: discard pixels which don't change the colorbuffer.
*
* The code below is non-trivial and some math is involved.
*
* Discarding pixels must be disabled when FP16 AA is enabled.
* This is a hardware bug. Also, this implementation wouldn't work
* with FP blending enabled and equation clamping disabled.
*
* Equations other than ADD are rarely used and therefore won't be
* optimized. */
if ((eqRGB == PIPE_BLEND_ADD || eqRGB == PIPE_BLEND_REVERSE_SUBTRACT) &&
(eqA == PIPE_BLEND_ADD || eqA == PIPE_BLEND_REVERSE_SUBTRACT)) {
/* ADD: X+Y
* REVERSE_SUBTRACT: Y-X
*
* The idea is:
* If X = src*srcFactor = 0 and Y = dst*dstFactor = 1,
* then CB will not be changed.
*
* Given the srcFactor and dstFactor variables, we can derive
* what src and dst should be equal to and discard appropriate
* pixels.
*/
if (blend_discard_if_src_alpha_0(srcRGB, srcA, dstRGB, dstA)) {
blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0;
} else if (blend_discard_if_src_alpha_1(srcRGB, srcA,
dstRGB, dstA)) {
blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1;
} else if (blend_discard_if_src_color_0(srcRGB, srcA,
dstRGB, dstA)) {
blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0;
} else if (blend_discard_if_src_color_1(srcRGB, srcA,
dstRGB, dstA)) {
blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1;
} else if (blend_discard_if_src_alpha_color_0(srcRGB, srcA,
dstRGB, dstA)) {
blend_control |=
R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0;
} else if (blend_discard_if_src_alpha_color_1(srcRGB, srcA,
dstRGB, dstA)) {
blend_control |=
R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1;
}
}
return blend_control;
}
 
/* The hardware colormask is clunky a must be swizzled depending on the format.
* This was figured out by trial-and-error. */
static unsigned bgra_cmask(unsigned mask)
{
return ((mask & PIPE_MASK_R) << 2) |
((mask & PIPE_MASK_B) >> 2) |
(mask & (PIPE_MASK_G | PIPE_MASK_A));
}
 
static unsigned rgba_cmask(unsigned mask)
{
return mask & PIPE_MASK_RGBA;
}
 
static unsigned rrrr_cmask(unsigned mask)
{
return (mask & PIPE_MASK_R) |
((mask & PIPE_MASK_R) << 1) |
((mask & PIPE_MASK_R) << 2) |
((mask & PIPE_MASK_R) << 3);
}
 
static unsigned aaaa_cmask(unsigned mask)
{
return ((mask & PIPE_MASK_A) >> 3) |
((mask & PIPE_MASK_A) >> 2) |
((mask & PIPE_MASK_A) >> 1) |
(mask & PIPE_MASK_A);
}
 
static unsigned grrg_cmask(unsigned mask)
{
return ((mask & PIPE_MASK_R) << 1) |
((mask & PIPE_MASK_R) << 2) |
((mask & PIPE_MASK_G) >> 1) |
((mask & PIPE_MASK_G) << 2);
}
 
static unsigned arra_cmask(unsigned mask)
{
return ((mask & PIPE_MASK_R) << 1) |
((mask & PIPE_MASK_R) << 2) |
((mask & PIPE_MASK_A) >> 3) |
(mask & PIPE_MASK_A);
}
 
static unsigned blend_read_enable(unsigned eqRGB, unsigned eqA,
unsigned dstRGB, unsigned dstA,
unsigned srcRGB, unsigned srcA,
boolean src_alpha_optz)
{
unsigned blend_control = 0;
 
/* Optimization: some operations do not require the destination color.
*
* When SRC_ALPHA_SATURATE is used, colorbuffer reads must be enabled,
* otherwise blending gives incorrect results. It seems to be
* a hardware bug. */
if (eqRGB == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MIN ||
eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MAX ||
dstRGB != PIPE_BLENDFACTOR_ZERO ||
dstA != PIPE_BLENDFACTOR_ZERO ||
srcRGB == PIPE_BLENDFACTOR_DST_COLOR ||
srcRGB == PIPE_BLENDFACTOR_DST_ALPHA ||
srcRGB == PIPE_BLENDFACTOR_INV_DST_COLOR ||
srcRGB == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
srcA == PIPE_BLENDFACTOR_DST_COLOR ||
srcA == PIPE_BLENDFACTOR_DST_ALPHA ||
srcA == PIPE_BLENDFACTOR_INV_DST_COLOR ||
srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) {
/* Enable reading from the colorbuffer. */
blend_control |= R300_READ_ENABLE;
 
if (src_alpha_optz) {
/* Optimization: Depending on incoming pixels, we can
* conditionally disable the reading in hardware... */
if (eqRGB != PIPE_BLEND_MIN && eqA != PIPE_BLEND_MIN &&
eqRGB != PIPE_BLEND_MAX && eqA != PIPE_BLEND_MAX) {
/* Disable reading if SRC_ALPHA == 0. */
if ((dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
dstRGB == PIPE_BLENDFACTOR_ZERO) &&
(dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
dstA == PIPE_BLENDFACTOR_ZERO) &&
(srcRGB != PIPE_BLENDFACTOR_DST_COLOR &&
srcRGB != PIPE_BLENDFACTOR_DST_ALPHA &&
srcRGB != PIPE_BLENDFACTOR_INV_DST_COLOR &&
srcRGB != PIPE_BLENDFACTOR_INV_DST_ALPHA)) {
blend_control |= R500_SRC_ALPHA_0_NO_READ;
}
 
/* Disable reading if SRC_ALPHA == 1. */
if ((dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
dstRGB == PIPE_BLENDFACTOR_ZERO) &&
(dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
dstA == PIPE_BLENDFACTOR_ZERO) &&
(srcRGB != PIPE_BLENDFACTOR_DST_COLOR &&
srcRGB != PIPE_BLENDFACTOR_DST_ALPHA &&
srcRGB != PIPE_BLENDFACTOR_INV_DST_COLOR &&
srcRGB != PIPE_BLENDFACTOR_INV_DST_ALPHA)) {
blend_control |= R500_SRC_ALPHA_1_NO_READ;
}
}
}
}
return blend_control;
}
 
/* Create a new blend state based on the CSO blend state.
*
* This encompasses alpha blending, logic/raster ops, and blend dithering. */
static void* r300_create_blend_state(struct pipe_context* pipe,
const struct pipe_blend_state* state)
{
struct r300_screen* r300screen = r300_screen(pipe->screen);
struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state);
uint32_t blend_control = 0; /* R300_RB3D_CBLEND: 0x4e04 */
uint32_t blend_control_noclamp = 0; /* R300_RB3D_CBLEND: 0x4e04 */
uint32_t blend_control_noalpha = 0; /* R300_RB3D_CBLEND: 0x4e04 */
uint32_t blend_control_noalpha_noclamp = 0; /* R300_RB3D_CBLEND: 0x4e04 */
uint32_t alpha_blend_control = 0; /* R300_RB3D_ABLEND: 0x4e08 */
uint32_t alpha_blend_control_noclamp = 0; /* R300_RB3D_ABLEND: 0x4e08 */
uint32_t alpha_blend_control_noalpha = 0; /* R300_RB3D_ABLEND: 0x4e08 */
uint32_t alpha_blend_control_noalpha_noclamp = 0; /* R300_RB3D_ABLEND: 0x4e08 */
uint32_t rop = 0; /* R300_RB3D_ROPCNTL: 0x4e18 */
uint32_t dither = 0; /* R300_RB3D_DITHER_CTL: 0x4e50 */
int i;
 
const unsigned eqRGB = state->rt[0].rgb_func;
const unsigned srcRGB = state->rt[0].rgb_src_factor;
const unsigned dstRGB = state->rt[0].rgb_dst_factor;
 
const unsigned eqA = state->rt[0].alpha_func;
const unsigned srcA = state->rt[0].alpha_src_factor;
const unsigned dstA = state->rt[0].alpha_dst_factor;
 
unsigned srcRGBX = srcRGB;
unsigned dstRGBX = dstRGB;
CB_LOCALS;
 
blend->state = *state;
 
/* force DST_ALPHA to ONE where we can */
switch (srcRGBX) {
case PIPE_BLENDFACTOR_DST_ALPHA:
srcRGBX = PIPE_BLENDFACTOR_ONE;
break;
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
srcRGBX = PIPE_BLENDFACTOR_ZERO;
break;
}
 
switch (dstRGBX) {
case PIPE_BLENDFACTOR_DST_ALPHA:
dstRGBX = PIPE_BLENDFACTOR_ONE;
break;
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
dstRGBX = PIPE_BLENDFACTOR_ZERO;
break;
}
 
/* Get blending register values. */
if (state->rt[0].blend_enable) {
unsigned blend_eq, blend_eq_noclamp;
 
/* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha,
* this is just the crappy D3D naming */
blend_control = blend_control_noclamp =
R300_ALPHA_BLEND_ENABLE |
( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) |
( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT);
 
blend_control_noalpha = blend_control_noalpha_noclamp =
R300_ALPHA_BLEND_ENABLE |
( r300_translate_blend_factor(srcRGBX) << R300_SRC_BLEND_SHIFT) |
( r300_translate_blend_factor(dstRGBX) << R300_DST_BLEND_SHIFT);
 
blend_eq = r300_translate_blend_function(eqRGB, TRUE);
blend_eq_noclamp = r300_translate_blend_function(eqRGB, FALSE);
 
blend_control |= blend_eq;
blend_control_noalpha |= blend_eq;
blend_control_noclamp |= blend_eq_noclamp;
blend_control_noalpha_noclamp |= blend_eq_noclamp;
 
/* Optimization: some operations do not require the destination color. */
blend_control |= blend_read_enable(eqRGB, eqA, dstRGB, dstA,
srcRGB, srcA, r300screen->caps.is_r500);
blend_control_noclamp |= blend_read_enable(eqRGB, eqA, dstRGB, dstA,
srcRGB, srcA, FALSE);
blend_control_noalpha |= blend_read_enable(eqRGB, eqA, dstRGBX, dstA,
srcRGBX, srcA, r300screen->caps.is_r500);
blend_control_noalpha_noclamp |= blend_read_enable(eqRGB, eqA, dstRGBX, dstA,
srcRGBX, srcA, FALSE);
 
/* Optimization: discard pixels which don't change the colorbuffer.
* It cannot be used with FP16 AA. */
blend_control |= blend_discard_conditionally(eqRGB, eqA, dstRGB, dstA,
srcRGB, srcA);
blend_control_noalpha |= blend_discard_conditionally(eqRGB, eqA, dstRGBX, dstA,
srcRGBX, srcA);
 
/* separate alpha */
if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
blend_control |= R300_SEPARATE_ALPHA_ENABLE;
blend_control_noclamp |= R300_SEPARATE_ALPHA_ENABLE;
 
alpha_blend_control = alpha_blend_control_noclamp =
(r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) |
(r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT);
alpha_blend_control |= r300_translate_blend_function(eqA, TRUE);
alpha_blend_control_noclamp |= r300_translate_blend_function(eqA, FALSE);
}
if (srcA != srcRGBX || dstA != dstRGBX || eqA != eqRGB) {
blend_control_noalpha |= R300_SEPARATE_ALPHA_ENABLE;
blend_control_noalpha_noclamp |= R300_SEPARATE_ALPHA_ENABLE;
 
alpha_blend_control_noalpha = alpha_blend_control_noalpha_noclamp =
(r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) |
(r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT);
alpha_blend_control_noalpha |= r300_translate_blend_function(eqA, TRUE);
alpha_blend_control_noalpha_noclamp |= r300_translate_blend_function(eqA, FALSE);
}
}
 
/* PIPE_LOGICOP_* don't need to be translated, fortunately. */
if (state->logicop_enable) {
rop = R300_RB3D_ROPCNTL_ROP_ENABLE |
(state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT;
}
 
/* Neither fglrx nor classic r300 ever set this, regardless of dithering
* state. Since it's an optional implementation detail, we can leave it
* out and never dither.
*
* This could be revisited if we ever get quality or conformance hints.
*
if (state->dither) {
dither = R300_RB3D_DITHER_CTL_DITHER_MODE_LUT |
R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT;
}
*/
 
/* Build a command buffer. */
{
unsigned (*func[COLORMASK_NUM_SWIZZLES])(unsigned) = {
bgra_cmask,
rgba_cmask,
rrrr_cmask,
aaaa_cmask,
grrg_cmask,
arra_cmask,
bgra_cmask,
rgba_cmask
};
 
for (i = 0; i < COLORMASK_NUM_SWIZZLES; i++) {
boolean has_alpha = i != COLORMASK_RGBX && i != COLORMASK_BGRX;
 
BEGIN_CB(blend->cb_clamp[i], 8);
OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
OUT_CB(has_alpha ? blend_control : blend_control_noalpha);
OUT_CB(has_alpha ? alpha_blend_control : alpha_blend_control_noalpha);
OUT_CB(func[i](state->rt[0].colormask));
OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
END_CB;
}
}
 
/* Build a command buffer (for RGBA16F). */
BEGIN_CB(blend->cb_noclamp, 8);
OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
OUT_CB(blend_control_noclamp);
OUT_CB(alpha_blend_control_noclamp);
OUT_CB(rgba_cmask(state->rt[0].colormask));
OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
END_CB;
 
/* Build a command buffer (for RGB16F). */
BEGIN_CB(blend->cb_noclamp_noalpha, 8);
OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
OUT_CB(blend_control_noalpha_noclamp);
OUT_CB(alpha_blend_control_noalpha_noclamp);
OUT_CB(rgba_cmask(state->rt[0].colormask));
OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
END_CB;
 
/* The same as above, but with no colorbuffer reads and writes. */
BEGIN_CB(blend->cb_no_readwrite, 8);
OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
OUT_CB(0);
OUT_CB(0);
OUT_CB(0);
OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
END_CB;
 
return (void*)blend;
}
 
/* Bind blend state. */
static void r300_bind_blend_state(struct pipe_context* pipe,
void* state)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_blend_state *blend = (struct r300_blend_state*)state;
boolean last_alpha_to_one = r300->alpha_to_one;
boolean last_alpha_to_coverage = r300->alpha_to_coverage;
 
UPDATE_STATE(state, r300->blend_state);
 
if (!blend)
return;
 
r300->alpha_to_one = blend->state.alpha_to_one;
r300->alpha_to_coverage = blend->state.alpha_to_coverage;
 
if (r300->alpha_to_one != last_alpha_to_one && r300->msaa_enable &&
r300->fs_status == FRAGMENT_SHADER_VALID) {
r300->fs_status = FRAGMENT_SHADER_MAYBE_DIRTY;
}
 
if (r300->alpha_to_coverage != last_alpha_to_coverage &&
r300->msaa_enable) {
r300_mark_atom_dirty(r300, &r300->dsa_state);
}
}
 
/* Free blend state. */
static void r300_delete_blend_state(struct pipe_context* pipe,
void* state)
{
FREE(state);
}
 
/* Convert float to 10bit integer */
static unsigned float_to_fixed10(float f)
{
return CLAMP((unsigned)(f * 1023.9f), 0, 1023);
}
 
/* Set blend color.
* Setup both R300 and R500 registers, figure out later which one to write. */
static void r300_set_blend_color(struct pipe_context* pipe,
const struct pipe_blend_color* color)
{
struct r300_context* r300 = r300_context(pipe);
struct pipe_framebuffer_state *fb = r300->fb_state.state;
struct r300_blend_color_state *state =
(struct r300_blend_color_state*)r300->blend_color_state.state;
struct pipe_blend_color c;
enum pipe_format format = fb->nr_cbufs ? fb->cbufs[0]->format : 0;
float tmp;
CB_LOCALS;
 
state->state = *color; /* Save it, so that we can reuse it in set_fb_state */
c = *color;
 
/* The blend color is dependent on the colorbuffer format. */
if (fb->nr_cbufs) {
switch (format) {
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_L8_UNORM:
case PIPE_FORMAT_I8_UNORM:
c.color[1] = c.color[0];
break;
 
case PIPE_FORMAT_A8_UNORM:
c.color[1] = c.color[3];
break;
 
case PIPE_FORMAT_R8G8_UNORM:
c.color[2] = c.color[1];
break;
 
case PIPE_FORMAT_L8A8_UNORM:
case PIPE_FORMAT_R8A8_UNORM:
c.color[2] = c.color[3];
break;
 
case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8X8_UNORM:
tmp = c.color[0];
c.color[0] = c.color[2];
c.color[2] = tmp;
break;
 
default:;
}
}
 
if (r300->screen->caps.is_r500) {
BEGIN_CB(state->cb, 3);
OUT_CB_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2);
 
switch (format) {
case PIPE_FORMAT_R16G16B16A16_FLOAT:
case PIPE_FORMAT_R16G16B16X16_FLOAT:
OUT_CB(util_float_to_half(c.color[2]) |
(util_float_to_half(c.color[3]) << 16));
OUT_CB(util_float_to_half(c.color[0]) |
(util_float_to_half(c.color[1]) << 16));
break;
 
default:
OUT_CB(float_to_fixed10(c.color[0]) |
(float_to_fixed10(c.color[3]) << 16));
OUT_CB(float_to_fixed10(c.color[2]) |
(float_to_fixed10(c.color[1]) << 16));
}
 
END_CB;
} else {
union util_color uc;
util_pack_color(c.color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
 
BEGIN_CB(state->cb, 2);
OUT_CB_REG(R300_RB3D_BLEND_COLOR, uc.ui);
END_CB;
}
 
r300_mark_atom_dirty(r300, &r300->blend_color_state);
}
 
static void r300_set_clip_state(struct pipe_context* pipe,
const struct pipe_clip_state* state)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_clip_state *clip =
(struct r300_clip_state*)r300->clip_state.state;
CB_LOCALS;
 
if (r300->screen->caps.has_tcl) {
BEGIN_CB(clip->cb, r300->clip_state.size);
OUT_CB_REG(R300_VAP_PVS_VECTOR_INDX_REG,
(r300->screen->caps.is_r500 ?
R500_PVS_UCP_START : R300_PVS_UCP_START));
OUT_CB_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4);
OUT_CB_TABLE(state->ucp, 6 * 4);
END_CB;
 
r300_mark_atom_dirty(r300, &r300->clip_state);
} else {
draw_set_clip_state(r300->draw, state);
}
}
 
/* Create a new depth, stencil, and alpha state based on the CSO dsa state.
*
* This contains the depth buffer, stencil buffer, alpha test, and such.
* On the Radeon, depth and stencil buffer setup are intertwined, which is
* the reason for some of the strange-looking assignments across registers. */
static void* r300_create_dsa_state(struct pipe_context* pipe,
const struct pipe_depth_stencil_alpha_state* state)
{
boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500;
struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state);
CB_LOCALS;
uint32_t alpha_value_fp16 = 0;
uint32_t z_buffer_control = 0;
uint32_t z_stencil_control = 0;
uint32_t stencil_ref_mask = 0;
uint32_t stencil_ref_bf = 0;
 
dsa->dsa = *state;
 
/* Depth test setup. - separate write mask depth for decomp flush */
if (state->depth.writemask) {
z_buffer_control |= R300_Z_WRITE_ENABLE;
}
 
if (state->depth.enabled) {
z_buffer_control |= R300_Z_ENABLE;
 
z_stencil_control |=
(r300_translate_depth_stencil_function(state->depth.func) <<
R300_Z_FUNC_SHIFT);
}
 
/* Stencil buffer setup. */
if (state->stencil[0].enabled) {
z_buffer_control |= R300_STENCIL_ENABLE;
z_stencil_control |=
(r300_translate_depth_stencil_function(state->stencil[0].func) <<
R300_S_FRONT_FUNC_SHIFT) |
(r300_translate_stencil_op(state->stencil[0].fail_op) <<
R300_S_FRONT_SFAIL_OP_SHIFT) |
(r300_translate_stencil_op(state->stencil[0].zpass_op) <<
R300_S_FRONT_ZPASS_OP_SHIFT) |
(r300_translate_stencil_op(state->stencil[0].zfail_op) <<
R300_S_FRONT_ZFAIL_OP_SHIFT);
 
stencil_ref_mask =
(state->stencil[0].valuemask << R300_STENCILMASK_SHIFT) |
(state->stencil[0].writemask << R300_STENCILWRITEMASK_SHIFT);
 
if (state->stencil[1].enabled) {
dsa->two_sided = TRUE;
 
z_buffer_control |= R300_STENCIL_FRONT_BACK;
z_stencil_control |=
(r300_translate_depth_stencil_function(state->stencil[1].func) <<
R300_S_BACK_FUNC_SHIFT) |
(r300_translate_stencil_op(state->stencil[1].fail_op) <<
R300_S_BACK_SFAIL_OP_SHIFT) |
(r300_translate_stencil_op(state->stencil[1].zpass_op) <<
R300_S_BACK_ZPASS_OP_SHIFT) |
(r300_translate_stencil_op(state->stencil[1].zfail_op) <<
R300_S_BACK_ZFAIL_OP_SHIFT);
 
stencil_ref_bf =
(state->stencil[1].valuemask << R300_STENCILMASK_SHIFT) |
(state->stencil[1].writemask << R300_STENCILWRITEMASK_SHIFT);
 
if (is_r500) {
z_buffer_control |= R500_STENCIL_REFMASK_FRONT_BACK;
} else {
dsa->two_sided_stencil_ref =
(state->stencil[0].valuemask != state->stencil[1].valuemask ||
state->stencil[0].writemask != state->stencil[1].writemask);
}
}
}
 
/* Alpha test setup. */
if (state->alpha.enabled) {
dsa->alpha_function =
r300_translate_alpha_function(state->alpha.func) |
R300_FG_ALPHA_FUNC_ENABLE;
 
dsa->alpha_function |= float_to_ubyte(state->alpha.ref_value);
alpha_value_fp16 = util_float_to_half(state->alpha.ref_value);
}
 
BEGIN_CB(&dsa->cb_begin, 8);
OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
OUT_CB(z_buffer_control);
OUT_CB(z_stencil_control);
OUT_CB(stencil_ref_mask);
OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, stencil_ref_bf);
OUT_CB_REG(R500_FG_ALPHA_VALUE, alpha_value_fp16);
END_CB;
 
BEGIN_CB(dsa->cb_zb_no_readwrite, 8);
OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
OUT_CB(0);
OUT_CB(0);
OUT_CB(0);
OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, 0);
OUT_CB_REG(R500_FG_ALPHA_VALUE, alpha_value_fp16);
END_CB;
 
return (void*)dsa;
}
 
static void r300_dsa_inject_stencilref(struct r300_context *r300)
{
struct r300_dsa_state *dsa =
(struct r300_dsa_state*)r300->dsa_state.state;
 
if (!dsa)
return;
 
dsa->stencil_ref_mask =
(dsa->stencil_ref_mask & ~R300_STENCILREF_MASK) |
r300->stencil_ref.ref_value[0];
dsa->stencil_ref_bf =
(dsa->stencil_ref_bf & ~R300_STENCILREF_MASK) |
r300->stencil_ref.ref_value[1];
}
 
/* Bind DSA state. */
static void r300_bind_dsa_state(struct pipe_context* pipe,
void* state)
{
struct r300_context* r300 = r300_context(pipe);
 
if (!state) {
return;
}
 
UPDATE_STATE(state, r300->dsa_state);
 
r300_mark_atom_dirty(r300, &r300->hyperz_state); /* Will be updated before the emission. */
r300_dsa_inject_stencilref(r300);
}
 
/* Free DSA state. */
static void r300_delete_dsa_state(struct pipe_context* pipe,
void* state)
{
FREE(state);
}
 
static void r300_set_stencil_ref(struct pipe_context* pipe,
const struct pipe_stencil_ref* sr)
{
struct r300_context* r300 = r300_context(pipe);
 
r300->stencil_ref = *sr;
 
r300_dsa_inject_stencilref(r300);
r300_mark_atom_dirty(r300, &r300->dsa_state);
}
 
static void r300_tex_set_tiling_flags(struct r300_context *r300,
struct r300_resource *tex,
unsigned level)
{
/* Check if the macrotile flag needs to be changed.
* Skip changing the flags otherwise. */
if (tex->tex.macrotile[tex->surface_level] !=
tex->tex.macrotile[level]) {
r300->rws->buffer_set_tiling(tex->buf, r300->cs,
tex->tex.microtile, tex->tex.macrotile[level],
0, 0, 0, 0, 0,
tex->tex.stride_in_bytes[0]);
 
tex->surface_level = level;
}
}
 
/* This switcheroo is needed just because of goddamned MACRO_SWITCH. */
static void r300_fb_set_tiling_flags(struct r300_context *r300,
const struct pipe_framebuffer_state *state)
{
unsigned i;
 
/* Set tiling flags for new surfaces. */
for (i = 0; i < state->nr_cbufs; i++) {
r300_tex_set_tiling_flags(r300,
r300_resource(state->cbufs[i]->texture),
state->cbufs[i]->u.tex.level);
}
if (state->zsbuf) {
r300_tex_set_tiling_flags(r300,
r300_resource(state->zsbuf->texture),
state->zsbuf->u.tex.level);
}
}
 
static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index,
const char *binding)
{
struct pipe_resource *tex = surf->texture;
struct r300_resource *rtex = r300_resource(tex);
 
fprintf(stderr,
"r300: %s[%i] Dim: %ix%i, Firstlayer: %i, "
"Lastlayer: %i, Level: %i, Format: %s\n"
 
"r300: TEX: Macro: %s, Micro: %s, "
"Dim: %ix%ix%i, LastLevel: %i, Format: %s\n",
 
binding, index, surf->width, surf->height,
surf->u.tex.first_layer, surf->u.tex.last_layer, surf->u.tex.level,
util_format_short_name(surf->format),
 
rtex->tex.macrotile[0] ? "YES" : " NO",
rtex->tex.microtile ? "YES" : " NO",
tex->width0, tex->height0, tex->depth0,
tex->last_level, util_format_short_name(surf->format));
}
 
void r300_mark_fb_state_dirty(struct r300_context *r300,
enum r300_fb_state_change change)
{
struct pipe_framebuffer_state *state = r300->fb_state.state;
 
r300_mark_atom_dirty(r300, &r300->gpu_flush);
r300_mark_atom_dirty(r300, &r300->fb_state);
 
/* What is marked as dirty depends on the enum r300_fb_state_change. */
if (change == R300_CHANGED_FB_STATE) {
r300_mark_atom_dirty(r300, &r300->aa_state);
r300_mark_atom_dirty(r300, &r300->dsa_state); /* for AlphaRef */
r300_set_blend_color(&r300->context, r300->blend_color_state.state);
}
 
if (change == R300_CHANGED_FB_STATE ||
change == R300_CHANGED_HYPERZ_FLAG) {
r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
 
if (change == R300_CHANGED_FB_STATE ||
change == R300_CHANGED_MULTIWRITE) {
r300_mark_atom_dirty(r300, &r300->fb_state_pipelined);
}
 
/* Now compute the fb_state atom size. */
r300->fb_state.size = 2 + (8 * state->nr_cbufs);
 
if (r300->cbzb_clear)
r300->fb_state.size += 10;
else if (state->zsbuf) {
r300->fb_state.size += 10;
if (r300->hyperz_enabled)
r300->fb_state.size += 8;
}
 
if (r300->cmask_in_use) {
r300->fb_state.size += 6;
if (r300->screen->caps.is_r500 && r300->screen->info.drm_minor >= 29) {
r300->fb_state.size += 3;
}
}
 
/* The size of the rest of atoms stays the same. */
}
 
static unsigned r300_get_num_samples(struct r300_context *r300)
{
struct pipe_framebuffer_state* fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
unsigned i, num_samples;
 
if (!fb->nr_cbufs && !fb->zsbuf)
return 1;
 
num_samples = 6;
 
for (i = 0; i < fb->nr_cbufs; i++)
num_samples = MIN2(num_samples, fb->cbufs[i]->texture->nr_samples);
 
if (fb->zsbuf)
num_samples = MIN2(num_samples, fb->zsbuf->texture->nr_samples);
 
if (!num_samples)
num_samples = 1;
 
return num_samples;
}
 
static void
r300_set_framebuffer_state(struct pipe_context* pipe,
const struct pipe_framebuffer_state* state)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
struct pipe_framebuffer_state *old_state = r300->fb_state.state;
unsigned max_width, max_height, i;
uint32_t zbuffer_bpp = 0;
boolean unlock_zbuffer = FALSE;
 
if (r300->screen->caps.is_r500) {
max_width = max_height = 4096;
} else if (r300->screen->caps.is_r400) {
max_width = max_height = 4021;
} else {
max_width = max_height = 2560;
}
 
if (state->width > max_width || state->height > max_height) {
fprintf(stderr, "r300: Implementation error: Render targets are too "
"big in %s, refusing to bind framebuffer state!\n", __FUNCTION__);
return;
}
 
if (old_state->zsbuf && r300->zmask_in_use && !r300->locked_zbuffer) {
/* There is a zmask in use, what are we gonna do? */
if (state->zsbuf) {
if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) {
/* Decompress the currently bound zbuffer before we bind another one. */
r300_decompress_zmask(r300);
r300->hiz_in_use = FALSE;
}
} else {
/* We don't bind another zbuffer, so lock the current one. */
pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf);
}
} else if (r300->locked_zbuffer) {
/* We have a locked zbuffer now, what are we gonna do? */
if (state->zsbuf) {
if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) {
/* We are binding some other zbuffer, so decompress the locked one,
* it gets unlocked automatically. */
r300_decompress_zmask_locked_unsafe(r300);
r300->hiz_in_use = FALSE;
} else {
/* We are binding the locked zbuffer again, so unlock it. */
unlock_zbuffer = TRUE;
}
}
}
assert(state->zsbuf || (r300->locked_zbuffer && !unlock_zbuffer) || !r300->zmask_in_use);
 
/* Set whether CMASK can be used. */
r300->cmask_in_use =
state->nr_cbufs == 1 &&
r300->screen->cmask_resource == state->cbufs[0]->texture;
 
/* Need to reset clamping or colormask. */
r300_mark_atom_dirty(r300, &r300->blend_state);
 
/* Re-swizzle the blend color. */
r300_set_blend_color(pipe, &((struct r300_blend_color_state*)r300->blend_color_state.state)->state);
 
/* If zsbuf is set from NULL to non-NULL or vice versa.. */
if (!!old_state->zsbuf != !!state->zsbuf) {
r300_mark_atom_dirty(r300, &r300->dsa_state);
}
 
if (r300->screen->info.drm_minor < 12) {
/* The tiling flags are dependent on the surface miplevel, unfortunately.
* This workarounds a bad design decision in old kernels which were
* rewriting tile fields in registers. */
r300_fb_set_tiling_flags(r300, state);
}
 
util_copy_framebuffer_state(r300->fb_state.state, state);
 
if (unlock_zbuffer) {
pipe_surface_reference(&r300->locked_zbuffer, NULL);
}
 
r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE);
 
if (state->zsbuf) {
switch (util_format_get_blocksize(state->zsbuf->format)) {
case 2:
zbuffer_bpp = 16;
break;
case 4:
zbuffer_bpp = 24;
break;
}
 
/* Polygon offset depends on the zbuffer bit depth. */
if (r300->zbuffer_bpp != zbuffer_bpp) {
r300->zbuffer_bpp = zbuffer_bpp;
 
if (r300->polygon_offset_enabled)
r300_mark_atom_dirty(r300, &r300->rs_state);
}
}
 
r300->num_samples = r300_get_num_samples(r300);
 
/* Set up AA config. */
if (r300->num_samples > 1) {
switch (r300->num_samples) {
case 2:
aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE |
R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2;
break;
case 4:
aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE |
R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4;
break;
case 6:
aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE |
R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6;
break;
}
} else {
aa->aa_config = 0;
}
 
if (DBG_ON(r300, DBG_FB)) {
fprintf(stderr, "r300: set_framebuffer_state:\n");
for (i = 0; i < state->nr_cbufs; i++) {
r300_print_fb_surf_info(state->cbufs[i], i, "CB");
}
if (state->zsbuf) {
r300_print_fb_surf_info(state->zsbuf, 0, "ZB");
}
}
}
 
/* Create fragment shader state. */
static void* r300_create_fs_state(struct pipe_context* pipe,
const struct pipe_shader_state* shader)
{
struct r300_fragment_shader* fs = NULL;
 
fs = (struct r300_fragment_shader*)CALLOC_STRUCT(r300_fragment_shader);
 
/* Copy state directly into shader. */
fs->state = *shader;
fs->state.tokens = tgsi_dup_tokens(shader->tokens);
 
return (void*)fs;
}
 
void r300_mark_fs_code_dirty(struct r300_context *r300)
{
struct r300_fragment_shader* fs = r300_fs(r300);
 
r300_mark_atom_dirty(r300, &r300->fs);
r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state);
r300_mark_atom_dirty(r300, &r300->fs_constants);
r300->fs.size = fs->shader->cb_code_size;
 
if (r300->screen->caps.is_r500) {
r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 7;
r300->fs_constants.size = fs->shader->externals_count * 4 + 3;
} else {
r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 5;
r300->fs_constants.size = fs->shader->externals_count * 4 + 1;
}
 
((struct r300_constant_buffer*)r300->fs_constants.state)->remap_table =
fs->shader->code.constants_remap_table;
}
 
/* Bind fragment shader state. */
static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader;
 
if (fs == NULL) {
r300->fs.state = NULL;
return;
}
 
r300->fs.state = fs;
r300->fs_status = FRAGMENT_SHADER_DIRTY;
 
r300_mark_atom_dirty(r300, &r300->rs_block_state); /* Will be updated before the emission. */
}
 
/* Delete fragment shader state. */
static void r300_delete_fs_state(struct pipe_context* pipe, void* shader)
{
struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader;
struct r300_fragment_shader_code *tmp, *ptr = fs->first;
 
while (ptr) {
tmp = ptr;
ptr = ptr->next;
rc_constants_destroy(&tmp->code.constants);
FREE(tmp->cb_code);
FREE(tmp);
}
FREE((void*)fs->state.tokens);
FREE(shader);
}
 
static void r300_set_polygon_stipple(struct pipe_context* pipe,
const struct pipe_poly_stipple* state)
{
/* XXX no idea how to set this up, but not terribly important */
}
 
/* Create a new rasterizer state based on the CSO rasterizer state.
*
* This is a very large chunk of state, and covers most of the graphics
* backend (GB), geometry assembly (GA), and setup unit (SU) blocks.
*
* In a not entirely unironic sidenote, this state has nearly nothing to do
* with the actual block on the Radeon called the rasterizer (RS). */
static void* r300_create_rs_state(struct pipe_context* pipe,
const struct pipe_rasterizer_state* state)
{
struct r300_rs_state* rs = CALLOC_STRUCT(r300_rs_state);
uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */
uint32_t vap_clip_cntl; /* R300_VAP_CLIP_CNTL: 0x221C */
uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */
uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */
uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */
uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */
uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */
uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */
uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */
uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */
uint32_t clip_rule; /* R300_SC_CLIP_RULE: 0x43D0 */
uint32_t round_mode; /* R300_GA_ROUND_MODE: 0x428c */
 
/* Point sprites texture coordinates, 0: lower left, 1: upper right */
float point_texcoord_left = 0; /* R300_GA_POINT_S0: 0x4200 */
float point_texcoord_bottom = 0;/* R300_GA_POINT_T0: 0x4204 */
float point_texcoord_right = 1; /* R300_GA_POINT_S1: 0x4208 */
float point_texcoord_top = 0; /* R300_GA_POINT_T1: 0x420c */
boolean vclamp = !r300_context(pipe)->screen->caps.is_r500;
CB_LOCALS;
 
/* Copy rasterizer state. */
rs->rs = *state;
rs->rs_draw = *state;
 
rs->rs.sprite_coord_enable = state->point_quad_rasterization *
state->sprite_coord_enable;
 
/* Override some states for Draw. */
rs->rs_draw.sprite_coord_enable = 0; /* We can do this in HW. */
rs->rs_draw.offset_point = 0;
rs->rs_draw.offset_line = 0;
rs->rs_draw.offset_tri = 0;
rs->rs_draw.offset_clamp = 0;
 
#ifdef PIPE_ARCH_LITTLE_ENDIAN
vap_control_status = R300_VC_NO_SWAP;
#else
vap_control_status = R300_VC_32BIT_SWAP;
#endif
 
/* If no TCL engine is present, turn off the HW TCL. */
if (!r300_screen(pipe->screen)->caps.has_tcl) {
vap_control_status |= R300_VAP_TCL_BYPASS;
}
 
/* Point size width and height. */
point_size =
pack_float_16_6x(state->point_size) |
(pack_float_16_6x(state->point_size) << R300_POINTSIZE_X_SHIFT);
 
/* Point size clamping. */
if (state->point_size_per_vertex) {
/* Per-vertex point size.
* Clamp to [0, max FB size] */
float min_psiz = util_get_min_point_size(state);
float max_psiz = pipe->screen->get_paramf(pipe->screen,
PIPE_CAPF_MAX_POINT_WIDTH);
point_minmax =
(pack_float_16_6x(min_psiz) << R300_GA_POINT_MINMAX_MIN_SHIFT) |
(pack_float_16_6x(max_psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT);
} else {
/* We cannot disable the point-size vertex output,
* so clamp it. */
float psiz = state->point_size;
point_minmax =
(pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MIN_SHIFT) |
(pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT);
}
 
/* Line control. */
line_control = pack_float_16_6x(state->line_width) |
R300_GA_LINE_CNTL_END_TYPE_COMP;
 
/* Enable polygon mode */
polygon_mode = 0;
if (state->fill_front != PIPE_POLYGON_MODE_FILL ||
state->fill_back != PIPE_POLYGON_MODE_FILL) {
polygon_mode = R300_GA_POLY_MODE_DUAL;
}
 
/* Front face */
if (state->front_ccw)
cull_mode = R300_FRONT_FACE_CCW;
else
cull_mode = R300_FRONT_FACE_CW;
 
/* Polygon offset */
polygon_offset_enable = 0;
if (util_get_offset(state, state->fill_front)) {
polygon_offset_enable |= R300_FRONT_ENABLE;
}
if (util_get_offset(state, state->fill_back)) {
polygon_offset_enable |= R300_BACK_ENABLE;
}
 
rs->polygon_offset_enable = polygon_offset_enable != 0;
 
/* Polygon mode */
if (polygon_mode) {
polygon_mode |=
r300_translate_polygon_mode_front(state->fill_front);
polygon_mode |=
r300_translate_polygon_mode_back(state->fill_back);
}
 
if (state->cull_face & PIPE_FACE_FRONT) {
cull_mode |= R300_CULL_FRONT;
}
if (state->cull_face & PIPE_FACE_BACK) {
cull_mode |= R300_CULL_BACK;
}
 
if (state->line_stipple_enable) {
line_stipple_config =
R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE |
(fui((float)state->line_stipple_factor) &
R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK);
/* XXX this might need to be scaled up */
line_stipple_value = state->line_stipple_pattern;
} else {
line_stipple_config = 0;
line_stipple_value = 0;
}
 
if (state->flatshade) {
rs->color_control = R300_SHADE_MODEL_FLAT;
} else {
rs->color_control = R300_SHADE_MODEL_SMOOTH;
}
 
clip_rule = state->scissor ? 0xAAAA : 0xFFFF;
 
/* Point sprites coord mode */
if (rs->rs.sprite_coord_enable) {
switch (state->sprite_coord_mode) {
case PIPE_SPRITE_COORD_UPPER_LEFT:
point_texcoord_top = 0.0f;
point_texcoord_bottom = 1.0f;
break;
case PIPE_SPRITE_COORD_LOWER_LEFT:
point_texcoord_top = 1.0f;
point_texcoord_bottom = 0.0f;
break;
}
}
 
if (r300_screen(pipe->screen)->caps.has_tcl) {
vap_clip_cntl = (state->clip_plane_enable & 63) |
R300_PS_UCP_MODE_CLIP_AS_TRIFAN;
} else {
vap_clip_cntl = R300_CLIP_DISABLE;
}
 
/* Vertex color clamping. FP20 means no clamping. */
round_mode =
R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST |
(!vclamp ? (R300_GA_ROUND_MODE_RGB_CLAMP_FP20 |
R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20) : 0);
 
/* Build the main command buffer. */
BEGIN_CB(rs->cb_main, RS_STATE_MAIN_SIZE);
OUT_CB_REG(R300_VAP_CNTL_STATUS, vap_control_status);
OUT_CB_REG(R300_VAP_CLIP_CNTL, vap_clip_cntl);
OUT_CB_REG(R300_GA_POINT_SIZE, point_size);
OUT_CB_REG_SEQ(R300_GA_POINT_MINMAX, 2);
OUT_CB(point_minmax);
OUT_CB(line_control);
OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2);
OUT_CB(polygon_offset_enable);
rs->cull_mode_index = 11;
OUT_CB(cull_mode);
OUT_CB_REG(R300_GA_LINE_STIPPLE_CONFIG, line_stipple_config);
OUT_CB_REG(R300_GA_LINE_STIPPLE_VALUE, line_stipple_value);
OUT_CB_REG(R300_GA_POLY_MODE, polygon_mode);
OUT_CB_REG(R300_GA_ROUND_MODE, round_mode);
OUT_CB_REG(R300_SC_CLIP_RULE, clip_rule);
OUT_CB_REG_SEQ(R300_GA_POINT_S0, 4);
OUT_CB_32F(point_texcoord_left);
OUT_CB_32F(point_texcoord_bottom);
OUT_CB_32F(point_texcoord_right);
OUT_CB_32F(point_texcoord_top);
END_CB;
 
/* Build the two command buffers for polygon offset setup. */
if (polygon_offset_enable) {
float scale = state->offset_scale * 12;
float offset = state->offset_units * 4;
 
BEGIN_CB(rs->cb_poly_offset_zb16, 5);
OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
OUT_CB_32F(scale);
OUT_CB_32F(offset);
OUT_CB_32F(scale);
OUT_CB_32F(offset);
END_CB;
 
offset = state->offset_units * 2;
 
BEGIN_CB(rs->cb_poly_offset_zb24, 5);
OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
OUT_CB_32F(scale);
OUT_CB_32F(offset);
OUT_CB_32F(scale);
OUT_CB_32F(offset);
END_CB;
}
 
return (void*)rs;
}
 
/* Bind rasterizer state. */
static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_rs_state* rs = (struct r300_rs_state*)state;
int last_sprite_coord_enable = r300->sprite_coord_enable;
boolean last_two_sided_color = r300->two_sided_color;
boolean last_msaa_enable = r300->msaa_enable;
boolean last_flatshade = r300->flatshade;
 
if (r300->draw && rs) {
draw_set_rasterizer_state(r300->draw, &rs->rs_draw, state);
}
 
if (rs) {
r300->polygon_offset_enabled = rs->polygon_offset_enable;
r300->sprite_coord_enable = rs->rs.sprite_coord_enable;
r300->two_sided_color = rs->rs.light_twoside;
r300->msaa_enable = rs->rs.multisample;
r300->flatshade = rs->rs.flatshade;
} else {
r300->polygon_offset_enabled = FALSE;
r300->sprite_coord_enable = 0;
r300->two_sided_color = FALSE;
r300->msaa_enable = FALSE;
r300->flatshade = FALSE;
}
 
UPDATE_STATE(state, r300->rs_state);
r300->rs_state.size = RS_STATE_MAIN_SIZE + (r300->polygon_offset_enabled ? 5 : 0);
 
if (last_sprite_coord_enable != r300->sprite_coord_enable ||
last_two_sided_color != r300->two_sided_color ||
last_flatshade != r300->flatshade) {
r300_mark_atom_dirty(r300, &r300->rs_block_state);
}
 
if (last_msaa_enable != r300->msaa_enable) {
if (r300->alpha_to_coverage) {
r300_mark_atom_dirty(r300, &r300->dsa_state);
}
 
if (r300->alpha_to_one &&
r300->fs_status == FRAGMENT_SHADER_VALID) {
r300->fs_status = FRAGMENT_SHADER_MAYBE_DIRTY;
}
}
}
 
/* Free rasterizer state. */
static void r300_delete_rs_state(struct pipe_context* pipe, void* state)
{
FREE(state);
}
 
static void*
r300_create_sampler_state(struct pipe_context* pipe,
const struct pipe_sampler_state* state)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_sampler_state* sampler = CALLOC_STRUCT(r300_sampler_state);
boolean is_r500 = r300->screen->caps.is_r500;
int lod_bias;
 
sampler->state = *state;
 
/* r300 doesn't handle CLAMP and MIRROR_CLAMP correctly when either MAG
* or MIN filter is NEAREST. Since texwrap produces same results
* for CLAMP and CLAMP_TO_EDGE, we use them instead. */
if (sampler->state.min_img_filter == PIPE_TEX_FILTER_NEAREST ||
sampler->state.mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
/* Wrap S. */
if (sampler->state.wrap_s == PIPE_TEX_WRAP_CLAMP)
sampler->state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
else if (sampler->state.wrap_s == PIPE_TEX_WRAP_MIRROR_CLAMP)
sampler->state.wrap_s = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
 
/* Wrap T. */
if (sampler->state.wrap_t == PIPE_TEX_WRAP_CLAMP)
sampler->state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
else if (sampler->state.wrap_t == PIPE_TEX_WRAP_MIRROR_CLAMP)
sampler->state.wrap_t = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
 
/* Wrap R. */
if (sampler->state.wrap_r == PIPE_TEX_WRAP_CLAMP)
sampler->state.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
else if (sampler->state.wrap_r == PIPE_TEX_WRAP_MIRROR_CLAMP)
sampler->state.wrap_r = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
}
 
sampler->filter0 |=
(r300_translate_wrap(sampler->state.wrap_s) << R300_TX_WRAP_S_SHIFT) |
(r300_translate_wrap(sampler->state.wrap_t) << R300_TX_WRAP_T_SHIFT) |
(r300_translate_wrap(sampler->state.wrap_r) << R300_TX_WRAP_R_SHIFT);
 
sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter,
state->mag_img_filter,
state->min_mip_filter,
state->max_anisotropy > 1);
 
sampler->filter0 |= r300_anisotropy(state->max_anisotropy);
 
/* Unfortunately, r300-r500 don't support floating-point mipmap lods. */
/* We must pass these to the merge function to clamp them properly. */
sampler->min_lod = (unsigned)MAX2(state->min_lod, 0);
sampler->max_lod = (unsigned)MAX2(ceilf(state->max_lod), 0);
 
lod_bias = CLAMP((int)(state->lod_bias * 32 + 1), -(1 << 9), (1 << 9) - 1);
 
sampler->filter1 |= (lod_bias << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK;
 
/* This is very high quality anisotropic filtering for R5xx.
* It's good for benchmarking the performance of texturing but
* in practice we don't want to slow down the driver because it's
* a pretty good performance killer. Feel free to play with it. */
if (DBG_ON(r300, DBG_ANISOHQ) && is_r500) {
sampler->filter1 |= r500_anisotropy(state->max_anisotropy);
}
 
/* R500-specific fixups and optimizations */
if (r300->screen->caps.is_r500) {
sampler->filter1 |= R500_BORDER_FIX;
}
 
return (void*)sampler;
}
 
static void r300_bind_sampler_states(struct pipe_context* pipe,
unsigned count,
void** states)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_textures_state* state =
(struct r300_textures_state*)r300->textures_state.state;
unsigned tex_units = r300->screen->caps.num_tex_units;
 
if (count > tex_units) {
return;
}
 
memcpy(state->sampler_states, states, sizeof(void*) * count);
state->sampler_state_count = count;
 
r300_mark_atom_dirty(r300, &r300->textures_state);
}
 
static void r300_lacks_vertex_textures(struct pipe_context* pipe,
unsigned count,
void** states)
{
}
 
static void r300_delete_sampler_state(struct pipe_context* pipe, void* state)
{
FREE(state);
}
 
static uint32_t r300_assign_texture_cache_region(unsigned index, unsigned num)
{
/* This looks like a hack, but I believe it's suppose to work like
* that. To illustrate how this works, let's assume you have 5 textures.
* From docs, 5 and the successive numbers are:
*
* FOURTH_1 = 5
* FOURTH_2 = 6
* FOURTH_3 = 7
* EIGHTH_0 = 8
* EIGHTH_1 = 9
*
* First 3 textures will get 3/4 of size of the cache, divived evenly
* between them. The last 1/4 of the cache must be divided between
* the last 2 textures, each will therefore get 1/8 of the cache.
* Why not just to use "5 + texture_index" ?
*
* This simple trick works for all "num" <= 16.
*/
if (num <= 1)
return R300_TX_CACHE(R300_TX_CACHE_WHOLE);
else
return R300_TX_CACHE(num + index);
}
 
static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
unsigned count,
struct pipe_sampler_view** views)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_textures_state* state =
(struct r300_textures_state*)r300->textures_state.state;
struct r300_resource *texture;
unsigned i, real_num_views = 0, view_index = 0;
unsigned tex_units = r300->screen->caps.num_tex_units;
boolean dirty_tex = FALSE;
 
if (count > tex_units) {
return;
}
 
/* Calculate the real number of views. */
for (i = 0; i < count; i++) {
if (views[i])
real_num_views++;
}
 
for (i = 0; i < count; i++) {
pipe_sampler_view_reference(
(struct pipe_sampler_view**)&state->sampler_views[i],
views[i]);
 
if (!views[i]) {
continue;
}
 
/* A new sampler view (= texture)... */
dirty_tex = TRUE;
 
/* Set the texrect factor in the fragment shader.
* Needed for RECT and NPOT fallback. */
texture = r300_resource(views[i]->texture);
if (texture->tex.is_npot) {
r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state);
}
 
state->sampler_views[i]->texcache_region =
r300_assign_texture_cache_region(view_index, real_num_views);
view_index++;
}
 
for (i = count; i < tex_units; i++) {
if (state->sampler_views[i]) {
pipe_sampler_view_reference(
(struct pipe_sampler_view**)&state->sampler_views[i],
NULL);
}
}
 
state->sampler_view_count = count;
 
r300_mark_atom_dirty(r300, &r300->textures_state);
 
if (dirty_tex) {
r300_mark_atom_dirty(r300, &r300->texture_cache_inval);
}
}
 
struct pipe_sampler_view *
r300_create_sampler_view_custom(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_sampler_view *templ,
unsigned width0_override,
unsigned height0_override)
{
struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view);
struct r300_resource *tex = r300_resource(texture);
boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500;
boolean dxtc_swizzle = r300_screen(pipe->screen)->caps.dxtc_swizzle;
 
if (view) {
unsigned hwformat;
 
view->base = *templ;
view->base.reference.count = 1;
view->base.context = pipe;
view->base.texture = NULL;
pipe_resource_reference(&view->base.texture, texture);
 
view->width0_override = width0_override;
view->height0_override = height0_override;
view->swizzle[0] = templ->swizzle_r;
view->swizzle[1] = templ->swizzle_g;
view->swizzle[2] = templ->swizzle_b;
view->swizzle[3] = templ->swizzle_a;
 
hwformat = r300_translate_texformat(templ->format,
view->swizzle,
is_r500,
dxtc_swizzle);
 
if (hwformat == ~0) {
fprintf(stderr, "r300: Ooops. Got unsupported format %s in %s.\n",
util_format_short_name(templ->format), __func__);
}
assert(hwformat != ~0);
 
r300_texture_setup_format_state(r300_screen(pipe->screen), tex,
templ->format, 0,
width0_override, height0_override,
&view->format);
view->format.format1 |= hwformat;
if (is_r500) {
view->format.format2 |= r500_tx_format_msb_bit(templ->format);
}
}
 
return (struct pipe_sampler_view*)view;
}
 
static struct pipe_sampler_view *
r300_create_sampler_view(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_sampler_view *templ)
{
return r300_create_sampler_view_custom(pipe, texture, templ,
r300_resource(texture)->tex.width0,
r300_resource(texture)->tex.height0);
}
 
 
static void
r300_sampler_view_destroy(struct pipe_context *pipe,
struct pipe_sampler_view *view)
{
pipe_resource_reference(&view->texture, NULL);
FREE(view);
}
 
static void r300_set_sample_mask(struct pipe_context *pipe,
unsigned mask)
{
struct r300_context* r300 = r300_context(pipe);
 
*((unsigned*)r300->sample_mask.state) = mask;
 
r300_mark_atom_dirty(r300, &r300->sample_mask);
}
 
static void r300_set_scissor_states(struct pipe_context* pipe,
unsigned start_slot,
unsigned num_scissors,
const struct pipe_scissor_state* state)
{
struct r300_context* r300 = r300_context(pipe);
 
memcpy(r300->scissor_state.state, state,
sizeof(struct pipe_scissor_state));
 
r300_mark_atom_dirty(r300, &r300->scissor_state);
}
 
static void r300_set_viewport_states(struct pipe_context* pipe,
unsigned start_slot,
unsigned num_viewports,
const struct pipe_viewport_state* state)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_viewport_state* viewport =
(struct r300_viewport_state*)r300->viewport_state.state;
 
r300->viewport = *state;
 
if (r300->draw) {
draw_set_viewport_states(r300->draw, start_slot, num_viewports, state);
viewport->vte_control = R300_VTX_XY_FMT | R300_VTX_Z_FMT;
return;
}
 
/* Do the transform in HW. */
viewport->vte_control = R300_VTX_W0_FMT;
 
if (state->scale[0] != 1.0f) {
viewport->xscale = state->scale[0];
viewport->vte_control |= R300_VPORT_X_SCALE_ENA;
}
if (state->scale[1] != 1.0f) {
viewport->yscale = state->scale[1];
viewport->vte_control |= R300_VPORT_Y_SCALE_ENA;
}
if (state->scale[2] != 1.0f) {
viewport->zscale = state->scale[2];
viewport->vte_control |= R300_VPORT_Z_SCALE_ENA;
}
if (state->translate[0] != 0.0f) {
viewport->xoffset = state->translate[0];
viewport->vte_control |= R300_VPORT_X_OFFSET_ENA;
}
if (state->translate[1] != 0.0f) {
viewport->yoffset = state->translate[1];
viewport->vte_control |= R300_VPORT_Y_OFFSET_ENA;
}
if (state->translate[2] != 0.0f) {
viewport->zoffset = state->translate[2];
viewport->vte_control |= R300_VPORT_Z_OFFSET_ENA;
}
 
r300_mark_atom_dirty(r300, &r300->viewport_state);
if (r300->fs.state && r300_fs(r300)->shader &&
r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED) {
r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state);
}
}
 
static void r300_set_vertex_buffers_hwtcl(struct pipe_context* pipe,
unsigned start_slot, unsigned count,
const struct pipe_vertex_buffer* buffers)
{
struct r300_context* r300 = r300_context(pipe);
 
util_set_vertex_buffers_count(r300->vertex_buffer,
&r300->nr_vertex_buffers,
buffers, start_slot, count);
 
/* There must be at least one vertex buffer set, otherwise it locks up. */
if (!r300->nr_vertex_buffers) {
util_set_vertex_buffers_count(r300->vertex_buffer,
&r300->nr_vertex_buffers,
&r300->dummy_vb, 0, 1);
}
 
r300->vertex_arrays_dirty = TRUE;
}
 
static void r300_set_vertex_buffers_swtcl(struct pipe_context* pipe,
unsigned start_slot, unsigned count,
const struct pipe_vertex_buffer* buffers)
{
struct r300_context* r300 = r300_context(pipe);
unsigned i;
 
util_set_vertex_buffers_count(r300->vertex_buffer,
&r300->nr_vertex_buffers,
buffers, start_slot, count);
draw_set_vertex_buffers(r300->draw, start_slot, count, buffers);
 
if (!buffers)
return;
 
for (i = 0; i < count; i++) {
if (buffers[i].user_buffer) {
draw_set_mapped_vertex_buffer(r300->draw, start_slot + i,
buffers[i].user_buffer, ~0);
} else if (buffers[i].buffer) {
draw_set_mapped_vertex_buffer(r300->draw, start_slot + i,
r300_resource(buffers[i].buffer)->malloced_buffer, ~0);
}
}
}
 
static void r300_set_index_buffer_hwtcl(struct pipe_context* pipe,
const struct pipe_index_buffer *ib)
{
struct r300_context* r300 = r300_context(pipe);
 
if (ib) {
pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer);
memcpy(&r300->index_buffer, ib, sizeof(*ib));
} else {
pipe_resource_reference(&r300->index_buffer.buffer, NULL);
}
}
 
static void r300_set_index_buffer_swtcl(struct pipe_context* pipe,
const struct pipe_index_buffer *ib)
{
struct r300_context* r300 = r300_context(pipe);
 
if (ib) {
const void *buf = NULL;
if (ib->user_buffer) {
buf = ib->user_buffer;
} else if (ib->buffer) {
buf = r300_resource(ib->buffer)->malloced_buffer;
}
draw_set_indexes(r300->draw,
(const ubyte *) buf + ib->offset,
ib->index_size, ~0);
}
}
 
/* Initialize the PSC tables. */
static void r300_vertex_psc(struct r300_vertex_element_state *velems)
{
struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
uint16_t type, swizzle;
enum pipe_format format;
unsigned i;
 
/* Vertex shaders have no semantics on their inputs,
* so PSC should just route stuff based on the vertex elements,
* and not on attrib information. */
for (i = 0; i < velems->count; i++) {
format = velems->velem[i].src_format;
 
type = r300_translate_vertex_data_type(format);
if (type == R300_INVALID_FORMAT) {
fprintf(stderr, "r300: Bad vertex format %s.\n",
util_format_short_name(format));
assert(0);
abort();
}
 
type |= i << R300_DST_VEC_LOC_SHIFT;
swizzle = r300_translate_vertex_data_swizzle(format);
 
if (i & 1) {
vstream->vap_prog_stream_cntl[i >> 1] |= type << 16;
vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
} else {
vstream->vap_prog_stream_cntl[i >> 1] |= type;
vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
}
}
 
/* Set the last vector in the PSC. */
if (i) {
i -= 1;
}
vstream->vap_prog_stream_cntl[i >> 1] |=
(R300_LAST_VEC << (i & 1 ? 16 : 0));
 
vstream->count = (i >> 1) + 1;
}
 
static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
unsigned count,
const struct pipe_vertex_element* attribs)
{
struct r300_vertex_element_state *velems;
unsigned i;
struct pipe_vertex_element dummy_attrib = {0};
 
/* R300 Programmable Stream Control (PSC) doesn't support 0 vertex elements. */
if (!count) {
dummy_attrib.src_format = PIPE_FORMAT_R8G8B8A8_UNORM;
attribs = &dummy_attrib;
count = 1;
} else if (count > 16) {
fprintf(stderr, "r300: More than 16 vertex elements are not supported,"
" requested %i, using 16.\n", count);
count = 16;
}
 
velems = CALLOC_STRUCT(r300_vertex_element_state);
if (!velems)
return NULL;
 
velems->count = count;
memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count);
 
if (r300_screen(pipe->screen)->caps.has_tcl) {
/* Setup PSC.
* The unused components will be replaced by (..., 0, 1). */
r300_vertex_psc(velems);
 
for (i = 0; i < count; i++) {
velems->format_size[i] =
align(util_format_get_blocksize(velems->velem[i].src_format), 4);
velems->vertex_size_dwords += velems->format_size[i] / 4;
}
}
 
return velems;
}
 
static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
void *state)
{
struct r300_context *r300 = r300_context(pipe);
struct r300_vertex_element_state *velems = state;
 
if (velems == NULL) {
return;
}
 
r300->velems = velems;
 
if (r300->draw) {
draw_set_vertex_elements(r300->draw, velems->count, velems->velem);
return;
}
 
UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state);
r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2;
r300->vertex_arrays_dirty = TRUE;
}
 
static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state)
{
FREE(state);
}
 
static void* r300_create_vs_state(struct pipe_context* pipe,
const struct pipe_shader_state* shader)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_vertex_shader* vs = CALLOC_STRUCT(r300_vertex_shader);
 
/* Copy state directly into shader. */
vs->state = *shader;
vs->state.tokens = tgsi_dup_tokens(shader->tokens);
 
if (r300->screen->caps.has_tcl) {
r300_init_vs_outputs(r300, vs);
r300_translate_vertex_shader(r300, vs);
} else {
r300_draw_init_vertex_shader(r300, vs);
}
 
return vs;
}
 
static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader;
 
if (vs == NULL) {
r300->vs_state.state = NULL;
return;
}
if (vs == r300->vs_state.state) {
return;
}
r300->vs_state.state = vs;
 
/* The majority of the RS block bits is dependent on the vertex shader. */
r300_mark_atom_dirty(r300, &r300->rs_block_state); /* Will be updated before the emission. */
 
if (r300->screen->caps.has_tcl) {
unsigned fc_op_dwords = r300->screen->caps.is_r500 ? 3 : 2;
r300_mark_atom_dirty(r300, &r300->vs_state);
r300->vs_state.size = vs->code.length + 9 +
(R300_VS_MAX_FC_OPS * fc_op_dwords + 4);
 
r300_mark_atom_dirty(r300, &r300->vs_constants);
r300->vs_constants.size =
2 +
(vs->externals_count ? vs->externals_count * 4 + 3 : 0) +
(vs->immediates_count ? vs->immediates_count * 4 + 3 : 0);
 
((struct r300_constant_buffer*)r300->vs_constants.state)->remap_table =
vs->code.constants_remap_table;
 
r300_mark_atom_dirty(r300, &r300->pvs_flush);
} else {
draw_bind_vertex_shader(r300->draw,
(struct draw_vertex_shader*)vs->draw_vs);
}
}
 
static void r300_delete_vs_state(struct pipe_context* pipe, void* shader)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader;
 
if (r300->screen->caps.has_tcl) {
rc_constants_destroy(&vs->code.constants);
FREE(vs->code.constants_remap_table);
} else {
draw_delete_vertex_shader(r300->draw,
(struct draw_vertex_shader*)vs->draw_vs);
}
 
FREE((void*)vs->state.tokens);
FREE(shader);
}
 
static void r300_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index,
struct pipe_constant_buffer *cb)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_constant_buffer *cbuf;
uint32_t *mapped;
 
if (!cb || (!cb->buffer && !cb->user_buffer))
return;
 
switch (shader) {
case PIPE_SHADER_VERTEX:
cbuf = (struct r300_constant_buffer*)r300->vs_constants.state;
break;
case PIPE_SHADER_FRAGMENT:
cbuf = (struct r300_constant_buffer*)r300->fs_constants.state;
break;
default:
return;
}
 
 
if (cb->user_buffer)
mapped = (uint32_t*)cb->user_buffer;
else {
struct r300_resource *rbuf = r300_resource(cb->buffer);
 
if (rbuf && rbuf->malloced_buffer)
mapped = (uint32_t*)rbuf->malloced_buffer;
else
return;
}
 
if (shader == PIPE_SHADER_FRAGMENT ||
(shader == PIPE_SHADER_VERTEX && r300->screen->caps.has_tcl)) {
cbuf->ptr = mapped;
}
 
if (shader == PIPE_SHADER_VERTEX) {
if (r300->screen->caps.has_tcl) {
struct r300_vertex_shader *vs =
(struct r300_vertex_shader*)r300->vs_state.state;
 
if (!vs) {
cbuf->buffer_base = 0;
return;
}
 
cbuf->buffer_base = r300->vs_const_base;
r300->vs_const_base += vs->code.constants.Count;
if (r300->vs_const_base > R500_MAX_PVS_CONST_VECS) {
r300->vs_const_base = vs->code.constants.Count;
cbuf->buffer_base = 0;
r300_mark_atom_dirty(r300, &r300->pvs_flush);
}
r300_mark_atom_dirty(r300, &r300->vs_constants);
} else if (r300->draw) {
draw_set_mapped_constant_buffer(r300->draw, PIPE_SHADER_VERTEX,
0, mapped, cb->buffer_size);
}
} else if (shader == PIPE_SHADER_FRAGMENT) {
r300_mark_atom_dirty(r300, &r300->fs_constants);
}
}
 
static void r300_texture_barrier(struct pipe_context *pipe)
{
struct r300_context *r300 = r300_context(pipe);
 
r300_mark_atom_dirty(r300, &r300->gpu_flush);
r300_mark_atom_dirty(r300, &r300->texture_cache_inval);
}
 
void r300_init_state_functions(struct r300_context* r300)
{
r300->context.create_blend_state = r300_create_blend_state;
r300->context.bind_blend_state = r300_bind_blend_state;
r300->context.delete_blend_state = r300_delete_blend_state;
 
r300->context.set_blend_color = r300_set_blend_color;
 
r300->context.set_clip_state = r300_set_clip_state;
r300->context.set_sample_mask = r300_set_sample_mask;
 
r300->context.set_constant_buffer = r300_set_constant_buffer;
 
r300->context.create_depth_stencil_alpha_state = r300_create_dsa_state;
r300->context.bind_depth_stencil_alpha_state = r300_bind_dsa_state;
r300->context.delete_depth_stencil_alpha_state = r300_delete_dsa_state;
 
r300->context.set_stencil_ref = r300_set_stencil_ref;
 
r300->context.set_framebuffer_state = r300_set_framebuffer_state;
 
r300->context.create_fs_state = r300_create_fs_state;
r300->context.bind_fs_state = r300_bind_fs_state;
r300->context.delete_fs_state = r300_delete_fs_state;
 
r300->context.set_polygon_stipple = r300_set_polygon_stipple;
 
r300->context.create_rasterizer_state = r300_create_rs_state;
r300->context.bind_rasterizer_state = r300_bind_rs_state;
r300->context.delete_rasterizer_state = r300_delete_rs_state;
 
r300->context.create_sampler_state = r300_create_sampler_state;
r300->context.bind_fragment_sampler_states = r300_bind_sampler_states;
r300->context.bind_vertex_sampler_states = r300_lacks_vertex_textures;
r300->context.delete_sampler_state = r300_delete_sampler_state;
 
r300->context.set_fragment_sampler_views = r300_set_fragment_sampler_views;
r300->context.create_sampler_view = r300_create_sampler_view;
r300->context.sampler_view_destroy = r300_sampler_view_destroy;
 
r300->context.set_scissor_states = r300_set_scissor_states;
 
r300->context.set_viewport_states = r300_set_viewport_states;
 
if (r300->screen->caps.has_tcl) {
r300->context.set_vertex_buffers = r300_set_vertex_buffers_hwtcl;
r300->context.set_index_buffer = r300_set_index_buffer_hwtcl;
} else {
r300->context.set_vertex_buffers = r300_set_vertex_buffers_swtcl;
r300->context.set_index_buffer = r300_set_index_buffer_swtcl;
}
 
r300->context.create_vertex_elements_state = r300_create_vertex_elements_state;
r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state;
r300->context.delete_vertex_elements_state = r300_delete_vertex_elements_state;
 
r300->context.create_vs_state = r300_create_vs_state;
r300->context.bind_vs_state = r300_bind_vs_state;
r300->context.delete_vs_state = r300_delete_vs_state;
 
r300->context.texture_barrier = r300_texture_barrier;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_state_derived.c
0,0 → 1,1089
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Copyright 2009 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "draw/draw_context.h"
 
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_pack_color.h"
 
#include "r300_context.h"
#include "r300_fs.h"
#include "r300_screen.h"
#include "r300_shader_semantics.h"
#include "r300_state_inlines.h"
#include "r300_texture.h"
#include "r300_vs.h"
 
/* r300_state_derived: Various bits of state which are dependent upon
* currently bound CSO data. */
 
enum r300_rs_swizzle {
SWIZ_XYZW = 0,
SWIZ_X001,
SWIZ_XY01,
SWIZ_0001,
};
 
enum r300_rs_col_write_type {
WRITE_COLOR = 0,
WRITE_FACE
};
 
static void r300_draw_emit_attrib(struct r300_context* r300,
enum attrib_emit emit,
enum interp_mode interp,
int index)
{
struct r300_vertex_shader* vs = r300->vs_state.state;
struct tgsi_shader_info* info = &vs->info;
int output;
 
output = draw_find_shader_output(r300->draw,
info->output_semantic_name[index],
info->output_semantic_index[index]);
draw_emit_vertex_attr(&r300->vertex_info, emit, interp, output);
}
 
static void r300_draw_emit_all_attribs(struct r300_context* r300)
{
struct r300_vertex_shader* vs = r300->vs_state.state;
struct r300_shader_semantics* vs_outputs = &vs->outputs;
int i, gen_count;
 
/* Position. */
if (vs_outputs->pos != ATTR_UNUSED) {
r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
vs_outputs->pos);
} else {
assert(0);
}
 
/* Point size. */
if (vs_outputs->psize != ATTR_UNUSED) {
r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS,
vs_outputs->psize);
}
 
/* Colors. */
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
if (vs_outputs->color[i] != ATTR_UNUSED) {
r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
vs_outputs->color[i]);
}
}
 
/* Back-face colors. */
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
if (vs_outputs->bcolor[i] != ATTR_UNUSED) {
r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
vs_outputs->bcolor[i]);
}
}
 
/* Texture coordinates. */
/* Only 8 generic vertex attributes can be used. If there are more,
* they won't be rasterized. */
gen_count = 0;
for (i = 0; i < ATTR_GENERIC_COUNT && gen_count < 8; i++) {
if (vs_outputs->generic[i] != ATTR_UNUSED &&
!(r300->sprite_coord_enable & (1 << i))) {
r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
vs_outputs->generic[i]);
gen_count++;
}
}
 
/* Fog coordinates. */
if (gen_count < 8 && vs_outputs->fog != ATTR_UNUSED) {
r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
vs_outputs->fog);
gen_count++;
}
 
/* WPOS. */
if (r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED && gen_count < 8) {
DBG(r300, DBG_SWTCL, "draw_emit_attrib: WPOS, index: %i\n",
vs_outputs->wpos);
r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
vs_outputs->wpos);
}
}
 
/* Update the PSC tables for SW TCL, using Draw. */
static void r300_swtcl_vertex_psc(struct r300_context *r300)
{
struct r300_vertex_stream_state *vstream = r300->vertex_stream_state.state;
struct vertex_info *vinfo = &r300->vertex_info;
uint16_t type, swizzle;
enum pipe_format format;
unsigned i, attrib_count;
int* vs_output_tab = r300->stream_loc_notcl;
 
memset(vstream, 0, sizeof(struct r300_vertex_stream_state));
 
/* For each Draw attribute, route it to the fragment shader according
* to the vs_output_tab. */
attrib_count = vinfo->num_attribs;
DBG(r300, DBG_SWTCL, "r300: attrib count: %d\n", attrib_count);
for (i = 0; i < attrib_count; i++) {
if (vs_output_tab[i] == -1) {
assert(0);
abort();
}
 
format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
 
DBG(r300, DBG_SWTCL,
"r300: swtcl_vertex_psc [%i] <- %s\n",
vs_output_tab[i], util_format_short_name(format));
 
/* Obtain the type of data in this attribute. */
type = r300_translate_vertex_data_type(format);
if (type == R300_INVALID_FORMAT) {
fprintf(stderr, "r300: Bad vertex format %s.\n",
util_format_short_name(format));
assert(0);
abort();
}
 
type |= vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT;
 
/* Obtain the swizzle for this attribute. Note that the default
* swizzle in the hardware is not XYZW! */
swizzle = r300_translate_vertex_data_swizzle(format);
 
/* Add the attribute to the PSC table. */
if (i & 1) {
vstream->vap_prog_stream_cntl[i >> 1] |= type << 16;
vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
} else {
vstream->vap_prog_stream_cntl[i >> 1] |= type;
vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
}
}
 
/* Set the last vector in the PSC. */
if (i) {
i -= 1;
}
vstream->vap_prog_stream_cntl[i >> 1] |=
(R300_LAST_VEC << (i & 1 ? 16 : 0));
 
vstream->count = (i >> 1) + 1;
r300_mark_atom_dirty(r300, &r300->vertex_stream_state);
r300->vertex_stream_state.size = (1 + vstream->count) * 2;
}
 
static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr,
enum r300_rs_swizzle swiz)
{
rs->ip[id] |= R300_RS_COL_PTR(ptr);
if (swiz == SWIZ_0001) {
rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
} else {
rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
}
rs->inst[id] |= R300_RS_INST_COL_ID(id);
}
 
static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset,
enum r300_rs_col_write_type type)
{
assert(type == WRITE_COLOR);
rs->inst[id] |= R300_RS_INST_COL_CN_WRITE |
R300_RS_INST_COL_ADDR(fp_offset);
}
 
static void r300_rs_tex(struct r300_rs_block* rs, int id, int ptr,
enum r300_rs_swizzle swiz)
{
if (swiz == SWIZ_X001) {
rs->ip[id] |= R300_RS_TEX_PTR(ptr) |
R300_RS_SEL_S(R300_RS_SEL_C0) |
R300_RS_SEL_T(R300_RS_SEL_K0) |
R300_RS_SEL_R(R300_RS_SEL_K0) |
R300_RS_SEL_Q(R300_RS_SEL_K1);
} else if (swiz == SWIZ_XY01) {
rs->ip[id] |= R300_RS_TEX_PTR(ptr) |
R300_RS_SEL_S(R300_RS_SEL_C0) |
R300_RS_SEL_T(R300_RS_SEL_C1) |
R300_RS_SEL_R(R300_RS_SEL_K0) |
R300_RS_SEL_Q(R300_RS_SEL_K1);
} else {
rs->ip[id] |= R300_RS_TEX_PTR(ptr) |
R300_RS_SEL_S(R300_RS_SEL_C0) |
R300_RS_SEL_T(R300_RS_SEL_C1) |
R300_RS_SEL_R(R300_RS_SEL_C2) |
R300_RS_SEL_Q(R300_RS_SEL_C3);
}
rs->inst[id] |= R300_RS_INST_TEX_ID(id);
}
 
static void r300_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset)
{
rs->inst[id] |= R300_RS_INST_TEX_CN_WRITE |
R300_RS_INST_TEX_ADDR(fp_offset);
}
 
static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr,
enum r300_rs_swizzle swiz)
{
rs->ip[id] |= R500_RS_COL_PTR(ptr);
if (swiz == SWIZ_0001) {
rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
} else {
rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
}
rs->inst[id] |= R500_RS_INST_COL_ID(id);
}
 
static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset,
enum r300_rs_col_write_type type)
{
if (type == WRITE_FACE)
rs->inst[id] |= R500_RS_INST_COL_CN_WRITE_BACKFACE |
R500_RS_INST_COL_ADDR(fp_offset);
else
rs->inst[id] |= R500_RS_INST_COL_CN_WRITE |
R500_RS_INST_COL_ADDR(fp_offset);
 
}
 
static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr,
enum r300_rs_swizzle swiz)
{
if (swiz == SWIZ_X001) {
rs->ip[id] |= R500_RS_SEL_S(ptr) |
R500_RS_SEL_T(R500_RS_IP_PTR_K0) |
R500_RS_SEL_R(R500_RS_IP_PTR_K0) |
R500_RS_SEL_Q(R500_RS_IP_PTR_K1);
} else if (swiz == SWIZ_XY01) {
rs->ip[id] |= R500_RS_SEL_S(ptr) |
R500_RS_SEL_T(ptr + 1) |
R500_RS_SEL_R(R500_RS_IP_PTR_K0) |
R500_RS_SEL_Q(R500_RS_IP_PTR_K1);
} else {
rs->ip[id] |= R500_RS_SEL_S(ptr) |
R500_RS_SEL_T(ptr + 1) |
R500_RS_SEL_R(ptr + 2) |
R500_RS_SEL_Q(ptr + 3);
}
rs->inst[id] |= R500_RS_INST_TEX_ID(id);
}
 
static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset)
{
rs->inst[id] |= R500_RS_INST_TEX_CN_WRITE |
R500_RS_INST_TEX_ADDR(fp_offset);
}
 
/* Set up the RS block.
*
* This is the part of the chipset that is responsible for linking vertex
* and fragment shaders and stuffed texture coordinates.
*
* The rasterizer reads data from VAP, which produces vertex shader outputs,
* and GA, which produces stuffed texture coordinates. VAP outputs have
* precedence over GA. All outputs must be rasterized otherwise it locks up.
* If there are more outputs rasterized than is set in VAP/GA, it locks up
* too. The funky part is that this info has been pretty much obtained by trial
* and error. */
static void r300_update_rs_block(struct r300_context *r300)
{
struct r300_vertex_shader *vs = r300->vs_state.state;
struct r300_shader_semantics *vs_outputs = &vs->outputs;
struct r300_shader_semantics *fs_inputs = &r300_fs(r300)->shader->inputs;
struct r300_rs_block rs = {0};
int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0, tex_ptr = 0;
int gen_offset = 0;
void (*rX00_rs_col)(struct r300_rs_block*, int, int, enum r300_rs_swizzle);
void (*rX00_rs_col_write)(struct r300_rs_block*, int, int, enum r300_rs_col_write_type);
void (*rX00_rs_tex)(struct r300_rs_block*, int, int, enum r300_rs_swizzle);
void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int);
boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
vs_outputs->bcolor[1] != ATTR_UNUSED;
int *stream_loc_notcl = r300->stream_loc_notcl;
uint32_t stuffing_enable = 0;
 
if (r300->screen->caps.is_r500) {
rX00_rs_col = r500_rs_col;
rX00_rs_col_write = r500_rs_col_write;
rX00_rs_tex = r500_rs_tex;
rX00_rs_tex_write = r500_rs_tex_write;
} else {
rX00_rs_col = r300_rs_col;
rX00_rs_col_write = r300_rs_col_write;
rX00_rs_tex = r300_rs_tex;
rX00_rs_tex_write = r300_rs_tex_write;
}
 
/* 0x5555 copied from classic, which means:
* Select user color 0 for COLOR0 up to COLOR7.
* What the hell does that mean? */
rs.vap_vtx_state_cntl = 0x5555;
 
/* The position is always present in VAP. */
rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS;
rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
stream_loc_notcl[loc++] = 0;
 
/* Set up the point size in VAP. */
if (vs_outputs->psize != ATTR_UNUSED) {
rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
stream_loc_notcl[loc++] = 1;
}
 
/* Set up and rasterize colors. */
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used ||
vs_outputs->color[1] != ATTR_UNUSED) {
/* Set up the color in VAP. */
rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR;
rs.vap_out_vtx_fmt[0] |=
R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i;
stream_loc_notcl[loc++] = 2 + i;
 
/* Rasterize it. */
rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW);
 
/* Write it to the FS input register if it's needed by the FS. */
if (fs_inputs->color[i] != ATTR_UNUSED) {
rX00_rs_col_write(&rs, col_count, fp_offset, WRITE_COLOR);
fp_offset++;
 
DBG(r300, DBG_RS,
"r300: Rasterized color %i written to FS.\n", i);
} else {
DBG(r300, DBG_RS, "r300: Rasterized color %i unused.\n", i);
}
col_count++;
} else {
/* Skip the FS input register, leave it uninitialized. */
/* If we try to set it to (0,0,0,1), it will lock up. */
if (fs_inputs->color[i] != ATTR_UNUSED) {
fp_offset++;
 
DBG(r300, DBG_RS, "r300: FS input color %i unassigned%s.\n",
i);
}
}
}
 
/* Set up back-face colors. The rasterizer will do the color selection
* automatically. */
if (any_bcolor_used) {
if (r300->two_sided_color) {
/* Rasterize as back-face colors. */
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR;
rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i);
stream_loc_notcl[loc++] = 4 + i;
}
} else {
/* Rasterize two fake texcoords to prevent from the two-sided color
* selection. */
/* XXX Consider recompiling the vertex shader to save 2 RS units. */
for (i = 0; i < 2; i++) {
rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count);
rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count));
stream_loc_notcl[loc++] = 6 + tex_count;
 
/* Rasterize it. */
rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_XYZW);
tex_count++;
tex_ptr += 4;
}
}
}
 
/* gl_FrontFacing.
* Note that we can use either the two-sided color selection based on
* the front and back vertex shader colors, or gl_FrontFacing,
* but not both! It locks up otherwise.
*
* In Direct3D 9, the two-sided color selection can be used
* with shaders 2.0 only, while gl_FrontFacing can be used
* with shaders 3.0 only. The hardware apparently hasn't been designed
* to support both at the same time. */
if (r300->screen->caps.is_r500 && fs_inputs->face != ATTR_UNUSED &&
!(any_bcolor_used && r300->two_sided_color)) {
rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW);
rX00_rs_col_write(&rs, col_count, fp_offset, WRITE_FACE);
fp_offset++;
col_count++;
DBG(r300, DBG_RS, "r300: Rasterized FACE written to FS.\n");
} else if (fs_inputs->face != ATTR_UNUSED) {
fprintf(stderr, "r300: ERROR: FS input FACE unassigned.\n");
}
 
/* Re-use color varyings for texcoords if possible.
*
* The colors are interpolated as 20-bit floats (reduced precision),
* Use this hack only if there are too many generic varyings.
* (number of generic varyings + fog + wpos > 8) */
if (r300->screen->caps.is_r500 && !any_bcolor_used && !r300->flatshade &&
fs_inputs->face == ATTR_UNUSED &&
vs_outputs->num_generic + (vs_outputs->fog != ATTR_UNUSED) +
(fs_inputs->wpos != ATTR_UNUSED) > 8) {
for (i = 0; i < ATTR_GENERIC_COUNT && col_count < 2; i++) {
/* Cannot use color varyings for sprite coords. */
if (fs_inputs->generic[i] != ATTR_UNUSED &&
(r300->sprite_coord_enable & (1 << i))) {
break;
}
 
if (vs_outputs->generic[i] != ATTR_UNUSED) {
/* Set up the color in VAP. */
rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR;
rs.vap_out_vtx_fmt[0] |=
R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << col_count;
stream_loc_notcl[loc++] = 2 + col_count;
 
/* Rasterize it. */
rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW);
 
/* Write it to the FS input register if it's needed by the FS. */
if (fs_inputs->generic[i] != ATTR_UNUSED) {
rX00_rs_col_write(&rs, col_count, fp_offset, WRITE_COLOR);
fp_offset++;
 
DBG(r300, DBG_RS,
"r300: Rasterized generic %i redirected to color %i and written to FS.\n",
i, col_count);
} else {
DBG(r300, DBG_RS, "r300: Rasterized generic %i redirected to color %i unused.\n",
i, col_count);
}
col_count++;
} else {
/* Skip the FS input register, leave it uninitialized. */
/* If we try to set it to (0,0,0,1), it will lock up. */
if (fs_inputs->generic[i] != ATTR_UNUSED) {
fp_offset++;
 
DBG(r300, DBG_RS, "r300: FS input generic %i unassigned%s.\n", i);
}
}
}
gen_offset = i;
}
 
/* Rasterize texture coordinates. */
for (i = gen_offset; i < ATTR_GENERIC_COUNT && tex_count < 8; i++) {
boolean sprite_coord = false;
 
if (fs_inputs->generic[i] != ATTR_UNUSED) {
sprite_coord = !!(r300->sprite_coord_enable & (1 << i));
}
 
if (vs_outputs->generic[i] != ATTR_UNUSED || sprite_coord) {
if (!sprite_coord) {
/* Set up the texture coordinates in VAP. */
rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count);
rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count));
stream_loc_notcl[loc++] = 6 + tex_count;
} else
stuffing_enable |=
R300_GB_TEX_ST << (R300_GB_TEX0_SOURCE_SHIFT + (tex_count*2));
 
/* Rasterize it. */
rX00_rs_tex(&rs, tex_count, tex_ptr,
sprite_coord ? SWIZ_XY01 : SWIZ_XYZW);
 
/* Write it to the FS input register if it's needed by the FS. */
if (fs_inputs->generic[i] != ATTR_UNUSED) {
rX00_rs_tex_write(&rs, tex_count, fp_offset);
fp_offset++;
 
DBG(r300, DBG_RS,
"r300: Rasterized generic %i written to FS%s in texcoord %d.\n",
i, sprite_coord ? " (sprite coord)" : "", tex_count);
} else {
DBG(r300, DBG_RS,
"r300: Rasterized generic %i unused%s.\n",
i, sprite_coord ? " (sprite coord)" : "");
}
tex_count++;
tex_ptr += sprite_coord ? 2 : 4;
} else {
/* Skip the FS input register, leave it uninitialized. */
/* If we try to set it to (0,0,0,1), it will lock up. */
if (fs_inputs->generic[i] != ATTR_UNUSED) {
fp_offset++;
 
DBG(r300, DBG_RS, "r300: FS input generic %i unassigned%s.\n",
i, sprite_coord ? " (sprite coord)" : "");
}
}
}
 
for (; i < ATTR_GENERIC_COUNT; i++) {
if (fs_inputs->generic[i] != ATTR_UNUSED) {
fprintf(stderr, "r300: ERROR: FS input generic %i unassigned, "
"not enough hardware slots (it's not a bug, do not "
"report it).\n", i);
}
}
 
/* Rasterize fog coordinates. */
if (vs_outputs->fog != ATTR_UNUSED && tex_count < 8) {
/* Set up the fog coordinates in VAP. */
rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count);
rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count));
stream_loc_notcl[loc++] = 6 + tex_count;
 
/* Rasterize it. */
rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_X001);
 
/* Write it to the FS input register if it's needed by the FS. */
if (fs_inputs->fog != ATTR_UNUSED) {
rX00_rs_tex_write(&rs, tex_count, fp_offset);
fp_offset++;
 
DBG(r300, DBG_RS, "r300: Rasterized fog written to FS.\n");
} else {
DBG(r300, DBG_RS, "r300: Rasterized fog unused.\n");
}
tex_count++;
tex_ptr += 4;
} else {
/* Skip the FS input register, leave it uninitialized. */
/* If we try to set it to (0,0,0,1), it will lock up. */
if (fs_inputs->fog != ATTR_UNUSED) {
fp_offset++;
 
if (tex_count < 8) {
DBG(r300, DBG_RS, "r300: FS input fog unassigned.\n");
} else {
fprintf(stderr, "r300: ERROR: FS input fog unassigned, "
"not enough hardware slots. (it's not a bug, "
"do not report it)\n");
}
}
}
 
/* Rasterize WPOS. */
/* Don't set it in VAP if the FS doesn't need it. */
if (fs_inputs->wpos != ATTR_UNUSED && tex_count < 8) {
/* Set up the WPOS coordinates in VAP. */
rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count);
rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count));
stream_loc_notcl[loc++] = 6 + tex_count;
 
/* Rasterize it. */
rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_XYZW);
 
/* Write it to the FS input register. */
rX00_rs_tex_write(&rs, tex_count, fp_offset);
 
DBG(r300, DBG_RS, "r300: Rasterized WPOS written to FS.\n");
 
fp_offset++;
tex_count++;
tex_ptr += 4;
} else {
if (fs_inputs->wpos != ATTR_UNUSED && tex_count >= 8) {
fprintf(stderr, "r300: ERROR: FS input WPOS unassigned, "
"not enough hardware slots. (it's not a bug, do not "
"report it)\n");
}
}
 
/* Invalidate the rest of the no-TCL (GA) stream locations. */
for (; loc < 16;) {
stream_loc_notcl[loc++] = -1;
}
 
/* Rasterize at least one color, or bad things happen. */
if (col_count == 0 && tex_count == 0) {
rX00_rs_col(&rs, 0, 0, SWIZ_0001);
col_count++;
 
DBG(r300, DBG_RS, "r300: Rasterized color 0 to prevent lockups.\n");
}
 
DBG(r300, DBG_RS, "r300: --- Rasterizer status ---: colors: %i, "
"generics: %i.\n", col_count, tex_count);
 
rs.count = MIN2(tex_ptr, 32) | (col_count << R300_IC_COUNT_SHIFT) |
R300_HIRES_EN;
 
count = MAX3(col_count, tex_count, 1);
rs.inst_count = count - 1;
 
/* set the GB enable flags */
if (r300->sprite_coord_enable)
stuffing_enable |= R300_GB_POINT_STUFF_ENABLE;
 
rs.gb_enable = stuffing_enable;
 
/* Now, after all that, see if we actually need to update the state. */
if (memcmp(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block))) {
memcpy(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block));
r300->rs_block_state.size = 13 + count*2;
}
}
 
static void rgba_to_bgra(float color[4])
{
float x = color[0];
color[0] = color[2];
color[2] = x;
}
 
static uint32_t r300_get_border_color(enum pipe_format format,
const float border[4],
boolean is_r500)
{
const struct util_format_description *desc;
float border_swizzled[4] = {0};
union util_color uc = {0};
 
desc = util_format_description(format);
 
/* Do depth formats first. */
if (util_format_is_depth_or_stencil(format)) {
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
return util_pack_z(PIPE_FORMAT_Z16_UNORM, border[0]);
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
if (is_r500) {
return util_pack_z(PIPE_FORMAT_X8Z24_UNORM, border[0]);
} else {
return util_pack_z(PIPE_FORMAT_Z16_UNORM, border[0]) << 16;
}
default:
assert(0);
return 0;
}
}
 
/* Apply inverse swizzle of the format. */
util_format_unswizzle_4f(border_swizzled, border, desc->swizzle);
 
/* Compressed formats. */
if (util_format_is_compressed(format)) {
switch (format) {
case PIPE_FORMAT_RGTC1_SNORM:
case PIPE_FORMAT_LATC1_SNORM:
border_swizzled[0] = border_swizzled[0] < 0 ?
border_swizzled[0]*0.5+1 :
border_swizzled[0]*0.5;
/* Pass through. */
 
case PIPE_FORMAT_RGTC1_UNORM:
case PIPE_FORMAT_LATC1_UNORM:
/* Add 1/32 to round the border color instead of truncating. */
/* The Y component is used for the border color. */
border_swizzled[1] = border_swizzled[0] + 1.0f/32;
util_pack_color(border_swizzled, PIPE_FORMAT_B4G4R4A4_UNORM, &uc);
return uc.ui;
case PIPE_FORMAT_RGTC2_SNORM:
case PIPE_FORMAT_LATC2_SNORM:
util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_SNORM, &uc);
return uc.ui;
case PIPE_FORMAT_RGTC2_UNORM:
case PIPE_FORMAT_LATC2_UNORM:
util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
return uc.ui;
case PIPE_FORMAT_DXT1_SRGB:
case PIPE_FORMAT_DXT1_SRGBA:
case PIPE_FORMAT_DXT3_SRGBA:
case PIPE_FORMAT_DXT5_SRGBA:
util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_SRGB, &uc);
return uc.ui;
default:
util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
return uc.ui;
}
}
 
switch (desc->channel[0].size) {
case 2:
rgba_to_bgra(border_swizzled);
util_pack_color(border_swizzled, PIPE_FORMAT_B2G3R3_UNORM, &uc);
break;
 
case 4:
rgba_to_bgra(border_swizzled);
util_pack_color(border_swizzled, PIPE_FORMAT_B4G4R4A4_UNORM, &uc);
break;
 
case 5:
rgba_to_bgra(border_swizzled);
if (desc->channel[1].size == 5) {
util_pack_color(border_swizzled, PIPE_FORMAT_B5G5R5A1_UNORM, &uc);
} else if (desc->channel[1].size == 6) {
util_pack_color(border_swizzled, PIPE_FORMAT_B5G6R5_UNORM, &uc);
} else {
assert(0);
}
break;
 
default:
case 8:
if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_SNORM, &uc);
} else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
if (desc->nr_channels == 2) {
border_swizzled[3] = border_swizzled[1];
util_pack_color(border_swizzled, PIPE_FORMAT_L8A8_SRGB, &uc);
} else {
util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_SRGB, &uc);
}
} else {
util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
}
break;
 
case 10:
util_pack_color(border_swizzled, PIPE_FORMAT_R10G10B10A2_UNORM, &uc);
break;
 
case 16:
if (desc->nr_channels <= 2) {
if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) {
util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_FLOAT, &uc);
} else if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_SNORM, &uc);
} else {
util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_UNORM, &uc);
}
} else {
if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_SNORM, &uc);
} else {
util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
}
}
break;
 
case 32:
if (desc->nr_channels == 1) {
util_pack_color(border_swizzled, PIPE_FORMAT_R32_FLOAT, &uc);
} else {
util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
}
break;
}
 
return uc.ui;
}
 
static void r300_merge_textures_and_samplers(struct r300_context* r300)
{
struct r300_textures_state *state =
(struct r300_textures_state*)r300->textures_state.state;
struct r300_texture_sampler_state *texstate;
struct r300_sampler_state *sampler;
struct r300_sampler_view *view;
struct r300_resource *tex;
unsigned base_level, min_level, level_count, i, j, size;
unsigned count = MIN2(state->sampler_view_count,
state->sampler_state_count);
boolean has_us_format = r300->screen->caps.has_us_format;
 
/* The KIL opcode fix, see below. */
if (!count && !r300->screen->caps.is_r500)
count = 1;
 
state->tx_enable = 0;
state->count = 0;
size = 2;
 
for (i = 0; i < count; i++) {
if (state->sampler_views[i] && state->sampler_states[i]) {
state->tx_enable |= 1 << i;
 
view = state->sampler_views[i];
tex = r300_resource(view->base.texture);
sampler = state->sampler_states[i];
 
texstate = &state->regs[i];
texstate->format = view->format;
texstate->filter0 = sampler->filter0;
texstate->filter1 = sampler->filter1;
 
/* Set the border color. */
texstate->border_color =
r300_get_border_color(view->base.format,
sampler->state.border_color.f,
r300->screen->caps.is_r500);
 
/* determine min/max levels */
base_level = view->base.u.tex.first_level;
min_level = sampler->min_lod;
level_count = MIN3(sampler->max_lod,
tex->b.b.last_level - base_level,
view->base.u.tex.last_level - base_level);
 
if (base_level + min_level) {
unsigned offset;
 
if (tex->tex.is_npot) {
/* Even though we do not implement mipmapping for NPOT
* textures, we should at least honor the minimum level
* which is allowed to be displayed. We do this by setting up
* an i-th mipmap level as the zero level. */
base_level += min_level;
}
offset = tex->tex.offset_in_bytes[base_level];
 
r300_texture_setup_format_state(r300->screen, tex,
view->base.format,
base_level,
view->width0_override,
view->height0_override,
&texstate->format);
texstate->format.tile_config |= offset & 0xffffffe0;
assert((offset & 0x1f) == 0);
}
 
/* Assign a texture cache region. */
texstate->format.format1 |= view->texcache_region;
 
/* Depth textures are kinda special. */
if (util_format_is_depth_or_stencil(view->base.format)) {
unsigned char depth_swizzle[4];
 
if (!r300->screen->caps.is_r500 &&
util_format_get_blocksizebits(view->base.format) == 32) {
/* X24x8 is sampled as Y16X16 on r3xx-r4xx.
* The depth here is at the Y component. */
for (j = 0; j < 4; j++)
depth_swizzle[j] = UTIL_FORMAT_SWIZZLE_Y;
} else {
for (j = 0; j < 4; j++)
depth_swizzle[j] = UTIL_FORMAT_SWIZZLE_X;
}
 
/* If compare mode is disabled, sampler view swizzles
* are stored in the format.
* Otherwise, the swizzles must be applied after the compare
* mode in the fragment shader. */
if (sampler->state.compare_mode == PIPE_TEX_COMPARE_NONE) {
texstate->format.format1 |=
r300_get_swizzle_combined(depth_swizzle,
view->swizzle, FALSE);
} else {
texstate->format.format1 |=
r300_get_swizzle_combined(depth_swizzle, 0, FALSE);
}
}
 
if (r300->screen->caps.dxtc_swizzle &&
util_format_is_compressed(view->base.format)) {
texstate->filter1 |= R400_DXTC_SWIZZLE_ENABLE;
}
 
/* to emulate 1D textures through 2D ones correctly */
if (tex->b.b.target == PIPE_TEXTURE_1D) {
texstate->filter0 &= ~R300_TX_WRAP_T_MASK;
texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE);
}
 
/* The hardware doesn't like CLAMP and CLAMP_TO_BORDER
* for the 3rd coordinate if the texture isn't 3D. */
if (tex->b.b.target != PIPE_TEXTURE_3D) {
texstate->filter0 &= ~R300_TX_WRAP_R_MASK;
}
 
if (tex->tex.is_npot) {
/* NPOT textures don't support mip filter, unfortunately.
* This prevents incorrect rendering. */
texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK;
 
/* Mask out the mirrored flag. */
if (texstate->filter0 & R300_TX_WRAP_S(R300_TX_MIRRORED)) {
texstate->filter0 &= ~R300_TX_WRAP_S(R300_TX_MIRRORED);
}
if (texstate->filter0 & R300_TX_WRAP_T(R300_TX_MIRRORED)) {
texstate->filter0 &= ~R300_TX_WRAP_T(R300_TX_MIRRORED);
}
 
/* Change repeat to clamp-to-edge.
* (the repeat bit has a value of 0, no masking needed). */
if ((texstate->filter0 & R300_TX_WRAP_S_MASK) ==
R300_TX_WRAP_S(R300_TX_REPEAT)) {
texstate->filter0 |= R300_TX_WRAP_S(R300_TX_CLAMP_TO_EDGE);
}
if ((texstate->filter0 & R300_TX_WRAP_T_MASK) ==
R300_TX_WRAP_T(R300_TX_REPEAT)) {
texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE);
}
} else {
/* the MAX_MIP level is the largest (finest) one */
texstate->format.format0 |= R300_TX_NUM_LEVELS(level_count);
texstate->filter0 |= R300_TX_MAX_MIP_LEVEL(min_level);
}
 
/* Float textures only support nearest and mip-nearest filtering. */
if (util_format_is_float(view->base.format)) {
/* No MAG linear filtering. */
if ((texstate->filter0 & R300_TX_MAG_FILTER_MASK) ==
R300_TX_MAG_FILTER_LINEAR) {
texstate->filter0 &= ~R300_TX_MAG_FILTER_MASK;
texstate->filter0 |= R300_TX_MAG_FILTER_NEAREST;
}
/* No MIN linear filtering. */
if ((texstate->filter0 & R300_TX_MIN_FILTER_MASK) ==
R300_TX_MIN_FILTER_LINEAR) {
texstate->filter0 &= ~R300_TX_MIN_FILTER_MASK;
texstate->filter0 |= R300_TX_MIN_FILTER_NEAREST;
}
/* No mipmap linear filtering. */
if ((texstate->filter0 & R300_TX_MIN_FILTER_MIP_MASK) ==
R300_TX_MIN_FILTER_MIP_LINEAR) {
texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK;
texstate->filter0 |= R300_TX_MIN_FILTER_MIP_NEAREST;
}
/* No anisotropic filtering. */
texstate->filter0 &= ~R300_TX_MAX_ANISO_MASK;
texstate->filter1 &= ~R500_TX_MAX_ANISO_MASK;
texstate->filter1 &= ~R500_TX_ANISO_HIGH_QUALITY;
}
 
texstate->filter0 |= i << 28;
 
size += 16 + (has_us_format ? 2 : 0);
state->count = i+1;
} else {
/* For the KIL opcode to work on r3xx-r4xx, the texture unit
* assigned to this opcode (it's always the first one) must be
* enabled. Otherwise the opcode doesn't work.
*
* In order to not depend on the fragment shader, we just make
* the first unit enabled all the time. */
if (i == 0 && !r300->screen->caps.is_r500) {
pipe_sampler_view_reference(
(struct pipe_sampler_view**)&state->sampler_views[i],
&r300->texkill_sampler->base);
 
state->tx_enable |= 1 << i;
 
texstate = &state->regs[i];
 
/* Just set some valid state. */
texstate->format = r300->texkill_sampler->format;
texstate->filter0 =
r300_translate_tex_filters(PIPE_TEX_FILTER_NEAREST,
PIPE_TEX_FILTER_NEAREST,
PIPE_TEX_FILTER_NEAREST,
FALSE);
texstate->filter1 = 0;
texstate->border_color = 0;
 
texstate->filter0 |= i << 28;
size += 16 + (has_us_format ? 2 : 0);
state->count = i+1;
}
}
}
 
r300->textures_state.size = size;
 
/* Pick a fragment shader based on either the texture compare state
* or the uses_pitch flag or some other external state. */
if (count &&
r300->fs_status == FRAGMENT_SHADER_VALID) {
r300->fs_status = FRAGMENT_SHADER_MAYBE_DIRTY;
}
}
 
static void r300_decompress_depth_textures(struct r300_context *r300)
{
struct r300_textures_state *state =
(struct r300_textures_state*)r300->textures_state.state;
struct pipe_resource *tex;
unsigned count = MIN2(state->sampler_view_count,
state->sampler_state_count);
unsigned i;
 
if (!r300->locked_zbuffer) {
return;
}
 
for (i = 0; i < count; i++) {
if (state->sampler_views[i] && state->sampler_states[i]) {
tex = state->sampler_views[i]->base.texture;
 
if (tex == r300->locked_zbuffer->texture) {
r300_decompress_zmask_locked(r300);
return;
}
}
}
}
 
static void r300_validate_fragment_shader(struct r300_context *r300)
{
struct pipe_framebuffer_state *fb = r300->fb_state.state;
 
if (r300->fs.state && r300->fs_status != FRAGMENT_SHADER_VALID) {
/* Pick the fragment shader based on external states.
* Then mark the state dirty if the fragment shader is either dirty
* or the function r300_pick_fragment_shader changed the shader. */
if (r300_pick_fragment_shader(r300) ||
r300->fs_status == FRAGMENT_SHADER_DIRTY) {
/* Mark the state atom as dirty. */
r300_mark_fs_code_dirty(r300);
 
/* Does Multiwrite need to be changed? */
if (fb->nr_cbufs > 1) {
boolean new_multiwrite =
r300_fragment_shader_writes_all(r300_fs(r300));
 
if (r300->fb_multiwrite != new_multiwrite) {
r300->fb_multiwrite = new_multiwrite;
r300_mark_fb_state_dirty(r300, R300_CHANGED_MULTIWRITE);
}
}
}
r300->fs_status = FRAGMENT_SHADER_VALID;
}
}
 
void r300_update_derived_state(struct r300_context* r300)
{
if (r300->textures_state.dirty) {
r300_decompress_depth_textures(r300);
r300_merge_textures_and_samplers(r300);
}
 
r300_validate_fragment_shader(r300);
 
if (r300->rs_block_state.dirty) {
r300_update_rs_block(r300);
 
if (r300->draw) {
memset(&r300->vertex_info, 0, sizeof(struct vertex_info));
r300_draw_emit_all_attribs(r300);
draw_compute_vertex_size(&r300->vertex_info);
r300_swtcl_vertex_psc(r300);
}
}
 
r300_update_hyperz_state(r300);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_state_inlines.h
0,0 → 1,439
/*
* Copyright 2009 Joakim Sindholt <opensource@zhasha.com>
* Corbin Simpson <MostAwesomeDude@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#ifndef R300_STATE_INLINES_H
#define R300_STATE_INLINES_H
 
#include "draw/draw_vertex.h"
#include "pipe/p_format.h"
#include "util/u_format.h"
#include "r300_reg.h"
#include <stdio.h>
 
/* Some maths. These should probably find their way to u_math, if needed. */
 
static INLINE int pack_float_16_6x(float f) {
return ((int)(f * 6.0) & 0xffff);
}
 
/* Blend state. */
 
static INLINE uint32_t r300_translate_blend_function(int blend_func,
boolean clamp)
{
switch (blend_func) {
case PIPE_BLEND_ADD:
return clamp ? R300_COMB_FCN_ADD_CLAMP : R300_COMB_FCN_ADD_NOCLAMP;
case PIPE_BLEND_SUBTRACT:
return clamp ? R300_COMB_FCN_SUB_CLAMP : R300_COMB_FCN_SUB_NOCLAMP;
case PIPE_BLEND_REVERSE_SUBTRACT:
return clamp ? R300_COMB_FCN_RSUB_CLAMP : R300_COMB_FCN_RSUB_NOCLAMP;
case PIPE_BLEND_MIN:
return R300_COMB_FCN_MIN;
case PIPE_BLEND_MAX:
return R300_COMB_FCN_MAX;
default:
fprintf(stderr, "r300: Unknown blend function %d\n", blend_func);
assert(0);
break;
}
return 0;
}
 
/* XXX we can also offer the D3D versions of some of these... */
static INLINE uint32_t r300_translate_blend_factor(int blend_fact)
{
switch (blend_fact) {
case PIPE_BLENDFACTOR_ONE:
return R300_BLEND_GL_ONE;
case PIPE_BLENDFACTOR_SRC_COLOR:
return R300_BLEND_GL_SRC_COLOR;
case PIPE_BLENDFACTOR_SRC_ALPHA:
return R300_BLEND_GL_SRC_ALPHA;
case PIPE_BLENDFACTOR_DST_ALPHA:
return R300_BLEND_GL_DST_ALPHA;
case PIPE_BLENDFACTOR_DST_COLOR:
return R300_BLEND_GL_DST_COLOR;
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
return R300_BLEND_GL_SRC_ALPHA_SATURATE;
case PIPE_BLENDFACTOR_CONST_COLOR:
return R300_BLEND_GL_CONST_COLOR;
case PIPE_BLENDFACTOR_CONST_ALPHA:
return R300_BLEND_GL_CONST_ALPHA;
case PIPE_BLENDFACTOR_ZERO:
return R300_BLEND_GL_ZERO;
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
return R300_BLEND_GL_ONE_MINUS_SRC_COLOR;
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
return R300_BLEND_GL_ONE_MINUS_SRC_ALPHA;
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
return R300_BLEND_GL_ONE_MINUS_DST_ALPHA;
case PIPE_BLENDFACTOR_INV_DST_COLOR:
return R300_BLEND_GL_ONE_MINUS_DST_COLOR;
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
return R300_BLEND_GL_ONE_MINUS_CONST_COLOR;
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
return R300_BLEND_GL_ONE_MINUS_CONST_ALPHA;
 
case PIPE_BLENDFACTOR_SRC1_COLOR:
case PIPE_BLENDFACTOR_SRC1_ALPHA:
case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
fprintf(stderr, "r300: Implementation error: "
"Bad blend factor %d not supported!\n", blend_fact);
assert(0);
break;
 
default:
fprintf(stderr, "r300: Unknown blend factor %d\n", blend_fact);
assert(0);
break;
}
return 0;
}
 
/* DSA state. */
 
static INLINE uint32_t r300_translate_depth_stencil_function(int zs_func)
{
switch (zs_func) {
case PIPE_FUNC_NEVER:
return R300_ZS_NEVER;
case PIPE_FUNC_LESS:
return R300_ZS_LESS;
case PIPE_FUNC_EQUAL:
return R300_ZS_EQUAL;
case PIPE_FUNC_LEQUAL:
return R300_ZS_LEQUAL;
case PIPE_FUNC_GREATER:
return R300_ZS_GREATER;
case PIPE_FUNC_NOTEQUAL:
return R300_ZS_NOTEQUAL;
case PIPE_FUNC_GEQUAL:
return R300_ZS_GEQUAL;
case PIPE_FUNC_ALWAYS:
return R300_ZS_ALWAYS;
default:
fprintf(stderr, "r300: Unknown depth/stencil function %d\n",
zs_func);
assert(0);
break;
}
return 0;
}
 
static INLINE uint32_t r300_translate_stencil_op(int s_op)
{
switch (s_op) {
case PIPE_STENCIL_OP_KEEP:
return R300_ZS_KEEP;
case PIPE_STENCIL_OP_ZERO:
return R300_ZS_ZERO;
case PIPE_STENCIL_OP_REPLACE:
return R300_ZS_REPLACE;
case PIPE_STENCIL_OP_INCR:
return R300_ZS_INCR;
case PIPE_STENCIL_OP_DECR:
return R300_ZS_DECR;
case PIPE_STENCIL_OP_INCR_WRAP:
return R300_ZS_INCR_WRAP;
case PIPE_STENCIL_OP_DECR_WRAP:
return R300_ZS_DECR_WRAP;
case PIPE_STENCIL_OP_INVERT:
return R300_ZS_INVERT;
default:
fprintf(stderr, "r300: Unknown stencil op %d", s_op);
assert(0);
break;
}
return 0;
}
 
static INLINE uint32_t r300_translate_alpha_function(int alpha_func)
{
switch (alpha_func) {
case PIPE_FUNC_NEVER:
return R300_FG_ALPHA_FUNC_NEVER;
case PIPE_FUNC_LESS:
return R300_FG_ALPHA_FUNC_LESS;
case PIPE_FUNC_EQUAL:
return R300_FG_ALPHA_FUNC_EQUAL;
case PIPE_FUNC_LEQUAL:
return R300_FG_ALPHA_FUNC_LE;
case PIPE_FUNC_GREATER:
return R300_FG_ALPHA_FUNC_GREATER;
case PIPE_FUNC_NOTEQUAL:
return R300_FG_ALPHA_FUNC_NOTEQUAL;
case PIPE_FUNC_GEQUAL:
return R300_FG_ALPHA_FUNC_GE;
case PIPE_FUNC_ALWAYS:
return R300_FG_ALPHA_FUNC_ALWAYS;
default:
fprintf(stderr, "r300: Unknown alpha function %d", alpha_func);
assert(0);
break;
}
return 0;
}
 
static INLINE uint32_t
r300_translate_polygon_mode_front(unsigned mode) {
switch (mode)
{
case PIPE_POLYGON_MODE_FILL:
return R300_GA_POLY_MODE_FRONT_PTYPE_TRI;
case PIPE_POLYGON_MODE_LINE:
return R300_GA_POLY_MODE_FRONT_PTYPE_LINE;
case PIPE_POLYGON_MODE_POINT:
return R300_GA_POLY_MODE_FRONT_PTYPE_POINT;
 
default:
fprintf(stderr, "r300: Bad polygon mode %i in %s\n", mode,
__FUNCTION__);
return R300_GA_POLY_MODE_FRONT_PTYPE_TRI;
}
}
 
static INLINE uint32_t
r300_translate_polygon_mode_back(unsigned mode) {
switch (mode)
{
case PIPE_POLYGON_MODE_FILL:
return R300_GA_POLY_MODE_BACK_PTYPE_TRI;
case PIPE_POLYGON_MODE_LINE:
return R300_GA_POLY_MODE_BACK_PTYPE_LINE;
case PIPE_POLYGON_MODE_POINT:
return R300_GA_POLY_MODE_BACK_PTYPE_POINT;
 
default:
fprintf(stderr, "r300: Bad polygon mode %i in %s\n", mode,
__FUNCTION__);
return R300_GA_POLY_MODE_BACK_PTYPE_TRI;
}
}
 
/* Texture sampler state. */
 
static INLINE uint32_t r300_translate_wrap(int wrap)
{
switch (wrap) {
case PIPE_TEX_WRAP_REPEAT:
return R300_TX_REPEAT;
case PIPE_TEX_WRAP_CLAMP:
return R300_TX_CLAMP;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
return R300_TX_CLAMP_TO_EDGE;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
return R300_TX_CLAMP_TO_BORDER;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
return R300_TX_REPEAT | R300_TX_MIRRORED;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
return R300_TX_CLAMP | R300_TX_MIRRORED;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
return R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED;
default:
fprintf(stderr, "r300: Unknown texture wrap %d", wrap);
assert(0);
return 0;
}
}
 
static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip,
boolean is_anisotropic)
{
uint32_t retval = 0;
 
switch (min) {
case PIPE_TEX_FILTER_NEAREST:
retval |= R300_TX_MIN_FILTER_NEAREST;
break;
case PIPE_TEX_FILTER_LINEAR:
retval |= is_anisotropic ? R300_TX_MIN_FILTER_ANISO :
R300_TX_MIN_FILTER_LINEAR;
break;
default:
fprintf(stderr, "r300: Unknown texture filter %d\n", min);
assert(0);
}
 
switch (mag) {
case PIPE_TEX_FILTER_NEAREST:
retval |= R300_TX_MAG_FILTER_NEAREST;
break;
case PIPE_TEX_FILTER_LINEAR:
retval |= is_anisotropic ? R300_TX_MAG_FILTER_ANISO :
R300_TX_MAG_FILTER_LINEAR;
break;
default:
fprintf(stderr, "r300: Unknown texture filter %d\n", mag);
assert(0);
}
 
switch (mip) {
case PIPE_TEX_MIPFILTER_NONE:
retval |= R300_TX_MIN_FILTER_MIP_NONE;
break;
case PIPE_TEX_MIPFILTER_NEAREST:
retval |= R300_TX_MIN_FILTER_MIP_NEAREST;
break;
case PIPE_TEX_MIPFILTER_LINEAR:
retval |= R300_TX_MIN_FILTER_MIP_LINEAR;
break;
default:
fprintf(stderr, "r300: Unknown texture filter %d\n", mip);
assert(0);
}
 
return retval;
}
 
static INLINE uint32_t r300_anisotropy(unsigned max_aniso)
{
if (max_aniso >= 16) {
return R300_TX_MAX_ANISO_16_TO_1;
} else if (max_aniso >= 8) {
return R300_TX_MAX_ANISO_8_TO_1;
} else if (max_aniso >= 4) {
return R300_TX_MAX_ANISO_4_TO_1;
} else if (max_aniso >= 2) {
return R300_TX_MAX_ANISO_2_TO_1;
} else {
return R300_TX_MAX_ANISO_1_TO_1;
}
}
 
static INLINE uint32_t r500_anisotropy(unsigned max_aniso)
{
if (!max_aniso) {
return 0;
}
max_aniso -= 1;
 
// Map the range [0, 15] to [0, 63].
return R500_TX_MAX_ANISO(MIN2((unsigned)(max_aniso*4.2001), 63)) |
R500_TX_ANISO_HIGH_QUALITY;
}
 
/* Translate pipe_formats into PSC vertex types. */
static INLINE uint16_t
r300_translate_vertex_data_type(enum pipe_format format) {
uint32_t result = 0;
const struct util_format_description *desc;
unsigned i;
 
desc = util_format_description(format);
 
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
return R300_INVALID_FORMAT;
}
 
/* Find the first non-VOID channel. */
for (i = 0; i < 4; i++) {
if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
break;
}
}
 
switch (desc->channel[i].type) {
/* Half-floats, floats, doubles */
case UTIL_FORMAT_TYPE_FLOAT:
switch (desc->channel[i].size) {
case 16:
/* Supported only on RV350 and later. */
if (desc->nr_channels > 2) {
result = R300_DATA_TYPE_FLT16_4;
} else {
result = R300_DATA_TYPE_FLT16_2;
}
break;
case 32:
result = R300_DATA_TYPE_FLOAT_1 + (desc->nr_channels - 1);
break;
default:
return R300_INVALID_FORMAT;
}
break;
/* Unsigned ints */
case UTIL_FORMAT_TYPE_UNSIGNED:
/* Signed ints */
case UTIL_FORMAT_TYPE_SIGNED:
switch (desc->channel[i].size) {
case 8:
result = R300_DATA_TYPE_BYTE;
break;
case 16:
if (desc->nr_channels > 2) {
result = R300_DATA_TYPE_SHORT_4;
} else {
result = R300_DATA_TYPE_SHORT_2;
}
break;
default:
return R300_INVALID_FORMAT;
}
break;
default:
return R300_INVALID_FORMAT;
}
 
if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
result |= R300_SIGNED;
}
if (desc->channel[i].normalized) {
result |= R300_NORMALIZE;
}
 
return result;
}
 
static INLINE uint16_t
r300_translate_vertex_data_swizzle(enum pipe_format format) {
const struct util_format_description *desc = util_format_description(format);
unsigned i, swizzle = 0;
 
assert(format);
 
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
fprintf(stderr, "r300: Bad format %s in %s:%d\n",
util_format_short_name(format), __FUNCTION__, __LINE__);
return 0;
}
 
for (i = 0; i < desc->nr_channels; i++) {
swizzle |=
MIN2(desc->swizzle[i], R300_SWIZZLE_SELECT_FP_ONE) << (3*i);
}
/* Set (0,0,0,1) in unused components. */
for (; i < 3; i++) {
swizzle |= R300_SWIZZLE_SELECT_FP_ZERO << (3*i);
}
for (; i < 4; i++) {
swizzle |= R300_SWIZZLE_SELECT_FP_ONE << (3*i);
}
 
return swizzle | (0xf << R300_WRITE_ENA_SHIFT);
}
 
#endif /* R300_STATE_INLINES_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_texture.c
0,0 → 1,1232
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Copyright 2010 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
/* Always include headers in the reverse order!! ~ M. */
#include "r300_texture.h"
 
#include "r300_context.h"
#include "r300_reg.h"
#include "r300_texture_desc.h"
#include "r300_transfer.h"
#include "r300_screen.h"
 
#include "util/u_format.h"
#include "util/u_format_s3tc.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_mm.h"
 
#include "pipe/p_screen.h"
 
unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
const unsigned char *swizzle_view,
boolean dxtc_swizzle)
{
unsigned i;
unsigned char swizzle[4];
unsigned result = 0;
const uint32_t swizzle_shift[4] = {
R300_TX_FORMAT_R_SHIFT,
R300_TX_FORMAT_G_SHIFT,
R300_TX_FORMAT_B_SHIFT,
R300_TX_FORMAT_A_SHIFT
};
uint32_t swizzle_bit[4] = {
dxtc_swizzle ? R300_TX_FORMAT_Z : R300_TX_FORMAT_X,
R300_TX_FORMAT_Y,
dxtc_swizzle ? R300_TX_FORMAT_X : R300_TX_FORMAT_Z,
R300_TX_FORMAT_W
};
 
if (swizzle_view) {
/* Combine two sets of swizzles. */
util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle);
} else {
memcpy(swizzle, swizzle_format, 4);
}
 
/* Get swizzle. */
for (i = 0; i < 4; i++) {
switch (swizzle[i]) {
case UTIL_FORMAT_SWIZZLE_Y:
result |= swizzle_bit[1] << swizzle_shift[i];
break;
case UTIL_FORMAT_SWIZZLE_Z:
result |= swizzle_bit[2] << swizzle_shift[i];
break;
case UTIL_FORMAT_SWIZZLE_W:
result |= swizzle_bit[3] << swizzle_shift[i];
break;
case UTIL_FORMAT_SWIZZLE_0:
result |= R300_TX_FORMAT_ZERO << swizzle_shift[i];
break;
case UTIL_FORMAT_SWIZZLE_1:
result |= R300_TX_FORMAT_ONE << swizzle_shift[i];
break;
default: /* UTIL_FORMAT_SWIZZLE_X */
result |= swizzle_bit[0] << swizzle_shift[i];
}
}
return result;
}
 
/* Translate a pipe_format into a useful texture format for sampling.
*
* Some special formats are translated directly using R300_EASY_TX_FORMAT,
* but the majority of them is translated in a generic way, automatically
* supporting all the formats hw can support.
*
* R300_EASY_TX_FORMAT swizzles the texture.
* Note the signature of R300_EASY_TX_FORMAT:
* R300_EASY_TX_FORMAT(B, G, R, A, FORMAT);
*
* The FORMAT specifies how the texture sampler will treat the texture, and
* makes available X, Y, Z, W, ZERO, and ONE for swizzling. */
uint32_t r300_translate_texformat(enum pipe_format format,
const unsigned char *swizzle_view,
boolean is_r500,
boolean dxtc_swizzle)
{
uint32_t result = 0;
const struct util_format_description *desc;
unsigned i;
boolean uniform = TRUE;
const uint32_t sign_bit[4] = {
R300_TX_FORMAT_SIGNED_W,
R300_TX_FORMAT_SIGNED_Z,
R300_TX_FORMAT_SIGNED_Y,
R300_TX_FORMAT_SIGNED_X,
};
 
desc = util_format_description(format);
 
/* Colorspace (return non-RGB formats directly). */
switch (desc->colorspace) {
/* Depth stencil formats.
* Swizzles are added in r300_merge_textures_and_samplers. */
case UTIL_FORMAT_COLORSPACE_ZS:
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
return R300_TX_FORMAT_X16;
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
if (is_r500)
return R500_TX_FORMAT_Y8X24;
else
return R300_TX_FORMAT_Y16X16;
default:
return ~0; /* Unsupported. */
}
 
/* YUV formats. */
case UTIL_FORMAT_COLORSPACE_YUV:
result |= R300_TX_FORMAT_YUV_TO_RGB;
 
switch (format) {
case PIPE_FORMAT_UYVY:
return R300_EASY_TX_FORMAT(X, Y, Z, ONE, YVYU422) | result;
case PIPE_FORMAT_YUYV:
return R300_EASY_TX_FORMAT(X, Y, Z, ONE, VYUY422) | result;
default:
return ~0; /* Unsupported/unknown. */
}
 
/* Add gamma correction. */
case UTIL_FORMAT_COLORSPACE_SRGB:
result |= R300_TX_FORMAT_GAMMA;
break;
 
default:
switch (format) {
/* Same as YUV but without the YUR->RGB conversion. */
case PIPE_FORMAT_R8G8_B8G8_UNORM:
return R300_EASY_TX_FORMAT(X, Y, Z, ONE, YVYU422) | result;
case PIPE_FORMAT_G8R8_G8B8_UNORM:
return R300_EASY_TX_FORMAT(X, Y, Z, ONE, VYUY422) | result;
default:;
}
}
 
/* Add swizzling. */
/* The RGTC1_SNORM and LATC1_SNORM swizzle is done in the shader. */
if (format != PIPE_FORMAT_RGTC1_SNORM &&
format != PIPE_FORMAT_LATC1_SNORM) {
if (util_format_is_compressed(format) &&
dxtc_swizzle &&
format != PIPE_FORMAT_RGTC2_UNORM &&
format != PIPE_FORMAT_RGTC2_SNORM &&
format != PIPE_FORMAT_LATC2_UNORM &&
format != PIPE_FORMAT_LATC2_SNORM) {
result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view,
TRUE);
} else {
result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view,
FALSE);
}
}
 
/* S3TC formats. */
if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
if (!util_format_s3tc_enabled) {
return ~0; /* Unsupported. */
}
 
switch (format) {
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:
case PIPE_FORMAT_DXT1_SRGB:
case PIPE_FORMAT_DXT1_SRGBA:
return R300_TX_FORMAT_DXT1 | result;
case PIPE_FORMAT_DXT3_RGBA:
case PIPE_FORMAT_DXT3_SRGBA:
return R300_TX_FORMAT_DXT3 | result;
case PIPE_FORMAT_DXT5_RGBA:
case PIPE_FORMAT_DXT5_SRGBA:
return R300_TX_FORMAT_DXT5 | result;
default:
return ~0; /* Unsupported/unknown. */
}
}
 
/* RGTC formats. */
if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
switch (format) {
case PIPE_FORMAT_RGTC1_SNORM:
case PIPE_FORMAT_LATC1_SNORM:
case PIPE_FORMAT_LATC1_UNORM:
case PIPE_FORMAT_RGTC1_UNORM:
return R500_TX_FORMAT_ATI1N | result;
 
case PIPE_FORMAT_RGTC2_SNORM:
case PIPE_FORMAT_LATC2_SNORM:
result |= sign_bit[1] | sign_bit[0];
case PIPE_FORMAT_RGTC2_UNORM:
case PIPE_FORMAT_LATC2_UNORM:
return R400_TX_FORMAT_ATI2N | result;
 
default:
return ~0; /* Unsupported/unknown. */
}
}
 
/* This is truly a special format.
* It stores R8G8 and B is computed using sqrt(1 - R^2 - G^2)
* in the sampler unit. Also known as D3DFMT_CxV8U8. */
if (format == PIPE_FORMAT_R8G8Bx_SNORM) {
return R300_TX_FORMAT_CxV8U8 | result;
}
 
/* Integer and fixed-point 16.16 textures are not supported. */
for (i = 0; i < 4; i++) {
if (desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED ||
((desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED ||
desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) &&
(!desc->channel[i].normalized ||
desc->channel[i].pure_integer))) {
return ~0; /* Unsupported/unknown. */
}
}
 
/* Add sign. */
for (i = 0; i < desc->nr_channels; i++) {
if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
result |= sign_bit[i];
}
}
 
/* See whether the components are of the same size. */
for (i = 1; i < desc->nr_channels; i++) {
uniform = uniform && desc->channel[0].size == desc->channel[i].size;
}
 
/* Non-uniform formats. */
if (!uniform) {
switch (desc->nr_channels) {
case 3:
if (desc->channel[0].size == 5 &&
desc->channel[1].size == 6 &&
desc->channel[2].size == 5) {
return R300_TX_FORMAT_Z5Y6X5 | result;
}
if (desc->channel[0].size == 5 &&
desc->channel[1].size == 5 &&
desc->channel[2].size == 6) {
return R300_TX_FORMAT_Z6Y5X5 | result;
}
if (desc->channel[0].size == 2 &&
desc->channel[1].size == 3 &&
desc->channel[2].size == 3) {
return R300_TX_FORMAT_Z3Y3X2 | result;
}
return ~0; /* Unsupported/unknown. */
 
case 4:
if (desc->channel[0].size == 5 &&
desc->channel[1].size == 5 &&
desc->channel[2].size == 5 &&
desc->channel[3].size == 1) {
return R300_TX_FORMAT_W1Z5Y5X5 | result;
}
if (desc->channel[0].size == 10 &&
desc->channel[1].size == 10 &&
desc->channel[2].size == 10 &&
desc->channel[3].size == 2) {
return R300_TX_FORMAT_W2Z10Y10X10 | result;
}
}
return ~0; /* Unsupported/unknown. */
}
 
/* Find the first non-VOID channel. */
for (i = 0; i < 4; i++) {
if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
break;
}
}
 
if (i == 4)
return ~0; /* Unsupported/unknown. */
 
/* And finally, uniform formats. */
switch (desc->channel[i].type) {
case UTIL_FORMAT_TYPE_UNSIGNED:
case UTIL_FORMAT_TYPE_SIGNED:
if (!desc->channel[i].normalized &&
desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) {
return ~0;
}
 
switch (desc->channel[i].size) {
case 4:
switch (desc->nr_channels) {
case 2:
return R300_TX_FORMAT_Y4X4 | result;
case 4:
return R300_TX_FORMAT_W4Z4Y4X4 | result;
}
return ~0;
 
case 8:
switch (desc->nr_channels) {
case 1:
return R300_TX_FORMAT_X8 | result;
case 2:
return R300_TX_FORMAT_Y8X8 | result;
case 4:
return R300_TX_FORMAT_W8Z8Y8X8 | result;
}
return ~0;
 
case 16:
switch (desc->nr_channels) {
case 1:
return R300_TX_FORMAT_X16 | result;
case 2:
return R300_TX_FORMAT_Y16X16 | result;
case 4:
return R300_TX_FORMAT_W16Z16Y16X16 | result;
}
}
return ~0;
 
case UTIL_FORMAT_TYPE_FLOAT:
switch (desc->channel[i].size) {
case 16:
switch (desc->nr_channels) {
case 1:
return R300_TX_FORMAT_16F | result;
case 2:
return R300_TX_FORMAT_16F_16F | result;
case 4:
return R300_TX_FORMAT_16F_16F_16F_16F | result;
}
return ~0;
 
case 32:
switch (desc->nr_channels) {
case 1:
return R300_TX_FORMAT_32F | result;
case 2:
return R300_TX_FORMAT_32F_32F | result;
case 4:
return R300_TX_FORMAT_32F_32F_32F_32F | result;
}
}
}
 
return ~0; /* Unsupported/unknown. */
}
 
uint32_t r500_tx_format_msb_bit(enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_RGTC1_UNORM:
case PIPE_FORMAT_RGTC1_SNORM:
case PIPE_FORMAT_LATC1_UNORM:
case PIPE_FORMAT_LATC1_SNORM:
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
return R500_TXFORMAT_MSB;
default:
return 0;
}
}
 
/* Buffer formats. */
 
/* Colorbuffer formats. This is the unswizzled format of the RB3D block's
* output. For the swizzling of the targets, check the shader's format. */
static uint32_t r300_translate_colorformat(enum pipe_format format)
{
switch (format) {
/* 8-bit buffers. */
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_A8_SNORM:
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_I8_SNORM:
case PIPE_FORMAT_L8_UNORM:
case PIPE_FORMAT_L8_SNORM:
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8_SNORM:
return R300_COLOR_FORMAT_I8;
 
/* 16-bit buffers. */
case PIPE_FORMAT_L8A8_UNORM:
case PIPE_FORMAT_L8A8_SNORM:
case PIPE_FORMAT_R8G8_UNORM:
case PIPE_FORMAT_R8G8_SNORM:
case PIPE_FORMAT_R8A8_UNORM:
case PIPE_FORMAT_R8A8_SNORM:
/* These formats work fine with UV88 if US_OUT_FMT is set correctly. */
case PIPE_FORMAT_A16_UNORM:
case PIPE_FORMAT_A16_SNORM:
case PIPE_FORMAT_A16_FLOAT:
case PIPE_FORMAT_L16_UNORM:
case PIPE_FORMAT_L16_SNORM:
case PIPE_FORMAT_L16_FLOAT:
case PIPE_FORMAT_I16_UNORM:
case PIPE_FORMAT_I16_SNORM:
case PIPE_FORMAT_I16_FLOAT:
case PIPE_FORMAT_R16_UNORM:
case PIPE_FORMAT_R16_SNORM:
case PIPE_FORMAT_R16_FLOAT:
return R300_COLOR_FORMAT_UV88;
 
case PIPE_FORMAT_B5G6R5_UNORM:
return R300_COLOR_FORMAT_RGB565;
 
case PIPE_FORMAT_B5G5R5A1_UNORM:
case PIPE_FORMAT_B5G5R5X1_UNORM:
return R300_COLOR_FORMAT_ARGB1555;
 
case PIPE_FORMAT_B4G4R4A4_UNORM:
case PIPE_FORMAT_B4G4R4X4_UNORM:
return R300_COLOR_FORMAT_ARGB4444;
 
/* 32-bit buffers. */
case PIPE_FORMAT_B8G8R8A8_UNORM:
/*case PIPE_FORMAT_B8G8R8A8_SNORM:*/
case PIPE_FORMAT_B8G8R8X8_UNORM:
/*case PIPE_FORMAT_B8G8R8X8_SNORM:*/
case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8A8_SNORM:
case PIPE_FORMAT_R8G8B8X8_UNORM:
case PIPE_FORMAT_R8G8B8X8_SNORM:
/* These formats work fine with ARGB8888 if US_OUT_FMT is set
* correctly. */
case PIPE_FORMAT_R16G16_UNORM:
case PIPE_FORMAT_R16G16_SNORM:
case PIPE_FORMAT_R16G16_FLOAT:
case PIPE_FORMAT_L16A16_UNORM:
case PIPE_FORMAT_L16A16_SNORM:
case PIPE_FORMAT_L16A16_FLOAT:
case PIPE_FORMAT_R16A16_UNORM:
case PIPE_FORMAT_R16A16_SNORM:
case PIPE_FORMAT_R16A16_FLOAT:
case PIPE_FORMAT_A32_FLOAT:
case PIPE_FORMAT_L32_FLOAT:
case PIPE_FORMAT_I32_FLOAT:
case PIPE_FORMAT_R32_FLOAT:
return R300_COLOR_FORMAT_ARGB8888;
 
case PIPE_FORMAT_R10G10B10A2_UNORM:
case PIPE_FORMAT_R10G10B10X2_SNORM:
case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_B10G10R10X2_UNORM:
return R500_COLOR_FORMAT_ARGB2101010; /* R5xx-only? */
 
/* 64-bit buffers. */
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
case PIPE_FORMAT_R16G16B16A16_FLOAT:
case PIPE_FORMAT_R16G16B16X16_UNORM:
case PIPE_FORMAT_R16G16B16X16_SNORM:
case PIPE_FORMAT_R16G16B16X16_FLOAT:
/* These formats work fine with ARGB16161616 if US_OUT_FMT is set
* correctly. */
case PIPE_FORMAT_R32G32_FLOAT:
case PIPE_FORMAT_L32A32_FLOAT:
case PIPE_FORMAT_R32A32_FLOAT:
return R300_COLOR_FORMAT_ARGB16161616;
 
/* 128-bit buffers. */
case PIPE_FORMAT_R32G32B32A32_FLOAT:
case PIPE_FORMAT_R32G32B32X32_FLOAT:
return R300_COLOR_FORMAT_ARGB32323232;
 
/* YUV buffers. */
case PIPE_FORMAT_UYVY:
return R300_COLOR_FORMAT_YVYU;
case PIPE_FORMAT_YUYV:
return R300_COLOR_FORMAT_VYUY;
default:
return ~0; /* Unsupported. */
}
}
 
/* Depthbuffer and stencilbuffer. Thankfully, we only support two flavors. */
static uint32_t r300_translate_zsformat(enum pipe_format format)
{
switch (format) {
/* 16-bit depth, no stencil */
case PIPE_FORMAT_Z16_UNORM:
return R300_DEPTHFORMAT_16BIT_INT_Z;
/* 24-bit depth, ignored stencil */
case PIPE_FORMAT_X8Z24_UNORM:
/* 24-bit depth, 8-bit stencil */
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
return R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
default:
return ~0; /* Unsupported. */
}
}
 
/* Shader output formats. This is essentially the swizzle from the shader
* to the RB3D block.
*
* Note that formats are stored from C3 to C0. */
static uint32_t r300_translate_out_fmt(enum pipe_format format)
{
uint32_t modifier = 0;
unsigned i;
const struct util_format_description *desc;
boolean uniform_sign;
 
desc = util_format_description(format);
 
/* Find the first non-VOID channel. */
for (i = 0; i < 4; i++) {
if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
break;
}
}
 
if (i == 4)
return ~0; /* Unsupported/unknown. */
 
/* Specifies how the shader output is written to the fog unit. */
switch (desc->channel[i].type) {
case UTIL_FORMAT_TYPE_FLOAT:
switch (desc->channel[i].size) {
case 32:
switch (desc->nr_channels) {
case 1:
modifier |= R300_US_OUT_FMT_C_32_FP;
break;
case 2:
modifier |= R300_US_OUT_FMT_C2_32_FP;
break;
case 4:
modifier |= R300_US_OUT_FMT_C4_32_FP;
break;
}
break;
 
case 16:
switch (desc->nr_channels) {
case 1:
modifier |= R300_US_OUT_FMT_C_16_FP;
break;
case 2:
modifier |= R300_US_OUT_FMT_C2_16_FP;
break;
case 4:
modifier |= R300_US_OUT_FMT_C4_16_FP;
break;
}
break;
}
break;
 
default:
switch (desc->channel[i].size) {
case 16:
switch (desc->nr_channels) {
case 1:
modifier |= R300_US_OUT_FMT_C_16;
break;
case 2:
modifier |= R300_US_OUT_FMT_C2_16;
break;
case 4:
modifier |= R300_US_OUT_FMT_C4_16;
break;
}
break;
 
case 10:
modifier |= R300_US_OUT_FMT_C4_10;
break;
 
default:
/* C4_8 seems to be used for the formats whose pixel size
* is <= 32 bits. */
modifier |= R300_US_OUT_FMT_C4_8;
break;
}
}
 
/* Add sign. */
uniform_sign = TRUE;
for (i = 0; i < desc->nr_channels; i++)
if (desc->channel[i].type != UTIL_FORMAT_TYPE_SIGNED)
uniform_sign = FALSE;
 
if (uniform_sign)
modifier |= R300_OUT_SIGN(0xf);
 
/* Add swizzles and return. */
switch (format) {
/*** Special cases (non-standard channel mapping) ***/
 
/* X8
* COLORFORMAT_I8 stores the Z component (C2). */
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_A8_SNORM:
return modifier | R300_C2_SEL_A;
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_I8_SNORM:
case PIPE_FORMAT_L8_UNORM:
case PIPE_FORMAT_L8_SNORM:
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8_SNORM:
return modifier | R300_C2_SEL_R;
 
/* X8Y8
* COLORFORMAT_UV88 stores ZX (C2 and C0). */
case PIPE_FORMAT_L8A8_SNORM:
case PIPE_FORMAT_L8A8_UNORM:
case PIPE_FORMAT_R8A8_SNORM:
case PIPE_FORMAT_R8A8_UNORM:
return modifier | R300_C0_SEL_A | R300_C2_SEL_R;
case PIPE_FORMAT_R8G8_SNORM:
case PIPE_FORMAT_R8G8_UNORM:
return modifier | R300_C0_SEL_G | R300_C2_SEL_R;
 
/* X32Y32
* ARGB16161616 stores XZ for RG32F */
case PIPE_FORMAT_R32G32_FLOAT:
return modifier | R300_C0_SEL_R | R300_C2_SEL_G;
 
/*** Generic cases (standard channel mapping) ***/
 
/* BGRA outputs. */
case PIPE_FORMAT_B5G6R5_UNORM:
case PIPE_FORMAT_B5G5R5A1_UNORM:
case PIPE_FORMAT_B5G5R5X1_UNORM:
case PIPE_FORMAT_B4G4R4A4_UNORM:
case PIPE_FORMAT_B4G4R4X4_UNORM:
case PIPE_FORMAT_B8G8R8A8_UNORM:
/*case PIPE_FORMAT_B8G8R8A8_SNORM:*/
case PIPE_FORMAT_B8G8R8X8_UNORM:
/*case PIPE_FORMAT_B8G8R8X8_SNORM:*/
case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_B10G10R10X2_UNORM:
return modifier |
R300_C0_SEL_B | R300_C1_SEL_G |
R300_C2_SEL_R | R300_C3_SEL_A;
 
/* ARGB outputs. */
case PIPE_FORMAT_A16_UNORM:
case PIPE_FORMAT_A16_SNORM:
case PIPE_FORMAT_A16_FLOAT:
case PIPE_FORMAT_A32_FLOAT:
return modifier |
R300_C0_SEL_A | R300_C1_SEL_R |
R300_C2_SEL_G | R300_C3_SEL_B;
 
/* RGBA outputs. */
case PIPE_FORMAT_R8G8B8X8_UNORM:
case PIPE_FORMAT_R8G8B8X8_SNORM:
case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8A8_SNORM:
case PIPE_FORMAT_R10G10B10A2_UNORM:
case PIPE_FORMAT_R10G10B10X2_SNORM:
case PIPE_FORMAT_R16_UNORM:
case PIPE_FORMAT_R16G16_UNORM:
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R16_SNORM:
case PIPE_FORMAT_R16G16_SNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
case PIPE_FORMAT_R16_FLOAT:
case PIPE_FORMAT_R16G16_FLOAT:
case PIPE_FORMAT_R16G16B16A16_FLOAT:
case PIPE_FORMAT_R32_FLOAT:
case PIPE_FORMAT_R32G32B32A32_FLOAT:
case PIPE_FORMAT_R32G32B32X32_FLOAT:
case PIPE_FORMAT_L16_UNORM:
case PIPE_FORMAT_L16_SNORM:
case PIPE_FORMAT_L16_FLOAT:
case PIPE_FORMAT_L32_FLOAT:
case PIPE_FORMAT_I16_UNORM:
case PIPE_FORMAT_I16_SNORM:
case PIPE_FORMAT_I16_FLOAT:
case PIPE_FORMAT_I32_FLOAT:
case PIPE_FORMAT_R16G16B16X16_UNORM:
case PIPE_FORMAT_R16G16B16X16_SNORM:
case PIPE_FORMAT_R16G16B16X16_FLOAT:
return modifier |
R300_C0_SEL_R | R300_C1_SEL_G |
R300_C2_SEL_B | R300_C3_SEL_A;
 
/* LA outputs. */
case PIPE_FORMAT_L16A16_UNORM:
case PIPE_FORMAT_L16A16_SNORM:
case PIPE_FORMAT_L16A16_FLOAT:
case PIPE_FORMAT_R16A16_UNORM:
case PIPE_FORMAT_R16A16_SNORM:
case PIPE_FORMAT_R16A16_FLOAT:
case PIPE_FORMAT_L32A32_FLOAT:
case PIPE_FORMAT_R32A32_FLOAT:
return modifier |
R300_C0_SEL_R | R300_C1_SEL_A;
 
default:
return ~0; /* Unsupported. */
}
}
 
static uint32_t r300_translate_colormask_swizzle(enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_A8_SNORM:
case PIPE_FORMAT_A16_UNORM:
case PIPE_FORMAT_A16_SNORM:
case PIPE_FORMAT_A16_FLOAT:
case PIPE_FORMAT_A32_FLOAT:
return COLORMASK_AAAA;
 
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_I8_SNORM:
case PIPE_FORMAT_L8_UNORM:
case PIPE_FORMAT_L8_SNORM:
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8_SNORM:
case PIPE_FORMAT_R32_FLOAT:
case PIPE_FORMAT_L32_FLOAT:
case PIPE_FORMAT_I32_FLOAT:
return COLORMASK_RRRR;
 
case PIPE_FORMAT_L8A8_SNORM:
case PIPE_FORMAT_L8A8_UNORM:
case PIPE_FORMAT_R8A8_UNORM:
case PIPE_FORMAT_R8A8_SNORM:
case PIPE_FORMAT_L16A16_UNORM:
case PIPE_FORMAT_L16A16_SNORM:
case PIPE_FORMAT_L16A16_FLOAT:
case PIPE_FORMAT_R16A16_UNORM:
case PIPE_FORMAT_R16A16_SNORM:
case PIPE_FORMAT_R16A16_FLOAT:
case PIPE_FORMAT_L32A32_FLOAT:
case PIPE_FORMAT_R32A32_FLOAT:
return COLORMASK_ARRA;
 
case PIPE_FORMAT_R8G8_SNORM:
case PIPE_FORMAT_R8G8_UNORM:
case PIPE_FORMAT_R16G16_UNORM:
case PIPE_FORMAT_R16G16_SNORM:
case PIPE_FORMAT_R16G16_FLOAT:
case PIPE_FORMAT_R32G32_FLOAT:
return COLORMASK_GRRG;
 
case PIPE_FORMAT_B5G5R5X1_UNORM:
case PIPE_FORMAT_B4G4R4X4_UNORM:
case PIPE_FORMAT_B8G8R8X8_UNORM:
/*case PIPE_FORMAT_B8G8R8X8_SNORM:*/
case PIPE_FORMAT_B10G10R10X2_UNORM:
return COLORMASK_BGRX;
 
case PIPE_FORMAT_B5G6R5_UNORM:
case PIPE_FORMAT_B5G5R5A1_UNORM:
case PIPE_FORMAT_B4G4R4A4_UNORM:
case PIPE_FORMAT_B8G8R8A8_UNORM:
/*case PIPE_FORMAT_B8G8R8A8_SNORM:*/
case PIPE_FORMAT_B10G10R10A2_UNORM:
return COLORMASK_BGRA;
 
case PIPE_FORMAT_R8G8B8X8_UNORM:
/* RGBX_SNORM formats are broken for an unknown reason */
/*case PIPE_FORMAT_R8G8B8X8_SNORM:*/
/*case PIPE_FORMAT_R10G10B10X2_SNORM:*/
case PIPE_FORMAT_R16G16B16X16_UNORM:
/*case PIPE_FORMAT_R16G16B16X16_SNORM:*/
case PIPE_FORMAT_R16G16B16X16_FLOAT:
case PIPE_FORMAT_R32G32B32X32_FLOAT:
return COLORMASK_RGBX;
 
case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8A8_SNORM:
case PIPE_FORMAT_R10G10B10A2_UNORM:
case PIPE_FORMAT_R16_UNORM:
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R16_SNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
case PIPE_FORMAT_R16_FLOAT:
case PIPE_FORMAT_R16G16B16A16_FLOAT:
case PIPE_FORMAT_R32G32B32A32_FLOAT:
case PIPE_FORMAT_L16_UNORM:
case PIPE_FORMAT_L16_SNORM:
case PIPE_FORMAT_L16_FLOAT:
case PIPE_FORMAT_I16_UNORM:
case PIPE_FORMAT_I16_SNORM:
case PIPE_FORMAT_I16_FLOAT:
return COLORMASK_RGBA;
 
default:
return ~0; /* Unsupported. */
}
}
 
boolean r300_is_colorbuffer_format_supported(enum pipe_format format)
{
return r300_translate_colorformat(format) != ~0 &&
r300_translate_out_fmt(format) != ~0 &&
r300_translate_colormask_swizzle(format) != ~0;
}
 
boolean r300_is_zs_format_supported(enum pipe_format format)
{
return r300_translate_zsformat(format) != ~0;
}
 
boolean r300_is_sampler_format_supported(enum pipe_format format)
{
return r300_translate_texformat(format, 0, TRUE, FALSE) != ~0;
}
 
void r300_texture_setup_format_state(struct r300_screen *screen,
struct r300_resource *tex,
enum pipe_format format,
unsigned level,
unsigned width0_override,
unsigned height0_override,
struct r300_texture_format_state *out)
{
struct pipe_resource *pt = &tex->b.b;
struct r300_texture_desc *desc = &tex->tex;
boolean is_r500 = screen->caps.is_r500;
unsigned width, height, depth;
unsigned txwidth, txheight, txdepth;
 
width = u_minify(width0_override, level);
height = u_minify(height0_override, level);
depth = u_minify(desc->depth0, level);
 
txwidth = (width - 1) & 0x7ff;
txheight = (height - 1) & 0x7ff;
txdepth = util_logbase2(depth) & 0xf;
 
/* Mask out all the fields we change. */
out->format0 = 0;
out->format1 &= ~R300_TX_FORMAT_TEX_COORD_TYPE_MASK;
out->format2 &= R500_TXFORMAT_MSB;
out->tile_config = 0;
 
/* Set sampler state. */
out->format0 =
R300_TX_WIDTH(txwidth) |
R300_TX_HEIGHT(txheight) |
R300_TX_DEPTH(txdepth);
 
if (desc->uses_stride_addressing) {
unsigned stride =
r300_stride_to_width(format, desc->stride_in_bytes[level]);
/* rectangles love this */
out->format0 |= R300_TX_PITCH_EN;
out->format2 = (stride - 1) & 0x1fff;
}
 
if (pt->target == PIPE_TEXTURE_CUBE) {
out->format1 |= R300_TX_FORMAT_CUBIC_MAP;
}
if (pt->target == PIPE_TEXTURE_3D) {
out->format1 |= R300_TX_FORMAT_3D;
}
 
/* large textures on r500 */
if (is_r500)
{
unsigned us_width = txwidth;
unsigned us_height = txheight;
unsigned us_depth = txdepth;
 
if (width > 2048) {
out->format2 |= R500_TXWIDTH_BIT11;
}
if (height > 2048) {
out->format2 |= R500_TXHEIGHT_BIT11;
}
 
/* The US_FORMAT register fixes an R500 TX addressing bug.
* Don't ask why it must be set like this. I don't know it either. */
if (width > 2048) {
us_width = (0x000007FF + us_width) >> 1;
us_depth |= 0x0000000D;
}
if (height > 2048) {
us_height = (0x000007FF + us_height) >> 1;
us_depth |= 0x0000000E;
}
 
out->us_format0 =
R300_TX_WIDTH(us_width) |
R300_TX_HEIGHT(us_height) |
R300_TX_DEPTH(us_depth);
}
 
out->tile_config = R300_TXO_MACRO_TILE(desc->macrotile[level]) |
R300_TXO_MICRO_TILE(desc->microtile);
}
 
static void r300_texture_setup_fb_state(struct r300_surface *surf)
{
struct r300_resource *tex = r300_resource(surf->base.texture);
unsigned level = surf->base.u.tex.level;
unsigned stride =
r300_stride_to_width(surf->base.format, tex->tex.stride_in_bytes[level]);
 
/* Set framebuffer state. */
if (util_format_is_depth_or_stencil(surf->base.format)) {
surf->pitch =
stride |
R300_DEPTHMACROTILE(tex->tex.macrotile[level]) |
R300_DEPTHMICROTILE(tex->tex.microtile);
surf->format = r300_translate_zsformat(surf->base.format);
surf->pitch_zmask = tex->tex.zmask_stride_in_pixels[level];
surf->pitch_hiz = tex->tex.hiz_stride_in_pixels[level];
} else {
surf->pitch =
stride |
r300_translate_colorformat(surf->base.format) |
R300_COLOR_TILE(tex->tex.macrotile[level]) |
R300_COLOR_MICROTILE(tex->tex.microtile);
surf->format = r300_translate_out_fmt(surf->base.format);
surf->colormask_swizzle =
r300_translate_colormask_swizzle(surf->base.format);
surf->pitch_cmask = tex->tex.cmask_stride_in_pixels;
}
}
 
static void r300_texture_destroy(struct pipe_screen *screen,
struct pipe_resource* texture)
{
struct r300_screen *rscreen = r300_screen(screen);
struct r300_resource* tex = (struct r300_resource*)texture;
 
if (tex->tex.cmask_dwords) {
pipe_mutex_lock(rscreen->cmask_mutex);
if (texture == rscreen->cmask_resource) {
rscreen->cmask_resource = NULL;
}
pipe_mutex_unlock(rscreen->cmask_mutex);
}
pb_reference(&tex->buf, NULL);
FREE(tex);
}
 
boolean r300_resource_get_handle(struct pipe_screen* screen,
struct pipe_resource *texture,
struct winsys_handle *whandle)
{
struct radeon_winsys *rws = r300_screen(screen)->rws;
struct r300_resource* tex = (struct r300_resource*)texture;
 
if (!tex) {
return FALSE;
}
 
return rws->buffer_get_handle(tex->buf,
tex->tex.stride_in_bytes[0], whandle);
}
 
static const struct u_resource_vtbl r300_texture_vtbl =
{
NULL, /* get_handle */
r300_texture_destroy, /* resource_destroy */
r300_texture_transfer_map, /* transfer_map */
NULL, /* transfer_flush_region */
r300_texture_transfer_unmap, /* transfer_unmap */
NULL /* transfer_inline_write */
};
 
/* The common texture constructor. */
static struct r300_resource*
r300_texture_create_object(struct r300_screen *rscreen,
const struct pipe_resource *base,
enum radeon_bo_layout microtile,
enum radeon_bo_layout macrotile,
unsigned stride_in_bytes_override,
struct pb_buffer *buffer)
{
struct radeon_winsys *rws = rscreen->rws;
struct r300_resource *tex = NULL;
 
tex = CALLOC_STRUCT(r300_resource);
if (!tex) {
goto fail;
}
 
pipe_reference_init(&tex->b.b.reference, 1);
tex->b.b.screen = &rscreen->screen;
tex->b.b.usage = base->usage;
tex->b.b.bind = base->bind;
tex->b.b.flags = base->flags;
tex->b.vtbl = &r300_texture_vtbl;
tex->tex.microtile = microtile;
tex->tex.macrotile[0] = macrotile;
tex->tex.stride_in_bytes_override = stride_in_bytes_override;
tex->domain = (base->flags & R300_RESOURCE_FLAG_TRANSFER ||
base->usage == PIPE_USAGE_STAGING) ? RADEON_DOMAIN_GTT :
base->nr_samples > 1 ? RADEON_DOMAIN_VRAM :
RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT;
tex->buf = buffer;
 
r300_texture_desc_init(rscreen, tex, base);
 
/* Figure out the ideal placement for the texture.. */
if (tex->domain & RADEON_DOMAIN_VRAM &&
tex->tex.size_in_bytes >= rscreen->info.vram_size) {
tex->domain &= ~RADEON_DOMAIN_VRAM;
tex->domain |= RADEON_DOMAIN_GTT;
}
if (tex->domain & RADEON_DOMAIN_GTT &&
tex->tex.size_in_bytes >= rscreen->info.gart_size) {
tex->domain &= ~RADEON_DOMAIN_GTT;
}
/* Just fail if the texture is too large. */
if (!tex->domain) {
goto fail;
}
 
/* Create the backing buffer if needed. */
if (!tex->buf) {
tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048, TRUE,
tex->domain);
 
if (!tex->buf) {
goto fail;
}
}
 
if (SCREEN_DBG_ON(rscreen, DBG_MSAA) && base->nr_samples > 1) {
fprintf(stderr, "r300: %ix MSAA %s buffer created\n",
base->nr_samples,
util_format_is_depth_or_stencil(base->format) ? "depth" : "color");
}
 
tex->cs_buf = rws->buffer_get_cs_handle(tex->buf);
 
rws->buffer_set_tiling(tex->buf, NULL,
tex->tex.microtile, tex->tex.macrotile[0],
0, 0, 0, 0, 0,
tex->tex.stride_in_bytes[0]);
 
return tex;
 
fail:
FREE(tex);
if (buffer)
pb_reference(&buffer, NULL);
return NULL;
}
 
/* Create a new texture. */
struct pipe_resource *r300_texture_create(struct pipe_screen *screen,
const struct pipe_resource *base)
{
struct r300_screen *rscreen = r300_screen(screen);
enum radeon_bo_layout microtile, macrotile;
 
if ((base->flags & R300_RESOURCE_FLAG_TRANSFER) ||
(base->bind & PIPE_BIND_SCANOUT)) {
microtile = RADEON_LAYOUT_LINEAR;
macrotile = RADEON_LAYOUT_LINEAR;
} else {
/* This will make the texture_create_function select the layout. */
microtile = RADEON_LAYOUT_UNKNOWN;
macrotile = RADEON_LAYOUT_UNKNOWN;
}
 
return (struct pipe_resource*)
r300_texture_create_object(rscreen, base, microtile, macrotile,
0, NULL);
}
 
struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen,
const struct pipe_resource *base,
struct winsys_handle *whandle)
{
struct r300_screen *rscreen = r300_screen(screen);
struct radeon_winsys *rws = rscreen->rws;
struct pb_buffer *buffer;
enum radeon_bo_layout microtile, macrotile;
unsigned stride;
 
/* Support only 2D textures without mipmaps */
if ((base->target != PIPE_TEXTURE_2D &&
base->target != PIPE_TEXTURE_RECT) ||
base->depth0 != 1 ||
base->last_level != 0) {
return NULL;
}
 
buffer = rws->buffer_from_handle(rws, whandle, &stride);
if (!buffer)
return NULL;
 
rws->buffer_get_tiling(buffer, &microtile, &macrotile, NULL, NULL, NULL, NULL, NULL);
 
/* Enforce a microtiled zbuffer. */
if (util_format_is_depth_or_stencil(base->format) &&
microtile == RADEON_LAYOUT_LINEAR) {
switch (util_format_get_blocksize(base->format)) {
case 4:
microtile = RADEON_LAYOUT_TILED;
break;
 
case 2:
microtile = RADEON_LAYOUT_SQUARETILED;
break;
}
}
 
return (struct pipe_resource*)
r300_texture_create_object(rscreen, base, microtile, macrotile,
stride, buffer);
}
 
/* Not required to implement u_resource_vtbl, consider moving to another file:
*/
struct pipe_surface* r300_create_surface_custom(struct pipe_context * ctx,
struct pipe_resource* texture,
const struct pipe_surface *surf_tmpl,
unsigned width0_override,
unsigned height0_override)
{
struct r300_resource* tex = r300_resource(texture);
struct r300_surface* surface = CALLOC_STRUCT(r300_surface);
unsigned level = surf_tmpl->u.tex.level;
 
assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
 
if (surface) {
uint32_t offset, tile_height;
 
pipe_reference_init(&surface->base.reference, 1);
pipe_resource_reference(&surface->base.texture, texture);
surface->base.context = ctx;
surface->base.format = surf_tmpl->format;
surface->base.width = u_minify(width0_override, level);
surface->base.height = u_minify(height0_override, level);
surface->base.u.tex.level = level;
surface->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer;
surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
 
surface->buf = tex->buf;
surface->cs_buf = tex->cs_buf;
 
/* Prefer VRAM if there are multiple domains to choose from. */
surface->domain = tex->domain;
if (surface->domain & RADEON_DOMAIN_VRAM)
surface->domain &= ~RADEON_DOMAIN_GTT;
 
surface->offset = r300_texture_get_offset(tex, level,
surf_tmpl->u.tex.first_layer);
r300_texture_setup_fb_state(surface);
 
/* Parameters for the CBZB clear. */
surface->cbzb_allowed = tex->tex.cbzb_allowed[level];
surface->cbzb_width = align(surface->base.width, 64);
 
/* Height must be aligned to the size of a tile. */
tile_height = r300_get_pixel_alignment(surface->base.format,
tex->b.b.nr_samples,
tex->tex.microtile,
tex->tex.macrotile[level],
DIM_HEIGHT, 0);
 
surface->cbzb_height = align((surface->base.height + 1) / 2,
tile_height);
 
/* Offset must be aligned to 2K and must point at the beginning
* of a scanline. */
offset = surface->offset +
tex->tex.stride_in_bytes[level] * surface->cbzb_height;
surface->cbzb_midpoint_offset = offset & ~2047;
 
surface->cbzb_pitch = surface->pitch & 0x1ffffc;
 
if (util_format_get_blocksizebits(surface->base.format) == 32)
surface->cbzb_format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
else
surface->cbzb_format = R300_DEPTHFORMAT_16BIT_INT_Z;
 
DBG(r300_context(ctx), DBG_CBZB,
"CBZB Allowed: %s, Dim: %ix%i, Misalignment: %i, Micro: %s, Macro: %s\n",
surface->cbzb_allowed ? "YES" : " NO",
surface->cbzb_width, surface->cbzb_height,
offset & 2047,
tex->tex.microtile ? "YES" : " NO",
tex->tex.macrotile[level] ? "YES" : " NO");
}
 
return &surface->base;
}
 
struct pipe_surface* r300_create_surface(struct pipe_context * ctx,
struct pipe_resource* texture,
const struct pipe_surface *surf_tmpl)
{
return r300_create_surface_custom(ctx, texture, surf_tmpl,
texture->width0,
texture->height0);
}
 
/* Not required to implement u_resource_vtbl, consider moving to another file:
*/
void r300_surface_destroy(struct pipe_context *ctx, struct pipe_surface* s)
{
pipe_resource_reference(&s->texture, NULL);
FREE(s);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_texture.h
0,0 → 1,88
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#ifndef R300_TEXTURE_H
#define R300_TEXTURE_H
 
#include "pipe/p_compiler.h"
#include "pipe/p_format.h"
 
struct pipe_screen;
struct pipe_context;
struct pipe_resource;
struct winsys_handle;
struct r300_texture_format_state;
struct r300_texture_desc;
struct r300_resource;
struct r300_screen;
 
unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
const unsigned char *swizzle_view,
boolean dxtc_swizzle);
 
uint32_t r300_translate_texformat(enum pipe_format format,
const unsigned char *swizzle_view,
boolean is_r500,
boolean dxtc_swizzle);
 
uint32_t r500_tx_format_msb_bit(enum pipe_format format);
 
boolean r300_is_colorbuffer_format_supported(enum pipe_format format);
 
boolean r300_is_zs_format_supported(enum pipe_format format);
 
boolean r300_is_sampler_format_supported(enum pipe_format format);
 
void r300_texture_setup_format_state(struct r300_screen *screen,
struct r300_resource *tex,
enum pipe_format format,
unsigned level,
unsigned width0_override,
unsigned height0_override,
struct r300_texture_format_state *out);
 
boolean r300_resource_get_handle(struct pipe_screen* screen,
struct pipe_resource *texture,
struct winsys_handle *whandle);
 
struct pipe_resource*
r300_texture_from_handle(struct pipe_screen* screen,
const struct pipe_resource* base,
struct winsys_handle *whandle);
 
struct pipe_resource*
r300_texture_create(struct pipe_screen* screen,
const struct pipe_resource* templ);
 
struct pipe_surface* r300_create_surface_custom(struct pipe_context * ctx,
struct pipe_resource* texture,
const struct pipe_surface *surf_tmpl,
unsigned width0_override,
unsigned height0_override);
 
struct pipe_surface* r300_create_surface(struct pipe_context *ctx,
struct pipe_resource* texture,
const struct pipe_surface *surf_tmpl);
 
void r300_surface_destroy(struct pipe_context *ctx, struct pipe_surface* s);
 
#endif /* R300_TEXTURE_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_texture_desc.c
0,0 → 1,642
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Copyright 2010 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "r300_texture_desc.h"
#include "r300_context.h"
 
#include "util/u_format.h"
 
/* Returns the number of pixels that the texture should be aligned to
* in the given dimension. */
unsigned r300_get_pixel_alignment(enum pipe_format format,
unsigned num_samples,
enum radeon_bo_layout microtile,
enum radeon_bo_layout macrotile,
enum r300_dim dim, boolean is_rs690)
{
static const unsigned table[2][5][3][2] =
{
{
/* Macro: linear linear linear
Micro: linear tiled square-tiled */
{{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */
{{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */
{{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */
{{ 4, 1}, { 2, 2}, { 0, 0}}, /* 64 bits per pixel */
{{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */
},
{
/* Macro: tiled tiled tiled
Micro: linear tiled square-tiled */
{{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */
{{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */
{{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */
{{ 32, 8}, {16, 16}, { 0, 0}}, /* 64 bits per pixel */
{{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */
}
};
 
unsigned tile = 0;
unsigned pixsize = util_format_get_blocksize(format);
 
assert(macrotile <= RADEON_LAYOUT_TILED);
assert(microtile <= RADEON_LAYOUT_SQUARETILED);
assert(pixsize <= 16);
assert(dim <= DIM_HEIGHT);
 
tile = table[macrotile][util_logbase2(pixsize)][microtile][dim];
if (macrotile == 0 && is_rs690 && dim == DIM_WIDTH) {
int align;
int h_tile;
h_tile = table[macrotile][util_logbase2(pixsize)][microtile][DIM_HEIGHT];
align = 64 / (pixsize * h_tile);
if (tile < align)
tile = align;
}
 
assert(tile);
return tile;
}
 
/* Return true if macrotiling should be enabled on the miplevel. */
static boolean r300_texture_macro_switch(struct r300_resource *tex,
unsigned level,
boolean rv350_mode,
enum r300_dim dim)
{
unsigned tile, texdim;
 
if (tex->b.b.nr_samples > 1) {
return TRUE;
}
 
tile = r300_get_pixel_alignment(tex->b.b.format, tex->b.b.nr_samples,
tex->tex.microtile, RADEON_LAYOUT_TILED, dim, 0);
if (dim == DIM_WIDTH) {
texdim = u_minify(tex->tex.width0, level);
} else {
texdim = u_minify(tex->tex.height0, level);
}
 
/* See TX_FILTER1_n.MACRO_SWITCH. */
if (rv350_mode) {
return texdim >= tile;
} else {
return texdim > tile;
}
}
 
/**
* Return the stride, in bytes, of the texture image of the given texture
* at the given level.
*/
static unsigned r300_texture_get_stride(struct r300_screen *screen,
struct r300_resource *tex,
unsigned level)
{
unsigned tile_width, width, stride;
boolean is_rs690 = (screen->caps.family == CHIP_RS600 ||
screen->caps.family == CHIP_RS690 ||
screen->caps.family == CHIP_RS740);
 
if (tex->tex.stride_in_bytes_override)
return tex->tex.stride_in_bytes_override;
 
/* Check the level. */
if (level > tex->b.b.last_level) {
SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n",
__FUNCTION__, level, tex->b.b.last_level);
return 0;
}
 
width = u_minify(tex->tex.width0, level);
 
if (util_format_is_plain(tex->b.b.format)) {
tile_width = r300_get_pixel_alignment(tex->b.b.format,
tex->b.b.nr_samples,
tex->tex.microtile,
tex->tex.macrotile[level],
DIM_WIDTH, is_rs690);
width = align(width, tile_width);
 
stride = util_format_get_stride(tex->b.b.format, width);
/* The alignment to 32 bytes is sort of implied by the layout... */
return stride;
} else {
return align(util_format_get_stride(tex->b.b.format, width), is_rs690 ? 64 : 32);
}
}
 
static unsigned r300_texture_get_nblocksy(struct r300_resource *tex,
unsigned level,
boolean *out_aligned_for_cbzb)
{
unsigned height, tile_height;
 
height = u_minify(tex->tex.height0, level);
 
/* Mipmapped and 3D textures must have their height aligned to POT. */
if ((tex->b.b.target != PIPE_TEXTURE_1D &&
tex->b.b.target != PIPE_TEXTURE_2D &&
tex->b.b.target != PIPE_TEXTURE_RECT) ||
tex->b.b.last_level != 0) {
height = util_next_power_of_two(height);
}
 
if (util_format_is_plain(tex->b.b.format)) {
tile_height = r300_get_pixel_alignment(tex->b.b.format,
tex->b.b.nr_samples,
tex->tex.microtile,
tex->tex.macrotile[level],
DIM_HEIGHT, 0);
height = align(height, tile_height);
 
/* See if the CBZB clear can be used on the buffer,
* taking the texture size into account. */
if (out_aligned_for_cbzb) {
if (tex->tex.macrotile[level]) {
/* When clearing, the layer (width*height) is horizontally split
* into two, and the upper and lower halves are cleared by the CB
* and ZB units, respectively. Therefore, the number of macrotiles
* in the Y direction must be even. */
 
/* Align the height so that there is an even number of macrotiles.
* Do so for 3 or more macrotiles in the Y direction. */
if (level == 0 && tex->b.b.last_level == 0 &&
(tex->b.b.target == PIPE_TEXTURE_1D ||
tex->b.b.target == PIPE_TEXTURE_2D ||
tex->b.b.target == PIPE_TEXTURE_RECT) &&
height >= tile_height * 3) {
height = align(height, tile_height * 2);
}
 
*out_aligned_for_cbzb = height % (tile_height * 2) == 0;
} else {
*out_aligned_for_cbzb = FALSE;
}
}
}
 
return util_format_get_nblocksy(tex->b.b.format, height);
}
 
/* Get a width in pixels from a stride in bytes. */
unsigned r300_stride_to_width(enum pipe_format format,
unsigned stride_in_bytes)
{
return (stride_in_bytes / util_format_get_blocksize(format)) *
util_format_get_blockwidth(format);
}
 
static void r300_setup_miptree(struct r300_screen *screen,
struct r300_resource *tex,
boolean align_for_cbzb)
{
struct pipe_resource *base = &tex->b.b;
unsigned stride, size, layer_size, nblocksy, i;
boolean rv350_mode = screen->caps.family >= CHIP_R350;
boolean aligned_for_cbzb;
 
tex->tex.size_in_bytes = 0;
 
SCREEN_DBG(screen, DBG_TEXALLOC,
"r300: Making miptree for texture, format %s\n",
util_format_short_name(base->format));
 
for (i = 0; i <= base->last_level; i++) {
/* Let's see if this miplevel can be macrotiled. */
tex->tex.macrotile[i] =
(tex->tex.macrotile[0] == RADEON_LAYOUT_TILED &&
r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) &&
r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ?
RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
 
stride = r300_texture_get_stride(screen, tex, i);
 
/* Compute the number of blocks in Y, see if the CBZB clear can be
* used on the texture. */
aligned_for_cbzb = FALSE;
if (align_for_cbzb && tex->tex.cbzb_allowed[i])
nblocksy = r300_texture_get_nblocksy(tex, i, &aligned_for_cbzb);
else
nblocksy = r300_texture_get_nblocksy(tex, i, NULL);
 
layer_size = stride * nblocksy;
 
if (base->nr_samples > 1) {
layer_size *= base->nr_samples;
}
 
if (base->target == PIPE_TEXTURE_CUBE)
size = layer_size * 6;
else
size = layer_size * u_minify(tex->tex.depth0, i);
 
tex->tex.offset_in_bytes[i] = tex->tex.size_in_bytes;
tex->tex.size_in_bytes = tex->tex.offset_in_bytes[i] + size;
tex->tex.layer_size_in_bytes[i] = layer_size;
tex->tex.stride_in_bytes[i] = stride;
tex->tex.cbzb_allowed[i] = tex->tex.cbzb_allowed[i] && aligned_for_cbzb;
 
SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d "
"(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n",
i, u_minify(tex->tex.width0, i), u_minify(tex->tex.height0, i),
u_minify(tex->tex.depth0, i), stride, tex->tex.size_in_bytes,
tex->tex.macrotile[i] ? "TRUE" : "FALSE");
}
}
 
static void r300_setup_flags(struct r300_resource *tex)
{
tex->tex.uses_stride_addressing =
!util_is_power_of_two(tex->b.b.width0) ||
(tex->tex.stride_in_bytes_override &&
r300_stride_to_width(tex->b.b.format,
tex->tex.stride_in_bytes_override) != tex->b.b.width0);
 
tex->tex.is_npot =
tex->tex.uses_stride_addressing ||
!util_is_power_of_two(tex->b.b.height0) ||
!util_is_power_of_two(tex->b.b.depth0);
}
 
static void r300_setup_cbzb_flags(struct r300_screen *rscreen,
struct r300_resource *tex)
{
unsigned i, bpp;
boolean first_level_valid;
 
bpp = util_format_get_blocksizebits(tex->b.b.format);
 
/* 1) The texture must be point-sampled,
* 2) The depth must be 16 or 32 bits.
* 3) If the midpoint ZB offset is not aligned to 2048, it returns garbage
* with certain texture sizes. Macrotiling ensures the alignment. */
first_level_valid = tex->b.b.nr_samples <= 1 &&
(bpp == 16 || bpp == 32) &&
tex->tex.macrotile[0];
 
if (SCREEN_DBG_ON(rscreen, DBG_NO_CBZB))
first_level_valid = FALSE;
 
for (i = 0; i <= tex->b.b.last_level; i++)
tex->tex.cbzb_allowed[i] = first_level_valid && tex->tex.macrotile[i];
}
 
static unsigned r300_pixels_to_dwords(unsigned stride,
unsigned height,
unsigned xblock, unsigned yblock)
{
return (util_align_npot(stride, xblock) * align(height, yblock)) / (xblock * yblock);
}
 
static void r300_setup_hyperz_properties(struct r300_screen *screen,
struct r300_resource *tex)
{
/* The tile size of 1 DWORD in ZMASK RAM is:
*
* GPU Pipes 4x4 mode 8x8 mode
* ------------------------------------------
* R580 4P/1Z 32x32 64x64
* RV570 3P/1Z 48x16 96x32
* RV530 1P/2Z 32x16 64x32
* 1P/1Z 16x16 32x32
*/
static unsigned zmask_blocks_x_per_dw[4] = {4, 8, 12, 8};
static unsigned zmask_blocks_y_per_dw[4] = {4, 4, 4, 8};
 
/* In HIZ RAM, one dword is always 8x8 pixels (each byte is 4x4 pixels),
* but the blocks have very weird ordering.
*
* With 2 pipes and an image of size 8xY, where Y >= 1,
* clearing 4 dwords clears blocks like this:
*
* 01012323
*
* where numbers correspond to dword indices. The blocks are interleaved
* in the X direction, so the alignment must be 4x1 blocks (32x8 pixels).
*
* With 4 pipes and an image of size 8xY, where Y >= 4,
* clearing 8 dwords clears blocks like this:
* 01012323
* 45456767
* 01012323
* 45456767
* where numbers correspond to dword indices. The blocks are interleaved
* in both directions, so the alignment must be 4x4 blocks (32x32 pixels)
*/
static unsigned hiz_align_x[4] = {8, 32, 48, 32};
static unsigned hiz_align_y[4] = {8, 8, 8, 32};
 
if (util_format_is_depth_or_stencil(tex->b.b.format) &&
util_format_get_blocksizebits(tex->b.b.format) == 32 &&
tex->tex.microtile) {
unsigned i, pipes;
 
if (screen->caps.family == CHIP_RV530) {
pipes = screen->info.r300_num_z_pipes;
} else {
pipes = screen->info.r300_num_gb_pipes;
}
 
for (i = 0; i <= tex->b.b.last_level; i++) {
unsigned zcomp_numdw, zcompsize, hiz_numdw, stride, height;
 
stride = r300_stride_to_width(tex->b.b.format,
tex->tex.stride_in_bytes[i]);
stride = align(stride, 16);
height = u_minify(tex->b.b.height0, i);
 
/* The 8x8 compression mode needs macrotiling. */
zcompsize = screen->caps.z_compress == R300_ZCOMP_8X8 &&
tex->tex.macrotile[i] &&
tex->b.b.nr_samples <= 1 ? 8 : 4;
 
/* Get the ZMASK buffer size in dwords. */
zcomp_numdw = r300_pixels_to_dwords(stride, height,
zmask_blocks_x_per_dw[pipes-1] * zcompsize,
zmask_blocks_y_per_dw[pipes-1] * zcompsize);
 
/* Check whether we have enough ZMASK memory. */
if (util_format_get_blocksizebits(tex->b.b.format) == 32 &&
zcomp_numdw <= screen->caps.zmask_ram * pipes) {
tex->tex.zmask_dwords[i] = zcomp_numdw;
tex->tex.zcomp8x8[i] = zcompsize == 8;
 
tex->tex.zmask_stride_in_pixels[i] =
util_align_npot(stride, zmask_blocks_x_per_dw[pipes-1] * zcompsize);
} else {
tex->tex.zmask_dwords[i] = 0;
tex->tex.zcomp8x8[i] = FALSE;
tex->tex.zmask_stride_in_pixels[i] = 0;
}
 
/* Now setup HIZ. */
stride = util_align_npot(stride, hiz_align_x[pipes-1]);
height = align(height, hiz_align_y[pipes-1]);
 
/* Get the HIZ buffer size in dwords. */
hiz_numdw = (stride * height) / (8*8 * pipes);
 
/* Check whether we have enough HIZ memory. */
if (hiz_numdw <= screen->caps.hiz_ram * pipes) {
tex->tex.hiz_dwords[i] = hiz_numdw;
tex->tex.hiz_stride_in_pixels[i] = stride;
} else {
tex->tex.hiz_dwords[i] = 0;
tex->tex.hiz_stride_in_pixels[i] = 0;
}
}
}
}
 
static void r300_setup_cmask_properties(struct r300_screen *screen,
struct r300_resource *tex)
{
static unsigned cmask_align_x[4] = {16, 32, 48, 32};
static unsigned cmask_align_y[4] = {16, 16, 16, 32};
unsigned pipes, stride, cmask_num_dw, cmask_max_size;
 
/* We need an AA colorbuffer, no mipmaps. */
if (tex->b.b.nr_samples <= 1 ||
tex->b.b.last_level > 0 ||
util_format_is_depth_or_stencil(tex->b.b.format)) {
return;
}
 
/* FP16 AA needs R500 and a fairly new DRM. */
if ((tex->b.b.format == PIPE_FORMAT_R16G16B16A16_FLOAT ||
tex->b.b.format == PIPE_FORMAT_R16G16B16X16_FLOAT) &&
(!screen->caps.is_r500 || screen->info.drm_minor < 29)) {
return;
}
 
if (SCREEN_DBG_ON(screen, DBG_NO_CMASK)) {
return;
}
 
/* CMASK is part of raster pipes. The number of Z pipes doesn't matter. */
pipes = screen->info.r300_num_gb_pipes;
 
/* The single-pipe cards have 5120 dwords of CMASK RAM,
* the other cards have 4096 dwords of CMASK RAM per pipe. */
cmask_max_size = pipes == 1 ? 5120 : pipes * 4096;
 
stride = r300_stride_to_width(tex->b.b.format,
tex->tex.stride_in_bytes[0]);
stride = align(stride, 16);
 
/* Get the CMASK size in dwords. */
cmask_num_dw = r300_pixels_to_dwords(stride, tex->b.b.height0,
cmask_align_x[pipes-1],
cmask_align_y[pipes-1]);
 
/* Check the CMASK size against the CMASK memory limit. */
if (cmask_num_dw <= cmask_max_size) {
tex->tex.cmask_dwords = cmask_num_dw;
tex->tex.cmask_stride_in_pixels =
util_align_npot(stride, cmask_align_x[pipes-1]);
}
}
 
static void r300_setup_tiling(struct r300_screen *screen,
struct r300_resource *tex)
{
enum pipe_format format = tex->b.b.format;
boolean rv350_mode = screen->caps.family >= CHIP_R350;
boolean is_zb = util_format_is_depth_or_stencil(format);
boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING);
boolean force_microtiling =
(tex->b.b.flags & R300_RESOURCE_FORCE_MICROTILING) != 0;
 
if (tex->b.b.nr_samples > 1) {
tex->tex.microtile = RADEON_LAYOUT_TILED;
tex->tex.macrotile[0] = RADEON_LAYOUT_TILED;
return;
}
 
tex->tex.microtile = RADEON_LAYOUT_LINEAR;
tex->tex.macrotile[0] = RADEON_LAYOUT_LINEAR;
 
if (tex->b.b.usage == PIPE_USAGE_STAGING) {
return;
}
 
if (!util_format_is_plain(format)) {
return;
}
 
/* If height == 1, disable microtiling except for zbuffer. */
if (!force_microtiling && !is_zb &&
(tex->b.b.height0 == 1 || dbg_no_tiling)) {
return;
}
 
/* Set microtiling. */
switch (util_format_get_blocksize(format)) {
case 1:
case 4:
case 8:
tex->tex.microtile = RADEON_LAYOUT_TILED;
break;
 
case 2:
tex->tex.microtile = RADEON_LAYOUT_SQUARETILED;
break;
}
 
if (dbg_no_tiling) {
return;
}
 
/* Set macrotiling. */
if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) &&
r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) {
tex->tex.macrotile[0] = RADEON_LAYOUT_TILED;
}
}
 
static void r300_tex_print_info(struct r300_resource *tex,
const char *func)
{
fprintf(stderr,
"r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, "
"LastLevel: %i, Size: %i, Format: %s, Samples: %i\n",
func,
tex->tex.macrotile[0] ? "YES" : " NO",
tex->tex.microtile ? "YES" : " NO",
r300_stride_to_width(tex->b.b.format, tex->tex.stride_in_bytes[0]),
tex->b.b.width0, tex->b.b.height0, tex->b.b.depth0,
tex->b.b.last_level, tex->tex.size_in_bytes,
util_format_short_name(tex->b.b.format),
tex->b.b.nr_samples);
}
 
void r300_texture_desc_init(struct r300_screen *rscreen,
struct r300_resource *tex,
const struct pipe_resource *base)
{
tex->b.b.target = base->target;
tex->b.b.format = base->format;
tex->b.b.width0 = base->width0;
tex->b.b.height0 = base->height0;
tex->b.b.depth0 = base->depth0;
tex->b.b.array_size = base->array_size;
tex->b.b.last_level = base->last_level;
tex->b.b.nr_samples = base->nr_samples;
tex->tex.width0 = base->width0;
tex->tex.height0 = base->height0;
tex->tex.depth0 = base->depth0;
 
/* There is a CB memory addressing hardware bug that limits the width
* of the MSAA buffer in some cases in R520. In order to get around it,
* the following code lowers the sample count depending on the format and
* the width.
*
* The only catch is that all MSAA colorbuffers and a zbuffer which are
* supposed to be used together should always be bound together. Only
* then the correct minimum sample count of all bound buffers is used
* for rendering. */
if (rscreen->caps.is_r500) {
/* FP16 6x MSAA buffers are limited to a width of 1360 pixels. */
if ((tex->b.b.format == PIPE_FORMAT_R16G16B16A16_FLOAT ||
tex->b.b.format == PIPE_FORMAT_R16G16B16X16_FLOAT) &&
tex->b.b.nr_samples == 6 && tex->b.b.width0 > 1360) {
tex->b.b.nr_samples = 4;
}
 
/* FP16 4x MSAA buffers are limited to a width of 2048 pixels. */
if ((tex->b.b.format == PIPE_FORMAT_R16G16B16A16_FLOAT ||
tex->b.b.format == PIPE_FORMAT_R16G16B16X16_FLOAT) &&
tex->b.b.nr_samples == 4 && tex->b.b.width0 > 2048) {
tex->b.b.nr_samples = 2;
}
}
 
/* 32-bit 6x MSAA buffers are limited to a width of 2720 pixels.
* This applies to all R300-R500 cards. */
if (util_format_get_blocksizebits(tex->b.b.format) == 32 &&
!util_format_is_depth_or_stencil(tex->b.b.format) &&
tex->b.b.nr_samples == 6 && tex->b.b.width0 > 2720) {
tex->b.b.nr_samples = 4;
}
 
r300_setup_flags(tex);
 
/* Align a 3D NPOT texture to POT. */
if (base->target == PIPE_TEXTURE_3D && tex->tex.is_npot) {
tex->tex.width0 = util_next_power_of_two(tex->tex.width0);
tex->tex.height0 = util_next_power_of_two(tex->tex.height0);
tex->tex.depth0 = util_next_power_of_two(tex->tex.depth0);
}
 
/* Setup tiling. */
if (tex->tex.microtile == RADEON_LAYOUT_UNKNOWN) {
r300_setup_tiling(rscreen, tex);
}
 
r300_setup_cbzb_flags(rscreen, tex);
 
/* Setup the miptree description. */
r300_setup_miptree(rscreen, tex, TRUE);
/* If the required buffer size is larger than the given max size,
* try again without the alignment for the CBZB clear. */
if (tex->buf && tex->tex.size_in_bytes > tex->buf->size) {
r300_setup_miptree(rscreen, tex, FALSE);
 
/* Make sure the buffer we got is large enough. */
if (tex->tex.size_in_bytes > tex->buf->size) {
fprintf(stderr,
"r300: I got a pre-allocated buffer to use it as a texture "
"storage, but the buffer is too small. I'll use the buffer "
"anyway, because I can't crash here, but it's dangerous. "
"This can be a DDX bug. Got: %iB, Need: %iB, Info:\n",
tex->buf->size, tex->tex.size_in_bytes);
r300_tex_print_info(tex, "texture_desc_init");
/* Ooops, what now. Apps will break if we fail this,
* so just pretend everything's okay. */
}
}
 
r300_setup_hyperz_properties(rscreen, tex);
r300_setup_cmask_properties(rscreen, tex);
 
if (SCREEN_DBG_ON(rscreen, DBG_TEX))
r300_tex_print_info(tex, "texture_desc_init");
}
 
unsigned r300_texture_get_offset(struct r300_resource *tex,
unsigned level, unsigned layer)
{
unsigned offset = tex->tex.offset_in_bytes[level];
 
switch (tex->b.b.target) {
case PIPE_TEXTURE_3D:
case PIPE_TEXTURE_CUBE:
return offset + layer * tex->tex.layer_size_in_bytes[level];
 
default:
assert(layer == 0);
return offset;
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_texture_desc.h
0,0 → 1,56
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Copyright 2010 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#ifndef R300_TEXTURE_DESC_H
#define R300_TEXTURE_DESC_H
 
#include "pipe/p_format.h"
#include "r300_context.h"
 
struct pipe_resource;
struct r300_screen;
struct r300_texture_desc;
struct r300_resource;
 
enum r300_dim {
DIM_WIDTH = 0,
DIM_HEIGHT = 1
};
 
unsigned r300_get_pixel_alignment(enum pipe_format format,
unsigned num_samples,
enum radeon_bo_layout microtile,
enum radeon_bo_layout macrotile,
enum r300_dim dim, boolean is_rs690);
 
void r300_texture_desc_init(struct r300_screen *rscreen,
struct r300_resource *tex,
const struct pipe_resource *base);
 
unsigned r300_texture_get_offset(struct r300_resource *tex,
unsigned level, unsigned layer);
 
unsigned r300_stride_to_width(enum pipe_format format,
unsigned stride_in_bytes);
 
#endif
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_tgsi_to_rc.c
0,0 → 1,385
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "r300_tgsi_to_rc.h"
 
#include "compiler/radeon_compiler.h"
 
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_scan.h"
#include "tgsi/tgsi_util.h"
 
static unsigned translate_opcode(unsigned opcode)
{
switch(opcode) {
case TGSI_OPCODE_ARL: return RC_OPCODE_ARL;
case TGSI_OPCODE_MOV: return RC_OPCODE_MOV;
case TGSI_OPCODE_LIT: return RC_OPCODE_LIT;
case TGSI_OPCODE_RCP: return RC_OPCODE_RCP;
case TGSI_OPCODE_RSQ: return RC_OPCODE_RSQ;
case TGSI_OPCODE_EXP: return RC_OPCODE_EXP;
case TGSI_OPCODE_LOG: return RC_OPCODE_LOG;
case TGSI_OPCODE_MUL: return RC_OPCODE_MUL;
case TGSI_OPCODE_ADD: return RC_OPCODE_ADD;
case TGSI_OPCODE_DP3: return RC_OPCODE_DP3;
case TGSI_OPCODE_DP4: return RC_OPCODE_DP4;
case TGSI_OPCODE_DST: return RC_OPCODE_DST;
case TGSI_OPCODE_MIN: return RC_OPCODE_MIN;
case TGSI_OPCODE_MAX: return RC_OPCODE_MAX;
case TGSI_OPCODE_SLT: return RC_OPCODE_SLT;
case TGSI_OPCODE_SGE: return RC_OPCODE_SGE;
case TGSI_OPCODE_MAD: return RC_OPCODE_MAD;
case TGSI_OPCODE_SUB: return RC_OPCODE_SUB;
case TGSI_OPCODE_LRP: return RC_OPCODE_LRP;
case TGSI_OPCODE_CND: return RC_OPCODE_CND;
/* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */
/* gap */
case TGSI_OPCODE_FRC: return RC_OPCODE_FRC;
case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP;
case TGSI_OPCODE_FLR: return RC_OPCODE_FLR;
case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND;
case TGSI_OPCODE_EX2: return RC_OPCODE_EX2;
case TGSI_OPCODE_LG2: return RC_OPCODE_LG2;
case TGSI_OPCODE_POW: return RC_OPCODE_POW;
case TGSI_OPCODE_XPD: return RC_OPCODE_XPD;
/* gap */
case TGSI_OPCODE_ABS: return RC_OPCODE_ABS;
/* case TGSI_OPCODE_RCC: return RC_OPCODE_RCC; */
case TGSI_OPCODE_DPH: return RC_OPCODE_DPH;
case TGSI_OPCODE_COS: return RC_OPCODE_COS;
case TGSI_OPCODE_DDX: return RC_OPCODE_DDX;
case TGSI_OPCODE_DDY: return RC_OPCODE_DDY;
case TGSI_OPCODE_KILL: return RC_OPCODE_KILP;
/* case TGSI_OPCODE_PK2H: return RC_OPCODE_PK2H; */
/* case TGSI_OPCODE_PK2US: return RC_OPCODE_PK2US; */
/* case TGSI_OPCODE_PK4B: return RC_OPCODE_PK4B; */
/* case TGSI_OPCODE_PK4UB: return RC_OPCODE_PK4UB; */
/* case TGSI_OPCODE_RFL: return RC_OPCODE_RFL; */
case TGSI_OPCODE_SEQ: return RC_OPCODE_SEQ;
case TGSI_OPCODE_SFL: return RC_OPCODE_SFL;
case TGSI_OPCODE_SGT: return RC_OPCODE_SGT;
case TGSI_OPCODE_SIN: return RC_OPCODE_SIN;
case TGSI_OPCODE_SLE: return RC_OPCODE_SLE;
case TGSI_OPCODE_SNE: return RC_OPCODE_SNE;
/* case TGSI_OPCODE_STR: return RC_OPCODE_STR; */
case TGSI_OPCODE_TEX: return RC_OPCODE_TEX;
case TGSI_OPCODE_TXD: return RC_OPCODE_TXD;
case TGSI_OPCODE_TXP: return RC_OPCODE_TXP;
/* case TGSI_OPCODE_UP2H: return RC_OPCODE_UP2H; */
/* case TGSI_OPCODE_UP2US: return RC_OPCODE_UP2US; */
/* case TGSI_OPCODE_UP4B: return RC_OPCODE_UP4B; */
/* case TGSI_OPCODE_UP4UB: return RC_OPCODE_UP4UB; */
/* case TGSI_OPCODE_X2D: return RC_OPCODE_X2D; */
/* case TGSI_OPCODE_ARA: return RC_OPCODE_ARA; */
/* case TGSI_OPCODE_ARR: return RC_OPCODE_ARR; */
/* case TGSI_OPCODE_BRA: return RC_OPCODE_BRA; */
/* case TGSI_OPCODE_CAL: return RC_OPCODE_CAL; */
/* case TGSI_OPCODE_RET: return RC_OPCODE_RET; */
case TGSI_OPCODE_SSG: return RC_OPCODE_SSG;
case TGSI_OPCODE_CMP: return RC_OPCODE_CMP;
case TGSI_OPCODE_SCS: return RC_OPCODE_SCS;
case TGSI_OPCODE_TXB: return RC_OPCODE_TXB;
/* case TGSI_OPCODE_NRM: return RC_OPCODE_NRM; */
/* case TGSI_OPCODE_DIV: return RC_OPCODE_DIV; */
case TGSI_OPCODE_DP2: return RC_OPCODE_DP2;
case TGSI_OPCODE_TXL: return RC_OPCODE_TXL;
case TGSI_OPCODE_BRK: return RC_OPCODE_BRK;
case TGSI_OPCODE_IF: return RC_OPCODE_IF;
case TGSI_OPCODE_BGNLOOP: return RC_OPCODE_BGNLOOP;
case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE;
case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF;
case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP;
/* case TGSI_OPCODE_PUSHA: return RC_OPCODE_PUSHA; */
/* case TGSI_OPCODE_POPA: return RC_OPCODE_POPA; */
case TGSI_OPCODE_CEIL: return RC_OPCODE_CEIL;
/* case TGSI_OPCODE_I2F: return RC_OPCODE_I2F; */
/* case TGSI_OPCODE_NOT: return RC_OPCODE_NOT; */
case TGSI_OPCODE_TRUNC: return RC_OPCODE_TRUNC;
/* case TGSI_OPCODE_SHL: return RC_OPCODE_SHL; */
/* case TGSI_OPCODE_ISHR: return RC_OPCODE_SHR; */
/* case TGSI_OPCODE_AND: return RC_OPCODE_AND; */
/* case TGSI_OPCODE_OR: return RC_OPCODE_OR; */
/* case TGSI_OPCODE_MOD: return RC_OPCODE_MOD; */
/* case TGSI_OPCODE_XOR: return RC_OPCODE_XOR; */
/* case TGSI_OPCODE_SAD: return RC_OPCODE_SAD; */
/* case TGSI_OPCODE_TXF: return RC_OPCODE_TXF; */
/* case TGSI_OPCODE_TXQ: return RC_OPCODE_TXQ; */
case TGSI_OPCODE_CONT: return RC_OPCODE_CONT;
/* case TGSI_OPCODE_EMIT: return RC_OPCODE_EMIT; */
/* case TGSI_OPCODE_ENDPRIM: return RC_OPCODE_ENDPRIM; */
/* case TGSI_OPCODE_BGNLOOP2: return RC_OPCODE_BGNLOOP2; */
/* case TGSI_OPCODE_BGNSUB: return RC_OPCODE_BGNSUB; */
/* case TGSI_OPCODE_ENDLOOP2: return RC_OPCODE_ENDLOOP2; */
/* case TGSI_OPCODE_ENDSUB: return RC_OPCODE_ENDSUB; */
case TGSI_OPCODE_NOP: return RC_OPCODE_NOP;
/* gap */
/* case TGSI_OPCODE_NRM4: return RC_OPCODE_NRM4; */
/* case TGSI_OPCODE_CALLNZ: return RC_OPCODE_CALLNZ; */
/* case TGSI_OPCODE_BREAKC: return RC_OPCODE_BREAKC; */
case TGSI_OPCODE_KILL_IF: return RC_OPCODE_KIL;
}
 
fprintf(stderr, "r300: Unknown TGSI/RC opcode: %s\n", tgsi_get_opcode_name(opcode));
return RC_OPCODE_ILLEGAL_OPCODE;
}
 
static unsigned translate_saturate(unsigned saturate)
{
switch(saturate) {
default:
fprintf(stderr, "Unknown saturate mode: %i\n", saturate);
/* fall-through */
case TGSI_SAT_NONE: return RC_SATURATE_NONE;
case TGSI_SAT_ZERO_ONE: return RC_SATURATE_ZERO_ONE;
}
}
 
static unsigned translate_register_file(unsigned file)
{
switch(file) {
case TGSI_FILE_CONSTANT: return RC_FILE_CONSTANT;
case TGSI_FILE_IMMEDIATE: return RC_FILE_CONSTANT;
case TGSI_FILE_INPUT: return RC_FILE_INPUT;
case TGSI_FILE_OUTPUT: return RC_FILE_OUTPUT;
default:
fprintf(stderr, "Unhandled register file: %i\n", file);
/* fall-through */
case TGSI_FILE_TEMPORARY: return RC_FILE_TEMPORARY;
case TGSI_FILE_ADDRESS: return RC_FILE_ADDRESS;
}
}
 
static int translate_register_index(
struct tgsi_to_rc * ttr,
unsigned file,
int index)
{
if (file == TGSI_FILE_IMMEDIATE)
return ttr->immediate_offset + index;
 
return index;
}
 
static void transform_dstreg(
struct tgsi_to_rc * ttr,
struct rc_dst_register * dst,
struct tgsi_full_dst_register * src)
{
dst->File = translate_register_file(src->Register.File);
dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index);
dst->WriteMask = src->Register.WriteMask;
 
if (src->Register.Indirect) {
ttr->error = TRUE;
fprintf(stderr, "r300: Relative addressing of destination operands "
"is unsupported.\n");
}
}
 
static void transform_srcreg(
struct tgsi_to_rc * ttr,
struct rc_src_register * dst,
struct tgsi_full_src_register * src)
{
unsigned i, j;
 
dst->File = translate_register_file(src->Register.File);
dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index);
dst->RelAddr = src->Register.Indirect;
dst->Swizzle = tgsi_util_get_full_src_register_swizzle(src, 0);
dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 1) << 3;
dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 2) << 6;
dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9;
dst->Abs = src->Register.Absolute;
dst->Negate = src->Register.Negate ? RC_MASK_XYZW : 0;
 
if (src->Register.File == TGSI_FILE_IMMEDIATE) {
for (i = 0; i < ttr->imms_to_swizzle_count; i++) {
if (ttr->imms_to_swizzle[i].index == src->Register.Index) {
dst->File = RC_FILE_TEMPORARY;
dst->Index = 0;
dst->Swizzle = 0;
for (j = 0; j < 4; j++) {
dst->Swizzle |= GET_SWZ(ttr->imms_to_swizzle[i].swizzle,
tgsi_util_get_full_src_register_swizzle(src, j)) << (j * 3);
}
break;
}
}
}
}
 
static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_texture src,
uint32_t *shadowSamplers)
{
switch(src.Texture) {
case TGSI_TEXTURE_1D:
dst->U.I.TexSrcTarget = RC_TEXTURE_1D;
break;
case TGSI_TEXTURE_2D:
dst->U.I.TexSrcTarget = RC_TEXTURE_2D;
break;
case TGSI_TEXTURE_3D:
dst->U.I.TexSrcTarget = RC_TEXTURE_3D;
break;
case TGSI_TEXTURE_CUBE:
dst->U.I.TexSrcTarget = RC_TEXTURE_CUBE;
break;
case TGSI_TEXTURE_RECT:
dst->U.I.TexSrcTarget = RC_TEXTURE_RECT;
break;
case TGSI_TEXTURE_SHADOW1D:
dst->U.I.TexSrcTarget = RC_TEXTURE_1D;
dst->U.I.TexShadow = 1;
*shadowSamplers |= 1 << dst->U.I.TexSrcUnit;
break;
case TGSI_TEXTURE_SHADOW2D:
dst->U.I.TexSrcTarget = RC_TEXTURE_2D;
dst->U.I.TexShadow = 1;
*shadowSamplers |= 1 << dst->U.I.TexSrcUnit;
break;
case TGSI_TEXTURE_SHADOWRECT:
dst->U.I.TexSrcTarget = RC_TEXTURE_RECT;
dst->U.I.TexShadow = 1;
*shadowSamplers |= 1 << dst->U.I.TexSrcUnit;
break;
}
dst->U.I.TexSwizzle = RC_SWIZZLE_XYZW;
}
 
static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_instruction * src)
{
struct rc_instruction * dst;
int i;
 
dst = rc_insert_new_instruction(ttr->compiler, ttr->compiler->Program.Instructions.Prev);
dst->U.I.Opcode = translate_opcode(src->Instruction.Opcode);
dst->U.I.SaturateMode = translate_saturate(src->Instruction.Saturate);
 
if (src->Instruction.NumDstRegs)
transform_dstreg(ttr, &dst->U.I.DstReg, &src->Dst[0]);
 
for(i = 0; i < src->Instruction.NumSrcRegs; ++i) {
if (src->Src[i].Register.File == TGSI_FILE_SAMPLER)
dst->U.I.TexSrcUnit = src->Src[i].Register.Index;
else
transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->Src[i]);
}
 
/* Texturing. */
if (src->Instruction.Texture)
transform_texture(dst, src->Texture,
&ttr->compiler->Program.ShadowSamplers);
}
 
static void handle_immediate(struct tgsi_to_rc * ttr,
struct tgsi_full_immediate * imm,
unsigned index)
{
struct rc_constant constant;
unsigned swizzle = 0;
boolean can_swizzle = TRUE;
unsigned i;
 
for (i = 0; i < 4; i++) {
if (imm->u[i].Float == 0.0f) {
swizzle |= RC_SWIZZLE_ZERO << (i * 3);
} else if (imm->u[i].Float == 0.5f && ttr->use_half_swizzles) {
swizzle |= RC_SWIZZLE_HALF << (i * 3);
} else if (imm->u[i].Float == 1.0f) {
swizzle |= RC_SWIZZLE_ONE << (i * 3);
} else {
can_swizzle = FALSE;
break;
}
}
 
if (can_swizzle) {
ttr->imms_to_swizzle[ttr->imms_to_swizzle_count].index = index;
ttr->imms_to_swizzle[ttr->imms_to_swizzle_count].swizzle = swizzle;
ttr->imms_to_swizzle_count++;
} else {
constant.Type = RC_CONSTANT_IMMEDIATE;
constant.Size = 4;
for(i = 0; i < 4; ++i)
constant.u.Immediate[i] = imm->u[i].Float;
rc_constants_add(&ttr->compiler->Program.Constants, &constant);
}
}
 
void r300_tgsi_to_rc(struct tgsi_to_rc * ttr,
const struct tgsi_token * tokens)
{
struct tgsi_full_instruction *inst;
struct tgsi_parse_context parser;
unsigned imm_index = 0;
int i;
 
ttr->error = FALSE;
 
/* Allocate constants placeholders.
*
* Note: What if declared constants are not contiguous? */
for(i = 0; i <= ttr->info->file_max[TGSI_FILE_CONSTANT]; ++i) {
struct rc_constant constant;
memset(&constant, 0, sizeof(constant));
constant.Type = RC_CONSTANT_EXTERNAL;
constant.Size = 4;
constant.u.External = i;
rc_constants_add(&ttr->compiler->Program.Constants, &constant);
}
 
ttr->immediate_offset = ttr->compiler->Program.Constants.Count;
 
ttr->imms_to_swizzle = malloc(ttr->info->immediate_count * sizeof(struct swizzled_imms));
ttr->imms_to_swizzle_count = 0;
 
tgsi_parse_init(&parser, tokens);
 
while (!tgsi_parse_end_of_tokens(&parser)) {
tgsi_parse_token(&parser);
 
switch (parser.FullToken.Token.Type) {
case TGSI_TOKEN_TYPE_DECLARATION:
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
handle_immediate(ttr, &parser.FullToken.FullImmediate, imm_index);
imm_index++;
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
inst = &parser.FullToken.FullInstruction;
if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
break;
}
 
transform_instruction(ttr, inst);
break;
}
}
 
tgsi_parse_free(&parser);
 
free(ttr->imms_to_swizzle);
 
rc_calculate_inputs_outputs(ttr->compiler);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_tgsi_to_rc.h
0,0 → 1,57
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#ifndef R300_TGSI_TO_RC_H
#define R300_TGSI_TO_RC_H
 
#include "pipe/p_compiler.h"
 
struct radeon_compiler;
 
struct tgsi_full_declaration;
struct tgsi_shader_info;
struct tgsi_token;
 
struct swizzled_imms {
unsigned index;
unsigned swizzle;
};
 
struct tgsi_to_rc {
struct radeon_compiler * compiler;
const struct tgsi_shader_info * info;
 
int immediate_offset;
struct swizzled_imms * imms_to_swizzle;
unsigned imms_to_swizzle_count;
 
/* Vertex shaders have no half swizzles, and no way to handle them, so
* until rc grows proper support, indicate if they're safe to use. */
boolean use_half_swizzles;
 
/* If an error occured. */
boolean error;
};
 
void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens);
 
#endif /* R300_TGSI_TO_RC_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_transfer.c
0,0 → 1,266
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Copyright 2010 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "r300_transfer.h"
#include "r300_texture_desc.h"
#include "r300_screen_buffer.h"
 
#include "util/u_memory.h"
#include "util/u_format.h"
#include "util/u_box.h"
 
struct r300_transfer {
/* Parent class */
struct pipe_transfer transfer;
 
/* Offset from start of buffer. */
unsigned offset;
 
/* Linear texture. */
struct r300_resource *linear_texture;
};
 
/* Convenience cast wrapper. */
static INLINE struct r300_transfer*
r300_transfer(struct pipe_transfer* transfer)
{
return (struct r300_transfer*)transfer;
}
 
/* Copy from a tiled texture to a detiled one. */
static void r300_copy_from_tiled_texture(struct pipe_context *ctx,
struct r300_transfer *r300transfer)
{
struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer;
struct pipe_resource *src = transfer->resource;
struct pipe_resource *dst = &r300transfer->linear_texture->b.b;
 
if (src->nr_samples <= 1) {
ctx->resource_copy_region(ctx, dst, 0, 0, 0, 0,
src, transfer->level, &transfer->box);
} else {
/* Resolve the resource. */
struct pipe_blit_info blit;
 
memset(&blit, 0, sizeof(blit));
blit.src.resource = src;
blit.src.format = src->format;
blit.src.level = transfer->level;
blit.src.box = transfer->box;
blit.dst.resource = dst;
blit.dst.format = dst->format;
blit.dst.box.width = transfer->box.width;
blit.dst.box.height = transfer->box.height;
blit.dst.box.depth = transfer->box.depth;
blit.mask = PIPE_MASK_RGBA;
blit.filter = PIPE_TEX_FILTER_NEAREST;
 
ctx->blit(ctx, &blit);
}
}
 
/* Copy a detiled texture to a tiled one. */
static void r300_copy_into_tiled_texture(struct pipe_context *ctx,
struct r300_transfer *r300transfer)
{
struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer;
struct pipe_resource *tex = transfer->resource;
struct pipe_box src_box;
 
u_box_3d(0, 0, 0,
transfer->box.width, transfer->box.height, transfer->box.depth,
&src_box);
 
ctx->resource_copy_region(ctx, tex, transfer->level,
transfer->box.x, transfer->box.y, transfer->box.z,
&r300transfer->linear_texture->b.b, 0, &src_box);
 
/* XXX remove this. */
r300_flush(ctx, 0, NULL);
}
 
void *
r300_texture_transfer_map(struct pipe_context *ctx,
struct pipe_resource *texture,
unsigned level,
unsigned usage,
const struct pipe_box *box,
struct pipe_transfer **transfer)
{
struct r300_context *r300 = r300_context(ctx);
struct r300_resource *tex = r300_resource(texture);
struct r300_transfer *trans;
boolean referenced_cs, referenced_hw;
enum pipe_format format = tex->b.b.format;
char *map;
 
referenced_cs =
r300->rws->cs_is_buffer_referenced(r300->cs, tex->cs_buf, RADEON_USAGE_READWRITE);
if (referenced_cs) {
referenced_hw = TRUE;
} else {
referenced_hw =
r300->rws->buffer_is_busy(tex->buf, RADEON_USAGE_READWRITE);
}
 
trans = CALLOC_STRUCT(r300_transfer);
if (trans) {
/* Initialize the transfer object. */
trans->transfer.resource = texture;
trans->transfer.level = level;
trans->transfer.usage = usage;
trans->transfer.box = *box;
 
/* If the texture is tiled, we must create a temporary detiled texture
* for this transfer.
* Also make write transfers pipelined. */
if (tex->tex.microtile || tex->tex.macrotile[level] ||
(referenced_hw && !(usage & PIPE_TRANSFER_READ) &&
r300_is_blit_supported(texture->format))) {
struct pipe_resource base;
 
if (r300->blitter->running) {
fprintf(stderr, "r300: ERROR: Blitter recursion in texture_get_transfer.\n");
os_break();
}
 
memset(&base, 0, sizeof(base));
base.target = PIPE_TEXTURE_2D;
base.format = texture->format;
base.width0 = box->width;
base.height0 = box->height;
base.depth0 = 1;
base.array_size = 1;
base.usage = PIPE_USAGE_STAGING;
base.flags = R300_RESOURCE_FLAG_TRANSFER;
 
/* We must set the correct texture target and dimensions if needed for a 3D transfer. */
if (box->depth > 1 && util_max_layer(texture, level) > 0) {
base.target = texture->target;
 
if (base.target == PIPE_TEXTURE_3D) {
base.depth0 = util_next_power_of_two(box->depth);
}
}
 
/* Create the temporary texture. */
trans->linear_texture = r300_resource(
ctx->screen->resource_create(ctx->screen,
&base));
 
if (!trans->linear_texture) {
/* Oh crap, the thing can't create the texture.
* Let's flush and try again. */
r300_flush(ctx, 0, NULL);
 
trans->linear_texture = r300_resource(
ctx->screen->resource_create(ctx->screen,
&base));
 
if (!trans->linear_texture) {
fprintf(stderr,
"r300: Failed to create a transfer object.\n");
FREE(trans);
return NULL;
}
}
 
assert(!trans->linear_texture->tex.microtile &&
!trans->linear_texture->tex.macrotile[0]);
 
/* Set the stride. */
trans->transfer.stride =
trans->linear_texture->tex.stride_in_bytes[0];
trans->transfer.layer_stride =
trans->linear_texture->tex.layer_size_in_bytes[0];
 
if (usage & PIPE_TRANSFER_READ) {
/* We cannot map a tiled texture directly because the data is
* in a different order, therefore we do detiling using a blit. */
r300_copy_from_tiled_texture(ctx, trans);
 
/* Always referenced in the blit. */
r300_flush(ctx, 0, NULL);
}
} else {
/* Unpipelined transfer. */
trans->transfer.stride = tex->tex.stride_in_bytes[level];
trans->transfer.layer_stride = tex->tex.layer_size_in_bytes[level];
trans->offset = r300_texture_get_offset(tex, level, box->z);
 
if (referenced_cs &&
!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
r300_flush(ctx, 0, NULL);
}
}
}
 
if (trans->linear_texture) {
/* The detiled texture is of the same size as the region being mapped
* (no offset needed). */
map = r300->rws->buffer_map(trans->linear_texture->cs_buf,
r300->cs, usage);
if (!map) {
pipe_resource_reference(
(struct pipe_resource**)&trans->linear_texture, NULL);
FREE(trans);
return NULL;
}
*transfer = &trans->transfer;
return map;
} else {
/* Tiling is disabled. */
map = r300->rws->buffer_map(tex->cs_buf, r300->cs, usage);
if (!map) {
FREE(trans);
return NULL;
}
 
*transfer = &trans->transfer;
return map + trans->offset +
box->y / util_format_get_blockheight(format) * trans->transfer.stride +
box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
}
}
 
void r300_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer *transfer)
{
struct radeon_winsys *rws = r300_context(ctx)->rws;
struct r300_transfer *trans = r300_transfer(transfer);
struct r300_resource *tex = r300_resource(transfer->resource);
 
if (trans->linear_texture) {
rws->buffer_unmap(trans->linear_texture->cs_buf);
 
if (transfer->usage & PIPE_TRANSFER_WRITE) {
r300_copy_into_tiled_texture(ctx, trans);
}
 
pipe_resource_reference(
(struct pipe_resource**)&trans->linear_texture, NULL);
} else {
rws->buffer_unmap(tex->cs_buf);
}
FREE(transfer);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_transfer.h
0,0 → 1,44
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Copyright 2010 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#ifndef R300_TRANSFER
#define R300_TRANSFER
 
#include "pipe/p_context.h"
 
struct r300_context;
 
void *
r300_texture_transfer_map(struct pipe_context *ctx,
struct pipe_resource *texture,
unsigned level,
unsigned usage,
const struct pipe_box *box,
struct pipe_transfer **transfer);
 
void
r300_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer *transfer);
 
 
#endif
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_vs.c
0,0 → 1,289
/*
* Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
* Copyright 2009 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#include "r300_vs.h"
 
#include "r300_context.h"
#include "r300_screen.h"
#include "r300_tgsi_to_rc.h"
#include "r300_reg.h"
 
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_ureg.h"
 
#include "compiler/radeon_compiler.h"
 
/* Convert info about VS output semantics into r300_shader_semantics. */
static void r300_shader_read_vs_outputs(
struct r300_context *r300,
struct tgsi_shader_info* info,
struct r300_shader_semantics* vs_outputs)
{
int i;
unsigned index;
 
r300_shader_semantics_reset(vs_outputs);
 
for (i = 0; i < info->num_outputs; i++) {
index = info->output_semantic_index[i];
 
switch (info->output_semantic_name[i]) {
case TGSI_SEMANTIC_POSITION:
assert(index == 0);
vs_outputs->pos = i;
break;
 
case TGSI_SEMANTIC_PSIZE:
assert(index == 0);
vs_outputs->psize = i;
break;
 
case TGSI_SEMANTIC_COLOR:
assert(index < ATTR_COLOR_COUNT);
vs_outputs->color[index] = i;
break;
 
case TGSI_SEMANTIC_BCOLOR:
assert(index < ATTR_COLOR_COUNT);
vs_outputs->bcolor[index] = i;
break;
 
case TGSI_SEMANTIC_GENERIC:
assert(index < ATTR_GENERIC_COUNT);
vs_outputs->generic[index] = i;
vs_outputs->num_generic++;
break;
 
case TGSI_SEMANTIC_FOG:
assert(index == 0);
vs_outputs->fog = i;
break;
 
case TGSI_SEMANTIC_EDGEFLAG:
assert(index == 0);
fprintf(stderr, "r300 VP: cannot handle edgeflag output.\n");
break;
 
case TGSI_SEMANTIC_CLIPVERTEX:
assert(index == 0);
/* Draw does clip vertex for us. */
if (r300->screen->caps.has_tcl) {
fprintf(stderr, "r300 VP: cannot handle clip vertex output.\n");
}
break;
 
default:
fprintf(stderr, "r300 VP: unknown vertex output semantic: %i.\n",
info->output_semantic_name[i]);
}
}
 
/* WPOS is a straight copy of POSITION and it's always emitted. */
vs_outputs->wpos = i;
}
 
static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
{
struct r300_vertex_shader * vs = c->UserData;
struct r300_shader_semantics* outputs = &vs->outputs;
struct tgsi_shader_info* info = &vs->info;
int i, reg = 0;
boolean any_bcolor_used = outputs->bcolor[0] != ATTR_UNUSED ||
outputs->bcolor[1] != ATTR_UNUSED;
 
/* Fill in the input mapping */
for (i = 0; i < info->num_inputs; i++)
c->code->inputs[i] = i;
 
/* Position. */
if (outputs->pos != ATTR_UNUSED) {
c->code->outputs[outputs->pos] = reg++;
} else {
assert(0);
}
 
/* Point size. */
if (outputs->psize != ATTR_UNUSED) {
c->code->outputs[outputs->psize] = reg++;
}
 
/* If we're writing back facing colors we need to send
* four colors to make front/back face colors selection work.
* If the vertex program doesn't write all 4 colors, lets
* pretend it does by skipping output index reg so the colors
* get written into appropriate output vectors.
*/
 
/* Colors. */
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
if (outputs->color[i] != ATTR_UNUSED) {
c->code->outputs[outputs->color[i]] = reg++;
} else if (any_bcolor_used ||
outputs->color[1] != ATTR_UNUSED) {
reg++;
}
}
 
/* Back-face colors. */
for (i = 0; i < ATTR_COLOR_COUNT; i++) {
if (outputs->bcolor[i] != ATTR_UNUSED) {
c->code->outputs[outputs->bcolor[i]] = reg++;
} else if (any_bcolor_used) {
reg++;
}
}
 
/* Texture coordinates. */
for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
if (outputs->generic[i] != ATTR_UNUSED) {
c->code->outputs[outputs->generic[i]] = reg++;
}
}
 
/* Fog coordinates. */
if (outputs->fog != ATTR_UNUSED) {
c->code->outputs[outputs->fog] = reg++;
}
 
/* WPOS. */
c->code->outputs[outputs->wpos] = reg++;
}
 
void r300_init_vs_outputs(struct r300_context *r300,
struct r300_vertex_shader *vs)
{
tgsi_scan_shader(vs->state.tokens, &vs->info);
r300_shader_read_vs_outputs(r300, &vs->info, &vs->outputs);
}
 
static void r300_dummy_vertex_shader(
struct r300_context* r300,
struct r300_vertex_shader* shader)
{
struct ureg_program *ureg;
struct ureg_dst dst;
struct ureg_src imm;
 
/* Make a simple vertex shader which outputs (0, 0, 0, 1),
* effectively rendering nothing. */
ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
imm = ureg_imm4f(ureg, 0, 0, 0, 1);
 
ureg_MOV(ureg, dst, imm);
ureg_END(ureg);
 
shader->state.tokens = tgsi_dup_tokens(ureg_finalize(ureg));
ureg_destroy(ureg);
 
shader->dummy = TRUE;
r300_init_vs_outputs(r300, shader);
r300_translate_vertex_shader(r300, shader);
}
 
void r300_translate_vertex_shader(struct r300_context *r300,
struct r300_vertex_shader *vs)
{
struct r300_vertex_program_compiler compiler;
struct tgsi_to_rc ttr;
unsigned i;
 
/* Setup the compiler */
memset(&compiler, 0, sizeof(compiler));
rc_init(&compiler.Base, NULL);
 
DBG_ON(r300, DBG_VP) ? compiler.Base.Debug |= RC_DBG_LOG : 0;
DBG_ON(r300, DBG_P_STAT) ? compiler.Base.Debug |= RC_DBG_STATS : 0;
compiler.code = &vs->code;
compiler.UserData = vs;
compiler.Base.is_r500 = r300->screen->caps.is_r500;
compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
compiler.Base.has_half_swizzles = FALSE;
compiler.Base.has_presub = FALSE;
compiler.Base.has_omod = FALSE;
compiler.Base.max_temp_regs = 32;
compiler.Base.max_constants = 256;
compiler.Base.max_alu_insts = r300->screen->caps.is_r500 ? 1024 : 256;
 
if (compiler.Base.Debug & RC_DBG_LOG) {
DBG(r300, DBG_VP, "r300: Initial vertex program\n");
tgsi_dump(vs->state.tokens, 0);
}
 
/* Translate TGSI to our internal representation */
ttr.compiler = &compiler.Base;
ttr.info = &vs->info;
ttr.use_half_swizzles = FALSE;
 
r300_tgsi_to_rc(&ttr, vs->state.tokens);
 
if (ttr.error) {
fprintf(stderr, "r300 VP: Cannot translate a shader. "
"Using a dummy shader instead.\n");
r300_dummy_vertex_shader(r300, vs);
return;
}
 
if (compiler.Base.Program.Constants.Count > 200) {
compiler.Base.remove_unused_constants = TRUE;
}
 
compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs + 1));
compiler.SetHwInputOutput = &set_vertex_inputs_outputs;
 
/* Insert the WPOS output. */
rc_copy_output(&compiler.Base, 0, vs->outputs.wpos);
 
/* Invoke the compiler */
r3xx_compile_vertex_program(&compiler);
if (compiler.Base.Error) {
fprintf(stderr, "r300 VP: Compiler error:\n%sUsing a dummy shader"
" instead.\n", compiler.Base.ErrorMsg);
 
if (vs->dummy) {
fprintf(stderr, "r300 VP: Cannot compile the dummy shader! "
"Giving up...\n");
abort();
}
 
rc_destroy(&compiler.Base);
r300_dummy_vertex_shader(r300, vs);
return;
}
 
/* Initialize numbers of constants for each type. */
vs->externals_count = 0;
for (i = 0;
i < vs->code.constants.Count &&
vs->code.constants.Constants[i].Type == RC_CONSTANT_EXTERNAL; i++) {
vs->externals_count = i+1;
}
for (; i < vs->code.constants.Count; i++) {
assert(vs->code.constants.Constants[i].Type == RC_CONSTANT_IMMEDIATE);
}
vs->immediates_count = vs->code.constants.Count - vs->externals_count;
 
/* And, finally... */
rc_destroy(&compiler.Base);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_vs.h
0,0 → 1,68
/*
* Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
* Copyright 2009 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#ifndef R300_VS_H
#define R300_VS_H
 
#include "pipe/p_state.h"
#include "tgsi/tgsi_scan.h"
#include "compiler/radeon_code.h"
 
#include "r300_context.h"
#include "r300_shader_semantics.h"
 
struct r300_context;
 
struct r300_vertex_shader {
/* Parent class */
struct pipe_shader_state state;
 
struct tgsi_shader_info info;
struct r300_shader_semantics outputs;
 
/* Whether the shader was replaced by a dummy one due to a shader
* compilation failure. */
boolean dummy;
 
/* Numbers of constants for each type. */
unsigned externals_count;
unsigned immediates_count;
 
/* HWTCL-specific. */
/* Machine code (if translated) */
struct r300_vertex_program_code code;
 
/* SWTCL-specific. */
void *draw_vs;
};
 
void r300_init_vs_outputs(struct r300_context *r300,
struct r300_vertex_shader *vs);
 
void r300_translate_vertex_shader(struct r300_context *r300,
struct r300_vertex_shader *vs);
 
void r300_draw_init_vertex_shader(struct r300_context *r300,
struct r300_vertex_shader *vs);
 
#endif /* R300_VS_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/r300/r300_vs_draw.c
0,0 → 1,377
/**************************************************************************
*
* Copyright 2009 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/* This file contains the vertex shader tranformations for SW TCL needed
* to overcome the limitations of the r300 rasterizer.
*
* Transformations:
* 1) If the secondary color output is present, the primary color must be
* present too.
* 2) If any back-face color output is present, there must be all 4 color
* outputs and missing ones must be inserted.
* 3) Insert a trailing texcoord output containing a copy of POS, for WPOS.
*
* I know this code is cumbersome, but I don't know of any nicer way
* of transforming TGSI shaders. ~ M.
*/
 
#include "r300_vs.h"
 
#include <stdio.h>
 
#include "tgsi/tgsi_transform.h"
#include "tgsi/tgsi_dump.h"
 
#include "draw/draw_context.h"
 
struct vs_transform_context {
struct tgsi_transform_context base;
 
boolean color_used[2];
boolean bcolor_used[2];
 
/* Index of the pos output, typically 0. */
unsigned pos_output;
/* Index of the pos temp where all writes of pos are redirected to. */
unsigned pos_temp;
/* The index of the last generic output, after which we insert a new
* output for WPOS. */
int last_generic;
 
unsigned num_outputs;
/* Used to shift output decl. indices when inserting new ones. */
unsigned decl_shift;
/* Used to remap writes to output decls if their indices changed. */
unsigned out_remap[32];
 
/* First instruction processed? */
boolean first_instruction;
/* End instruction processed? */
boolean end_instruction;
 
boolean temp_used[1024];
};
 
static void emit_temp(struct tgsi_transform_context *ctx, unsigned reg)
{
struct tgsi_full_declaration decl;
 
decl = tgsi_default_full_declaration();
decl.Declaration.File = TGSI_FILE_TEMPORARY;
decl.Range.First = decl.Range.Last = reg;
ctx->emit_declaration(ctx, &decl);
}
 
static void emit_output(struct tgsi_transform_context *ctx,
unsigned name, unsigned index, unsigned interp,
unsigned reg)
{
struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
struct tgsi_full_declaration decl;
 
decl = tgsi_default_full_declaration();
decl.Declaration.File = TGSI_FILE_OUTPUT;
decl.Declaration.Interpolate = 1;
decl.Declaration.Semantic = TRUE;
decl.Semantic.Name = name;
decl.Semantic.Index = index;
decl.Range.First = decl.Range.Last = reg;
decl.Interp.Interpolate = interp;
ctx->emit_declaration(ctx, &decl);
++vsctx->num_outputs;
}
 
static void insert_output_before(struct tgsi_transform_context *ctx,
struct tgsi_full_declaration *before,
unsigned name, unsigned index, unsigned interp)
{
struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
unsigned i;
 
/* Make a place for the new output. */
for (i = before->Range.First; i < Elements(vsctx->out_remap); i++) {
++vsctx->out_remap[i];
}
 
/* Insert the new output. */
emit_output(ctx, name, index, interp,
before->Range.First + vsctx->decl_shift);
 
++vsctx->decl_shift;
}
 
static void insert_output_after(struct tgsi_transform_context *ctx,
struct tgsi_full_declaration *after,
unsigned name, unsigned index, unsigned interp)
{
struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
unsigned i;
 
/* Make a place for the new output. */
for (i = after->Range.First+1; i < Elements(vsctx->out_remap); i++) {
++vsctx->out_remap[i];
}
 
/* Insert the new output. */
emit_output(ctx, name, index, interp,
after->Range.First + 1);
 
++vsctx->decl_shift;
}
 
static void transform_decl(struct tgsi_transform_context *ctx,
struct tgsi_full_declaration *decl)
{
struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
unsigned i;
 
if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
switch (decl->Semantic.Name) {
case TGSI_SEMANTIC_POSITION:
vsctx->pos_output = decl->Range.First;
break;
 
case TGSI_SEMANTIC_COLOR:
assert(decl->Semantic.Index < 2);
 
/* We must rasterize the first color if the second one is
* used, otherwise the rasterizer doesn't do the color
* selection correctly. Declare it, but don't write to it. */
if (decl->Semantic.Index == 1 && !vsctx->color_used[0]) {
insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 0,
TGSI_INTERPOLATE_LINEAR);
vsctx->color_used[0] = TRUE;
}
break;
 
case TGSI_SEMANTIC_BCOLOR:
assert(decl->Semantic.Index < 2);
 
/* We must rasterize all 4 colors if back-face colors are
* used, otherwise the rasterizer doesn't do the color
* selection correctly. Declare it, but don't write to it. */
if (!vsctx->color_used[0]) {
insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 0,
TGSI_INTERPOLATE_LINEAR);
vsctx->color_used[0] = TRUE;
}
if (!vsctx->color_used[1]) {
insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 1,
TGSI_INTERPOLATE_LINEAR);
vsctx->color_used[1] = TRUE;
}
if (decl->Semantic.Index == 1 && !vsctx->bcolor_used[0]) {
insert_output_before(ctx, decl, TGSI_SEMANTIC_BCOLOR, 0,
TGSI_INTERPOLATE_LINEAR);
vsctx->bcolor_used[0] = TRUE;
}
break;
 
case TGSI_SEMANTIC_GENERIC:
vsctx->last_generic = MAX2(vsctx->last_generic, decl->Semantic.Index);
break;
}
 
/* Since we're inserting new outputs in between, the following outputs
* should be moved to the right so that they don't overlap with
* the newly added ones. */
decl->Range.First += vsctx->decl_shift;
decl->Range.Last += vsctx->decl_shift;
 
++vsctx->num_outputs;
} else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
for (i = decl->Range.First; i <= decl->Range.Last; i++) {
vsctx->temp_used[i] = TRUE;
}
}
 
ctx->emit_declaration(ctx, decl);
 
/* Insert BCOLOR1 if needed. */
if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
decl->Semantic.Name == TGSI_SEMANTIC_BCOLOR &&
!vsctx->bcolor_used[1]) {
insert_output_after(ctx, decl, TGSI_SEMANTIC_BCOLOR, 1,
TGSI_INTERPOLATE_LINEAR);
}
}
 
static void transform_inst(struct tgsi_transform_context *ctx,
struct tgsi_full_instruction *inst)
{
struct vs_transform_context *vsctx = (struct vs_transform_context *) ctx;
struct tgsi_full_instruction new_inst;
unsigned i;
 
if (!vsctx->first_instruction) {
vsctx->first_instruction = TRUE;
 
/* Insert the generic output for WPOS. */
emit_output(ctx, TGSI_SEMANTIC_GENERIC, vsctx->last_generic + 1,
TGSI_INTERPOLATE_PERSPECTIVE, vsctx->num_outputs);
 
/* Find a free temp for POSITION. */
for (i = 0; i < Elements(vsctx->temp_used); i++) {
if (!vsctx->temp_used[i]) {
emit_temp(ctx, i);
vsctx->pos_temp = i;
break;
}
}
}
 
if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
/* MOV OUT[pos_output], TEMP[pos_temp]; */
new_inst = tgsi_default_full_instruction();
new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
new_inst.Instruction.NumDstRegs = 1;
new_inst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
new_inst.Dst[0].Register.Index = vsctx->pos_output;
new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
new_inst.Instruction.NumSrcRegs = 1;
new_inst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
new_inst.Src[0].Register.Index = vsctx->pos_temp;
ctx->emit_instruction(ctx, &new_inst);
 
/* MOV OUT[n-1], TEMP[pos_temp]; */
new_inst = tgsi_default_full_instruction();
new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
new_inst.Instruction.NumDstRegs = 1;
new_inst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
new_inst.Dst[0].Register.Index = vsctx->num_outputs - 1;
new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
new_inst.Instruction.NumSrcRegs = 1;
new_inst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
new_inst.Src[0].Register.Index = vsctx->pos_temp;
ctx->emit_instruction(ctx, &new_inst);
 
vsctx->end_instruction = TRUE;
} else {
/* Not an END instruction. */
/* Fix writes to outputs. */
for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
struct tgsi_full_dst_register *dst = &inst->Dst[i];
if (dst->Register.File == TGSI_FILE_OUTPUT) {
if (dst->Register.Index == vsctx->pos_output) {
/* Replace writes to OUT[pos_output] with TEMP[pos_temp]. */
dst->Register.File = TGSI_FILE_TEMPORARY;
dst->Register.Index = vsctx->pos_temp;
} else {
/* Not a position, good...
* Since we were changing the indices of output decls,
* we must redirect writes into them too. */
dst->Register.Index = vsctx->out_remap[dst->Register.Index];
}
}
}
 
/* Inserting 2 instructions before the END opcode moves all following
* labels by 2. Subroutines are always after the END opcode so
* they're always moved. */
if (inst->Instruction.Opcode == TGSI_OPCODE_CAL) {
inst->Label.Label += 2;
}
/* The labels of the following opcodes are moved only after
* the END opcode. */
if (vsctx->end_instruction &&
(inst->Instruction.Opcode == TGSI_OPCODE_IF ||
inst->Instruction.Opcode == TGSI_OPCODE_ELSE ||
inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP ||
inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP)) {
inst->Label.Label += 2;
}
}
 
ctx->emit_instruction(ctx, inst);
}
 
void r300_draw_init_vertex_shader(struct r300_context *r300,
struct r300_vertex_shader *vs)
{
struct draw_context *draw = r300->draw;
struct pipe_shader_state new_vs;
struct tgsi_shader_info info;
struct vs_transform_context transform;
const uint newLen = tgsi_num_tokens(vs->state.tokens) + 100 /* XXX */;
unsigned i;
 
tgsi_scan_shader(vs->state.tokens, &info);
 
new_vs.tokens = tgsi_alloc_tokens(newLen);
if (new_vs.tokens == NULL)
return;
 
memset(&transform, 0, sizeof(transform));
for (i = 0; i < Elements(transform.out_remap); i++) {
transform.out_remap[i] = i;
}
transform.last_generic = -1;
transform.base.transform_instruction = transform_inst;
transform.base.transform_declaration = transform_decl;
 
for (i = 0; i < info.num_outputs; i++) {
unsigned index = info.output_semantic_index[i];
 
switch (info.output_semantic_name[i]) {
case TGSI_SEMANTIC_COLOR:
assert(index < 2);
transform.color_used[index] = TRUE;
break;
 
case TGSI_SEMANTIC_BCOLOR:
assert(index < 2);
transform.bcolor_used[index] = TRUE;
break;
}
}
 
tgsi_transform_shader(vs->state.tokens,
(struct tgsi_token*)new_vs.tokens,
newLen, &transform.base);
 
#if 0
printf("----------------------------------------------\norig shader:\n");
tgsi_dump(vs->state.tokens, 0);
printf("----------------------------------------------\nnew shader:\n");
tgsi_dump(new_vs.tokens, 0);
printf("----------------------------------------------\n");
#endif
 
/* Free old tokens. */
FREE((void*)vs->state.tokens);
 
vs->draw_vs = draw_create_vertex_shader(draw, &new_vs);
 
/* Instead of duplicating and freeing the tokens, copy the pointer directly. */
vs->state.tokens = new_vs.tokens;
 
/* Init the VS output table for the rasterizer. */
r300_init_vs_outputs(r300, vs);
 
/* Make the last generic be WPOS. */
vs->outputs.wpos = vs->outputs.generic[transform.last_generic + 1];
vs->outputs.generic[transform.last_generic + 1] = ATTR_UNUSED;
}