Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 5563 → Rev 5564

/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/LLVM_REVISION.txt
0,0 → 1,0
@181269
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/Makefile.am
0,0 → 1,35
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
 
 
AM_CFLAGS = \
$(GALLIUM_DRIVER_CFLAGS) \
$(RADEON_CFLAGS) \
-Wstrict-overflow=0
# ^^ disable warnings about overflows (os_time_timeout)
 
noinst_LTLIBRARIES = libradeon.la
 
libradeon_la_SOURCES = \
$(C_SOURCES)
 
if NEED_RADEON_LLVM
 
AM_CFLAGS += \
$(LLVM_CFLAGS)
 
libradeon_la_SOURCES += \
$(LLVM_C_FILES)
 
libradeon_la_LIBADD = \
$(CLOCK_LIB) \
$(LLVM_LIBS) \
$(ELF_LIB)
 
libradeon_la_LDFLAGS = \
$(LLVM_LDFLAGS)
 
endif
 
EXTRA_DIST = \
LLVM_REVISION.txt
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/Makefile.in
0,0 → 1,917
# Makefile.in generated by automake 1.15 from Makefile.am.
# @configure_input@
 
# Copyright (C) 1994-2014 Free Software Foundation, Inc.
 
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
 
@SET_MAKE@
 
VPATH = @srcdir@
am__is_gnu_make = { \
if test -z '$(MAKELEVEL)'; then \
false; \
elif test -n '$(MAKE_HOST)'; then \
true; \
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
true; \
else \
false; \
fi; \
}
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
@HAVE_DRISW_TRUE@am__append_1 = \
@HAVE_DRISW_TRUE@ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la
 
@NEED_WINSYS_XLIB_TRUE@am__append_2 = \
@NEED_WINSYS_XLIB_TRUE@ $(top_builddir)/src/gallium/winsys/sw/xlib/libws_xlib.la \
@NEED_WINSYS_XLIB_TRUE@ -lX11 -lXext -lXfixes \
@NEED_WINSYS_XLIB_TRUE@ $(LIBDRM_LIBS)
 
@NEED_RADEON_LLVM_TRUE@am__append_3 = \
@NEED_RADEON_LLVM_TRUE@ $(LLVM_CFLAGS)
 
@NEED_RADEON_LLVM_TRUE@am__append_4 = \
@NEED_RADEON_LLVM_TRUE@ $(LLVM_C_FILES)
 
subdir = src/gallium/drivers/radeon
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
$(top_srcdir)/m4/ax_prog_bison.m4 \
$(top_srcdir)/m4/ax_prog_flex.m4 \
$(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/libtool.m4 \
$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
$(top_srcdir)/VERSION $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
mkinstalldirs = $(install_sh) -d
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
am__DEPENDENCIES_1 =
@NEED_RADEON_LLVM_TRUE@libradeon_la_DEPENDENCIES = \
@NEED_RADEON_LLVM_TRUE@ $(am__DEPENDENCIES_1) \
@NEED_RADEON_LLVM_TRUE@ $(am__DEPENDENCIES_1) \
@NEED_RADEON_LLVM_TRUE@ $(am__DEPENDENCIES_1)
am__libradeon_la_SOURCES_DIST = cayman_msaa.c r600_buffer_common.c \
r600_cs.h r600d_common.h r600_gpu_load.c r600_pipe_common.c \
r600_pipe_common.h r600_query.c r600_streamout.c \
r600_texture.c radeon_uvd.c radeon_uvd.h radeon_vce_40_2_2.c \
radeon_vce.c radeon_vce.h radeon_video.c radeon_video.h \
radeon_winsys.h radeon_elf_util.c radeon_elf_util.h \
radeon_llvm_emit.c radeon_llvm_emit.h radeon_llvm.h \
radeon_llvm_util.c radeon_llvm_util.h radeon_setup_tgsi_llvm.c
am__objects_1 = cayman_msaa.lo r600_buffer_common.lo r600_gpu_load.lo \
r600_pipe_common.lo r600_query.lo r600_streamout.lo \
r600_texture.lo radeon_uvd.lo radeon_vce_40_2_2.lo \
radeon_vce.lo radeon_video.lo
am__objects_2 = radeon_elf_util.lo radeon_llvm_emit.lo \
radeon_llvm_util.lo radeon_setup_tgsi_llvm.lo
@NEED_RADEON_LLVM_TRUE@am__objects_3 = $(am__objects_2)
am_libradeon_la_OBJECTS = $(am__objects_1) $(am__objects_3)
libradeon_la_OBJECTS = $(am_libradeon_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
am__v_lt_0 = --silent
am__v_lt_1 =
libradeon_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(libradeon_la_LDFLAGS) $(LDFLAGS) -o $@
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
DEFAULT_INCLUDES = -I.@am__isrc@
depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
am__depfiles_maybe = depfiles
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
$(AM_CFLAGS) $(CFLAGS)
AM_V_CC = $(am__v_CC_@AM_V@)
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
am__v_CC_0 = @echo " CC " $@;
am__v_CC_1 =
CCLD = $(CC)
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(AM_LDFLAGS) $(LDFLAGS) -o $@
AM_V_CCLD = $(am__v_CCLD_@AM_V@)
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
am__v_CCLD_0 = @echo " CCLD " $@;
am__v_CCLD_1 =
SOURCES = $(libradeon_la_SOURCES)
DIST_SOURCES = $(am__libradeon_la_SOURCES_DIST)
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.sources \
$(top_srcdir)/bin/depcomp \
$(top_srcdir)/src/gallium/Automake.inc
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
BSYMBOLIC = @BSYMBOLIC@
CC = @CC@
CCAS = @CCAS@
CCASDEPMODE = @CCASDEPMODE@
CCASFLAGS = @CCASFLAGS@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
CLOCK_LIB = @CLOCK_LIB@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CXX = @CXX@
CXXCPP = @CXXCPP@
CXXDEPMODE = @CXXDEPMODE@
CXXFLAGS = @CXXFLAGS@
CYGPATH_W = @CYGPATH_W@
D3D_DRIVER_INSTALL_DIR = @D3D_DRIVER_INSTALL_DIR@
DEFINES = @DEFINES@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DLOPEN_LIBS = @DLOPEN_LIBS@
DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
DRI3PROTO_CFLAGS = @DRI3PROTO_CFLAGS@
DRI3PROTO_LIBS = @DRI3PROTO_LIBS@
DRIGL_CFLAGS = @DRIGL_CFLAGS@
DRIGL_LIBS = @DRIGL_LIBS@
DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
DRI_LIB_DEPS = @DRI_LIB_DEPS@
DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGL_CFLAGS = @EGL_CFLAGS@
EGL_CLIENT_APIS = @EGL_CLIENT_APIS@
EGL_LIB_DEPS = @EGL_LIB_DEPS@
EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
EGREP = @EGREP@
ELF_LIB = @ELF_LIB@
EXEEXT = @EXEEXT@
EXPAT_CFLAGS = @EXPAT_CFLAGS@
EXPAT_LIBS = @EXPAT_LIBS@
FGREP = @FGREP@
FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
FREEDRENO_LIBS = @FREEDRENO_LIBS@
GALLIUM_PIPE_LOADER_CLIENT_DEFINES = @GALLIUM_PIPE_LOADER_CLIENT_DEFINES@
GALLIUM_PIPE_LOADER_CLIENT_LIBS = @GALLIUM_PIPE_LOADER_CLIENT_LIBS@
GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@
GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@
GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@
GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
GC_SECTIONS = @GC_SECTIONS@
GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
GLPROTO_LIBS = @GLPROTO_LIBS@
GLX_TLS = @GLX_TLS@
GL_LIB = @GL_LIB@
GL_LIB_DEPS = @GL_LIB_DEPS@
GL_PC_CFLAGS = @GL_PC_CFLAGS@
GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
GREP = @GREP@
HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
INDENT = @INDENT@
INDENT_FLAGS = @INDENT_FLAGS@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
INTEL_CFLAGS = @INTEL_CFLAGS@
INTEL_LIBS = @INTEL_LIBS@
LD = @LD@
LDFLAGS = @LDFLAGS@
LD_NO_UNDEFINED = @LD_NO_UNDEFINED@
LEX = @LEX@
LEXLIB = @LEXLIB@
LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
LIBDRM_LIBS = @LIBDRM_LIBS@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBSHA1_CFLAGS = @LIBSHA1_CFLAGS@
LIBSHA1_LIBS = @LIBSHA1_LIBS@
LIBTOOL = @LIBTOOL@
LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@
LIBUDEV_LIBS = @LIBUDEV_LIBS@
LIB_DIR = @LIB_DIR@
LIB_EXT = @LIB_EXT@
LIPO = @LIPO@
LLVM_BINDIR = @LLVM_BINDIR@
LLVM_CFLAGS = @LLVM_CFLAGS@
LLVM_CONFIG = @LLVM_CONFIG@
LLVM_CPPFLAGS = @LLVM_CPPFLAGS@
LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
LLVM_LDFLAGS = @LLVM_LDFLAGS@
LLVM_LIBDIR = @LLVM_LIBDIR@
LLVM_LIBS = @LLVM_LIBS@
LLVM_VERSION = @LLVM_VERSION@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MESA_LLVM = @MESA_LLVM@
MKDIR_P = @MKDIR_P@
MSVC2008_COMPAT_CFLAGS = @MSVC2008_COMPAT_CFLAGS@
MSVC2008_COMPAT_CXXFLAGS = @MSVC2008_COMPAT_CXXFLAGS@
MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@
MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@
NINE_MAJOR = @NINE_MAJOR@
NINE_MINOR = @NINE_MINOR@
NINE_TINY = @NINE_TINY@
NINE_VERSION = @NINE_VERSION@
NM = @NM@
NMEDIT = @NMEDIT@
NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
NOUVEAU_LIBS = @NOUVEAU_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OMX_CFLAGS = @OMX_CFLAGS@
OMX_LIBS = @OMX_LIBS@
OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENSSL_CFLAGS = @OPENSSL_CFLAGS@
OPENSSL_LIBS = @OPENSSL_LIBS@
OSMESA_LIB = @OSMESA_LIB@
OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
OSMESA_PC_REQ = @OSMESA_PC_REQ@
OSMESA_VERSION = @OSMESA_VERSION@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PKG_CONFIG = @PKG_CONFIG@
PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
POSIX_SHELL = @POSIX_SHELL@
PRESENTPROTO_CFLAGS = @PRESENTPROTO_CFLAGS@
PRESENTPROTO_LIBS = @PRESENTPROTO_LIBS@
PTHREAD_CC = @PTHREAD_CC@
PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
PTHREAD_LIBS = @PTHREAD_LIBS@
PYTHON2 = @PYTHON2@
RADEON_CFLAGS = @RADEON_CFLAGS@
RADEON_LIBS = @RADEON_LIBS@
RANLIB = @RANLIB@
SED = @SED@
SELINUX_CFLAGS = @SELINUX_CFLAGS@
SELINUX_LIBS = @SELINUX_LIBS@
SET_MAKE = @SET_MAKE@
SHA1_CFLAGS = @SHA1_CFLAGS@
SHA1_LIBS = @SHA1_LIBS@
SHELL = @SHELL@
SSE41_CFLAGS = @SSE41_CFLAGS@
STRIP = @STRIP@
VA_CFLAGS = @VA_CFLAGS@
VA_LIBS = @VA_LIBS@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
VDPAU_MAJOR = @VDPAU_MAJOR@
VDPAU_MINOR = @VDPAU_MINOR@
VERSION = @VERSION@
VG_LIB_DEPS = @VG_LIB_DEPS@
VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
WAYLAND_LIBS = @WAYLAND_LIBS@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
XA_TINY = @XA_TINY@
XA_VERSION = @XA_VERSION@
XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
XLIBGL_LIBS = @XLIBGL_LIBS@
XVMC_CFLAGS = @XVMC_CFLAGS@
XVMC_LIBS = @XVMC_LIBS@
XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
XVMC_MAJOR = @XVMC_MAJOR@
XVMC_MINOR = @XVMC_MINOR@
YACC = @YACC@
YFLAGS = @YFLAGS@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
acv_mako_found = @acv_mako_found@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
ax_pthread_config = @ax_pthread_config@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
ifGNUmake = @ifGNUmake@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target = @target@
target_alias = @target_alias@
target_cpu = @target_cpu@
target_os = @target_os@
target_vendor = @target_vendor@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
C_SOURCES := \
cayman_msaa.c \
r600_buffer_common.c \
r600_cs.h \
r600d_common.h \
r600_gpu_load.c \
r600_pipe_common.c \
r600_pipe_common.h \
r600_query.c \
r600_streamout.c \
r600_texture.c \
radeon_uvd.c \
radeon_uvd.h \
radeon_vce_40_2_2.c \
radeon_vce.c \
radeon_vce.h \
radeon_video.c \
radeon_video.h \
radeon_winsys.h
 
LLVM_C_FILES := \
radeon_elf_util.c \
radeon_elf_util.h \
radeon_llvm_emit.c \
radeon_llvm_emit.h \
radeon_llvm.h \
radeon_llvm_util.c \
radeon_llvm_util.h \
radeon_setup_tgsi_llvm.c
 
GALLIUM_CFLAGS = \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src \
-I$(top_srcdir)/src/gallium/include \
-I$(top_srcdir)/src/gallium/auxiliary \
$(DEFINES)
 
 
# src/gallium/auxiliary must appear before src/gallium/drivers
# because there are stupidly two rbug_context.h files in
# different directories, and which one is included by the
# preprocessor is determined by the ordering of the -I flags.
GALLIUM_DRIVER_CFLAGS = \
-I$(srcdir)/include \
-I$(top_srcdir)/src \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src/gallium/include \
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
$(DEFINES) \
$(VISIBILITY_CFLAGS)
 
GALLIUM_DRIVER_CXXFLAGS = \
-I$(srcdir)/include \
-I$(top_srcdir)/src \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src/gallium/include \
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
$(DEFINES) \
$(VISIBILITY_CXXFLAGS)
 
GALLIUM_TARGET_CFLAGS = \
-I$(top_srcdir)/src \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src/loader \
-I$(top_srcdir)/src/gallium/include \
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
$(VISIBILITY_CFLAGS)
 
GALLIUM_COMMON_LIB_DEPS = \
-lm \
$(CLOCK_LIB) \
$(PTHREAD_LIBS) \
$(DLOPEN_LIBS)
 
GALLIUM_WINSYS_CFLAGS = \
-I$(top_srcdir)/src \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src/gallium/include \
-I$(top_srcdir)/src/gallium/auxiliary \
$(DEFINES) \
$(VISIBILITY_CFLAGS)
 
GALLIUM_PIPE_LOADER_WINSYS_LIBS = \
$(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \
$(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \
$(am__append_1) $(am__append_2)
AM_CFLAGS = $(GALLIUM_DRIVER_CFLAGS) $(RADEON_CFLAGS) \
-Wstrict-overflow=0 $(am__append_3)
# ^^ disable warnings about overflows (os_time_timeout)
noinst_LTLIBRARIES = libradeon.la
libradeon_la_SOURCES = $(C_SOURCES) $(am__append_4)
@NEED_RADEON_LLVM_TRUE@libradeon_la_LIBADD = \
@NEED_RADEON_LLVM_TRUE@ $(CLOCK_LIB) \
@NEED_RADEON_LLVM_TRUE@ $(LLVM_LIBS) \
@NEED_RADEON_LLVM_TRUE@ $(ELF_LIB)
 
@NEED_RADEON_LLVM_TRUE@libradeon_la_LDFLAGS = \
@NEED_RADEON_LLVM_TRUE@ $(LLVM_LDFLAGS)
 
EXTRA_DIST = \
LLVM_REVISION.txt
 
all: all-am
 
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/drivers/radeon/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --foreign src/gallium/drivers/radeon/Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__empty):
 
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
 
$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
 
clean-noinstLTLIBRARIES:
-test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
@list='$(noinst_LTLIBRARIES)'; \
locs=`for p in $$list; do echo $$p; done | \
sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
sort -u`; \
test -z "$$locs" || { \
echo rm -f $${locs}; \
rm -f $${locs}; \
}
 
libradeon.la: $(libradeon_la_OBJECTS) $(libradeon_la_DEPENDENCIES) $(EXTRA_libradeon_la_DEPENDENCIES)
$(AM_V_CCLD)$(libradeon_la_LINK) $(libradeon_la_OBJECTS) $(libradeon_la_LIBADD) $(LIBS)
 
mostlyclean-compile:
-rm -f *.$(OBJEXT)
 
distclean-compile:
-rm -f *.tab.c
 
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cayman_msaa.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_buffer_common.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_gpu_load.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_pipe_common.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_query.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_streamout.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_texture.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_elf_util.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_llvm_emit.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_llvm_util.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_setup_tgsi_llvm.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_uvd.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vce.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vce_40_2_2.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_video.Plo@am__quote@
 
.c.o:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
 
.c.obj:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
 
.c.lo:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
 
mostlyclean-libtool:
-rm -f *.lo
 
clean-libtool:
-rm -rf .libs _libs
 
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-am
TAGS: tags
 
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: ctags-am
 
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
 
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: cscopelist-am
 
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
 
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
 
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-am
all-am: Makefile $(LTLIBRARIES)
installdirs:
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
 
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
 
installcheck: installcheck-am
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
 
clean-generic:
 
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
 
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
 
clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
mostlyclean-am
 
distclean: distclean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-tags
 
dvi: dvi-am
 
dvi-am:
 
html: html-am
 
html-am:
 
info: info-am
 
info-am:
 
install-data-am:
 
install-dvi: install-dvi-am
 
install-dvi-am:
 
install-exec-am:
 
install-html: install-html-am
 
install-html-am:
 
install-info: install-info-am
 
install-info-am:
 
install-man:
 
install-pdf: install-pdf-am
 
install-pdf-am:
 
install-ps: install-ps-am
 
install-ps-am:
 
installcheck-am:
 
maintainer-clean: maintainer-clean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
 
mostlyclean: mostlyclean-am
 
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
 
pdf: pdf-am
 
pdf-am:
 
ps: ps-am
 
ps-am:
 
uninstall-am:
 
.MAKE: install-am install-strip
 
.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \
ctags-am distclean distclean-compile distclean-generic \
distclean-libtool distclean-tags distdir dvi dvi-am html \
html-am info info-am install install-am install-data \
install-data-am install-dvi install-dvi-am install-exec \
install-exec-am install-html install-html-am install-info \
install-info-am install-man install-pdf install-pdf-am \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
tags tags-am uninstall uninstall-am
 
.PRECIOUS: Makefile
 
 
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/Makefile.sources
0,0 → 1,29
C_SOURCES := \
cayman_msaa.c \
r600_buffer_common.c \
r600_cs.h \
r600d_common.h \
r600_gpu_load.c \
r600_pipe_common.c \
r600_pipe_common.h \
r600_query.c \
r600_streamout.c \
r600_texture.c \
radeon_uvd.c \
radeon_uvd.h \
radeon_vce_40_2_2.c \
radeon_vce.c \
radeon_vce.h \
radeon_video.c \
radeon_video.h \
radeon_winsys.h
 
LLVM_C_FILES := \
radeon_elf_util.c \
radeon_elf_util.h \
radeon_llvm_emit.c \
radeon_llvm_emit.h \
radeon_llvm.h \
radeon_llvm_util.c \
radeon_llvm_util.h \
radeon_setup_tgsi_llvm.c
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/cayman_msaa.c
0,0 → 1,250
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors: Marek Olšák <maraeo@gmail.com>
*
*/
 
#include "r600_cs.h"
 
/* 2xMSAA
* There are two locations (-4, 4), (4, -4). */
const uint32_t eg_sample_locs_2x[4] = {
FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
};
const unsigned eg_max_dist_2x = 4;
/* 4xMSAA
* There are 4 locations: (-2, -2), (2, 2), (-6, 6), (6, -6). */
const uint32_t eg_sample_locs_4x[4] = {
FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
};
const unsigned eg_max_dist_4x = 6;
 
/* Cayman 8xMSAA */
static const uint32_t cm_sample_locs_8x[] = {
FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4),
};
static const unsigned cm_max_dist_8x = 8;
/* Cayman 16xMSAA */
static const uint32_t cm_sample_locs_16x[] = {
FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
};
static const unsigned cm_max_dist_16x = 8;
 
void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
unsigned sample_index, float *out_value)
{
int offset, index;
struct {
int idx:4;
} val;
switch (sample_count) {
case 1:
default:
out_value[0] = out_value[1] = 0.5;
break;
case 2:
offset = 4 * (sample_index * 2);
val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf;
out_value[0] = (float)(val.idx + 8) / 16.0f;
val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf;
out_value[1] = (float)(val.idx + 8) / 16.0f;
break;
case 4:
offset = 4 * (sample_index * 2);
val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf;
out_value[0] = (float)(val.idx + 8) / 16.0f;
val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf;
out_value[1] = (float)(val.idx + 8) / 16.0f;
break;
case 8:
offset = 4 * (sample_index % 4 * 2);
index = (sample_index / 4) * 4;
val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
out_value[0] = (float)(val.idx + 8) / 16.0f;
val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
out_value[1] = (float)(val.idx + 8) / 16.0f;
break;
case 16:
offset = 4 * (sample_index % 4 * 2);
index = (sample_index / 4) * 4;
val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
out_value[0] = (float)(val.idx + 8) / 16.0f;
val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
out_value[1] = (float)(val.idx + 8) / 16.0f;
break;
}
}
 
void cayman_init_msaa(struct pipe_context *ctx)
{
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
int i;
 
cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]);
 
for (i = 0; i < 2; i++)
cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]);
for (i = 0; i < 4; i++)
cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]);
for (i = 0; i < 8; i++)
cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]);
for (i = 0; i < 16; i++)
cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]);
}
 
void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
{
switch (nr_samples) {
case 2:
r600_write_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
r600_write_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
r600_write_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
r600_write_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
break;
case 4:
r600_write_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
r600_write_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
r600_write_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
r600_write_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
break;
case 8:
r600_write_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
radeon_emit(cs, cm_sample_locs_8x[0]);
radeon_emit(cs, cm_sample_locs_8x[4]);
radeon_emit(cs, 0);
radeon_emit(cs, 0);
radeon_emit(cs, cm_sample_locs_8x[1]);
radeon_emit(cs, cm_sample_locs_8x[5]);
radeon_emit(cs, 0);
radeon_emit(cs, 0);
radeon_emit(cs, cm_sample_locs_8x[2]);
radeon_emit(cs, cm_sample_locs_8x[6]);
radeon_emit(cs, 0);
radeon_emit(cs, 0);
radeon_emit(cs, cm_sample_locs_8x[3]);
radeon_emit(cs, cm_sample_locs_8x[7]);
break;
case 16:
r600_write_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
radeon_emit(cs, cm_sample_locs_16x[0]);
radeon_emit(cs, cm_sample_locs_16x[4]);
radeon_emit(cs, cm_sample_locs_16x[8]);
radeon_emit(cs, cm_sample_locs_16x[12]);
radeon_emit(cs, cm_sample_locs_16x[1]);
radeon_emit(cs, cm_sample_locs_16x[5]);
radeon_emit(cs, cm_sample_locs_16x[9]);
radeon_emit(cs, cm_sample_locs_16x[13]);
radeon_emit(cs, cm_sample_locs_16x[2]);
radeon_emit(cs, cm_sample_locs_16x[6]);
radeon_emit(cs, cm_sample_locs_16x[10]);
radeon_emit(cs, cm_sample_locs_16x[14]);
radeon_emit(cs, cm_sample_locs_16x[3]);
radeon_emit(cs, cm_sample_locs_16x[7]);
radeon_emit(cs, cm_sample_locs_16x[11]);
radeon_emit(cs, cm_sample_locs_16x[15]);
break;
}
}
 
void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
int ps_iter_samples, int overrast_samples)
{
int setup_samples = nr_samples > 1 ? nr_samples :
overrast_samples > 1 ? overrast_samples : 0;
 
if (setup_samples > 1) {
/* indexed by log2(nr_samples) */
unsigned max_dist[] = {
0,
eg_max_dist_2x,
eg_max_dist_4x,
cm_max_dist_8x,
cm_max_dist_16x
};
unsigned log_samples = util_logbase2(setup_samples);
unsigned log_ps_iter_samples =
util_logbase2(util_next_power_of_two(ps_iter_samples));
 
r600_write_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
radeon_emit(cs, S_028BDC_LAST_PIXEL(1) |
S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */
 
if (nr_samples > 1) {
r600_write_context_reg(cs, CM_R_028804_DB_EQAA,
S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
} else if (overrast_samples > 1) {
r600_write_context_reg(cs, CM_R_028804_DB_EQAA,
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
}
} else {
r600_write_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
radeon_emit(cs, S_028BDC_LAST_PIXEL(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
 
r600_write_context_reg(cs, CM_R_028804_DB_EQAA,
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
}
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_buffer_common.c
0,0 → 1,448
/*
* Copyright 2013 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Marek Olšák
*/
 
#include "r600_cs.h"
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
#include <inttypes.h>
#include <stdio.h>
 
boolean r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
struct radeon_winsys_cs_handle *buf,
enum radeon_bo_usage usage)
{
if (ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs, buf, usage)) {
return TRUE;
}
if (ctx->rings.dma.cs && ctx->rings.dma.cs->cdw &&
ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs, buf, usage)) {
return TRUE;
}
return FALSE;
}
 
void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
struct r600_resource *resource,
unsigned usage)
{
enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
bool busy = false;
 
if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
}
 
if (!(usage & PIPE_TRANSFER_WRITE)) {
/* have to wait for the last write */
rusage = RADEON_USAGE_WRITE;
}
 
if (ctx->rings.gfx.cs->cdw != ctx->initial_gfx_cs_size &&
ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs,
resource->cs_buf, rusage)) {
if (usage & PIPE_TRANSFER_DONTBLOCK) {
ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
return NULL;
} else {
ctx->rings.gfx.flush(ctx, 0, NULL);
busy = true;
}
}
if (ctx->rings.dma.cs &&
ctx->rings.dma.cs->cdw &&
ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs,
resource->cs_buf, rusage)) {
if (usage & PIPE_TRANSFER_DONTBLOCK) {
ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
return NULL;
} else {
ctx->rings.dma.flush(ctx, 0, NULL);
busy = true;
}
}
 
if (busy || ctx->ws->buffer_is_busy(resource->buf, rusage)) {
if (usage & PIPE_TRANSFER_DONTBLOCK) {
return NULL;
} else {
/* We will be wait for the GPU. Wait for any offloaded
* CS flush to complete to avoid busy-waiting in the winsys. */
ctx->ws->cs_sync_flush(ctx->rings.gfx.cs);
if (ctx->rings.dma.cs)
ctx->ws->cs_sync_flush(ctx->rings.dma.cs);
}
}
 
/* Setting the CS to NULL will prevent doing checks we have done already. */
return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
}
 
bool r600_init_resource(struct r600_common_screen *rscreen,
struct r600_resource *res,
unsigned size, unsigned alignment,
bool use_reusable_pool)
{
struct r600_texture *rtex = (struct r600_texture*)res;
struct pb_buffer *old_buf, *new_buf;
enum radeon_bo_flag flags = 0;
 
switch (res->b.b.usage) {
case PIPE_USAGE_STREAM:
flags = RADEON_FLAG_GTT_WC;
/* fall through */
case PIPE_USAGE_STAGING:
/* Transfers are likely to occur more often with these resources. */
res->domains = RADEON_DOMAIN_GTT;
break;
case PIPE_USAGE_DYNAMIC:
/* Older kernels didn't always flush the HDP cache before
* CS execution
*/
if (rscreen->info.drm_minor < 40) {
res->domains = RADEON_DOMAIN_GTT;
flags |= RADEON_FLAG_GTT_WC;
break;
}
flags |= RADEON_FLAG_CPU_ACCESS;
/* fall through */
case PIPE_USAGE_DEFAULT:
case PIPE_USAGE_IMMUTABLE:
default:
/* Not listing GTT here improves performance in some apps. */
res->domains = RADEON_DOMAIN_VRAM;
flags |= RADEON_FLAG_GTT_WC;
break;
}
 
if (res->b.b.target == PIPE_BUFFER &&
res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
/* Use GTT for all persistent mappings with older kernels,
* because they didn't always flush the HDP cache before CS
* execution.
*
* Write-combined CPU mappings are fine, the kernel ensures all CPU
* writes finish before the GPU executes a command stream.
*/
if (rscreen->info.drm_minor < 40)
res->domains = RADEON_DOMAIN_GTT;
else if (res->domains & RADEON_DOMAIN_VRAM)
flags |= RADEON_FLAG_CPU_ACCESS;
}
 
/* Tiled textures are unmappable. Always put them in VRAM. */
if (res->b.b.target != PIPE_BUFFER &&
rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) {
res->domains = RADEON_DOMAIN_VRAM;
flags &= ~RADEON_FLAG_CPU_ACCESS;
flags |= RADEON_FLAG_NO_CPU_ACCESS;
}
 
/* Allocate a new resource. */
new_buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
use_reusable_pool,
res->domains, flags);
if (!new_buf) {
return false;
}
 
/* Replace the pointer such that if res->buf wasn't NULL, it won't be
* NULL. This should prevent crashes with multiple contexts using
* the same buffer where one of the contexts invalidates it while
* the others are using it. */
old_buf = res->buf;
res->cs_buf = rscreen->ws->buffer_get_cs_handle(new_buf); /* should be atomic */
res->buf = new_buf; /* should be atomic */
 
if (rscreen->info.r600_virtual_address)
res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->cs_buf);
else
res->gpu_address = 0;
 
pb_reference(&old_buf, NULL);
 
util_range_set_empty(&res->valid_buffer_range);
res->TC_L2_dirty = false;
 
if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %u bytes\n",
res->gpu_address, res->gpu_address + res->buf->size,
res->buf->size);
}
return true;
}
 
static void r600_buffer_destroy(struct pipe_screen *screen,
struct pipe_resource *buf)
{
struct r600_resource *rbuffer = r600_resource(buf);
 
util_range_destroy(&rbuffer->valid_buffer_range);
pb_reference(&rbuffer->buf, NULL);
FREE(rbuffer);
}
 
static void *r600_buffer_get_transfer(struct pipe_context *ctx,
struct pipe_resource *resource,
unsigned level,
unsigned usage,
const struct pipe_box *box,
struct pipe_transfer **ptransfer,
void *data, struct r600_resource *staging,
unsigned offset)
{
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_transfer *transfer = util_slab_alloc(&rctx->pool_transfers);
 
transfer->transfer.resource = resource;
transfer->transfer.level = level;
transfer->transfer.usage = usage;
transfer->transfer.box = *box;
transfer->transfer.stride = 0;
transfer->transfer.layer_stride = 0;
transfer->offset = offset;
transfer->staging = staging;
*ptransfer = &transfer->transfer;
return data;
}
 
static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx,
unsigned dstx, unsigned srcx, unsigned size)
{
bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4);
 
return rctx->screen->has_cp_dma ||
(dword_aligned && (rctx->rings.dma.cs ||
rctx->screen->has_streamout));
 
}
 
static void *r600_buffer_transfer_map(struct pipe_context *ctx,
struct pipe_resource *resource,
unsigned level,
unsigned usage,
const struct pipe_box *box,
struct pipe_transfer **ptransfer)
{
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
struct r600_resource *rbuffer = r600_resource(resource);
uint8_t *data;
 
assert(box->x + box->width <= resource->width0);
 
/* See if the buffer range being mapped has never been initialized,
* in which case it can be mapped unsynchronized. */
if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
usage & PIPE_TRANSFER_WRITE &&
!util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
}
 
/* If discarding the entire range, discard the whole resource instead. */
if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
box->x == 0 && box->width == resource->width0) {
usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
}
 
if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
assert(usage & PIPE_TRANSFER_WRITE);
 
/* Check if mapping this buffer would cause waiting for the GPU. */
if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
}
/* At this point, the buffer is always idle. */
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
}
else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
!(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) {
assert(usage & PIPE_TRANSFER_WRITE);
 
/* Check if mapping this buffer would cause waiting for the GPU. */
if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
/* Do a wait-free write-only transfer using a temporary buffer. */
unsigned offset;
struct r600_resource *staging = NULL;
 
u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
&offset, (struct pipe_resource**)&staging, (void**)&data);
 
if (staging) {
data += box->x % R600_MAP_BUFFER_ALIGNMENT;
return r600_buffer_get_transfer(ctx, resource, level, usage, box,
ptransfer, data, staging, offset);
} else {
return NULL; /* error, shouldn't occur though */
}
}
/* At this point, the buffer is always idle (we checked it above). */
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
}
/* Using a staging buffer in GTT for larger reads is much faster. */
else if ((usage & PIPE_TRANSFER_READ) &&
!(usage & PIPE_TRANSFER_WRITE) &&
rbuffer->domains == RADEON_DOMAIN_VRAM &&
r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) {
struct r600_resource *staging;
 
staging = (struct r600_resource*) pipe_buffer_create(
ctx->screen, PIPE_BIND_TRANSFER_READ, PIPE_USAGE_STAGING,
box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT));
if (staging) {
/* Copy the VRAM buffer to the staging buffer. */
rctx->dma_copy(ctx, &staging->b.b, 0,
box->x % R600_MAP_BUFFER_ALIGNMENT,
0, 0, resource, level, box);
 
data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ);
data += box->x % R600_MAP_BUFFER_ALIGNMENT;
 
return r600_buffer_get_transfer(ctx, resource, level, usage, box,
ptransfer, data, staging, 0);
}
}
 
data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage);
if (!data) {
return NULL;
}
data += box->x;
 
return r600_buffer_get_transfer(ctx, resource, level, usage, box,
ptransfer, data, NULL, 0);
}
 
static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer *transfer)
{
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
struct r600_resource *rbuffer = r600_resource(transfer->resource);
 
if (rtransfer->staging) {
if (rtransfer->transfer.usage & PIPE_TRANSFER_WRITE) {
struct pipe_resource *dst, *src;
unsigned soffset, doffset, size;
struct pipe_box box;
 
dst = transfer->resource;
src = &rtransfer->staging->b.b;
size = transfer->box.width;
doffset = transfer->box.x;
soffset = rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT;
 
u_box_1d(soffset, size, &box);
 
/* Copy the staging buffer into the original one. */
rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box);
}
pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
}
 
if (transfer->usage & PIPE_TRANSFER_WRITE) {
util_range_add(&rbuffer->valid_buffer_range, transfer->box.x,
transfer->box.x + transfer->box.width);
}
util_slab_free(&rctx->pool_transfers, transfer);
}
 
static const struct u_resource_vtbl r600_buffer_vtbl =
{
NULL, /* get_handle */
r600_buffer_destroy, /* resource_destroy */
r600_buffer_transfer_map, /* transfer_map */
NULL, /* transfer_flush_region */
r600_buffer_transfer_unmap, /* transfer_unmap */
NULL /* transfer_inline_write */
};
 
static struct r600_resource *
r600_alloc_buffer_struct(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
struct r600_resource *rbuffer;
 
rbuffer = MALLOC_STRUCT(r600_resource);
 
rbuffer->b.b = *templ;
pipe_reference_init(&rbuffer->b.b.reference, 1);
rbuffer->b.b.screen = screen;
rbuffer->b.vtbl = &r600_buffer_vtbl;
rbuffer->buf = NULL;
rbuffer->TC_L2_dirty = false;
util_range_init(&rbuffer->valid_buffer_range);
return rbuffer;
}
 
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ,
unsigned alignment)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
 
if (!r600_init_resource(rscreen, rbuffer, templ->width0, alignment, TRUE)) {
FREE(rbuffer);
return NULL;
}
return &rbuffer->b.b;
}
 
struct pipe_resource *
r600_buffer_from_user_memory(struct pipe_screen *screen,
const struct pipe_resource *templ,
void *user_memory)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct radeon_winsys *ws = rscreen->ws;
struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
 
rbuffer->domains = RADEON_DOMAIN_GTT;
util_range_add(&rbuffer->valid_buffer_range, 0, templ->width0);
 
/* Convert a user pointer to a buffer. */
rbuffer->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0);
if (!rbuffer->buf) {
FREE(rbuffer);
return NULL;
}
 
rbuffer->cs_buf = ws->buffer_get_cs_handle(rbuffer->buf);
 
if (rscreen->info.r600_virtual_address)
rbuffer->gpu_address =
ws->buffer_get_virtual_address(rbuffer->cs_buf);
else
rbuffer->gpu_address = 0;
 
return &rbuffer->b.b;
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_cs.h
0,0 → 1,133
/*
* Copyright 2013 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Marek Olšák <maraeo@gmail.com>
*/
 
/**
* This file contains helpers for writing commands to commands streams.
*/
 
#ifndef R600_CS_H
#define R600_CS_H
 
#include "r600_pipe_common.h"
#include "r600d_common.h"
 
static INLINE unsigned r600_context_bo_reloc(struct r600_common_context *rctx,
struct r600_ring *ring,
struct r600_resource *rbo,
enum radeon_bo_usage usage,
enum radeon_bo_priority priority)
{
assert(usage);
 
/* Make sure that all previous rings are flushed so that everything
* looks serialized from the driver point of view.
*/
if (!ring->flushing) {
if (ring == &rctx->rings.gfx) {
if (rctx->rings.dma.cs) {
/* flush dma ring */
rctx->rings.dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
}
} else {
/* flush gfx ring */
rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
}
}
return rctx->ws->cs_add_reloc(ring->cs, rbo->cs_buf, usage,
rbo->domains, priority) * 4;
}
 
static INLINE void r600_emit_reloc(struct r600_common_context *rctx,
struct r600_ring *ring, struct r600_resource *rbo,
enum radeon_bo_usage usage,
enum radeon_bo_priority priority)
{
struct radeon_winsys_cs *cs = ring->cs;
bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_virtual_address;
unsigned reloc = r600_context_bo_reloc(rctx, ring, rbo, usage, priority);
 
if (!has_vm) {
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, reloc);
}
}
 
static INLINE void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg < R600_CONTEXT_REG_OFFSET);
assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
}
 
static INLINE void r600_write_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
r600_write_config_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
 
static INLINE void r600_write_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= R600_CONTEXT_REG_OFFSET);
assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
}
 
static INLINE void r600_write_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
r600_write_context_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
 
static INLINE void si_write_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
}
 
static INLINE void si_write_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
si_write_sh_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
 
static INLINE void cik_write_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
}
 
static INLINE void cik_write_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
cik_write_uconfig_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
 
#endif
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_gpu_load.c
0,0 → 1,141
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors: Marek Olšák <maraeo@gmail.com>
*
*/
 
/* The GPU load is measured as follows.
*
* There is a thread which samples the GRBM_STATUS register at a certain
* frequency and the "busy" or "idle" counter is incremented based on
* whether the GUI_ACTIVE bit is set or not.
*
* Then, the user can sample the counters twice and calculate the average
* GPU load between the two samples.
*/
 
#include "r600_pipe_common.h"
#include "os/os_time.h"
 
/* For good accuracy at 1000 fps or lower. This will be inaccurate for higher
* fps (there are too few samples per frame). */
#define SAMPLES_PER_SEC 10000
 
#define GRBM_STATUS 0x8010
#define GUI_ACTIVE(x) (((x) >> 31) & 0x1)
 
static bool r600_is_gpu_busy(struct r600_common_screen *rscreen)
{
uint32_t value = 0;
 
rscreen->ws->read_registers(rscreen->ws, GRBM_STATUS, 1, &value);
return GUI_ACTIVE(value);
}
 
static PIPE_THREAD_ROUTINE(r600_gpu_load_thread, param)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)param;
const int period_us = 1000000 / SAMPLES_PER_SEC;
int sleep_us = period_us;
int64_t cur_time, last_time = os_time_get();
 
while (!p_atomic_read(&rscreen->gpu_load_stop_thread)) {
if (sleep_us)
os_time_sleep(sleep_us);
 
/* Make sure we sleep the ideal amount of time to match
* the expected frequency. */
cur_time = os_time_get();
 
if (os_time_timeout(last_time, last_time + period_us,
cur_time))
sleep_us = MAX2(sleep_us - 1, 1);
else
sleep_us += 1;
 
/*printf("Hz: %.1f\n", 1000000.0 / (cur_time - last_time));*/
last_time = cur_time;
 
/* Update the counters. */
if (r600_is_gpu_busy(rscreen))
p_atomic_inc(&rscreen->gpu_load_counter_busy);
else
p_atomic_inc(&rscreen->gpu_load_counter_idle);
}
p_atomic_dec(&rscreen->gpu_load_stop_thread);
return 0;
}
 
void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen)
{
if (!rscreen->gpu_load_thread)
return;
 
p_atomic_inc(&rscreen->gpu_load_stop_thread);
pipe_thread_wait(rscreen->gpu_load_thread);
rscreen->gpu_load_thread = 0;
}
 
static uint64_t r600_gpu_load_read_counter(struct r600_common_screen *rscreen)
{
/* Start the thread if needed. */
if (!rscreen->gpu_load_thread) {
pipe_mutex_lock(rscreen->gpu_load_mutex);
/* Check again inside the mutex. */
if (!rscreen->gpu_load_thread)
rscreen->gpu_load_thread =
pipe_thread_create(r600_gpu_load_thread, rscreen);
pipe_mutex_unlock(rscreen->gpu_load_mutex);
}
 
/* The busy counter is in the lower 32 bits.
* The idle counter is in the upper 32 bits. */
return p_atomic_read(&rscreen->gpu_load_counter_busy) |
((uint64_t)p_atomic_read(&rscreen->gpu_load_counter_idle) << 32);
}
 
/**
* Just return the counters.
*/
uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen)
{
return r600_gpu_load_read_counter(rscreen);
}
 
unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin)
{
uint64_t end = r600_gpu_load_read_counter(rscreen);
unsigned busy = (end & 0xffffffff) - (begin & 0xffffffff);
unsigned idle = (end >> 32) - (begin >> 32);
 
/* Calculate the GPU load.
*
* If no counters have been incremented, return the current load.
* It's for the case when the load is queried faster than
* the counters are updated.
*/
if (idle || busy)
return busy*100 / (busy + idle);
else
return r600_is_gpu_busy(rscreen) ? 100 : 0;
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_pipe_common.c
0,0 → 1,966
/*
* Copyright 2013 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors: Marek Olšák <maraeo@gmail.com>
*
*/
 
#include "r600_pipe_common.h"
#include "r600_cs.h"
#include "tgsi/tgsi_parse.h"
#include "util/u_draw_quad.h"
#include "util/u_memory.h"
#include "util/u_format_s3tc.h"
#include "util/u_upload_mgr.h"
#include "vl/vl_decoder.h"
#include "vl/vl_video_buffer.h"
#include "radeon/radeon_video.h"
#include <inttypes.h>
 
#ifndef HAVE_LLVM
#define HAVE_LLVM 0
#endif
 
/*
* pipe_context
*/
 
void r600_draw_rectangle(struct blitter_context *blitter,
int x1, int y1, int x2, int y2, float depth,
enum blitter_attrib_type type,
const union pipe_color_union *attrib)
{
struct r600_common_context *rctx =
(struct r600_common_context*)util_blitter_get_pipe(blitter);
struct pipe_viewport_state viewport;
struct pipe_resource *buf = NULL;
unsigned offset = 0;
float *vb;
 
if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) {
util_blitter_draw_rectangle(blitter, x1, y1, x2, y2, depth, type, attrib);
return;
}
 
/* Some operations (like color resolve on r6xx) don't work
* with the conventional primitive types.
* One that works is PT_RECTLIST, which we use here. */
 
/* setup viewport */
viewport.scale[0] = 1.0f;
viewport.scale[1] = 1.0f;
viewport.scale[2] = 1.0f;
viewport.translate[0] = 0.0f;
viewport.translate[1] = 0.0f;
viewport.translate[2] = 0.0f;
rctx->b.set_viewport_states(&rctx->b, 0, 1, &viewport);
 
/* Upload vertices. The hw rectangle has only 3 vertices,
* I guess the 4th one is derived from the first 3.
* The vertex specification should match u_blitter's vertex element state. */
u_upload_alloc(rctx->uploader, 0, sizeof(float) * 24, &offset, &buf, (void**)&vb);
vb[0] = x1;
vb[1] = y1;
vb[2] = depth;
vb[3] = 1;
 
vb[8] = x1;
vb[9] = y2;
vb[10] = depth;
vb[11] = 1;
 
vb[16] = x2;
vb[17] = y1;
vb[18] = depth;
vb[19] = 1;
 
if (attrib) {
memcpy(vb+4, attrib->f, sizeof(float)*4);
memcpy(vb+12, attrib->f, sizeof(float)*4);
memcpy(vb+20, attrib->f, sizeof(float)*4);
}
 
/* draw */
util_draw_vertex_buffer(&rctx->b, NULL, buf, blitter->vb_slot, offset,
R600_PRIM_RECTANGLE_LIST, 3, 2);
pipe_resource_reference(&buf, NULL);
}
 
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw)
{
/* The number of dwords we already used in the DMA so far. */
num_dw += ctx->rings.dma.cs->cdw;
/* Flush if there's not enough space. */
if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
}
}
 
static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
{
}
 
void r600_preflush_suspend_features(struct r600_common_context *ctx)
{
/* Disable render condition. */
ctx->saved_render_cond = NULL;
ctx->saved_render_cond_cond = FALSE;
ctx->saved_render_cond_mode = 0;
if (ctx->current_render_cond) {
ctx->saved_render_cond = ctx->current_render_cond;
ctx->saved_render_cond_cond = ctx->current_render_cond_cond;
ctx->saved_render_cond_mode = ctx->current_render_cond_mode;
ctx->b.render_condition(&ctx->b, NULL, FALSE, 0);
}
 
/* suspend queries */
ctx->nontimer_queries_suspended = false;
if (ctx->num_cs_dw_nontimer_queries_suspend) {
r600_suspend_nontimer_queries(ctx);
ctx->nontimer_queries_suspended = true;
}
 
ctx->streamout.suspended = false;
if (ctx->streamout.begin_emitted) {
r600_emit_streamout_end(ctx);
ctx->streamout.suspended = true;
}
}
 
void r600_postflush_resume_features(struct r600_common_context *ctx)
{
if (ctx->streamout.suspended) {
ctx->streamout.append_bitmask = ctx->streamout.enabled_mask;
r600_streamout_buffers_dirty(ctx);
}
 
/* resume queries */
if (ctx->nontimer_queries_suspended) {
r600_resume_nontimer_queries(ctx);
}
 
/* Re-enable render condition. */
if (ctx->saved_render_cond) {
ctx->b.render_condition(&ctx->b, ctx->saved_render_cond,
ctx->saved_render_cond_cond,
ctx->saved_render_cond_mode);
}
}
 
static void r600_flush_from_st(struct pipe_context *ctx,
struct pipe_fence_handle **fence,
unsigned flags)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
unsigned rflags = 0;
 
if (flags & PIPE_FLUSH_END_OF_FRAME)
rflags |= RADEON_FLUSH_END_OF_FRAME;
 
if (rctx->rings.dma.cs) {
rctx->rings.dma.flush(rctx, rflags, NULL);
}
rctx->rings.gfx.flush(rctx, rflags, fence);
}
 
static void r600_flush_dma_ring(void *ctx, unsigned flags,
struct pipe_fence_handle **fence)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct radeon_winsys_cs *cs = rctx->rings.dma.cs;
 
if (!cs->cdw) {
return;
}
 
rctx->rings.dma.flushing = true;
rctx->ws->cs_flush(cs, flags, fence, 0);
rctx->rings.dma.flushing = false;
}
 
bool r600_common_context_init(struct r600_common_context *rctx,
struct r600_common_screen *rscreen)
{
util_slab_create(&rctx->pool_transfers,
sizeof(struct r600_transfer), 64,
UTIL_SLAB_SINGLETHREADED);
 
rctx->screen = rscreen;
rctx->ws = rscreen->ws;
rctx->family = rscreen->family;
rctx->chip_class = rscreen->chip_class;
 
if (rscreen->family == CHIP_HAWAII)
rctx->max_db = 16;
else if (rscreen->chip_class >= EVERGREEN)
rctx->max_db = 8;
else
rctx->max_db = 4;
 
rctx->b.transfer_map = u_transfer_map_vtbl;
rctx->b.transfer_flush_region = u_default_transfer_flush_region;
rctx->b.transfer_unmap = u_transfer_unmap_vtbl;
rctx->b.transfer_inline_write = u_default_transfer_inline_write;
rctx->b.memory_barrier = r600_memory_barrier;
rctx->b.flush = r600_flush_from_st;
 
LIST_INITHEAD(&rctx->texture_buffers);
 
r600_init_context_texture_functions(rctx);
r600_streamout_init(rctx);
r600_query_init(rctx);
cayman_init_msaa(&rctx->b);
 
rctx->allocator_so_filled_size = u_suballocator_create(&rctx->b, 4096, 4,
0, PIPE_USAGE_DEFAULT, TRUE);
if (!rctx->allocator_so_filled_size)
return false;
 
rctx->uploader = u_upload_create(&rctx->b, 1024 * 1024, 256,
PIPE_BIND_INDEX_BUFFER |
PIPE_BIND_CONSTANT_BUFFER);
if (!rctx->uploader)
return false;
 
if (rscreen->info.r600_has_dma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
rctx->rings.dma.cs = rctx->ws->cs_create(rctx->ws, RING_DMA,
r600_flush_dma_ring,
rctx, NULL);
rctx->rings.dma.flush = r600_flush_dma_ring;
}
 
return true;
}
 
void r600_common_context_cleanup(struct r600_common_context *rctx)
{
if (rctx->rings.gfx.cs) {
rctx->ws->cs_destroy(rctx->rings.gfx.cs);
}
if (rctx->rings.dma.cs) {
rctx->ws->cs_destroy(rctx->rings.dma.cs);
}
 
if (rctx->uploader) {
u_upload_destroy(rctx->uploader);
}
 
util_slab_destroy(&rctx->pool_transfers);
 
if (rctx->allocator_so_filled_size) {
u_suballocator_destroy(rctx->allocator_so_filled_size);
}
}
 
void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_resource *rr = (struct r600_resource *)r;
 
if (r == NULL) {
return;
}
 
/*
* The idea is to compute a gross estimate of memory requirement of
* each draw call. After each draw call, memory will be precisely
* accounted. So the uncertainty is only on the current draw call.
* In practice this gave very good estimate (+/- 10% of the target
* memory limit).
*/
if (rr->domains & RADEON_DOMAIN_GTT) {
rctx->gtt += rr->buf->size;
}
if (rr->domains & RADEON_DOMAIN_VRAM) {
rctx->vram += rr->buf->size;
}
}
 
/*
* pipe_screen
*/
 
static const struct debug_named_value common_debug_options[] = {
/* logging */
{ "tex", DBG_TEX, "Print texture info" },
{ "texmip", DBG_TEXMIP, "Print texture info (mipmapped only)" },
{ "compute", DBG_COMPUTE, "Print compute info" },
{ "vm", DBG_VM, "Print virtual addresses when creating resources" },
{ "trace_cs", DBG_TRACE_CS, "Trace cs and write rlockup_<csid>.c file with faulty cs" },
{ "info", DBG_INFO, "Print driver information" },
 
/* shaders */
{ "fs", DBG_FS, "Print fetch shaders" },
{ "vs", DBG_VS, "Print vertex shaders" },
{ "gs", DBG_GS, "Print geometry shaders" },
{ "ps", DBG_PS, "Print pixel shaders" },
{ "cs", DBG_CS, "Print compute shaders" },
 
/* features */
{ "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" },
{ "nohyperz", DBG_NO_HYPERZ, "Disable Hyper-Z" },
/* GL uses the word INVALIDATE, gallium uses the word DISCARD */
{ "noinvalrange", DBG_NO_DISCARD_RANGE, "Disable handling of INVALIDATE_RANGE map flags" },
{ "no2d", DBG_NO_2D_TILING, "Disable 2D tiling" },
{ "notiling", DBG_NO_TILING, "Disable tiling" },
{ "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on end-of-packet." },
{ "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." },
{ "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." },
 
DEBUG_NAMED_VALUE_END /* must be last */
};
 
static const char* r600_get_vendor(struct pipe_screen* pscreen)
{
return "X.Org";
}
 
static const char* r600_get_device_vendor(struct pipe_screen* pscreen)
{
return "AMD";
}
 
static const char* r600_get_name(struct pipe_screen* pscreen)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
 
switch (rscreen->family) {
case CHIP_R600: return "AMD R600";
case CHIP_RV610: return "AMD RV610";
case CHIP_RV630: return "AMD RV630";
case CHIP_RV670: return "AMD RV670";
case CHIP_RV620: return "AMD RV620";
case CHIP_RV635: return "AMD RV635";
case CHIP_RS780: return "AMD RS780";
case CHIP_RS880: return "AMD RS880";
case CHIP_RV770: return "AMD RV770";
case CHIP_RV730: return "AMD RV730";
case CHIP_RV710: return "AMD RV710";
case CHIP_RV740: return "AMD RV740";
case CHIP_CEDAR: return "AMD CEDAR";
case CHIP_REDWOOD: return "AMD REDWOOD";
case CHIP_JUNIPER: return "AMD JUNIPER";
case CHIP_CYPRESS: return "AMD CYPRESS";
case CHIP_HEMLOCK: return "AMD HEMLOCK";
case CHIP_PALM: return "AMD PALM";
case CHIP_SUMO: return "AMD SUMO";
case CHIP_SUMO2: return "AMD SUMO2";
case CHIP_BARTS: return "AMD BARTS";
case CHIP_TURKS: return "AMD TURKS";
case CHIP_CAICOS: return "AMD CAICOS";
case CHIP_CAYMAN: return "AMD CAYMAN";
case CHIP_ARUBA: return "AMD ARUBA";
case CHIP_TAHITI: return "AMD TAHITI";
case CHIP_PITCAIRN: return "AMD PITCAIRN";
case CHIP_VERDE: return "AMD CAPE VERDE";
case CHIP_OLAND: return "AMD OLAND";
case CHIP_HAINAN: return "AMD HAINAN";
case CHIP_BONAIRE: return "AMD BONAIRE";
case CHIP_KAVERI: return "AMD KAVERI";
case CHIP_KABINI: return "AMD KABINI";
case CHIP_HAWAII: return "AMD HAWAII";
case CHIP_MULLINS: return "AMD MULLINS";
default: return "AMD unknown";
}
}
 
static float r600_get_paramf(struct pipe_screen* pscreen,
enum pipe_capf param)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)pscreen;
 
switch (param) {
case PIPE_CAPF_MAX_LINE_WIDTH:
case PIPE_CAPF_MAX_LINE_WIDTH_AA:
case PIPE_CAPF_MAX_POINT_WIDTH:
case PIPE_CAPF_MAX_POINT_WIDTH_AA:
if (rscreen->family >= CHIP_CEDAR)
return 16384.0f;
else
return 8192.0f;
case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
return 16.0f;
case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
return 16.0f;
case PIPE_CAPF_GUARD_BAND_LEFT:
case PIPE_CAPF_GUARD_BAND_TOP:
case PIPE_CAPF_GUARD_BAND_RIGHT:
case PIPE_CAPF_GUARD_BAND_BOTTOM:
return 0.0f;
}
return 0.0f;
}
 
static int r600_get_video_param(struct pipe_screen *screen,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint,
enum pipe_video_cap param)
{
switch (param) {
case PIPE_VIDEO_CAP_SUPPORTED:
return vl_profile_supported(screen, profile, entrypoint);
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
case PIPE_VIDEO_CAP_MAX_WIDTH:
case PIPE_VIDEO_CAP_MAX_HEIGHT:
return vl_video_buffer_max_size(screen);
case PIPE_VIDEO_CAP_PREFERED_FORMAT:
return PIPE_FORMAT_NV12;
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
return false;
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
return false;
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
return true;
case PIPE_VIDEO_CAP_MAX_LEVEL:
return vl_level_supported(screen, profile);
default:
return 0;
}
}
 
const char *r600_get_llvm_processor_name(enum radeon_family family)
{
switch (family) {
case CHIP_R600:
case CHIP_RV630:
case CHIP_RV635:
case CHIP_RV670:
return "r600";
case CHIP_RV610:
case CHIP_RV620:
case CHIP_RS780:
case CHIP_RS880:
return "rs880";
case CHIP_RV710:
return "rv710";
case CHIP_RV730:
return "rv730";
case CHIP_RV740:
case CHIP_RV770:
return "rv770";
case CHIP_PALM:
case CHIP_CEDAR:
return "cedar";
case CHIP_SUMO:
case CHIP_SUMO2:
return "sumo";
case CHIP_REDWOOD:
return "redwood";
case CHIP_JUNIPER:
return "juniper";
case CHIP_HEMLOCK:
case CHIP_CYPRESS:
return "cypress";
case CHIP_BARTS:
return "barts";
case CHIP_TURKS:
return "turks";
case CHIP_CAICOS:
return "caicos";
case CHIP_CAYMAN:
case CHIP_ARUBA:
return "cayman";
 
case CHIP_TAHITI: return "tahiti";
case CHIP_PITCAIRN: return "pitcairn";
case CHIP_VERDE: return "verde";
case CHIP_OLAND: return "oland";
case CHIP_HAINAN: return "hainan";
case CHIP_BONAIRE: return "bonaire";
case CHIP_KABINI: return "kabini";
case CHIP_KAVERI: return "kaveri";
case CHIP_HAWAII: return "hawaii";
case CHIP_MULLINS:
#if HAVE_LLVM >= 0x0305
return "mullins";
#else
return "kabini";
#endif
default: return "";
}
}
 
static int r600_get_compute_param(struct pipe_screen *screen,
enum pipe_compute_cap param,
void *ret)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
 
//TODO: select these params by asic
switch (param) {
case PIPE_COMPUTE_CAP_IR_TARGET: {
const char *gpu;
const char *triple;
if (rscreen->family <= CHIP_ARUBA || HAVE_LLVM < 0x0306) {
triple = "r600--";
} else {
triple = "amdgcn--";
}
switch(rscreen->family) {
/* Clang < 3.6 is missing Hainan in its list of
* GPUs, so we need to use the name of a similar GPU.
*/
#if HAVE_LLVM < 0x0306
case CHIP_HAINAN:
gpu = "oland";
break;
#endif
default:
gpu = r600_get_llvm_processor_name(rscreen->family);
break;
}
if (ret) {
sprintf(ret, "%s-%s", gpu, triple);
}
/* +2 for dash and terminating NIL byte */
return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
}
case PIPE_COMPUTE_CAP_GRID_DIMENSION:
if (ret) {
uint64_t *grid_dimension = ret;
grid_dimension[0] = 3;
}
return 1 * sizeof(uint64_t);
 
case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
if (ret) {
uint64_t *grid_size = ret;
grid_size[0] = 65535;
grid_size[1] = 65535;
grid_size[2] = 1;
}
return 3 * sizeof(uint64_t) ;
 
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
if (ret) {
uint64_t *block_size = ret;
block_size[0] = 256;
block_size[1] = 256;
block_size[2] = 256;
}
return 3 * sizeof(uint64_t);
 
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
if (ret) {
uint64_t *max_threads_per_block = ret;
*max_threads_per_block = 256;
}
return sizeof(uint64_t);
 
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
if (ret) {
uint64_t *max_global_size = ret;
uint64_t max_mem_alloc_size;
 
r600_get_compute_param(screen,
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
&max_mem_alloc_size);
 
/* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
* 1/4 of the MAX_GLOBAL_SIZE. Since the
* MAX_MEM_ALLOC_SIZE is fixed for older kernels,
* make sure we never report more than
* 4 * MAX_MEM_ALLOC_SIZE.
*/
*max_global_size = MIN2(4 * max_mem_alloc_size,
rscreen->info.gart_size +
rscreen->info.vram_size);
}
return sizeof(uint64_t);
 
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
if (ret) {
uint64_t *max_local_size = ret;
/* Value reported by the closed source driver. */
*max_local_size = 32768;
}
return sizeof(uint64_t);
 
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
if (ret) {
uint64_t *max_input_size = ret;
/* Value reported by the closed source driver. */
*max_input_size = 1024;
}
return sizeof(uint64_t);
 
case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
if (ret) {
uint64_t *max_mem_alloc_size = ret;
 
/* XXX: The limit in older kernels is 256 MB. We
* should add a query here for newer kernels.
*/
*max_mem_alloc_size = 256 * 1024 * 1024;
}
return sizeof(uint64_t);
 
case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
if (ret) {
uint32_t *max_clock_frequency = ret;
*max_clock_frequency = rscreen->info.max_sclk;
}
return sizeof(uint32_t);
 
case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
if (ret) {
uint32_t *max_compute_units = ret;
*max_compute_units = rscreen->info.max_compute_units;
}
return sizeof(uint32_t);
 
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
if (ret) {
uint32_t *images_supported = ret;
*images_supported = 0;
}
return sizeof(uint32_t);
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
break; /* unused */
}
 
fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
return 0;
}
 
static uint64_t r600_get_timestamp(struct pipe_screen *screen)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
 
return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) /
rscreen->info.r600_clock_crystal_freq;
}
 
static int r600_get_driver_query_info(struct pipe_screen *screen,
unsigned index,
struct pipe_driver_query_info *info)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct pipe_driver_query_info list[] = {
{"draw-calls", R600_QUERY_DRAW_CALLS, {0}},
{"requested-VRAM", R600_QUERY_REQUESTED_VRAM, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
{"requested-GTT", R600_QUERY_REQUESTED_GTT, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
{"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}},
{"num-cs-flushes", R600_QUERY_NUM_CS_FLUSHES, {0}},
{"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, PIPE_DRIVER_QUERY_TYPE_BYTES},
{"VRAM-usage", R600_QUERY_VRAM_USAGE, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
{"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
{"temperature", R600_QUERY_GPU_TEMPERATURE, {100}},
{"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}},
{"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}},
{"GPU-load", R600_QUERY_GPU_LOAD, {100}}
};
unsigned num_queries;
 
if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
num_queries = Elements(list);
else
num_queries = 8;
 
if (!info)
return num_queries;
 
if (index >= num_queries)
return 0;
 
*info = list[index];
return 1;
}
 
static void r600_fence_reference(struct pipe_screen *screen,
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *fence)
{
struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
 
rws->fence_reference(ptr, fence);
}
 
static boolean r600_fence_signalled(struct pipe_screen *screen,
struct pipe_fence_handle *fence)
{
struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
 
return rws->fence_wait(rws, fence, 0);
}
 
static boolean r600_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
uint64_t timeout)
{
struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
 
return rws->fence_wait(rws, fence, timeout);
}
 
static bool r600_interpret_tiling(struct r600_common_screen *rscreen,
uint32_t tiling_config)
{
switch ((tiling_config & 0xe) >> 1) {
case 0:
rscreen->tiling_info.num_channels = 1;
break;
case 1:
rscreen->tiling_info.num_channels = 2;
break;
case 2:
rscreen->tiling_info.num_channels = 4;
break;
case 3:
rscreen->tiling_info.num_channels = 8;
break;
default:
return false;
}
 
switch ((tiling_config & 0x30) >> 4) {
case 0:
rscreen->tiling_info.num_banks = 4;
break;
case 1:
rscreen->tiling_info.num_banks = 8;
break;
default:
return false;
 
}
switch ((tiling_config & 0xc0) >> 6) {
case 0:
rscreen->tiling_info.group_bytes = 256;
break;
case 1:
rscreen->tiling_info.group_bytes = 512;
break;
default:
return false;
}
return true;
}
 
static bool evergreen_interpret_tiling(struct r600_common_screen *rscreen,
uint32_t tiling_config)
{
switch (tiling_config & 0xf) {
case 0:
rscreen->tiling_info.num_channels = 1;
break;
case 1:
rscreen->tiling_info.num_channels = 2;
break;
case 2:
rscreen->tiling_info.num_channels = 4;
break;
case 3:
rscreen->tiling_info.num_channels = 8;
break;
default:
return false;
}
 
switch ((tiling_config & 0xf0) >> 4) {
case 0:
rscreen->tiling_info.num_banks = 4;
break;
case 1:
rscreen->tiling_info.num_banks = 8;
break;
case 2:
rscreen->tiling_info.num_banks = 16;
break;
default:
return false;
}
 
switch ((tiling_config & 0xf00) >> 8) {
case 0:
rscreen->tiling_info.group_bytes = 256;
break;
case 1:
rscreen->tiling_info.group_bytes = 512;
break;
default:
return false;
}
return true;
}
 
static bool r600_init_tiling(struct r600_common_screen *rscreen)
{
uint32_t tiling_config = rscreen->info.r600_tiling_config;
 
/* set default group bytes, overridden by tiling info ioctl */
if (rscreen->chip_class <= R700) {
rscreen->tiling_info.group_bytes = 256;
} else {
rscreen->tiling_info.group_bytes = 512;
}
 
if (!tiling_config)
return true;
 
if (rscreen->chip_class <= R700) {
return r600_interpret_tiling(rscreen, tiling_config);
} else {
return evergreen_interpret_tiling(rscreen, tiling_config);
}
}
 
struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
if (templ->target == PIPE_BUFFER) {
return r600_buffer_create(screen, templ, 4096);
} else {
return r600_texture_create(screen, templ);
}
}
 
bool r600_common_screen_init(struct r600_common_screen *rscreen,
struct radeon_winsys *ws)
{
ws->query_info(ws, &rscreen->info);
 
rscreen->b.get_name = r600_get_name;
rscreen->b.get_vendor = r600_get_vendor;
rscreen->b.get_device_vendor = r600_get_device_vendor;
rscreen->b.get_compute_param = r600_get_compute_param;
rscreen->b.get_paramf = r600_get_paramf;
rscreen->b.get_driver_query_info = r600_get_driver_query_info;
rscreen->b.get_timestamp = r600_get_timestamp;
rscreen->b.fence_finish = r600_fence_finish;
rscreen->b.fence_reference = r600_fence_reference;
rscreen->b.fence_signalled = r600_fence_signalled;
rscreen->b.resource_destroy = u_resource_destroy_vtbl;
rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory;
 
if (rscreen->info.has_uvd) {
rscreen->b.get_video_param = rvid_get_video_param;
rscreen->b.is_video_format_supported = rvid_is_format_supported;
} else {
rscreen->b.get_video_param = r600_get_video_param;
rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
}
 
r600_init_screen_texture_functions(rscreen);
 
rscreen->ws = ws;
rscreen->family = rscreen->info.family;
rscreen->chip_class = rscreen->info.chip_class;
rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
 
if (!r600_init_tiling(rscreen)) {
return false;
}
util_format_s3tc_init();
pipe_mutex_init(rscreen->aux_context_lock);
pipe_mutex_init(rscreen->gpu_load_mutex);
 
if (rscreen->info.drm_minor >= 28 && (rscreen->debug_flags & DBG_TRACE_CS)) {
rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->b,
PIPE_BIND_CUSTOM,
PIPE_USAGE_STAGING,
4096);
if (rscreen->trace_bo) {
rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL,
PIPE_TRANSFER_UNSYNCHRONIZED);
}
}
 
if (rscreen->debug_flags & DBG_INFO) {
printf("pci_id = 0x%x\n", rscreen->info.pci_id);
printf("family = %i\n", rscreen->info.family);
printf("chip_class = %i\n", rscreen->info.chip_class);
printf("gart_size = %i MB\n", (int)(rscreen->info.gart_size >> 20));
printf("vram_size = %i MB\n", (int)(rscreen->info.vram_size >> 20));
printf("max_sclk = %i\n", rscreen->info.max_sclk);
printf("max_compute_units = %i\n", rscreen->info.max_compute_units);
printf("max_se = %i\n", rscreen->info.max_se);
printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
printf("has_uvd = %i\n", rscreen->info.has_uvd);
printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version);
printf("r600_num_backends = %i\n", rscreen->info.r600_num_backends);
printf("r600_clock_crystal_freq = %i\n", rscreen->info.r600_clock_crystal_freq);
printf("r600_tiling_config = 0x%x\n", rscreen->info.r600_tiling_config);
printf("r600_num_tile_pipes = %i\n", rscreen->info.r600_num_tile_pipes);
printf("r600_max_pipes = %i\n", rscreen->info.r600_max_pipes);
printf("r600_virtual_address = %i\n", rscreen->info.r600_virtual_address);
printf("r600_has_dma = %i\n", rscreen->info.r600_has_dma);
printf("r600_backend_map = %i\n", rscreen->info.r600_backend_map);
printf("r600_backend_map_valid = %i\n", rscreen->info.r600_backend_map_valid);
printf("si_tile_mode_array_valid = %i\n", rscreen->info.si_tile_mode_array_valid);
printf("cik_macrotile_mode_array_valid = %i\n", rscreen->info.cik_macrotile_mode_array_valid);
}
return true;
}
 
void r600_destroy_common_screen(struct r600_common_screen *rscreen)
{
r600_gpu_load_kill_thread(rscreen);
 
pipe_mutex_destroy(rscreen->gpu_load_mutex);
pipe_mutex_destroy(rscreen->aux_context_lock);
rscreen->aux_context->destroy(rscreen->aux_context);
 
if (rscreen->trace_bo) {
rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf);
pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL);
}
 
rscreen->ws->destroy(rscreen->ws);
FREE(rscreen);
}
 
bool r600_can_dump_shader(struct r600_common_screen *rscreen,
const struct tgsi_token *tokens)
{
/* Compute shader don't have tgsi_tokens */
if (!tokens)
return (rscreen->debug_flags & DBG_CS) != 0;
 
switch (tgsi_get_processor_type(tokens)) {
case TGSI_PROCESSOR_VERTEX:
return (rscreen->debug_flags & DBG_VS) != 0;
case TGSI_PROCESSOR_GEOMETRY:
return (rscreen->debug_flags & DBG_GS) != 0;
case TGSI_PROCESSOR_FRAGMENT:
return (rscreen->debug_flags & DBG_PS) != 0;
case TGSI_PROCESSOR_COMPUTE:
return (rscreen->debug_flags & DBG_CS) != 0;
default:
return false;
}
}
 
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
unsigned offset, unsigned size, unsigned value,
bool is_framebuffer)
{
struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
 
pipe_mutex_lock(rscreen->aux_context_lock);
rctx->clear_buffer(&rctx->b, dst, offset, size, value, is_framebuffer);
rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
pipe_mutex_unlock(rscreen->aux_context_lock);
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_pipe_common.h
0,0 → 1,588
/*
* Copyright 2013 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors: Marek Olšák <maraeo@gmail.com>
*
*/
 
/**
* This file contains common screen and context structures and functions
* for r600g and radeonsi.
*/
 
#ifndef R600_PIPE_COMMON_H
#define R600_PIPE_COMMON_H
 
#include <stdio.h>
 
#include "radeon/radeon_winsys.h"
 
#include "util/u_blitter.h"
#include "util/list.h"
#include "util/u_range.h"
#include "util/u_slab.h"
#include "util/u_suballoc.h"
#include "util/u_transfer.h"
 
#define R600_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
#define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
#define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
 
#define R600_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0)
#define R600_QUERY_REQUESTED_VRAM (PIPE_QUERY_DRIVER_SPECIFIC + 1)
#define R600_QUERY_REQUESTED_GTT (PIPE_QUERY_DRIVER_SPECIFIC + 2)
#define R600_QUERY_BUFFER_WAIT_TIME (PIPE_QUERY_DRIVER_SPECIFIC + 3)
#define R600_QUERY_NUM_CS_FLUSHES (PIPE_QUERY_DRIVER_SPECIFIC + 4)
#define R600_QUERY_NUM_BYTES_MOVED (PIPE_QUERY_DRIVER_SPECIFIC + 5)
#define R600_QUERY_VRAM_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 6)
#define R600_QUERY_GTT_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 7)
#define R600_QUERY_GPU_TEMPERATURE (PIPE_QUERY_DRIVER_SPECIFIC + 8)
#define R600_QUERY_CURRENT_GPU_SCLK (PIPE_QUERY_DRIVER_SPECIFIC + 9)
#define R600_QUERY_CURRENT_GPU_MCLK (PIPE_QUERY_DRIVER_SPECIFIC + 10)
#define R600_QUERY_GPU_LOAD (PIPE_QUERY_DRIVER_SPECIFIC + 11)
 
#define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0)
#define R600_CONTEXT_PRIVATE_FLAG (1u << 1)
 
/* special primitive types */
#define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX
 
/* Debug flags. */
/* logging */
#define DBG_TEX (1 << 0)
#define DBG_TEXMIP (1 << 1)
#define DBG_COMPUTE (1 << 2)
#define DBG_VM (1 << 3)
#define DBG_TRACE_CS (1 << 4)
/* shader logging */
#define DBG_FS (1 << 5)
#define DBG_VS (1 << 6)
#define DBG_GS (1 << 7)
#define DBG_PS (1 << 8)
#define DBG_CS (1 << 9)
/* features */
#define DBG_NO_ASYNC_DMA (1 << 10)
#define DBG_NO_HYPERZ (1 << 11)
#define DBG_NO_DISCARD_RANGE (1 << 12)
#define DBG_NO_2D_TILING (1 << 13)
#define DBG_NO_TILING (1 << 14)
#define DBG_SWITCH_ON_EOP (1 << 15)
#define DBG_FORCE_DMA (1 << 16)
#define DBG_PRECOMPILE (1 << 17)
#define DBG_INFO (1 << 18)
/* The maximum allowed bit is 20. */
 
#define R600_MAP_BUFFER_ALIGNMENT 64
 
struct r600_common_context;
 
struct radeon_shader_reloc {
char *name;
uint64_t offset;
};
 
struct radeon_shader_binary {
/** Shader code */
unsigned char *code;
unsigned code_size;
 
/** Config/Context register state that accompanies this shader.
* This is a stream of dword pairs. First dword contains the
* register address, the second dword contains the value.*/
unsigned char *config;
unsigned config_size;
 
/** The number of bytes of config information for each global symbol.
*/
unsigned config_size_per_symbol;
 
/** Constant data accessed by the shader. This will be uploaded
* into a constant buffer. */
unsigned char *rodata;
unsigned rodata_size;
 
/** List of symbol offsets for the shader */
uint64_t *global_symbol_offsets;
unsigned global_symbol_count;
 
struct radeon_shader_reloc *relocs;
unsigned reloc_count;
 
/** Set to 1 if the disassembly for this binary has been dumped to
* stderr. */
int disassembled;
};
 
struct r600_resource {
struct u_resource b;
 
/* Winsys objects. */
struct pb_buffer *buf;
struct radeon_winsys_cs_handle *cs_buf;
uint64_t gpu_address;
 
/* Resource state. */
enum radeon_bo_domain domains;
 
/* The buffer range which is initialized (with a write transfer,
* streamout, DMA, or as a random access target). The rest of
* the buffer is considered invalid and can be mapped unsynchronized.
*
* This allows unsychronized mapping of a buffer range which hasn't
* been used yet. It's for applications which forget to use
* the unsynchronized map flag and expect the driver to figure it out.
*/
struct util_range valid_buffer_range;
 
/* For buffers only. This indicates that a write operation has been
* performed by TC L2, but the cache hasn't been flushed.
* Any hw block which doesn't use or bypasses TC L2 should check this
* flag and flush the cache before using the buffer.
*
* For example, TC L2 must be flushed if a buffer which has been
* modified by a shader store instruction is about to be used as
* an index buffer. The reason is that VGT DMA index fetching doesn't
* use TC L2.
*/
bool TC_L2_dirty;
};
 
struct r600_transfer {
struct pipe_transfer transfer;
struct r600_resource *staging;
unsigned offset;
};
 
struct r600_fmask_info {
unsigned offset;
unsigned size;
unsigned alignment;
unsigned pitch;
unsigned bank_height;
unsigned slice_tile_max;
unsigned tile_mode_index;
};
 
struct r600_cmask_info {
unsigned offset;
unsigned size;
unsigned alignment;
unsigned slice_tile_max;
unsigned base_address_reg;
};
 
struct r600_texture {
struct r600_resource resource;
 
unsigned size;
unsigned pitch_override;
bool is_depth;
unsigned dirty_level_mask; /* each bit says if that mipmap is compressed */
struct r600_texture *flushed_depth_texture;
boolean is_flushing_texture;
struct radeon_surf surface;
 
/* Colorbuffer compression and fast clear. */
struct r600_fmask_info fmask;
struct r600_cmask_info cmask;
struct r600_resource *cmask_buffer;
unsigned cb_color_info; /* fast clear enable bit */
unsigned color_clear_value[2];
 
/* Depth buffer compression and fast clear. */
struct r600_resource *htile_buffer;
bool depth_cleared; /* if it was cleared at least once */
float depth_clear_value;
 
bool non_disp_tiling; /* R600-Cayman only */
unsigned mipmap_shift;
};
 
struct r600_surface {
struct pipe_surface base;
 
bool color_initialized;
bool depth_initialized;
 
/* Misc. color flags. */
bool alphatest_bypass;
bool export_16bpc;
 
/* Color registers. */
unsigned cb_color_info;
unsigned cb_color_base;
unsigned cb_color_view;
unsigned cb_color_size; /* R600 only */
unsigned cb_color_dim; /* EG only */
unsigned cb_color_pitch; /* EG and later */
unsigned cb_color_slice; /* EG and later */
unsigned cb_color_attrib; /* EG and later */
unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
unsigned cb_color_fmask_slice; /* EG and later */
unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */
unsigned cb_color_mask; /* R600 only */
struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
 
/* DB registers. */
unsigned db_depth_info; /* R600 only, then SI and later */
unsigned db_z_info; /* EG and later */
unsigned db_depth_base; /* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */
unsigned db_depth_view;
unsigned db_depth_size;
unsigned db_depth_slice; /* EG and later */
unsigned db_stencil_base; /* EG and later */
unsigned db_stencil_info; /* EG and later */
unsigned db_prefetch_limit; /* R600 only */
unsigned db_htile_surface;
unsigned db_htile_data_base;
unsigned db_preload_control; /* EG and later */
unsigned pa_su_poly_offset_db_fmt_cntl;
};
 
struct r600_tiling_info {
unsigned num_channels;
unsigned num_banks;
unsigned group_bytes;
};
 
struct r600_common_screen {
struct pipe_screen b;
struct radeon_winsys *ws;
enum radeon_family family;
enum chip_class chip_class;
struct radeon_info info;
struct r600_tiling_info tiling_info;
unsigned debug_flags;
bool has_cp_dma;
bool has_streamout;
 
/* Auxiliary context. Mainly used to initialize resources.
* It must be locked prior to using and flushed before unlocking. */
struct pipe_context *aux_context;
pipe_mutex aux_context_lock;
 
struct r600_resource *trace_bo;
uint32_t *trace_ptr;
unsigned cs_count;
 
/* GPU load thread. */
pipe_mutex gpu_load_mutex;
pipe_thread gpu_load_thread;
unsigned gpu_load_counter_busy;
unsigned gpu_load_counter_idle;
unsigned gpu_load_stop_thread; /* bool */
};
 
/* This encapsulates a state or an operation which can emitted into the GPU
* command stream. */
struct r600_atom {
void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
unsigned num_dw;
bool dirty;
};
 
struct r600_so_target {
struct pipe_stream_output_target b;
 
/* The buffer where BUFFER_FILLED_SIZE is stored. */
struct r600_resource *buf_filled_size;
unsigned buf_filled_size_offset;
bool buf_filled_size_valid;
 
unsigned stride_in_dw;
};
 
struct r600_streamout {
struct r600_atom begin_atom;
bool begin_emitted;
unsigned num_dw_for_end;
 
unsigned enabled_mask;
unsigned num_targets;
struct r600_so_target *targets[PIPE_MAX_SO_BUFFERS];
 
unsigned append_bitmask;
bool suspended;
 
/* External state which comes from the vertex shader,
* it must be set explicitly when binding a shader. */
unsigned *stride_in_dw;
 
/* The state of VGT_STRMOUT_(CONFIG|EN). */
struct r600_atom enable_atom;
bool streamout_enabled;
bool prims_gen_query_enabled;
int num_prims_gen_queries;
};
 
struct r600_ring {
struct radeon_winsys_cs *cs;
bool flushing;
void (*flush)(void *ctx, unsigned flags,
struct pipe_fence_handle **fence);
};
 
struct r600_rings {
struct r600_ring gfx;
struct r600_ring dma;
};
 
struct r600_common_context {
struct pipe_context b; /* base class */
 
struct r600_common_screen *screen;
struct radeon_winsys *ws;
enum radeon_family family;
enum chip_class chip_class;
struct r600_rings rings;
unsigned initial_gfx_cs_size;
 
struct u_upload_mgr *uploader;
struct u_suballocator *allocator_so_filled_size;
struct util_slab_mempool pool_transfers;
 
/* Current unaccounted memory usage. */
uint64_t vram;
uint64_t gtt;
 
/* States. */
struct r600_streamout streamout;
 
/* Additional context states. */
unsigned flags; /* flush flags */
 
/* Queries. */
/* The list of active queries. Only one query of each type can be active. */
int num_occlusion_queries;
/* Keep track of non-timer queries, because they should be suspended
* during context flushing.
* The timer queries (TIME_ELAPSED) shouldn't be suspended. */
struct list_head active_nontimer_queries;
unsigned num_cs_dw_nontimer_queries_suspend;
/* If queries have been suspended. */
bool nontimer_queries_suspended;
/* Additional hardware info. */
unsigned backend_mask;
unsigned max_db; /* for OQ */
/* Misc stats. */
unsigned num_draw_calls;
 
/* Render condition. */
struct pipe_query *current_render_cond;
unsigned current_render_cond_mode;
boolean current_render_cond_cond;
boolean predicate_drawing;
/* For context flushing. */
struct pipe_query *saved_render_cond;
boolean saved_render_cond_cond;
unsigned saved_render_cond_mode;
 
/* MSAA sample locations.
* The first index is the sample index.
* The second index is the coordinate: X, Y. */
float sample_locations_1x[1][2];
float sample_locations_2x[2][2];
float sample_locations_4x[4][2];
float sample_locations_8x[8][2];
float sample_locations_16x[16][2];
 
/* The list of all texture buffer objects in this context.
* This list is walked when a buffer is invalidated/reallocated and
* the GPU addresses are updated. */
struct list_head texture_buffers;
 
/* Copy one resource to another using async DMA. */
void (*dma_copy)(struct pipe_context *ctx,
struct pipe_resource *dst,
unsigned dst_level,
unsigned dst_x, unsigned dst_y, unsigned dst_z,
struct pipe_resource *src,
unsigned src_level,
const struct pipe_box *src_box);
 
void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
unsigned offset, unsigned size, unsigned value,
bool is_framebuffer);
 
void (*blit_decompress_depth)(struct pipe_context *ctx,
struct r600_texture *texture,
struct r600_texture *staging,
unsigned first_level, unsigned last_level,
unsigned first_layer, unsigned last_layer,
unsigned first_sample, unsigned last_sample);
 
/* Reallocate the buffer and update all resource bindings where
* the buffer is bound, including all resource descriptors. */
void (*invalidate_buffer)(struct pipe_context *ctx, struct pipe_resource *buf);
 
/* Enable or disable occlusion queries. */
void (*set_occlusion_query_state)(struct pipe_context *ctx, bool enable);
 
/* This ensures there is enough space in the command stream. */
void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw,
bool include_draw_vbo);
};
 
/* r600_buffer.c */
boolean r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
struct radeon_winsys_cs_handle *buf,
enum radeon_bo_usage usage);
void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
struct r600_resource *resource,
unsigned usage);
bool r600_init_resource(struct r600_common_screen *rscreen,
struct r600_resource *res,
unsigned size, unsigned alignment,
bool use_reusable_pool);
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ,
unsigned alignment);
struct pipe_resource *
r600_buffer_from_user_memory(struct pipe_screen *screen,
const struct pipe_resource *templ,
void *user_memory);
 
/* r600_common_pipe.c */
void r600_draw_rectangle(struct blitter_context *blitter,
int x1, int y1, int x2, int y2, float depth,
enum blitter_attrib_type type,
const union pipe_color_union *attrib);
bool r600_common_screen_init(struct r600_common_screen *rscreen,
struct radeon_winsys *ws);
void r600_destroy_common_screen(struct r600_common_screen *rscreen);
void r600_preflush_suspend_features(struct r600_common_context *ctx);
void r600_postflush_resume_features(struct r600_common_context *ctx);
bool r600_common_context_init(struct r600_common_context *rctx,
struct r600_common_screen *rscreen);
void r600_common_context_cleanup(struct r600_common_context *rctx);
void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r);
bool r600_can_dump_shader(struct r600_common_screen *rscreen,
const struct tgsi_token *tokens);
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
unsigned offset, unsigned size, unsigned value,
bool is_framebuffer);
struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
const struct pipe_resource *templ);
const char *r600_get_llvm_processor_name(enum radeon_family family);
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw);
 
/* r600_gpu_load.c */
void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen);
unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin);
 
/* r600_query.c */
void r600_query_init(struct r600_common_context *rctx);
void r600_suspend_nontimer_queries(struct r600_common_context *ctx);
void r600_resume_nontimer_queries(struct r600_common_context *ctx);
void r600_query_init_backend_mask(struct r600_common_context *ctx);
 
/* r600_streamout.c */
void r600_streamout_buffers_dirty(struct r600_common_context *rctx);
void r600_set_streamout_targets(struct pipe_context *ctx,
unsigned num_targets,
struct pipe_stream_output_target **targets,
const unsigned *offset);
void r600_emit_streamout_end(struct r600_common_context *rctx);
void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
unsigned type, int diff);
void r600_streamout_init(struct r600_common_context *rctx);
 
/* r600_texture.c */
void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
struct r600_texture *rtex,
unsigned nr_samples,
struct r600_fmask_info *out);
void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
struct r600_texture *rtex,
struct r600_cmask_info *out);
bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
struct pipe_resource *texture,
struct r600_texture **staging);
struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
const struct pipe_resource *templ);
struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_surface *templ,
unsigned width, unsigned height);
unsigned r600_translate_colorswap(enum pipe_format format);
void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
struct pipe_framebuffer_state *fb,
struct r600_atom *fb_state,
unsigned *buffers,
const union pipe_color_union *color);
void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
void r600_init_context_texture_functions(struct r600_common_context *rctx);
 
/* cayman_msaa.c */
extern const uint32_t eg_sample_locs_2x[4];
extern const unsigned eg_max_dist_2x;
extern const uint32_t eg_sample_locs_4x[4];
extern const unsigned eg_max_dist_4x;
void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
unsigned sample_index, float *out_value);
void cayman_init_msaa(struct pipe_context *ctx);
void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples);
void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
int ps_iter_samples, int overrast_samples);
 
 
/* Inline helpers. */
 
static INLINE struct r600_resource *r600_resource(struct pipe_resource *r)
{
return (struct r600_resource*)r;
}
 
static INLINE void
r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
{
pipe_resource_reference((struct pipe_resource **)ptr,
(struct pipe_resource *)res);
}
 
static inline unsigned r600_tex_aniso_filter(unsigned filter)
{
if (filter <= 1) return 0;
if (filter <= 2) return 1;
if (filter <= 4) return 2;
if (filter <= 8) return 3;
/* else */ return 4;
}
 
#define COMPUTE_DBG(rscreen, fmt, args...) \
do { \
if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
} while (0);
 
#define R600_ERR(fmt, args...) \
fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args)
 
/* For MSAA sample positions. */
#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \
(((s0x) & 0xf) | (((s0y) & 0xf) << 4) | \
(((s1x) & 0xf) << 8) | (((s1y) & 0xf) << 12) | \
(((s2x) & 0xf) << 16) | (((s2y) & 0xf) << 20) | \
(((s3x) & 0xf) << 24) | (((s3y) & 0xf) << 28))
 
#endif
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_query.c
0,0 → 1,969
/*
* Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
* Copyright 2014 Marek Olšák <marek.olsak@amd.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "r600_cs.h"
#include "util/u_memory.h"
 
 
struct r600_query_buffer {
/* The buffer where query results are stored. */
struct r600_resource *buf;
/* Offset of the next free result after current query data */
unsigned results_end;
/* If a query buffer is full, a new buffer is created and the old one
* is put in here. When we calculate the result, we sum up the samples
* from all buffers. */
struct r600_query_buffer *previous;
};
 
struct r600_query {
/* The query buffer and how many results are in it. */
struct r600_query_buffer buffer;
/* The type of query */
unsigned type;
/* Size of the result in memory for both begin_query and end_query,
* this can be one or two numbers, or it could even be a size of a structure. */
unsigned result_size;
/* The number of dwords for begin_query or end_query. */
unsigned num_cs_dw;
/* linked list of queries */
struct list_head list;
/* for custom non-GPU queries */
uint64_t begin_result;
uint64_t end_result;
/* Fence for GPU_FINISHED. */
struct pipe_fence_handle *fence;
};
 
 
static bool r600_is_timer_query(unsigned type)
{
return type == PIPE_QUERY_TIME_ELAPSED ||
type == PIPE_QUERY_TIMESTAMP;
}
 
static bool r600_query_needs_begin(unsigned type)
{
return type != PIPE_QUERY_GPU_FINISHED &&
type != PIPE_QUERY_TIMESTAMP;
}
 
static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx, unsigned type)
{
unsigned j, i, num_results, buf_size = 4096;
uint32_t *results;
 
/* Non-GPU queries. */
switch (type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_GPU_FINISHED:
case R600_QUERY_DRAW_CALLS:
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
case R600_QUERY_BUFFER_WAIT_TIME:
case R600_QUERY_NUM_CS_FLUSHES:
case R600_QUERY_NUM_BYTES_MOVED:
case R600_QUERY_VRAM_USAGE:
case R600_QUERY_GTT_USAGE:
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
case R600_QUERY_GPU_LOAD:
return NULL;
}
 
/* Queries are normally read by the CPU after
* being written by the gpu, hence staging is probably a good
* usage pattern.
*/
struct r600_resource *buf = (struct r600_resource*)
pipe_buffer_create(ctx->b.screen, PIPE_BIND_CUSTOM,
PIPE_USAGE_STAGING, buf_size);
 
switch (type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
memset(results, 0, buf_size);
 
/* Set top bits for unused backends. */
num_results = buf_size / (16 * ctx->max_db);
for (j = 0; j < num_results; j++) {
for (i = 0; i < ctx->max_db; i++) {
if (!(ctx->backend_mask & (1<<i))) {
results[(i * 4)+1] = 0x80000000;
results[(i * 4)+3] = 0x80000000;
}
}
results += 4 * ctx->max_db;
}
ctx->ws->buffer_unmap(buf->cs_buf);
break;
case PIPE_QUERY_TIME_ELAPSED:
case PIPE_QUERY_TIMESTAMP:
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
case PIPE_QUERY_PIPELINE_STATISTICS:
results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
memset(results, 0, buf_size);
ctx->ws->buffer_unmap(buf->cs_buf);
break;
default:
assert(0);
}
return buf;
}
 
static void r600_update_occlusion_query_state(struct r600_common_context *rctx,
unsigned type, int diff)
{
if (type == PIPE_QUERY_OCCLUSION_COUNTER ||
type == PIPE_QUERY_OCCLUSION_PREDICATE) {
bool old_enable = rctx->num_occlusion_queries != 0;
bool enable;
 
rctx->num_occlusion_queries += diff;
assert(rctx->num_occlusion_queries >= 0);
 
enable = rctx->num_occlusion_queries != 0;
 
if (enable != old_enable) {
rctx->set_occlusion_query_state(&rctx->b, enable);
}
}
}
 
static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_query *query)
{
struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
uint64_t va;
 
r600_update_occlusion_query_state(ctx, query->type, 1);
r600_update_prims_generated_query_state(ctx, query->type, 1);
ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw * 2, TRUE);
 
/* Get a new query buffer if needed. */
if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer);
*qbuf = query->buffer;
query->buffer.buf = r600_new_query_buffer(ctx, query->type);
query->buffer.results_end = 0;
query->buffer.previous = qbuf;
}
 
/* emit begin query */
va = query->buffer.buf->gpu_address + query->buffer.results_end;
 
switch (query->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
break;
case PIPE_QUERY_TIME_ELAPSED:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
radeon_emit(cs, va);
radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
radeon_emit(cs, 0);
radeon_emit(cs, 0);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
break;
default:
assert(0);
}
r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE,
RADEON_PRIO_MIN);
 
if (!r600_is_timer_query(query->type)) {
ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
}
}
 
static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_query *query)
{
struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
uint64_t va;
 
/* The queries which need begin already called this in begin_query. */
if (!r600_query_needs_begin(query->type)) {
ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw, FALSE);
}
 
va = query->buffer.buf->gpu_address;
 
/* emit end query */
switch (query->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
va += query->buffer.results_end + 8;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
va += query->buffer.results_end + query->result_size/2;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
break;
case PIPE_QUERY_TIME_ELAPSED:
va += query->buffer.results_end + query->result_size/2;
/* fall through */
case PIPE_QUERY_TIMESTAMP:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
radeon_emit(cs, va);
radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
radeon_emit(cs, 0);
radeon_emit(cs, 0);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
va += query->buffer.results_end + query->result_size/2;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
break;
default:
assert(0);
}
r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE,
RADEON_PRIO_MIN);
 
query->buffer.results_end += query->result_size;
 
if (r600_query_needs_begin(query->type)) {
if (!r600_is_timer_query(query->type)) {
ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw;
}
}
 
r600_update_occlusion_query_state(ctx, query->type, -1);
r600_update_prims_generated_query_state(ctx, query->type, -1);
}
 
static void r600_emit_query_predication(struct r600_common_context *ctx, struct r600_query *query,
int operation, bool flag_wait)
{
struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
 
if (operation == PREDICATION_OP_CLEAR) {
ctx->need_gfx_cs_space(&ctx->b, 3, FALSE);
 
radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
radeon_emit(cs, 0);
radeon_emit(cs, PRED_OP(PREDICATION_OP_CLEAR));
} else {
struct r600_query_buffer *qbuf;
unsigned count;
uint32_t op;
 
/* Find how many results there are. */
count = 0;
for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
count += qbuf->results_end / query->result_size;
}
 
ctx->need_gfx_cs_space(&ctx->b, 5 * count, TRUE);
 
op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
(flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
 
/* emit predicate packets for all data blocks */
for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
unsigned results_base = 0;
uint64_t va = qbuf->buf->gpu_address;
 
while (results_base < qbuf->results_end) {
radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
radeon_emit(cs, (va + results_base) & 0xFFFFFFFFUL);
radeon_emit(cs, op | (((va + results_base) >> 32UL) & 0xFF));
r600_emit_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ,
RADEON_PRIO_MIN);
results_base += query->result_size;
 
/* set CONTINUE bit for all packets except the first */
op |= PREDICATION_CONTINUE;
}
}
}
}
 
static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *query;
bool skip_allocation = false;
 
query = CALLOC_STRUCT(r600_query);
if (query == NULL)
return NULL;
 
query->type = query_type;
 
switch (query_type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
query->result_size = 16 * rctx->max_db;
query->num_cs_dw = 6;
break;
break;
case PIPE_QUERY_TIME_ELAPSED:
query->result_size = 16;
query->num_cs_dw = 8;
break;
case PIPE_QUERY_TIMESTAMP:
query->result_size = 8;
query->num_cs_dw = 8;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
query->result_size = 32;
query->num_cs_dw = 6;
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
/* 11 values on EG, 8 on R600. */
query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16;
query->num_cs_dw = 6;
break;
/* Non-GPU queries and queries not requiring a buffer. */
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_GPU_FINISHED:
case R600_QUERY_DRAW_CALLS:
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
case R600_QUERY_BUFFER_WAIT_TIME:
case R600_QUERY_NUM_CS_FLUSHES:
case R600_QUERY_NUM_BYTES_MOVED:
case R600_QUERY_VRAM_USAGE:
case R600_QUERY_GTT_USAGE:
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
case R600_QUERY_GPU_LOAD:
skip_allocation = true;
break;
default:
assert(0);
FREE(query);
return NULL;
}
 
if (!skip_allocation) {
query->buffer.buf = r600_new_query_buffer(rctx, query_type);
if (!query->buffer.buf) {
FREE(query);
return NULL;
}
}
return (struct pipe_query*)query;
}
 
static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
{
struct r600_query *rquery = (struct r600_query*)query;
struct r600_query_buffer *prev = rquery->buffer.previous;
 
/* Release all query buffers. */
while (prev) {
struct r600_query_buffer *qbuf = prev;
prev = prev->previous;
pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL);
FREE(qbuf);
}
 
pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
FREE(query);
}
 
static boolean r600_begin_query(struct pipe_context *ctx,
struct pipe_query *query)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *rquery = (struct r600_query *)query;
struct r600_query_buffer *prev = rquery->buffer.previous;
 
if (!r600_query_needs_begin(rquery->type)) {
assert(0);
return false;
}
 
/* Non-GPU queries. */
switch (rquery->type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
return true;
case R600_QUERY_DRAW_CALLS:
rquery->begin_result = rctx->num_draw_calls;
return true;
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
case R600_QUERY_VRAM_USAGE:
case R600_QUERY_GTT_USAGE:
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
rquery->begin_result = 0;
return true;
case R600_QUERY_BUFFER_WAIT_TIME:
rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS);
return true;
case R600_QUERY_NUM_CS_FLUSHES:
rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
return true;
case R600_QUERY_NUM_BYTES_MOVED:
rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED);
return true;
case R600_QUERY_GPU_LOAD:
rquery->begin_result = r600_gpu_load_begin(rctx->screen);
return true;
}
 
/* Discard the old query buffers. */
while (prev) {
struct r600_query_buffer *qbuf = prev;
prev = prev->previous;
pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL);
FREE(qbuf);
}
 
/* Obtain a new buffer if the current one can't be mapped without a stall. */
if (r600_rings_is_buffer_referenced(rctx, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
rctx->ws->buffer_is_busy(rquery->buffer.buf->buf, RADEON_USAGE_READWRITE)) {
pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type);
}
 
rquery->buffer.results_end = 0;
rquery->buffer.previous = NULL;
 
r600_emit_query_begin(rctx, rquery);
 
if (!r600_is_timer_query(rquery->type)) {
LIST_ADDTAIL(&rquery->list, &rctx->active_nontimer_queries);
}
return true;
}
 
static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *rquery = (struct r600_query *)query;
 
/* Non-GPU queries. */
switch (rquery->type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
return;
case PIPE_QUERY_GPU_FINISHED:
rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC, &rquery->fence);
return;
case R600_QUERY_DRAW_CALLS:
rquery->end_result = rctx->num_draw_calls;
return;
case R600_QUERY_REQUESTED_VRAM:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_VRAM_MEMORY);
return;
case R600_QUERY_REQUESTED_GTT:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_GTT_MEMORY);
return;
case R600_QUERY_BUFFER_WAIT_TIME:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS);
return;
case R600_QUERY_NUM_CS_FLUSHES:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
return;
case R600_QUERY_NUM_BYTES_MOVED:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED);
return;
case R600_QUERY_VRAM_USAGE:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_VRAM_USAGE);
return;
case R600_QUERY_GTT_USAGE:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GTT_USAGE);
return;
case R600_QUERY_GPU_TEMPERATURE:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GPU_TEMPERATURE) / 1000;
return;
case R600_QUERY_CURRENT_GPU_SCLK:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_SCLK) * 1000000;
return;
case R600_QUERY_CURRENT_GPU_MCLK:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_MCLK) * 1000000;
return;
case R600_QUERY_GPU_LOAD:
rquery->end_result = r600_gpu_load_end(rctx->screen, rquery->begin_result);
return;
}
 
r600_emit_query_end(rctx, rquery);
 
if (r600_query_needs_begin(rquery->type) && !r600_is_timer_query(rquery->type)) {
LIST_DELINIT(&rquery->list);
}
}
 
static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index,
bool test_status_bit)
{
uint32_t *current_result = (uint32_t*)map;
uint64_t start, end;
 
start = (uint64_t)current_result[start_index] |
(uint64_t)current_result[start_index+1] << 32;
end = (uint64_t)current_result[end_index] |
(uint64_t)current_result[end_index+1] << 32;
 
if (!test_status_bit ||
((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) {
return end - start;
}
return 0;
}
 
static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
struct r600_query *query,
struct r600_query_buffer *qbuf,
boolean wait,
union pipe_query_result *result)
{
struct pipe_screen *screen = ctx->b.screen;
unsigned results_base = 0;
char *map;
 
/* Non-GPU queries. */
switch (query->type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
/* Convert from cycles per millisecond to cycles per second (Hz). */
result->timestamp_disjoint.frequency =
(uint64_t)ctx->screen->info.r600_clock_crystal_freq * 1000;
result->timestamp_disjoint.disjoint = FALSE;
return TRUE;
case PIPE_QUERY_GPU_FINISHED:
result->b = screen->fence_finish(screen, query->fence,
wait ? PIPE_TIMEOUT_INFINITE : 0);
return result->b;
case R600_QUERY_DRAW_CALLS:
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
case R600_QUERY_BUFFER_WAIT_TIME:
case R600_QUERY_NUM_CS_FLUSHES:
case R600_QUERY_NUM_BYTES_MOVED:
case R600_QUERY_VRAM_USAGE:
case R600_QUERY_GTT_USAGE:
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
result->u64 = query->end_result - query->begin_result;
return TRUE;
case R600_QUERY_GPU_LOAD:
result->u64 = query->end_result;
return TRUE;
}
 
map = r600_buffer_map_sync_with_rings(ctx, qbuf->buf,
PIPE_TRANSFER_READ |
(wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
if (!map)
return FALSE;
 
/* count all results across all data blocks */
switch (query->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
while (results_base != qbuf->results_end) {
result->u64 +=
r600_query_read_result(map + results_base, 0, 2, true);
results_base += 16;
}
break;
case PIPE_QUERY_OCCLUSION_PREDICATE:
while (results_base != qbuf->results_end) {
result->b = result->b ||
r600_query_read_result(map + results_base, 0, 2, true) != 0;
results_base += 16;
}
break;
case PIPE_QUERY_TIME_ELAPSED:
while (results_base != qbuf->results_end) {
result->u64 +=
r600_query_read_result(map + results_base, 0, 2, false);
results_base += query->result_size;
}
break;
case PIPE_QUERY_TIMESTAMP:
{
uint32_t *current_result = (uint32_t*)map;
result->u64 = (uint64_t)current_result[0] |
(uint64_t)current_result[1] << 32;
break;
}
case PIPE_QUERY_PRIMITIVES_EMITTED:
/* SAMPLE_STREAMOUTSTATS stores this structure:
* {
* u64 NumPrimitivesWritten;
* u64 PrimitiveStorageNeeded;
* }
* We only need NumPrimitivesWritten here. */
while (results_base != qbuf->results_end) {
result->u64 +=
r600_query_read_result(map + results_base, 2, 6, true);
results_base += query->result_size;
}
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
/* Here we read PrimitiveStorageNeeded. */
while (results_base != qbuf->results_end) {
result->u64 +=
r600_query_read_result(map + results_base, 0, 4, true);
results_base += query->result_size;
}
break;
case PIPE_QUERY_SO_STATISTICS:
while (results_base != qbuf->results_end) {
result->so_statistics.num_primitives_written +=
r600_query_read_result(map + results_base, 2, 6, true);
result->so_statistics.primitives_storage_needed +=
r600_query_read_result(map + results_base, 0, 4, true);
results_base += query->result_size;
}
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
while (results_base != qbuf->results_end) {
result->b = result->b ||
r600_query_read_result(map + results_base, 2, 6, true) !=
r600_query_read_result(map + results_base, 0, 4, true);
results_base += query->result_size;
}
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
if (ctx->chip_class >= EVERGREEN) {
while (results_base != qbuf->results_end) {
result->pipeline_statistics.ps_invocations +=
r600_query_read_result(map + results_base, 0, 22, false);
result->pipeline_statistics.c_primitives +=
r600_query_read_result(map + results_base, 2, 24, false);
result->pipeline_statistics.c_invocations +=
r600_query_read_result(map + results_base, 4, 26, false);
result->pipeline_statistics.vs_invocations +=
r600_query_read_result(map + results_base, 6, 28, false);
result->pipeline_statistics.gs_invocations +=
r600_query_read_result(map + results_base, 8, 30, false);
result->pipeline_statistics.gs_primitives +=
r600_query_read_result(map + results_base, 10, 32, false);
result->pipeline_statistics.ia_primitives +=
r600_query_read_result(map + results_base, 12, 34, false);
result->pipeline_statistics.ia_vertices +=
r600_query_read_result(map + results_base, 14, 36, false);
result->pipeline_statistics.hs_invocations +=
r600_query_read_result(map + results_base, 16, 38, false);
result->pipeline_statistics.ds_invocations +=
r600_query_read_result(map + results_base, 18, 40, false);
result->pipeline_statistics.cs_invocations +=
r600_query_read_result(map + results_base, 20, 42, false);
results_base += query->result_size;
}
} else {
while (results_base != qbuf->results_end) {
result->pipeline_statistics.ps_invocations +=
r600_query_read_result(map + results_base, 0, 16, false);
result->pipeline_statistics.c_primitives +=
r600_query_read_result(map + results_base, 2, 18, false);
result->pipeline_statistics.c_invocations +=
r600_query_read_result(map + results_base, 4, 20, false);
result->pipeline_statistics.vs_invocations +=
r600_query_read_result(map + results_base, 6, 22, false);
result->pipeline_statistics.gs_invocations +=
r600_query_read_result(map + results_base, 8, 24, false);
result->pipeline_statistics.gs_primitives +=
r600_query_read_result(map + results_base, 10, 26, false);
result->pipeline_statistics.ia_primitives +=
r600_query_read_result(map + results_base, 12, 28, false);
result->pipeline_statistics.ia_vertices +=
r600_query_read_result(map + results_base, 14, 30, false);
results_base += query->result_size;
}
}
#if 0 /* for testing */
printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
"DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, "
"Clipper prims=%llu, PS=%llu, CS=%llu\n",
result->pipeline_statistics.ia_vertices,
result->pipeline_statistics.ia_primitives,
result->pipeline_statistics.vs_invocations,
result->pipeline_statistics.hs_invocations,
result->pipeline_statistics.ds_invocations,
result->pipeline_statistics.gs_invocations,
result->pipeline_statistics.gs_primitives,
result->pipeline_statistics.c_invocations,
result->pipeline_statistics.c_primitives,
result->pipeline_statistics.ps_invocations,
result->pipeline_statistics.cs_invocations);
#endif
break;
default:
assert(0);
}
 
ctx->ws->buffer_unmap(qbuf->buf->cs_buf);
return TRUE;
}
 
static boolean r600_get_query_result(struct pipe_context *ctx,
struct pipe_query *query,
boolean wait, union pipe_query_result *result)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *rquery = (struct r600_query *)query;
struct r600_query_buffer *qbuf;
 
util_query_clear_result(result, rquery->type);
 
for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) {
if (!r600_get_query_buffer_result(rctx, rquery, qbuf, wait, result)) {
return FALSE;
}
}
 
/* Convert the time to expected units. */
if (rquery->type == PIPE_QUERY_TIME_ELAPSED ||
rquery->type == PIPE_QUERY_TIMESTAMP) {
result->u64 = (1000000 * result->u64) / rctx->screen->info.r600_clock_crystal_freq;
}
return TRUE;
}
 
static void r600_render_condition(struct pipe_context *ctx,
struct pipe_query *query,
boolean condition,
uint mode)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *rquery = (struct r600_query *)query;
bool wait_flag = false;
 
rctx->current_render_cond = query;
rctx->current_render_cond_cond = condition;
rctx->current_render_cond_mode = mode;
 
if (query == NULL) {
if (rctx->predicate_drawing) {
rctx->predicate_drawing = false;
r600_emit_query_predication(rctx, NULL, PREDICATION_OP_CLEAR, false);
}
return;
}
 
if (mode == PIPE_RENDER_COND_WAIT ||
mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
wait_flag = true;
}
 
rctx->predicate_drawing = true;
 
switch (rquery->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
r600_emit_query_predication(rctx, rquery, PREDICATION_OP_ZPASS, wait_flag);
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
r600_emit_query_predication(rctx, rquery, PREDICATION_OP_PRIMCOUNT, wait_flag);
break;
default:
assert(0);
}
}
 
void r600_suspend_nontimer_queries(struct r600_common_context *ctx)
{
struct r600_query *query;
 
LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
r600_emit_query_end(ctx, query);
}
assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);
}
 
static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx)
{
struct r600_query *query;
unsigned num_dw = 0;
 
LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
/* begin + end */
num_dw += query->num_cs_dw * 2;
 
/* Workaround for the fact that
* num_cs_dw_nontimer_queries_suspend is incremented for every
* resumed query, which raises the bar in need_cs_space for
* queries about to be resumed.
*/
num_dw += query->num_cs_dw;
}
/* primitives generated query */
num_dw += ctx->streamout.enable_atom.num_dw;
/* guess for ZPASS enable or PERFECT_ZPASS_COUNT enable updates */
num_dw += 13;
 
return num_dw;
}
 
void r600_resume_nontimer_queries(struct r600_common_context *ctx)
{
struct r600_query *query;
 
assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);
 
/* Check CS space here. Resuming must not be interrupted by flushes. */
ctx->need_gfx_cs_space(&ctx->b,
r600_queries_num_cs_dw_for_resuming(ctx), TRUE);
 
LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
r600_emit_query_begin(ctx, query);
}
}
 
/* Get backends mask */
void r600_query_init_backend_mask(struct r600_common_context *ctx)
{
struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
struct r600_resource *buffer;
uint32_t *results;
unsigned num_backends = ctx->screen->info.r600_num_backends;
unsigned i, mask = 0;
 
/* if backend_map query is supported by the kernel */
if (ctx->screen->info.r600_backend_map_valid) {
unsigned num_tile_pipes = ctx->screen->info.r600_num_tile_pipes;
unsigned backend_map = ctx->screen->info.r600_backend_map;
unsigned item_width, item_mask;
 
if (ctx->chip_class >= EVERGREEN) {
item_width = 4;
item_mask = 0x7;
} else {
item_width = 2;
item_mask = 0x3;
}
 
while(num_tile_pipes--) {
i = backend_map & item_mask;
mask |= (1<<i);
backend_map >>= item_width;
}
if (mask != 0) {
ctx->backend_mask = mask;
return;
}
}
 
/* otherwise backup path for older kernels */
 
/* create buffer for event data */
buffer = (struct r600_resource*)
pipe_buffer_create(ctx->b.screen, PIPE_BIND_CUSTOM,
PIPE_USAGE_STAGING, ctx->max_db*16);
if (!buffer)
goto err;
 
/* initialize buffer with zeroes */
results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE);
if (results) {
memset(results, 0, ctx->max_db * 4 * 4);
ctx->ws->buffer_unmap(buffer->cs_buf);
 
/* emit EVENT_WRITE for ZPASS_DONE */
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(cs, buffer->gpu_address);
radeon_emit(cs, buffer->gpu_address >> 32);
 
r600_emit_reloc(ctx, &ctx->rings.gfx, buffer, RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
 
/* analyze results */
results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ);
if (results) {
for(i = 0; i < ctx->max_db; i++) {
/* at least highest bit will be set if backend is used */
if (results[i*4 + 1])
mask |= (1<<i);
}
ctx->ws->buffer_unmap(buffer->cs_buf);
}
}
 
pipe_resource_reference((struct pipe_resource**)&buffer, NULL);
 
if (mask != 0) {
ctx->backend_mask = mask;
return;
}
 
err:
/* fallback to old method - set num_backends lower bits to 1 */
ctx->backend_mask = (~((uint32_t)0))>>(32-num_backends);
return;
}
 
void r600_query_init(struct r600_common_context *rctx)
{
rctx->b.create_query = r600_create_query;
rctx->b.destroy_query = r600_destroy_query;
rctx->b.begin_query = r600_begin_query;
rctx->b.end_query = r600_end_query;
rctx->b.get_query_result = r600_get_query_result;
 
if (((struct r600_common_screen*)rctx->b.screen)->info.r600_num_backends > 0)
rctx->b.render_condition = r600_render_condition;
 
LIST_INITHEAD(&rctx->active_nontimer_queries);
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_streamout.c
0,0 → 1,369
/*
* Copyright 2013 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors: Marek Olšák <maraeo@gmail.com>
*
*/
 
#include "r600_pipe_common.h"
#include "r600_cs.h"
 
#include "util/u_memory.h"
 
static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable);
 
static struct pipe_stream_output_target *
r600_create_so_target(struct pipe_context *ctx,
struct pipe_resource *buffer,
unsigned buffer_offset,
unsigned buffer_size)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_so_target *t;
struct r600_resource *rbuffer = (struct r600_resource*)buffer;
 
t = CALLOC_STRUCT(r600_so_target);
if (!t) {
return NULL;
}
 
u_suballocator_alloc(rctx->allocator_so_filled_size, 4,
&t->buf_filled_size_offset,
(struct pipe_resource**)&t->buf_filled_size);
if (!t->buf_filled_size) {
FREE(t);
return NULL;
}
 
t->b.reference.count = 1;
t->b.context = ctx;
pipe_resource_reference(&t->b.buffer, buffer);
t->b.buffer_offset = buffer_offset;
t->b.buffer_size = buffer_size;
 
util_range_add(&rbuffer->valid_buffer_range, buffer_offset,
buffer_offset + buffer_size);
return &t->b;
}
 
static void r600_so_target_destroy(struct pipe_context *ctx,
struct pipe_stream_output_target *target)
{
struct r600_so_target *t = (struct r600_so_target*)target;
pipe_resource_reference(&t->b.buffer, NULL);
pipe_resource_reference((struct pipe_resource**)&t->buf_filled_size, NULL);
FREE(t);
}
 
void r600_streamout_buffers_dirty(struct r600_common_context *rctx)
{
struct r600_atom *begin = &rctx->streamout.begin_atom;
unsigned num_bufs = util_bitcount(rctx->streamout.enabled_mask);
unsigned num_bufs_appended = util_bitcount(rctx->streamout.enabled_mask &
rctx->streamout.append_bitmask);
 
if (!num_bufs)
return;
 
rctx->streamout.num_dw_for_end =
12 + /* flush_vgt_streamout */
num_bufs * 11; /* STRMOUT_BUFFER_UPDATE, BUFFER_SIZE */
 
begin->num_dw = 12 + /* flush_vgt_streamout */
3; /* VGT_STRMOUT_BUFFER_CONFIG */
 
if (rctx->chip_class >= SI) {
begin->num_dw += num_bufs * 4; /* SET_CONTEXT_REG */
} else {
begin->num_dw += num_bufs * 7; /* SET_CONTEXT_REG */
 
if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740)
begin->num_dw += num_bufs * 5; /* STRMOUT_BASE_UPDATE */
}
 
begin->num_dw +=
num_bufs_appended * 8 + /* STRMOUT_BUFFER_UPDATE */
(num_bufs - num_bufs_appended) * 6 + /* STRMOUT_BUFFER_UPDATE */
(rctx->family > CHIP_R600 && rctx->family < CHIP_RS780 ? 2 : 0); /* SURFACE_BASE_UPDATE */
 
begin->dirty = true;
 
r600_set_streamout_enable(rctx, true);
}
 
void r600_set_streamout_targets(struct pipe_context *ctx,
unsigned num_targets,
struct pipe_stream_output_target **targets,
const unsigned *offsets)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
unsigned i;
unsigned append_bitmask = 0;
 
/* Stop streamout. */
if (rctx->streamout.num_targets && rctx->streamout.begin_emitted) {
r600_emit_streamout_end(rctx);
}
 
/* Set the new targets. */
for (i = 0; i < num_targets; i++) {
pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], targets[i]);
r600_context_add_resource_size(ctx, targets[i]->buffer);
if (offsets[i] == ((unsigned)-1))
append_bitmask |= 1 << i;
}
for (; i < rctx->streamout.num_targets; i++) {
pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], NULL);
}
 
rctx->streamout.enabled_mask = (num_targets >= 1 && targets[0] ? 1 : 0) |
(num_targets >= 2 && targets[1] ? 2 : 0) |
(num_targets >= 3 && targets[2] ? 4 : 0) |
(num_targets >= 4 && targets[3] ? 8 : 0);
 
rctx->streamout.num_targets = num_targets;
rctx->streamout.append_bitmask = append_bitmask;
 
if (num_targets) {
r600_streamout_buffers_dirty(rctx);
} else {
rctx->streamout.begin_atom.dirty = false;
r600_set_streamout_enable(rctx, false);
}
}
 
static void r600_flush_vgt_streamout(struct r600_common_context *rctx)
{
struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
unsigned reg_strmout_cntl;
 
/* The register is at different places on different ASICs. */
if (rctx->chip_class >= CIK) {
reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
} else if (rctx->chip_class >= EVERGREEN) {
reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL;
} else {
reg_strmout_cntl = R_008490_CP_STRMOUT_CNTL;
}
 
if (rctx->chip_class >= CIK) {
cik_write_uconfig_reg(cs, reg_strmout_cntl, 0);
} else {
r600_write_config_reg(cs, reg_strmout_cntl, 0);
}
 
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0));
 
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
radeon_emit(cs, reg_strmout_cntl >> 2); /* register */
radeon_emit(cs, 0);
radeon_emit(cs, S_008490_OFFSET_UPDATE_DONE(1)); /* reference value */
radeon_emit(cs, S_008490_OFFSET_UPDATE_DONE(1)); /* mask */
radeon_emit(cs, 4); /* poll interval */
}
 
static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
struct r600_so_target **t = rctx->streamout.targets;
unsigned *stride_in_dw = rctx->streamout.stride_in_dw;
unsigned i, update_flags = 0;
 
r600_flush_vgt_streamout(rctx);
 
r600_write_context_reg(cs, rctx->chip_class >= EVERGREEN ?
R_028B98_VGT_STRMOUT_BUFFER_CONFIG :
R_028B20_VGT_STRMOUT_BUFFER_EN,
rctx->streamout.enabled_mask);
 
for (i = 0; i < rctx->streamout.num_targets; i++) {
if (!t[i])
continue;
 
t[i]->stride_in_dw = stride_in_dw[i];
 
if (rctx->chip_class >= SI) {
/* SI binds streamout buffers as shader resources.
* VGT only counts primitives and tells the shader
* through SGPRs what to do. */
r600_write_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
radeon_emit(cs, (t[i]->b.buffer_offset +
t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */
radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */
} else {
uint64_t va = r600_resource(t[i]->b.buffer)->gpu_address;
 
update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);
 
r600_write_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3);
radeon_emit(cs, (t[i]->b.buffer_offset +
t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */
radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */
radeon_emit(cs, va >> 8); /* BUFFER_BASE */
 
r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
 
/* R7xx requires this packet after updating BUFFER_BASE.
* Without this, R7xx locks up. */
if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) {
radeon_emit(cs, PKT3(PKT3_STRMOUT_BASE_UPDATE, 1, 0));
radeon_emit(cs, i);
radeon_emit(cs, va >> 8);
 
r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
}
}
 
if (rctx->streamout.append_bitmask & (1 << i) && t[i]->buf_filled_size_valid) {
uint64_t va = t[i]->buf_filled_size->gpu_address +
t[i]->buf_filled_size_offset;
 
/* Append. */
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, va); /* src address lo */
radeon_emit(cs, va >> 32); /* src address hi */
 
r600_emit_reloc(rctx, &rctx->rings.gfx, t[i]->buf_filled_size,
RADEON_USAGE_READ, RADEON_PRIO_MIN);
} else {
/* Start from the beginning. */
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, t[i]->b.buffer_offset >> 2); /* buffer offset in DW */
radeon_emit(cs, 0); /* unused */
}
}
 
if (rctx->family > CHIP_R600 && rctx->family < CHIP_RV770) {
radeon_emit(cs, PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0));
radeon_emit(cs, update_flags);
}
rctx->streamout.begin_emitted = true;
}
 
void r600_emit_streamout_end(struct r600_common_context *rctx)
{
struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
struct r600_so_target **t = rctx->streamout.targets;
unsigned i;
uint64_t va;
 
r600_flush_vgt_streamout(rctx);
 
for (i = 0; i < rctx->streamout.num_targets; i++) {
if (!t[i])
continue;
 
va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset;
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */
radeon_emit(cs, va); /* dst address lo */
radeon_emit(cs, va >> 32); /* dst address hi */
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, 0); /* unused */
 
r600_emit_reloc(rctx, &rctx->rings.gfx, t[i]->buf_filled_size,
RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
 
/* Zero the buffer size. The counters (primitives generated,
* primitives emitted) may be enabled even if there is not
* buffer bound. This ensures that the primitives-emitted query
* won't increment. */
r600_write_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
 
t[i]->buf_filled_size_valid = true;
}
 
rctx->streamout.begin_emitted = false;
rctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH;
}
 
/* STREAMOUT CONFIG DERIVED STATE
*
* Streamout must be enabled for the PRIMITIVES_GENERATED query to work.
* The buffer mask is an independent state, so no writes occur if there
* are no buffers bound.
*/
 
static bool r600_get_strmout_en(struct r600_common_context *rctx)
{
return rctx->streamout.streamout_enabled ||
rctx->streamout.prims_gen_query_enabled;
}
 
static void r600_emit_streamout_enable(struct r600_common_context *rctx,
struct r600_atom *atom)
{
r600_write_context_reg(rctx->rings.gfx.cs,
rctx->chip_class >= EVERGREEN ?
R_028B94_VGT_STRMOUT_CONFIG :
R_028AB0_VGT_STRMOUT_EN,
S_028B94_STREAMOUT_0_EN(r600_get_strmout_en(rctx)));
}
 
static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable)
{
bool old_strmout_en = r600_get_strmout_en(rctx);
 
rctx->streamout.streamout_enabled = enable;
if (old_strmout_en != r600_get_strmout_en(rctx))
rctx->streamout.enable_atom.dirty = true;
}
 
void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
unsigned type, int diff)
{
if (type == PIPE_QUERY_PRIMITIVES_GENERATED) {
bool old_strmout_en = r600_get_strmout_en(rctx);
 
rctx->streamout.num_prims_gen_queries += diff;
assert(rctx->streamout.num_prims_gen_queries >= 0);
 
rctx->streamout.prims_gen_query_enabled =
rctx->streamout.num_prims_gen_queries != 0;
 
if (old_strmout_en != r600_get_strmout_en(rctx))
rctx->streamout.enable_atom.dirty = true;
}
}
 
void r600_streamout_init(struct r600_common_context *rctx)
{
rctx->b.create_stream_output_target = r600_create_so_target;
rctx->b.stream_output_target_destroy = r600_so_target_destroy;
rctx->streamout.begin_atom.emit = r600_emit_streamout_begin;
rctx->streamout.enable_atom.emit = r600_emit_streamout_enable;
rctx->streamout.enable_atom.num_dw = 3;
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_texture.c
0,0 → 1,1296
/*
* Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Jerome Glisse
* Corbin Simpson
*/
#include "r600_pipe_common.h"
#include "r600_cs.h"
#include "util/u_format.h"
#include "util/u_memory.h"
#include "util/u_pack_color.h"
#include <errno.h>
#include <inttypes.h>
 
/* Same as resource_copy_region, except that both upsampling and downsampling are allowed. */
static void r600_copy_region_with_blit(struct pipe_context *pipe,
struct pipe_resource *dst,
unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
struct pipe_resource *src,
unsigned src_level,
const struct pipe_box *src_box)
{
struct pipe_blit_info blit;
 
memset(&blit, 0, sizeof(blit));
blit.src.resource = src;
blit.src.format = src->format;
blit.src.level = src_level;
blit.src.box = *src_box;
blit.dst.resource = dst;
blit.dst.format = dst->format;
blit.dst.level = dst_level;
blit.dst.box.x = dstx;
blit.dst.box.y = dsty;
blit.dst.box.z = dstz;
blit.dst.box.width = src_box->width;
blit.dst.box.height = src_box->height;
blit.dst.box.depth = src_box->depth;
blit.mask = util_format_get_mask(src->format) &
util_format_get_mask(dst->format);
blit.filter = PIPE_TEX_FILTER_NEAREST;
 
if (blit.mask) {
pipe->blit(pipe, &blit);
}
}
 
/* Copy from a full GPU texture to a transfer's staging one. */
static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
{
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
struct pipe_resource *dst = &rtransfer->staging->b.b;
struct pipe_resource *src = transfer->resource;
 
if (src->nr_samples > 1) {
r600_copy_region_with_blit(ctx, dst, 0, 0, 0, 0,
src, transfer->level, &transfer->box);
return;
}
 
rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level,
&transfer->box);
}
 
/* Copy from a transfer's staging texture to a full GPU one. */
static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
{
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
struct pipe_resource *dst = transfer->resource;
struct pipe_resource *src = &rtransfer->staging->b.b;
struct pipe_box sbox;
 
u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox);
 
if (dst->nr_samples > 1) {
r600_copy_region_with_blit(ctx, dst, transfer->level,
transfer->box.x, transfer->box.y, transfer->box.z,
src, 0, &sbox);
return;
}
 
rctx->dma_copy(ctx, dst, transfer->level,
transfer->box.x, transfer->box.y, transfer->box.z,
src, 0, &sbox);
}
 
static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned level,
const struct pipe_box *box)
{
enum pipe_format format = rtex->resource.b.b.format;
 
return rtex->surface.level[level].offset +
box->z * rtex->surface.level[level].slice_size +
box->y / util_format_get_blockheight(format) * rtex->surface.level[level].pitch_bytes +
box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
}
 
static int r600_init_surface(struct r600_common_screen *rscreen,
struct radeon_surf *surface,
const struct pipe_resource *ptex,
unsigned array_mode,
bool is_flushed_depth)
{
const struct util_format_description *desc =
util_format_description(ptex->format);
bool is_depth, is_stencil;
 
is_depth = util_format_has_depth(desc);
is_stencil = util_format_has_stencil(desc);
 
surface->npix_x = ptex->width0;
surface->npix_y = ptex->height0;
surface->npix_z = ptex->depth0;
surface->blk_w = util_format_get_blockwidth(ptex->format);
surface->blk_h = util_format_get_blockheight(ptex->format);
surface->blk_d = 1;
surface->array_size = 1;
surface->last_level = ptex->last_level;
 
if (rscreen->chip_class >= EVERGREEN && !is_flushed_depth &&
ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
surface->bpe = 4; /* stencil is allocated separately on evergreen */
} else {
surface->bpe = util_format_get_blocksize(ptex->format);
/* align byte per element on dword */
if (surface->bpe == 3) {
surface->bpe = 4;
}
}
 
surface->nsamples = ptex->nr_samples ? ptex->nr_samples : 1;
surface->flags = RADEON_SURF_SET(array_mode, MODE);
 
switch (ptex->target) {
case PIPE_TEXTURE_1D:
surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
break;
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_2D:
surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
break;
case PIPE_TEXTURE_3D:
surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
break;
case PIPE_TEXTURE_1D_ARRAY:
surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
surface->array_size = ptex->array_size;
break;
case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_CUBE_ARRAY: /* cube array layout like 2d array */
surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
surface->array_size = ptex->array_size;
break;
case PIPE_TEXTURE_CUBE:
surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_CUBEMAP, TYPE);
break;
case PIPE_BUFFER:
default:
return -EINVAL;
}
if (ptex->bind & PIPE_BIND_SCANOUT) {
surface->flags |= RADEON_SURF_SCANOUT;
}
 
if (!is_flushed_depth && is_depth) {
surface->flags |= RADEON_SURF_ZBUFFER;
 
if (is_stencil) {
surface->flags |= RADEON_SURF_SBUFFER |
RADEON_SURF_HAS_SBUFFER_MIPTREE;
}
}
if (rscreen->chip_class >= SI) {
surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
}
return 0;
}
 
static int r600_setup_surface(struct pipe_screen *screen,
struct r600_texture *rtex,
unsigned pitch_in_bytes_override)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
int r;
 
r = rscreen->ws->surface_init(rscreen->ws, &rtex->surface);
if (r) {
return r;
}
 
rtex->size = rtex->surface.bo_size;
 
if (pitch_in_bytes_override && pitch_in_bytes_override != rtex->surface.level[0].pitch_bytes) {
/* old ddx on evergreen over estimate alignment for 1d, only 1 level
* for those
*/
rtex->surface.level[0].nblk_x = pitch_in_bytes_override / rtex->surface.bpe;
rtex->surface.level[0].pitch_bytes = pitch_in_bytes_override;
rtex->surface.level[0].slice_size = pitch_in_bytes_override * rtex->surface.level[0].nblk_y;
if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
rtex->surface.stencil_offset =
rtex->surface.stencil_level[0].offset = rtex->surface.level[0].slice_size;
}
}
return 0;
}
 
static boolean r600_texture_get_handle(struct pipe_screen* screen,
struct pipe_resource *ptex,
struct winsys_handle *whandle)
{
struct r600_texture *rtex = (struct r600_texture*)ptex;
struct r600_resource *resource = &rtex->resource;
struct radeon_surf *surface = &rtex->surface;
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
 
rscreen->ws->buffer_set_tiling(resource->buf,
NULL,
surface->level[0].mode >= RADEON_SURF_MODE_1D ?
RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR,
surface->level[0].mode >= RADEON_SURF_MODE_2D ?
RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR,
surface->bankw, surface->bankh,
surface->tile_split,
surface->stencil_tile_split,
surface->mtilea,
surface->level[0].pitch_bytes,
(surface->flags & RADEON_SURF_SCANOUT) != 0);
 
return rscreen->ws->buffer_get_handle(resource->buf,
surface->level[0].pitch_bytes, whandle);
}
 
static void r600_texture_destroy(struct pipe_screen *screen,
struct pipe_resource *ptex)
{
struct r600_texture *rtex = (struct r600_texture*)ptex;
struct r600_resource *resource = &rtex->resource;
 
if (rtex->flushed_depth_texture)
pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);
 
pipe_resource_reference((struct pipe_resource**)&rtex->htile_buffer, NULL);
if (rtex->cmask_buffer != &rtex->resource) {
pipe_resource_reference((struct pipe_resource**)&rtex->cmask_buffer, NULL);
}
pb_reference(&resource->buf, NULL);
FREE(rtex);
}
 
static const struct u_resource_vtbl r600_texture_vtbl;
 
/* The number of samples can be specified independently of the texture. */
void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
struct r600_texture *rtex,
unsigned nr_samples,
struct r600_fmask_info *out)
{
/* FMASK is allocated like an ordinary texture. */
struct radeon_surf fmask = rtex->surface;
 
memset(out, 0, sizeof(*out));
 
fmask.bo_alignment = 0;
fmask.bo_size = 0;
fmask.nsamples = 1;
fmask.flags |= RADEON_SURF_FMASK;
 
/* Force 2D tiling if it wasn't set. This may occur when creating
* FMASK for MSAA resolve on R6xx. On R6xx, the single-sample
* destination buffer must have an FMASK too. */
fmask.flags = RADEON_SURF_CLR(fmask.flags, MODE);
fmask.flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
 
if (rscreen->chip_class >= SI) {
fmask.flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
}
 
switch (nr_samples) {
case 2:
case 4:
fmask.bpe = 1;
if (rscreen->chip_class <= CAYMAN) {
fmask.bankh = 4;
}
break;
case 8:
fmask.bpe = 4;
break;
default:
R600_ERR("Invalid sample count for FMASK allocation.\n");
return;
}
 
/* Overallocate FMASK on R600-R700 to fix colorbuffer corruption.
* This can be fixed by writing a separate FMASK allocator specifically
* for R600-R700 asics. */
if (rscreen->chip_class <= R700) {
fmask.bpe *= 2;
}
 
if (rscreen->ws->surface_init(rscreen->ws, &fmask)) {
R600_ERR("Got error in surface_init while allocating FMASK.\n");
return;
}
 
assert(fmask.level[0].mode == RADEON_SURF_MODE_2D);
 
out->slice_tile_max = (fmask.level[0].nblk_x * fmask.level[0].nblk_y) / 64;
if (out->slice_tile_max)
out->slice_tile_max -= 1;
 
out->tile_mode_index = fmask.tiling_index[0];
out->pitch = fmask.level[0].nblk_x;
out->bank_height = fmask.bankh;
out->alignment = MAX2(256, fmask.bo_alignment);
out->size = fmask.bo_size;
}
 
static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
r600_texture_get_fmask_info(rscreen, rtex,
rtex->resource.b.b.nr_samples, &rtex->fmask);
 
rtex->fmask.offset = align(rtex->size, rtex->fmask.alignment);
rtex->size = rtex->fmask.offset + rtex->fmask.size;
}
 
void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
struct r600_texture *rtex,
struct r600_cmask_info *out)
{
unsigned cmask_tile_width = 8;
unsigned cmask_tile_height = 8;
unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height;
unsigned element_bits = 4;
unsigned cmask_cache_bits = 1024;
unsigned num_pipes = rscreen->tiling_info.num_channels;
unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes;
 
unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes;
unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements;
unsigned sqrt_pixels_per_macro_tile = sqrt(pixels_per_macro_tile);
unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile);
unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width;
 
unsigned pitch_elements = align(rtex->surface.npix_x, macro_tile_width);
unsigned height = align(rtex->surface.npix_y, macro_tile_height);
 
unsigned base_align = num_pipes * pipe_interleave_bytes;
unsigned slice_bytes =
((pitch_elements * height * element_bits + 7) / 8) / cmask_tile_elements;
 
assert(macro_tile_width % 128 == 0);
assert(macro_tile_height % 128 == 0);
 
out->slice_tile_max = ((pitch_elements * height) / (128*128)) - 1;
out->alignment = MAX2(256, base_align);
out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
align(slice_bytes, base_align);
}
 
static void si_texture_get_cmask_info(struct r600_common_screen *rscreen,
struct r600_texture *rtex,
struct r600_cmask_info *out)
{
unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes;
unsigned num_pipes = rscreen->tiling_info.num_channels;
unsigned cl_width, cl_height;
 
switch (num_pipes) {
case 2:
cl_width = 32;
cl_height = 16;
break;
case 4:
cl_width = 32;
cl_height = 32;
break;
case 8:
cl_width = 64;
cl_height = 32;
break;
case 16: /* Hawaii */
cl_width = 64;
cl_height = 64;
break;
default:
assert(0);
return;
}
 
unsigned base_align = num_pipes * pipe_interleave_bytes;
 
unsigned width = align(rtex->surface.npix_x, cl_width*8);
unsigned height = align(rtex->surface.npix_y, cl_height*8);
unsigned slice_elements = (width * height) / (8*8);
 
/* Each element of CMASK is a nibble. */
unsigned slice_bytes = slice_elements / 2;
 
out->slice_tile_max = (width * height) / (128*128);
if (out->slice_tile_max)
out->slice_tile_max -= 1;
 
out->alignment = MAX2(256, base_align);
out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
align(slice_bytes, base_align);
}
 
static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
if (rscreen->chip_class >= SI) {
si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
} else {
r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
}
 
rtex->cmask.offset = align(rtex->size, rtex->cmask.alignment);
rtex->size = rtex->cmask.offset + rtex->cmask.size;
 
if (rscreen->chip_class >= SI)
rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
else
rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
}
 
static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
if (rtex->cmask_buffer)
return;
 
assert(rtex->cmask.size == 0);
 
if (rscreen->chip_class >= SI) {
si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
} else {
r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
}
 
rtex->cmask_buffer = (struct r600_resource *)
pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
PIPE_USAGE_DEFAULT, rtex->cmask.size);
if (rtex->cmask_buffer == NULL) {
rtex->cmask.size = 0;
return;
}
 
/* update colorbuffer state bits */
rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
 
if (rscreen->chip_class >= SI)
rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
else
rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
}
 
static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
unsigned cl_width, cl_height, width, height;
unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
unsigned num_pipes = rscreen->tiling_info.num_channels;
 
if (rscreen->chip_class <= EVERGREEN &&
rscreen->info.drm_minor < 26)
return 0;
 
/* HW bug on R6xx. */
if (rscreen->chip_class == R600 &&
(rtex->surface.level[0].npix_x > 7680 ||
rtex->surface.level[0].npix_y > 7680))
return 0;
 
/* HTILE is broken with 1D tiling on old kernels and CIK. */
if (rscreen->chip_class >= CIK &&
rtex->surface.level[0].mode == RADEON_SURF_MODE_1D &&
rscreen->info.drm_minor < 38)
return 0;
 
switch (num_pipes) {
case 1:
cl_width = 32;
cl_height = 16;
break;
case 2:
cl_width = 32;
cl_height = 32;
break;
case 4:
cl_width = 64;
cl_height = 32;
break;
case 8:
cl_width = 64;
cl_height = 64;
break;
case 16:
cl_width = 128;
cl_height = 64;
break;
default:
assert(0);
return 0;
}
 
width = align(rtex->surface.npix_x, cl_width * 8);
height = align(rtex->surface.npix_y, cl_height * 8);
 
slice_elements = (width * height) / (8 * 8);
slice_bytes = slice_elements * 4;
 
pipe_interleave_bytes = rscreen->tiling_info.group_bytes;
base_align = num_pipes * pipe_interleave_bytes;
 
return (util_max_layer(&rtex->resource.b.b, 0) + 1) *
align(slice_bytes, base_align);
}
 
static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
unsigned htile_size = r600_texture_get_htile_size(rscreen, rtex);
 
if (!htile_size)
return;
 
rtex->htile_buffer = (struct r600_resource*)
pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
PIPE_USAGE_DEFAULT, htile_size);
if (rtex->htile_buffer == NULL) {
/* this is not a fatal error as we can still keep rendering
* without htile buffer */
R600_ERR("Failed to create buffer object for htile buffer.\n");
} else {
r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0,
htile_size, 0, true);
}
}
 
/* Common processing for r600_texture_create and r600_texture_from_handle */
static struct r600_texture *
r600_texture_create_object(struct pipe_screen *screen,
const struct pipe_resource *base,
unsigned pitch_in_bytes_override,
struct pb_buffer *buf,
struct radeon_surf *surface)
{
struct r600_texture *rtex;
struct r600_resource *resource;
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
 
rtex = CALLOC_STRUCT(r600_texture);
if (rtex == NULL)
return NULL;
 
resource = &rtex->resource;
resource->b.b = *base;
resource->b.vtbl = &r600_texture_vtbl;
pipe_reference_init(&resource->b.b.reference, 1);
resource->b.b.screen = screen;
rtex->pitch_override = pitch_in_bytes_override;
 
/* don't include stencil-only formats which we don't support for rendering */
rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format));
 
rtex->surface = *surface;
if (r600_setup_surface(screen, rtex, pitch_in_bytes_override)) {
FREE(rtex);
return NULL;
}
 
/* Tiled depth textures utilize the non-displayable tile order.
* This must be done after r600_setup_surface.
* Applies to R600-Cayman. */
rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D;
 
if (rtex->is_depth) {
if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
R600_RESOURCE_FLAG_FLUSHED_DEPTH)) &&
!(rscreen->debug_flags & DBG_NO_HYPERZ)) {
 
r600_texture_allocate_htile(rscreen, rtex);
}
} else {
if (base->nr_samples > 1) {
if (!buf) {
r600_texture_allocate_fmask(rscreen, rtex);
r600_texture_allocate_cmask(rscreen, rtex);
rtex->cmask_buffer = &rtex->resource;
}
if (!rtex->fmask.size || !rtex->cmask.size) {
FREE(rtex);
return NULL;
}
}
}
 
/* Now create the backing buffer. */
if (!buf) {
if (!r600_init_resource(rscreen, resource, rtex->size,
rtex->surface.bo_alignment, TRUE)) {
FREE(rtex);
return NULL;
}
} else {
resource->buf = buf;
resource->cs_buf = rscreen->ws->buffer_get_cs_handle(buf);
resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->cs_buf);
resource->domains = rscreen->ws->buffer_get_initial_domain(resource->cs_buf);
}
 
if (rtex->cmask.size) {
/* Initialize the cmask to 0xCC (= compressed state). */
r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
rtex->cmask.offset, rtex->cmask.size,
0xCCCCCCCC, true);
}
 
/* Initialize the CMASK base register value. */
rtex->cmask.base_address_reg =
(rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
 
if (rscreen->debug_flags & DBG_VM) {
fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Texture %ix%ix%i, %i levels, %i samples, %s\n",
rtex->resource.gpu_address,
rtex->resource.gpu_address + rtex->resource.buf->size,
base->width0, base->height0, util_max_layer(base, 0)+1, base->last_level+1,
base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format));
}
 
if (rscreen->debug_flags & DBG_TEX ||
(rtex->resource.b.b.last_level > 0 && rscreen->debug_flags & DBG_TEXMIP)) {
printf("Texture: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
"blk_h=%u, blk_d=%u, array_size=%u, last_level=%u, "
"bpe=%u, nsamples=%u, flags=0x%x, %s\n",
rtex->surface.npix_x, rtex->surface.npix_y,
rtex->surface.npix_z, rtex->surface.blk_w,
rtex->surface.blk_h, rtex->surface.blk_d,
rtex->surface.array_size, rtex->surface.last_level,
rtex->surface.bpe, rtex->surface.nsamples,
rtex->surface.flags, util_format_short_name(base->format));
for (int i = 0; i <= rtex->surface.last_level; i++) {
printf(" L %i: offset=%"PRIu64", slice_size=%"PRIu64", npix_x=%u, "
"npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
"nblk_z=%u, pitch_bytes=%u, mode=%u\n",
i, rtex->surface.level[i].offset,
rtex->surface.level[i].slice_size,
u_minify(rtex->resource.b.b.width0, i),
u_minify(rtex->resource.b.b.height0, i),
u_minify(rtex->resource.b.b.depth0, i),
rtex->surface.level[i].nblk_x,
rtex->surface.level[i].nblk_y,
rtex->surface.level[i].nblk_z,
rtex->surface.level[i].pitch_bytes,
rtex->surface.level[i].mode);
}
if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
for (int i = 0; i <= rtex->surface.last_level; i++) {
printf(" S %i: offset=%"PRIu64", slice_size=%"PRIu64", npix_x=%u, "
"npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
"nblk_z=%u, pitch_bytes=%u, mode=%u\n",
i, rtex->surface.stencil_level[i].offset,
rtex->surface.stencil_level[i].slice_size,
u_minify(rtex->resource.b.b.width0, i),
u_minify(rtex->resource.b.b.height0, i),
u_minify(rtex->resource.b.b.depth0, i),
rtex->surface.stencil_level[i].nblk_x,
rtex->surface.stencil_level[i].nblk_y,
rtex->surface.stencil_level[i].nblk_z,
rtex->surface.stencil_level[i].pitch_bytes,
rtex->surface.stencil_level[i].mode);
}
}
}
return rtex;
}
 
static unsigned r600_choose_tiling(struct r600_common_screen *rscreen,
const struct pipe_resource *templ)
{
const struct util_format_description *desc = util_format_description(templ->format);
 
/* MSAA resources must be 2D tiled. */
if (templ->nr_samples > 1)
return RADEON_SURF_MODE_2D;
 
/* Transfer resources should be linear. */
if (templ->flags & R600_RESOURCE_FLAG_TRANSFER)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
 
/* Handle common candidates for the linear mode.
* Compressed textures must always be tiled. */
if (!(templ->flags & R600_RESOURCE_FLAG_FORCE_TILING) &&
!util_format_is_compressed(templ->format)) {
/* Not everything can be linear, so we cannot enforce it
* for all textures. */
if ((rscreen->debug_flags & DBG_NO_TILING) &&
(!util_format_is_depth_or_stencil(templ->format) ||
!(templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH)))
return RADEON_SURF_MODE_LINEAR_ALIGNED;
 
/* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */
if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
 
/* Cursors are linear on SI.
* (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */
if (rscreen->chip_class >= SI &&
(templ->bind & PIPE_BIND_CURSOR))
return RADEON_SURF_MODE_LINEAR_ALIGNED;
 
if (templ->bind & PIPE_BIND_LINEAR)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
 
/* Textures with a very small height are recommended to be linear. */
if (templ->target == PIPE_TEXTURE_1D ||
templ->target == PIPE_TEXTURE_1D_ARRAY ||
templ->height0 <= 4)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
 
/* Textures likely to be mapped often. */
if (templ->usage == PIPE_USAGE_STAGING ||
templ->usage == PIPE_USAGE_STREAM)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
}
 
/* Make small textures 1D tiled. */
if (templ->width0 <= 16 || templ->height0 <= 16 ||
(rscreen->debug_flags & DBG_NO_2D_TILING))
return RADEON_SURF_MODE_1D;
 
/* The allocator will switch to 1D if needed. */
return RADEON_SURF_MODE_2D;
}
 
struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct radeon_surf surface = {0};
int r;
 
r = r600_init_surface(rscreen, &surface, templ,
r600_choose_tiling(rscreen, templ),
templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH);
if (r) {
return NULL;
}
r = rscreen->ws->surface_best(rscreen->ws, &surface);
if (r) {
return NULL;
}
return (struct pipe_resource *)r600_texture_create_object(screen, templ,
0, NULL, &surface);
}
 
static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
const struct pipe_resource *templ,
struct winsys_handle *whandle)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct pb_buffer *buf = NULL;
unsigned stride = 0;
unsigned array_mode;
enum radeon_bo_layout micro, macro;
struct radeon_surf surface;
bool scanout;
int r;
 
/* Support only 2D textures without mipmaps */
if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) ||
templ->depth0 != 1 || templ->last_level != 0)
return NULL;
 
buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride);
if (!buf)
return NULL;
 
rscreen->ws->buffer_get_tiling(buf, &micro, &macro,
&surface.bankw, &surface.bankh,
&surface.tile_split,
&surface.stencil_tile_split,
&surface.mtilea, &scanout);
 
if (macro == RADEON_LAYOUT_TILED)
array_mode = RADEON_SURF_MODE_2D;
else if (micro == RADEON_LAYOUT_TILED)
array_mode = RADEON_SURF_MODE_1D;
else
array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
 
r = r600_init_surface(rscreen, &surface, templ, array_mode, false);
if (r) {
return NULL;
}
 
if (scanout)
surface.flags |= RADEON_SURF_SCANOUT;
 
return (struct pipe_resource *)r600_texture_create_object(screen, templ,
stride, buf, &surface);
}
 
bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
struct pipe_resource *texture,
struct r600_texture **staging)
{
struct r600_texture *rtex = (struct r600_texture*)texture;
struct pipe_resource resource;
struct r600_texture **flushed_depth_texture = staging ?
staging : &rtex->flushed_depth_texture;
 
if (!staging && rtex->flushed_depth_texture)
return true; /* it's ready */
 
resource.target = texture->target;
resource.format = texture->format;
resource.width0 = texture->width0;
resource.height0 = texture->height0;
resource.depth0 = texture->depth0;
resource.array_size = texture->array_size;
resource.last_level = texture->last_level;
resource.nr_samples = texture->nr_samples;
resource.usage = staging ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL;
resource.flags = texture->flags | R600_RESOURCE_FLAG_FLUSHED_DEPTH;
 
if (staging)
resource.flags |= R600_RESOURCE_FLAG_TRANSFER;
 
*flushed_depth_texture = (struct r600_texture *)ctx->screen->resource_create(ctx->screen, &resource);
if (*flushed_depth_texture == NULL) {
R600_ERR("failed to create temporary texture to hold flushed depth\n");
return false;
}
 
(*flushed_depth_texture)->is_flushing_texture = TRUE;
(*flushed_depth_texture)->non_disp_tiling = false;
return true;
}
 
/**
* Initialize the pipe_resource descriptor to be of the same size as the box,
* which is supposed to hold a subregion of the texture "orig" at the given
* mipmap level.
*/
static void r600_init_temp_resource_from_box(struct pipe_resource *res,
struct pipe_resource *orig,
const struct pipe_box *box,
unsigned level, unsigned flags)
{
memset(res, 0, sizeof(*res));
res->format = orig->format;
res->width0 = box->width;
res->height0 = box->height;
res->depth0 = 1;
res->array_size = 1;
res->usage = flags & R600_RESOURCE_FLAG_TRANSFER ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
res->flags = flags;
 
/* We must set the correct texture target and dimensions for a 3D box. */
if (box->depth > 1 && util_max_layer(orig, level) > 0)
res->target = orig->target;
else
res->target = PIPE_TEXTURE_2D;
 
switch (res->target) {
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_CUBE_ARRAY:
res->array_size = box->depth;
break;
case PIPE_TEXTURE_3D:
res->depth0 = box->depth;
break;
default:;
}
}
 
static void *r600_texture_transfer_map(struct pipe_context *ctx,
struct pipe_resource *texture,
unsigned level,
unsigned usage,
const struct pipe_box *box,
struct pipe_transfer **ptransfer)
{
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_texture *rtex = (struct r600_texture*)texture;
struct r600_transfer *trans;
boolean use_staging_texture = FALSE;
struct r600_resource *buf;
unsigned offset = 0;
char *map;
 
/* We cannot map a tiled texture directly because the data is
* in a different order, therefore we do detiling using a blit.
*
* Also, use a temporary in GTT memory for read transfers, as
* the CPU is much happier reading out of cached system memory
* than uncached VRAM.
*/
if (rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) {
use_staging_texture = TRUE;
} else if ((usage & PIPE_TRANSFER_READ) && !(usage & PIPE_TRANSFER_MAP_DIRECTLY) &&
(rtex->resource.domains == RADEON_DOMAIN_VRAM)) {
/* Untiled buffers in VRAM, which is slow for CPU reads */
use_staging_texture = TRUE;
} else if (!(usage & PIPE_TRANSFER_READ) &&
(r600_rings_is_buffer_referenced(rctx, rtex->resource.cs_buf, RADEON_USAGE_READWRITE) ||
rctx->ws->buffer_is_busy(rtex->resource.buf, RADEON_USAGE_READWRITE))) {
/* Use a staging texture for uploads if the underlying BO is busy. */
use_staging_texture = TRUE;
}
 
if (texture->flags & R600_RESOURCE_FLAG_TRANSFER) {
use_staging_texture = FALSE;
}
 
if (use_staging_texture && (usage & PIPE_TRANSFER_MAP_DIRECTLY)) {
return NULL;
}
 
trans = CALLOC_STRUCT(r600_transfer);
if (trans == NULL)
return NULL;
trans->transfer.resource = texture;
trans->transfer.level = level;
trans->transfer.usage = usage;
trans->transfer.box = *box;
 
if (rtex->is_depth) {
struct r600_texture *staging_depth;
 
if (rtex->resource.b.b.nr_samples > 1) {
/* MSAA depth buffers need to be converted to single sample buffers.
*
* Mapping MSAA depth buffers can occur if ReadPixels is called
* with a multisample GLX visual.
*
* First downsample the depth buffer to a temporary texture,
* then decompress the temporary one to staging.
*
* Only the region being mapped is transfered.
*/
struct pipe_resource resource;
 
r600_init_temp_resource_from_box(&resource, texture, box, level, 0);
 
if (!r600_init_flushed_depth_texture(ctx, &resource, &staging_depth)) {
R600_ERR("failed to create temporary texture to hold untiled copy\n");
FREE(trans);
return NULL;
}
 
if (usage & PIPE_TRANSFER_READ) {
struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource);
 
r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box);
rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth,
0, 0, 0, box->depth, 0, 0);
pipe_resource_reference((struct pipe_resource**)&temp, NULL);
}
}
else {
/* XXX: only readback the rectangle which is being mapped? */
/* XXX: when discard is true, no need to read back from depth texture */
if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) {
R600_ERR("failed to create temporary texture to hold untiled copy\n");
FREE(trans);
return NULL;
}
 
rctx->blit_decompress_depth(ctx, rtex, staging_depth,
level, level,
box->z, box->z + box->depth - 1,
0, 0);
 
offset = r600_texture_get_offset(staging_depth, level, box);
}
 
trans->transfer.stride = staging_depth->surface.level[level].pitch_bytes;
trans->transfer.layer_stride = staging_depth->surface.level[level].slice_size;
trans->staging = (struct r600_resource*)staging_depth;
} else if (use_staging_texture) {
struct pipe_resource resource;
struct r600_texture *staging;
 
r600_init_temp_resource_from_box(&resource, texture, box, level,
R600_RESOURCE_FLAG_TRANSFER);
resource.usage = (usage & PIPE_TRANSFER_READ) ?
PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
 
/* Create the temporary texture. */
staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource);
if (staging == NULL) {
R600_ERR("failed to create temporary texture to hold untiled copy\n");
FREE(trans);
return NULL;
}
trans->staging = &staging->resource;
trans->transfer.stride = staging->surface.level[0].pitch_bytes;
trans->transfer.layer_stride = staging->surface.level[0].slice_size;
if (usage & PIPE_TRANSFER_READ) {
r600_copy_to_staging_texture(ctx, trans);
}
} else {
/* the resource is mapped directly */
trans->transfer.stride = rtex->surface.level[level].pitch_bytes;
trans->transfer.layer_stride = rtex->surface.level[level].slice_size;
offset = r600_texture_get_offset(rtex, level, box);
}
 
if (trans->staging) {
buf = trans->staging;
if (!rtex->is_depth && !(usage & PIPE_TRANSFER_READ))
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
} else {
buf = &rtex->resource;
}
 
if (!(map = r600_buffer_map_sync_with_rings(rctx, buf, usage))) {
pipe_resource_reference((struct pipe_resource**)&trans->staging, NULL);
FREE(trans);
return NULL;
}
 
*ptransfer = &trans->transfer;
return map + offset;
}
 
static void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer* transfer)
{
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct radeon_winsys_cs_handle *buf;
struct pipe_resource *texture = transfer->resource;
struct r600_texture *rtex = (struct r600_texture*)texture;
 
if (rtransfer->staging) {
buf = rtransfer->staging->cs_buf;
} else {
buf = r600_resource(transfer->resource)->cs_buf;
}
rctx->ws->buffer_unmap(buf);
 
if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) {
if (rtex->is_depth && rtex->resource.b.b.nr_samples <= 1) {
ctx->resource_copy_region(ctx, texture, transfer->level,
transfer->box.x, transfer->box.y, transfer->box.z,
&rtransfer->staging->b.b, transfer->level,
&transfer->box);
} else {
r600_copy_from_staging_texture(ctx, rtransfer);
}
}
 
if (rtransfer->staging)
pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
 
FREE(transfer);
}
 
static const struct u_resource_vtbl r600_texture_vtbl =
{
NULL, /* get_handle */
r600_texture_destroy, /* resource_destroy */
r600_texture_transfer_map, /* transfer_map */
NULL, /* transfer_flush_region */
r600_texture_transfer_unmap, /* transfer_unmap */
NULL /* transfer_inline_write */
};
 
struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_surface *templ,
unsigned width, unsigned height)
{
struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
 
if (surface == NULL)
return NULL;
 
assert(templ->u.tex.first_layer <= util_max_layer(texture, templ->u.tex.level));
assert(templ->u.tex.last_layer <= util_max_layer(texture, templ->u.tex.level));
 
pipe_reference_init(&surface->base.reference, 1);
pipe_resource_reference(&surface->base.texture, texture);
surface->base.context = pipe;
surface->base.format = templ->format;
surface->base.width = width;
surface->base.height = height;
surface->base.u = templ->u;
return &surface->base;
}
 
static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
struct pipe_resource *tex,
const struct pipe_surface *templ)
{
unsigned level = templ->u.tex.level;
 
return r600_create_surface_custom(pipe, tex, templ,
u_minify(tex->width0, level),
u_minify(tex->height0, level));
}
 
static void r600_surface_destroy(struct pipe_context *pipe,
struct pipe_surface *surface)
{
struct r600_surface *surf = (struct r600_surface*)surface;
pipe_resource_reference((struct pipe_resource**)&surf->cb_buffer_fmask, NULL);
pipe_resource_reference((struct pipe_resource**)&surf->cb_buffer_cmask, NULL);
pipe_resource_reference(&surface->texture, NULL);
FREE(surface);
}
 
unsigned r600_translate_colorswap(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
 
#define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == UTIL_FORMAT_SWIZZLE_##swz)
 
if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
return V_0280A0_SWAP_STD;
 
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
return ~0U;
 
switch (desc->nr_channels) {
case 1:
if (HAS_SWIZZLE(0,X))
return V_0280A0_SWAP_STD; /* X___ */
else if (HAS_SWIZZLE(3,X))
return V_0280A0_SWAP_ALT_REV; /* ___X */
break;
case 2:
if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) ||
(HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) ||
(HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y)))
return V_0280A0_SWAP_STD; /* XY__ */
else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) ||
(HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) ||
(HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X)))
return V_0280A0_SWAP_STD_REV; /* YX__ */
else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y))
return V_0280A0_SWAP_ALT; /* X__Y */
else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X))
return V_0280A0_SWAP_ALT_REV; /* Y__X */
break;
case 3:
if (HAS_SWIZZLE(0,X))
return V_0280A0_SWAP_STD; /* XYZ */
else if (HAS_SWIZZLE(0,Z))
return V_0280A0_SWAP_STD_REV; /* ZYX */
break;
case 4:
/* check the middle channels, the 1st and 4th channel can be NONE */
if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z))
return V_0280A0_SWAP_STD; /* XYZW */
else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y))
return V_0280A0_SWAP_STD_REV; /* WZYX */
else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X))
return V_0280A0_SWAP_ALT; /* ZYXW */
else if (HAS_SWIZZLE(1,X) && HAS_SWIZZLE(2,Y))
return V_0280A0_SWAP_ALT_REV; /* WXYZ */
break;
}
return ~0U;
}
 
static void evergreen_set_clear_color(struct r600_texture *rtex,
enum pipe_format surface_format,
const union pipe_color_union *color)
{
union util_color uc;
 
memset(&uc, 0, sizeof(uc));
 
if (util_format_is_pure_uint(surface_format)) {
util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
} else if (util_format_is_pure_sint(surface_format)) {
util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1);
} else {
util_pack_color(color->f, surface_format, &uc);
}
 
memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t));
}
 
void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
struct pipe_framebuffer_state *fb,
struct r600_atom *fb_state,
unsigned *buffers,
const union pipe_color_union *color)
{
int i;
 
if (rctx->current_render_cond)
return;
 
for (i = 0; i < fb->nr_cbufs; i++) {
struct r600_texture *tex;
unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
 
if (!fb->cbufs[i])
continue;
 
/* if this colorbuffer is not being cleared */
if (!(*buffers & clear_bit))
continue;
 
tex = (struct r600_texture *)fb->cbufs[i]->texture;
 
/* 128-bit formats are unusupported */
if (util_format_get_blocksizebits(fb->cbufs[i]->format) > 64) {
continue;
}
 
/* the clear is allowed if all layers are bound */
if (fb->cbufs[i]->u.tex.first_layer != 0 ||
fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->resource.b.b, 0)) {
continue;
}
 
/* cannot clear mipmapped textures */
if (fb->cbufs[i]->texture->last_level != 0) {
continue;
}
 
/* only supported on tiled surfaces */
if (tex->surface.level[0].mode < RADEON_SURF_MODE_1D) {
continue;
}
 
/* fast color clear with 1D tiling doesn't work on old kernels and CIK */
if (tex->surface.level[0].mode == RADEON_SURF_MODE_1D &&
rctx->chip_class >= CIK && rctx->screen->info.drm_minor < 38) {
continue;
}
 
/* ensure CMASK is enabled */
r600_texture_alloc_cmask_separate(rctx->screen, tex);
if (tex->cmask.size == 0) {
continue;
}
 
/* Do the fast clear. */
evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
tex->cmask.offset, tex->cmask.size, 0, true);
 
tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
fb_state->dirty = true;
*buffers &= ~clear_bit;
}
}
 
void r600_init_screen_texture_functions(struct r600_common_screen *rscreen)
{
rscreen->b.resource_from_handle = r600_texture_from_handle;
rscreen->b.resource_get_handle = r600_texture_get_handle;
}
 
void r600_init_context_texture_functions(struct r600_common_context *rctx)
{
rctx->b.create_surface = r600_create_surface;
rctx->b.surface_destroy = r600_surface_destroy;
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600d_common.h
0,0 → 1,206
/*
* Copyright 2013 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Marek Olšák <maraeo@gmail.com>
*/
 
#ifndef R600D_COMMON_H
#define R600D_COMMON_H
 
#define R600_CONFIG_REG_OFFSET 0x08000
#define R600_CONTEXT_REG_OFFSET 0x28000
#define SI_SH_REG_OFFSET 0x0000B000
#define SI_SH_REG_END 0x0000C000
#define CIK_UCONFIG_REG_OFFSET 0x00030000
#define CIK_UCONFIG_REG_END 0x00031000
 
#define PKT_TYPE_S(x) (((x) & 0x3) << 30)
#define PKT_COUNT_S(x) (((x) & 0x3FFF) << 16)
#define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8)
#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1)
#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate))
 
#define RADEON_CP_PACKET3_COMPUTE_MODE 0x00000002
 
#define PKT3_NOP 0x10
#define PKT3_SET_PREDICATION 0x20
#define PKT3_STRMOUT_BUFFER_UPDATE 0x34
#define STRMOUT_STORE_BUFFER_FILLED_SIZE 1
#define STRMOUT_OFFSET_SOURCE(x) (((x) & 0x3) << 1)
#define STRMOUT_OFFSET_FROM_PACKET 0
#define STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE 1
#define STRMOUT_OFFSET_FROM_MEM 2
#define STRMOUT_OFFSET_NONE 3
#define STRMOUT_SELECT_BUFFER(x) (((x) & 0x3) << 8)
#define PKT3_WAIT_REG_MEM 0x3C
#define WAIT_REG_MEM_EQUAL 3
#define PKT3_EVENT_WRITE 0x46
#define PKT3_EVENT_WRITE_EOP 0x47
#define PKT3_SET_CONFIG_REG 0x68
#define PKT3_SET_CONTEXT_REG 0x69
#define PKT3_STRMOUT_BASE_UPDATE 0x72 /* r700 only */
#define PKT3_SURFACE_BASE_UPDATE 0x73 /* r600 only */
#define SURFACE_BASE_UPDATE_DEPTH (1 << 0)
#define SURFACE_BASE_UPDATE_COLOR(x) (2 << (x))
#define SURFACE_BASE_UPDATE_COLOR_NUM(x) (((1 << x) - 1) << 1)
#define SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x))
#define PKT3_SET_SH_REG 0x76 /* SI and later */
#define PKT3_SET_UCONFIG_REG 0x79 /* CIK and later */
 
#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
#define EVENT_TYPE_ZPASS_DONE 0x15
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16
#define EVENT_TYPE_PIPELINESTAT_START 25
#define EVENT_TYPE_PIPELINESTAT_STOP 26
#define EVENT_TYPE_SAMPLE_PIPELINESTAT 30
#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f
#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS 0x20
#define EVENT_TYPE_FLUSH_AND_INV_DB_META 0x2c /* supported on r700+ */
#define EVENT_TYPE_FLUSH_AND_INV_CB_META 46 /* supported on r700+ */
#define EVENT_TYPE(x) ((x) << 0)
#define EVENT_INDEX(x) ((x) << 8)
/* 0 - any non-TS event
* 1 - ZPASS_DONE
* 2 - SAMPLE_PIPELINESTAT
* 3 - SAMPLE_STREAMOUTSTAT*
* 4 - *S_PARTIAL_FLUSH
* 5 - TS events
*/
 
#define PREDICATION_OP_CLEAR 0x0
#define PREDICATION_OP_ZPASS 0x1
#define PREDICATION_OP_PRIMCOUNT 0x2
#define PRED_OP(x) ((x) << 16)
#define PREDICATION_CONTINUE (1 << 31)
#define PREDICATION_HINT_WAIT (0 << 12)
#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12)
#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8)
#define PREDICATION_DRAW_VISIBLE (1 << 8)
 
/* R600-R700*/
#define R_008490_CP_STRMOUT_CNTL 0x008490
#define S_008490_OFFSET_UPDATE_DONE(x) (((x) & 0x1) << 0)
#define R_028AB0_VGT_STRMOUT_EN 0x028AB0
#define S_028AB0_STREAMOUT(x) (((x) & 0x1) << 0)
#define G_028AB0_STREAMOUT(x) (((x) >> 0) & 0x1)
#define C_028AB0_STREAMOUT 0xFFFFFFFE
#define R_028B20_VGT_STRMOUT_BUFFER_EN 0x028B20
#define S_028B20_BUFFER_0_EN(x) (((x) & 0x1) << 0)
#define G_028B20_BUFFER_0_EN(x) (((x) >> 0) & 0x1)
#define C_028B20_BUFFER_0_EN 0xFFFFFFFE
#define S_028B20_BUFFER_1_EN(x) (((x) & 0x1) << 1)
#define G_028B20_BUFFER_1_EN(x) (((x) >> 1) & 0x1)
#define C_028B20_BUFFER_1_EN 0xFFFFFFFD
#define S_028B20_BUFFER_2_EN(x) (((x) & 0x1) << 2)
#define G_028B20_BUFFER_2_EN(x) (((x) >> 2) & 0x1)
#define C_028B20_BUFFER_2_EN 0xFFFFFFFB
#define S_028B20_BUFFER_3_EN(x) (((x) & 0x1) << 3)
#define G_028B20_BUFFER_3_EN(x) (((x) >> 3) & 0x1)
#define C_028B20_BUFFER_3_EN 0xFFFFFFF7
#define R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 0x028AD0
 
#define V_0280A0_SWAP_STD 0x00000000
#define V_0280A0_SWAP_ALT 0x00000001
#define V_0280A0_SWAP_STD_REV 0x00000002
#define V_0280A0_SWAP_ALT_REV 0x00000003
 
/* EG+ */
#define R_0084FC_CP_STRMOUT_CNTL 0x0084FC
#define S_0084FC_OFFSET_UPDATE_DONE(x) (((x) & 0x1) << 0)
#define R_028B94_VGT_STRMOUT_CONFIG 0x028B94
#define S_028B94_STREAMOUT_0_EN(x) (((x) & 0x1) << 0)
#define G_028B94_STREAMOUT_0_EN(x) (((x) >> 0) & 0x1)
#define C_028B94_STREAMOUT_0_EN 0xFFFFFFFE
#define S_028B94_STREAMOUT_1_EN(x) (((x) & 0x1) << 1)
#define G_028B94_STREAMOUT_1_EN(x) (((x) >> 1) & 0x1)
#define C_028B94_STREAMOUT_1_EN 0xFFFFFFFD
#define S_028B94_STREAMOUT_2_EN(x) (((x) & 0x1) << 2)
#define G_028B94_STREAMOUT_2_EN(x) (((x) >> 2) & 0x1)
#define C_028B94_STREAMOUT_2_EN 0xFFFFFFFB
#define S_028B94_STREAMOUT_3_EN(x) (((x) & 0x1) << 3)
#define G_028B94_STREAMOUT_3_EN(x) (((x) >> 3) & 0x1)
#define C_028B94_STREAMOUT_3_EN 0xFFFFFFF7
#define S_028B94_RAST_STREAM(x) (((x) & 0x07) << 4)
#define G_028B94_RAST_STREAM(x) (((x) >> 4) & 0x07)
#define C_028B94_RAST_STREAM 0xFFFFFF8F
#define S_028B94_RAST_STREAM_MASK(x) (((x) & 0x0F) << 8) /* SI+ */
#define G_028B94_RAST_STREAM_MASK(x) (((x) >> 8) & 0x0F)
#define C_028B94_RAST_STREAM_MASK 0xFFFFF0FF
#define S_028B94_USE_RAST_STREAM_MASK(x) (((x) & 0x1) << 31) /* SI+ */
#define G_028B94_USE_RAST_STREAM_MASK(x) (((x) >> 31) & 0x1)
#define C_028B94_USE_RAST_STREAM_MASK 0x7FFFFFFF
#define R_028B98_VGT_STRMOUT_BUFFER_CONFIG 0x028B98
#define S_028B98_STREAM_0_BUFFER_EN(x) (((x) & 0x0F) << 0)
#define G_028B98_STREAM_0_BUFFER_EN(x) (((x) >> 0) & 0x0F)
#define C_028B98_STREAM_0_BUFFER_EN 0xFFFFFFF0
#define S_028B98_STREAM_1_BUFFER_EN(x) (((x) & 0x0F) << 4)
#define G_028B98_STREAM_1_BUFFER_EN(x) (((x) >> 4) & 0x0F)
#define C_028B98_STREAM_1_BUFFER_EN 0xFFFFFF0F
#define S_028B98_STREAM_2_BUFFER_EN(x) (((x) & 0x0F) << 8)
#define G_028B98_STREAM_2_BUFFER_EN(x) (((x) >> 8) & 0x0F)
#define C_028B98_STREAM_2_BUFFER_EN 0xFFFFF0FF
#define S_028B98_STREAM_3_BUFFER_EN(x) (((x) & 0x0F) << 12)
#define G_028B98_STREAM_3_BUFFER_EN(x) (((x) >> 12) & 0x0F)
#define C_028B98_STREAM_3_BUFFER_EN 0xFFFF0FFF
 
#define EG_R_028A4C_PA_SC_MODE_CNTL_1 0x028A4C
#define EG_S_028A4C_PS_ITER_SAMPLE(x) (((x) & 0x1) << 16)
 
#define CM_R_028804_DB_EQAA 0x00028804
#define S_028804_MAX_ANCHOR_SAMPLES(x) (((x) & 0x7) << 0)
#define S_028804_PS_ITER_SAMPLES(x) (((x) & 0x7) << 4)
#define S_028804_MASK_EXPORT_NUM_SAMPLES(x) (((x) & 0x7) << 8)
#define S_028804_ALPHA_TO_MASK_NUM_SAMPLES(x) (((x) & 0x7) << 12)
#define S_028804_HIGH_QUALITY_INTERSECTIONS(x) (((x) & 0x1) << 16)
#define S_028804_INCOHERENT_EQAA_READS(x) (((x) & 0x1) << 17)
#define S_028804_INTERPOLATE_COMP_Z(x) (((x) & 0x1) << 18)
#define S_028804_INTERPOLATE_SRC_Z(x) (((x) & 0x1) << 19)
#define S_028804_STATIC_ANCHOR_ASSOCIATIONS(x) (((x) & 0x1) << 20)
#define S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x) (((x) & 0x1) << 21)
#define S_028804_OVERRASTERIZATION_AMOUNT(x) (((x) & 0x7) << 24)
#define S_028804_ENABLE_POSTZ_OVERRASTERIZATION(x) (((x) & 0x1) << 27)
#define CM_R_028BDC_PA_SC_LINE_CNTL 0x28bdc
#define S_028BDC_EXPAND_LINE_WIDTH(x) (((x) & 0x1) << 9)
#define G_028BDC_EXPAND_LINE_WIDTH(x) (((x) >> 9) & 0x1)
#define C_028BDC_EXPAND_LINE_WIDTH 0xFFFFFDFF
#define S_028BDC_LAST_PIXEL(x) (((x) & 0x1) << 10)
#define G_028BDC_LAST_PIXEL(x) (((x) >> 10) & 0x1)
#define C_028BDC_LAST_PIXEL 0xFFFFFBFF
#define CM_R_028BE0_PA_SC_AA_CONFIG 0x28be0
#define S_028BE0_MSAA_NUM_SAMPLES(x) (((x) & 0x7) << 0)
#define S_028BE0_AA_MASK_CENTROID_DTMN(x) (((x) & 0x1) << 4)
#define S_028BE0_MAX_SAMPLE_DIST(x) (((x) & 0xf) << 13)
#define S_028BE0_MSAA_EXPOSED_SAMPLES(x) (((x) & 0x7) << 20)
#define S_028BE0_DETAIL_TO_EXPOSED_MODE(x) (((x) & 0x3) << 24)
#define CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 0x28bf8
#define CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0 0x28c08
#define CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0 0x28c18
#define CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0 0x28c28
 
#define EG_S_028C70_FAST_CLEAR(x) (((x) & 0x1) << 17)
#define SI_S_028C70_FAST_CLEAR(x) (((x) & 0x1) << 13)
 
/*CIK+*/
#define R_0300FC_CP_STRMOUT_CNTL 0x0300FC
 
#endif
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_elf_util.c
0,0 → 1,222
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors: Tom Stellard <thomas.stellard@amd.com>
*
*/
 
#include "radeon_elf_util.h"
#include "r600_pipe_common.h"
 
#include "util/u_memory.h"
 
#include <gelf.h>
#include <libelf.h>
#include <stdio.h>
 
static void parse_symbol_table(Elf_Data *symbol_table_data,
const GElf_Shdr *symbol_table_header,
struct radeon_shader_binary *binary)
{
GElf_Sym symbol;
unsigned i = 0;
unsigned symbol_count =
symbol_table_header->sh_size / symbol_table_header->sh_entsize;
 
/* We are over allocating this list, because symbol_count gives the
* total number of symbols, and we will only be filling the list
* with offsets of global symbols. The memory savings from
* allocating the correct size of this list will be small, and
* I don't think it is worth the cost of pre-computing the number
* of global symbols.
*/
binary->global_symbol_offsets = CALLOC(symbol_count, sizeof(uint64_t));
 
while (gelf_getsym(symbol_table_data, i++, &symbol)) {
unsigned i;
if (GELF_ST_BIND(symbol.st_info) != STB_GLOBAL ||
symbol.st_shndx == 0 /* Undefined symbol */) {
continue;
}
 
binary->global_symbol_offsets[binary->global_symbol_count] =
symbol.st_value;
 
/* Sort the list using bubble sort. This list will usually
* be small. */
for (i = binary->global_symbol_count; i > 0; --i) {
uint64_t lhs = binary->global_symbol_offsets[i - 1];
uint64_t rhs = binary->global_symbol_offsets[i];
if (lhs < rhs) {
break;
}
binary->global_symbol_offsets[i] = lhs;
binary->global_symbol_offsets[i - 1] = rhs;
}
++binary->global_symbol_count;
}
}
 
static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols,
unsigned symbol_sh_link,
struct radeon_shader_binary *binary)
{
unsigned i;
 
if (!relocs || !symbols || !binary->reloc_count) {
return;
}
binary->relocs = CALLOC(binary->reloc_count,
sizeof(struct radeon_shader_reloc));
for (i = 0; i < binary->reloc_count; i++) {
GElf_Sym symbol;
GElf_Rel rel;
char *symbol_name;
struct radeon_shader_reloc *reloc = &binary->relocs[i];
 
gelf_getrel(relocs, i, &rel);
gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &symbol);
symbol_name = elf_strptr(elf, symbol_sh_link, symbol.st_name);
 
reloc->offset = rel.r_offset;
reloc->name = strdup(symbol_name);
}
}
 
void radeon_elf_read(const char *elf_data, unsigned elf_size,
struct radeon_shader_binary *binary,
unsigned debug)
{
char *elf_buffer;
Elf *elf;
Elf_Scn *section = NULL;
Elf_Data *symbols = NULL, *relocs = NULL;
size_t section_str_index;
unsigned symbol_sh_link = 0;
 
/* One of the libelf implementations
* (http://www.mr511.de/software/english.htm) requires calling
* elf_version() before elf_memory().
*/
elf_version(EV_CURRENT);
elf_buffer = MALLOC(elf_size);
memcpy(elf_buffer, elf_data, elf_size);
 
elf = elf_memory(elf_buffer, elf_size);
 
elf_getshdrstrndx(elf, &section_str_index);
binary->disassembled = 0;
 
while ((section = elf_nextscn(elf, section))) {
const char *name;
Elf_Data *section_data = NULL;
GElf_Shdr section_header;
if (gelf_getshdr(section, &section_header) != &section_header) {
fprintf(stderr, "Failed to read ELF section header\n");
return;
}
name = elf_strptr(elf, section_str_index, section_header.sh_name);
if (!strcmp(name, ".text")) {
section_data = elf_getdata(section, section_data);
binary->code_size = section_data->d_size;
binary->code = MALLOC(binary->code_size * sizeof(unsigned char));
memcpy(binary->code, section_data->d_buf, binary->code_size);
} else if (!strcmp(name, ".AMDGPU.config")) {
section_data = elf_getdata(section, section_data);
binary->config_size = section_data->d_size;
binary->config = MALLOC(binary->config_size * sizeof(unsigned char));
memcpy(binary->config, section_data->d_buf, binary->config_size);
} else if (debug && !strcmp(name, ".AMDGPU.disasm")) {
binary->disassembled = 1;
section_data = elf_getdata(section, section_data);
fprintf(stderr, "\nShader Disassembly:\n\n");
fprintf(stderr, "%.*s\n", (int)section_data->d_size,
(char *)section_data->d_buf);
} else if (!strncmp(name, ".rodata", 7)) {
section_data = elf_getdata(section, section_data);
binary->rodata_size = section_data->d_size;
binary->rodata = MALLOC(binary->rodata_size * sizeof(unsigned char));
memcpy(binary->rodata, section_data->d_buf, binary->rodata_size);
} else if (!strncmp(name, ".symtab", 7)) {
symbols = elf_getdata(section, section_data);
symbol_sh_link = section_header.sh_link;
parse_symbol_table(symbols, &section_header, binary);
} else if (!strcmp(name, ".rel.text")) {
relocs = elf_getdata(section, section_data);
binary->reloc_count = section_header.sh_size /
section_header.sh_entsize;
}
}
 
parse_relocs(elf, relocs, symbols, symbol_sh_link, binary);
 
if (elf){
elf_end(elf);
}
FREE(elf_buffer);
 
/* Cache the config size per symbol */
if (binary->global_symbol_count) {
binary->config_size_per_symbol =
binary->config_size / binary->global_symbol_count;
} else {
binary->global_symbol_count = 1;
binary->config_size_per_symbol = binary->config_size;
}
}
 
const unsigned char *radeon_shader_binary_config_start(
const struct radeon_shader_binary *binary,
uint64_t symbol_offset)
{
unsigned i;
for (i = 0; i < binary->global_symbol_count; ++i) {
if (binary->global_symbol_offsets[i] == symbol_offset) {
unsigned offset = i * binary->config_size_per_symbol;
return binary->config + offset;
}
}
return binary->config;
}
 
void radeon_shader_binary_free_relocs(struct radeon_shader_reloc *relocs,
unsigned reloc_count)
{
unsigned i;
for (i = 0; i < reloc_count; i++) {
FREE(relocs[i].name);
}
FREE(relocs);
}
 
void radeon_shader_binary_free_members(struct radeon_shader_binary *binary,
unsigned free_relocs)
{
FREE(binary->code);
FREE(binary->config);
FREE(binary->rodata);
 
if (free_relocs) {
radeon_shader_binary_free_relocs(binary->relocs,
binary->reloc_count);
}
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_elf_util.h
0,0 → 1,64
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors: Tom Stellard <thomas.stellard@amd.com>
*
*/
 
#ifndef RADEON_ELF_UTIL_H
#define RADEON_ELF_UTIL_H
 
#include <stdint.h>
 
struct radeon_shader_binary;
struct radeon_shader_reloc;
 
/*
* Parse the elf binary stored in \p elf_data and create a
* radeon_shader_binary object.
*/
void radeon_elf_read(const char *elf_data, unsigned elf_size,
struct radeon_shader_binary *binary, unsigned debug);
 
/**
* @returns A pointer to the start of the configuration information for
* the function starting at \p symbol_offset of the binary.
*/
const unsigned char *radeon_shader_binary_config_start(
const struct radeon_shader_binary *binary,
uint64_t symbol_offset);
 
/**
* Free all memory allocated for members of \p binary. This function does
* not free \p binary.
*
* @param free_relocs If false, reolc information will not be freed.
*/
void radeon_shader_binary_free_members(struct radeon_shader_binary *binary,
unsigned free_relocs);
 
/**
* Free \p relocs and all member data.
*/
void radeon_shader_binary_free_relocs(struct radeon_shader_reloc *relocs,
unsigned reloc_count);
#endif /* RADEON_ELF_UTIL_H */
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_llvm.h
0,0 → 1,212
/*
* Copyright 2011 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors: Tom Stellard <thomas.stellard@amd.com>
*
*/
 
#ifndef RADEON_LLVM_H
#define RADEON_LLVM_H
 
#include <llvm-c/Core.h>
#include "gallivm/lp_bld_init.h"
#include "gallivm/lp_bld_tgsi.h"
 
#define RADEON_LLVM_MAX_INPUTS 32 * 4
#define RADEON_LLVM_MAX_OUTPUTS 32 * 4
#define RADEON_LLVM_MAX_ARRAYS 16
 
#define RADEON_LLVM_INITIAL_CF_DEPTH 4
 
#define RADEON_LLVM_MAX_SYSTEM_VALUES 4
 
struct radeon_llvm_branch {
LLVMBasicBlockRef endif_block;
LLVMBasicBlockRef if_block;
LLVMBasicBlockRef else_block;
unsigned has_else;
};
 
struct radeon_llvm_loop {
LLVMBasicBlockRef loop_block;
LLVMBasicBlockRef endloop_block;
};
 
struct radeon_llvm_context {
 
struct lp_build_tgsi_soa_context soa;
 
unsigned chip_class;
unsigned type;
unsigned face_gpr;
unsigned two_side;
unsigned clip_vertex;
unsigned inputs_count;
struct r600_shader_io * r600_inputs;
struct r600_shader_io * r600_outputs;
struct pipe_stream_output_info *stream_outputs;
unsigned color_buffer_count;
unsigned fs_color_all;
unsigned alpha_to_one;
unsigned has_txq_cube_array_z_comp;
unsigned uses_tex_buffers;
unsigned has_compressed_msaa_texturing;
 
/*=== Front end configuration ===*/
 
/* Special Intrinsics */
 
/** Write to an output register: float store_output(float, i32) */
const char * store_output_intr;
 
/** Swizzle a vector value: <4 x float> swizzle(<4 x float>, i32)
* The swizzle is an unsigned integer that encodes a TGSI_SWIZZLE_* value
* in 2-bits.
* Swizzle{0-1} = X Channel
* Swizzle{2-3} = Y Channel
* Swizzle{4-5} = Z Channel
* Swizzle{6-7} = W Channel
*/
const char * swizzle_intr;
 
/* Instructions that are not described by any of the TGSI opcodes. */
 
/** This function is responsible for initilizing the inputs array and will be
* called once for each input declared in the TGSI shader.
*/
void (*load_input)(struct radeon_llvm_context *,
unsigned input_index,
const struct tgsi_full_declaration *decl);
 
void (*load_system_value)(struct radeon_llvm_context *,
unsigned index,
const struct tgsi_full_declaration *decl);
 
/** User data to use with the callbacks */
void * userdata;
 
/** This array contains the input values for the shader. Typically these
* values will be in the form of a target intrinsic that will inform the
* backend how to load the actual inputs to the shader.
*/
LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS];
LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS];
unsigned output_reg_count;
 
/** This pointer is used to contain the temporary values.
* The amount of temporary used in tgsi can't be bound to a max value and
* thus we must allocate this array at runtime.
*/
LLVMValueRef *temps;
unsigned temps_count;
LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES];
 
/*=== Private Members ===*/
 
struct radeon_llvm_branch *branch;
struct radeon_llvm_loop *loop;
 
unsigned branch_depth;
unsigned branch_depth_max;
unsigned loop_depth;
unsigned loop_depth_max;
 
struct tgsi_declaration_range arrays[RADEON_LLVM_MAX_ARRAYS];
unsigned num_arrays;
 
LLVMValueRef main_fn;
 
struct gallivm_state gallivm;
};
 
static inline LLVMTypeRef tgsi2llvmtype(
struct lp_build_tgsi_context * bld_base,
enum tgsi_opcode_type type)
{
LLVMContextRef ctx = bld_base->base.gallivm->context;
 
switch (type) {
case TGSI_TYPE_UNSIGNED:
case TGSI_TYPE_SIGNED:
return LLVMInt32TypeInContext(ctx);
case TGSI_TYPE_UNTYPED:
case TGSI_TYPE_FLOAT:
return LLVMFloatTypeInContext(ctx);
default: break;
}
return 0;
}
 
static inline LLVMValueRef bitcast(
struct lp_build_tgsi_context * bld_base,
enum tgsi_opcode_type type,
LLVMValueRef value
)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
 
if (dst_type)
return LLVMBuildBitCast(builder, value, dst_type, "");
else
return value;
}
 
 
void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data,
LLVMValueRef *coords_arg);
 
void radeon_llvm_context_init(struct radeon_llvm_context * ctx);
 
void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
LLVMTypeRef *ParamTypes, unsigned ParamCount);
 
void radeon_llvm_dispose(struct radeon_llvm_context * ctx);
 
inline static struct radeon_llvm_context * radeon_llvm_context(
struct lp_build_tgsi_context * bld_base)
{
return (struct radeon_llvm_context*)bld_base;
}
 
unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan);
 
void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx);
 
LLVMValueRef
build_intrinsic(LLVMBuilderRef builder,
const char *name,
LLVMTypeRef ret_type,
LLVMValueRef *args,
unsigned num_args,
LLVMAttribute attr);
 
void
build_tgsi_intrinsic_nomem(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data);
 
 
 
#endif /* RADEON_LLVM_H */
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_llvm_emit.c
0,0 → 1,208
/*
* Copyright 2011 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors: Tom Stellard <thomas.stellard@amd.com>
*
*/
#include "radeon_llvm_emit.h"
#include "radeon_elf_util.h"
#include "util/u_memory.h"
#include "pipe/p_shader_tokens.h"
 
#include <llvm-c/Target.h>
#include <llvm-c/TargetMachine.h>
#include <llvm-c/Core.h>
 
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
 
#define CPU_STRING_LEN 30
#define FS_STRING_LEN 30
#define TRIPLE_STRING_LEN 7
 
/**
* Shader types for the LLVM backend.
*/
enum radeon_llvm_shader_type {
RADEON_LLVM_SHADER_PS = 0,
RADEON_LLVM_SHADER_VS = 1,
RADEON_LLVM_SHADER_GS = 2,
RADEON_LLVM_SHADER_CS = 3,
};
 
/**
* Set the shader type we want to compile
*
* @param type shader type to set
*/
void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
{
char Str[2];
enum radeon_llvm_shader_type llvm_type;
 
switch (type) {
case TGSI_PROCESSOR_VERTEX:
llvm_type = RADEON_LLVM_SHADER_VS;
break;
case TGSI_PROCESSOR_GEOMETRY:
llvm_type = RADEON_LLVM_SHADER_GS;
break;
case TGSI_PROCESSOR_FRAGMENT:
llvm_type = RADEON_LLVM_SHADER_PS;
break;
case TGSI_PROCESSOR_COMPUTE:
llvm_type = RADEON_LLVM_SHADER_CS;
break;
default:
assert(0);
}
 
sprintf(Str, "%1d", llvm_type);
 
LLVMAddTargetDependentFunctionAttr(F, "ShaderType", Str);
}
 
static void init_r600_target()
{
static unsigned initialized = 0;
if (!initialized) {
LLVMInitializeR600TargetInfo();
LLVMInitializeR600Target();
LLVMInitializeR600TargetMC();
LLVMInitializeR600AsmPrinter();
initialized = 1;
}
}
 
LLVMTargetRef radeon_llvm_get_r600_target(const char *triple)
{
LLVMTargetRef target = NULL;
char *err_message = NULL;
 
init_r600_target();
 
if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
fprintf(stderr, "Cannot find target for triple %s ", triple);
if (err_message) {
fprintf(stderr, "%s\n", err_message);
}
LLVMDisposeMessage(err_message);
return NULL;
}
return target;
}
 
#if HAVE_LLVM >= 0x0305
 
static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context)
{
if (LLVMGetDiagInfoSeverity(di) == LLVMDSError) {
unsigned int *diagnosticflag = (unsigned int *)context;
char *diaginfo_message = LLVMGetDiagInfoDescription(di);
 
*diagnosticflag = 1;
fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", diaginfo_message);
LLVMDisposeMessage(diaginfo_message);
}
}
 
#endif
 
/**
* Compile an LLVM module to machine code.
*
* @returns 0 for success, 1 for failure
*/
unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary,
const char *gpu_family, unsigned dump, LLVMTargetMachineRef tm)
{
 
char cpu[CPU_STRING_LEN];
char fs[FS_STRING_LEN];
char *err;
bool dispose_tm = false;
LLVMContextRef llvm_ctx;
unsigned rval = 0;
LLVMMemoryBufferRef out_buffer;
unsigned buffer_size;
const char *buffer_data;
char triple[TRIPLE_STRING_LEN];
LLVMBool mem_err;
 
if (!tm) {
strncpy(triple, "r600--", TRIPLE_STRING_LEN);
LLVMTargetRef target = radeon_llvm_get_r600_target(triple);
if (!target) {
return 1;
}
strncpy(cpu, gpu_family, CPU_STRING_LEN);
memset(fs, 0, sizeof(fs));
if (dump) {
strncpy(fs, "+DumpCode", FS_STRING_LEN);
}
tm = LLVMCreateTargetMachine(target, triple, cpu, fs,
LLVMCodeGenLevelDefault, LLVMRelocDefault,
LLVMCodeModelDefault);
dispose_tm = true;
}
if (dump) {
LLVMDumpModule(M);
}
/* Setup Diagnostic Handler*/
llvm_ctx = LLVMGetModuleContext(M);
 
#if HAVE_LLVM >= 0x0305
LLVMContextSetDiagnosticHandler(llvm_ctx, radeonDiagnosticHandler, &rval);
#endif
rval = 0;
 
/* Compile IR*/
mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
&out_buffer);
 
/* Process Errors/Warnings */
if (mem_err) {
fprintf(stderr, "%s: %s", __FUNCTION__, err);
FREE(err);
LLVMDisposeTargetMachine(tm);
return 1;
}
 
if (0 != rval) {
fprintf(stderr, "%s: Processing Diag Flag\n", __FUNCTION__);
}
 
/* Extract Shader Code*/
buffer_size = LLVMGetBufferSize(out_buffer);
buffer_data = LLVMGetBufferStart(out_buffer);
 
radeon_elf_read(buffer_data, buffer_size, binary, dump);
 
/* Clean up */
LLVMDisposeMemoryBuffer(out_buffer);
 
if (dispose_tm) {
LLVMDisposeTargetMachine(tm);
}
return rval;
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_llvm_emit.h
0,0 → 1,46
/*
* Copyright 2012 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors: Tom Stellard <thomas.stellard@amd.com>
*
*/
 
#ifndef RADEON_LLVM_EMIT_H
#define RADEON_LLVM_EMIT_H
 
#include <llvm-c/Core.h>
#include <llvm-c/TargetMachine.h>
 
struct radeon_shader_binary;
 
void radeon_llvm_shader_type(LLVMValueRef F, unsigned type);
 
LLVMTargetRef radeon_llvm_get_r600_target(const char *triple);
 
unsigned radeon_llvm_compile(
LLVMModuleRef M,
struct radeon_shader_binary *binary,
const char * gpu_family,
unsigned dump,
LLVMTargetMachineRef tm);
 
#endif /* RADEON_LLVM_EMIT_H */
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_llvm_util.c
0,0 → 1,118
/*
* Copyright 2012, 2013 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors: Tom Stellard <thomas.stellard@amd.com>
*
*/
 
#include "radeon_llvm_util.h"
#include "util/u_memory.h"
 
#include <llvm-c/BitReader.h>
#include <llvm-c/Core.h>
#include <llvm-c/Target.h>
#include <llvm-c/Transforms/IPO.h>
#include <llvm-c/Transforms/PassManagerBuilder.h>
 
LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
const char * bitcode, unsigned bitcode_len)
{
LLVMMemoryBufferRef buf;
LLVMModuleRef module;
 
buf = LLVMCreateMemoryBufferWithMemoryRangeCopy((const char*)bitcode,
bitcode_len, "radeon");
LLVMParseBitcodeInContext(ctx, buf, &module, NULL);
LLVMDisposeMemoryBuffer(buf);
return module;
}
 
unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
const char *bitcode, unsigned bitcode_len)
{
LLVMModuleRef mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len);
return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
}
 
static void radeon_llvm_optimize(LLVMModuleRef mod)
{
const char *data_layout = LLVMGetDataLayout(mod);
LLVMTargetDataRef TD = LLVMCreateTargetData(data_layout);
LLVMPassManagerBuilderRef builder = LLVMPassManagerBuilderCreate();
LLVMPassManagerRef pass_manager = LLVMCreatePassManager();
 
/* Functions calls are not supported yet, so we need to inline
* everything. The most efficient way to do this is to add
* the always_inline attribute to all non-kernel functions
* and then run the Always Inline pass. The Always Inline
* pass will automaically inline functions with this attribute
* and does not perform the expensive cost analysis that the normal
* inliner does.
*/
 
LLVMValueRef fn;
for (fn = LLVMGetFirstFunction(mod); fn; fn = LLVMGetNextFunction(fn)) {
/* All the non-kernel functions have internal linkage */
if (LLVMGetLinkage(fn) == LLVMInternalLinkage) {
LLVMAddFunctionAttr(fn, LLVMAlwaysInlineAttribute);
}
}
 
LLVMAddTargetData(TD, pass_manager);
LLVMAddAlwaysInlinerPass(pass_manager);
LLVMPassManagerBuilderPopulateModulePassManager(builder, pass_manager);
 
LLVMRunPassManager(pass_manager, mod);
LLVMPassManagerBuilderDispose(builder);
LLVMDisposePassManager(pass_manager);
LLVMDisposeTargetData(TD);
}
 
LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
const char *bitcode, unsigned bitcode_len)
{
LLVMModuleRef mod;
unsigned num_kernels;
LLVMValueRef *kernel_metadata;
unsigned i;
 
mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len);
num_kernels = LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
kernel_metadata = MALLOC(num_kernels * sizeof(LLVMValueRef));
LLVMGetNamedMetadataOperands(mod, "opencl.kernels", kernel_metadata);
for (i = 0; i < num_kernels; i++) {
LLVMValueRef kernel_signature, *kernel_function;
unsigned num_kernel_md_operands;
if (i == index) {
continue;
}
kernel_signature = kernel_metadata[i];
num_kernel_md_operands = LLVMGetMDNodeNumOperands(kernel_signature);
kernel_function = MALLOC(num_kernel_md_operands * sizeof (LLVMValueRef));
LLVMGetMDNodeOperands(kernel_signature, kernel_function);
LLVMDeleteFunction(*kernel_function);
FREE(kernel_function);
}
FREE(kernel_metadata);
radeon_llvm_optimize(mod);
return mod;
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_llvm_util.h
0,0 → 1,39
/*
* Copyright 2012, 2013 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors: Tom Stellard <thomas.stellard@amd.com>
*
*/
 
#ifndef RADEON_LLVM_UTIL_H
#define RADEON_LLVM_UTIL_H
 
#include <llvm-c/Core.h>
 
LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
const char * bitcode, unsigned bitcode_len);
unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
const char *bitcode, unsigned bitcode_len);
LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
const char *bitcode, unsigned bitcode_len);
 
#endif
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
0,0 → 1,1639
/*
* Copyright 2011 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors: Tom Stellard <thomas.stellard@amd.com>
*
*/
#include "radeon_llvm.h"
 
#include "gallivm/lp_bld_const.h"
#include "gallivm/lp_bld_gather.h"
#include "gallivm/lp_bld_flow.h"
#include "gallivm/lp_bld_init.h"
#include "gallivm/lp_bld_intr.h"
#include "gallivm/lp_bld_swizzle.h"
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_debug.h"
 
#include <llvm-c/Core.h>
#include <llvm-c/Transforms/Scalar.h>
 
static struct radeon_llvm_loop * get_current_loop(struct radeon_llvm_context * ctx)
{
return ctx->loop_depth > 0 ? ctx->loop + (ctx->loop_depth - 1) : NULL;
}
 
static struct radeon_llvm_branch * get_current_branch(
struct radeon_llvm_context * ctx)
{
return ctx->branch_depth > 0 ?
ctx->branch + (ctx->branch_depth - 1) : NULL;
}
 
unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
{
return (index * 4) + chan;
}
 
static LLVMValueRef emit_swizzle(
struct lp_build_tgsi_context * bld_base,
LLVMValueRef value,
unsigned swizzle_x,
unsigned swizzle_y,
unsigned swizzle_z,
unsigned swizzle_w)
{
LLVMValueRef swizzles[4];
LLVMTypeRef i32t =
LLVMInt32TypeInContext(bld_base->base.gallivm->context);
 
swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
 
return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
value,
LLVMGetUndef(LLVMTypeOf(value)),
LLVMConstVector(swizzles, 4), "");
}
 
static struct tgsi_declaration_range
get_array_range(struct lp_build_tgsi_context *bld_base,
unsigned File, const struct tgsi_ind_register *reg)
{
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 ||
reg->ArrayID > RADEON_LLVM_MAX_ARRAYS) {
struct tgsi_declaration_range range;
range.First = 0;
range.Last = bld_base->info->file_max[File];
return range;
}
 
return ctx->arrays[reg->ArrayID - 1];
}
 
static LLVMValueRef
emit_array_index(
struct lp_build_tgsi_soa_context *bld,
const struct tgsi_ind_register *reg,
unsigned offset)
{
struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
 
LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, bld->addr[reg->Index][reg->Swizzle], "");
return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), "");
}
 
static LLVMValueRef
emit_fetch(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
enum tgsi_opcode_type type,
unsigned swizzle);
 
static LLVMValueRef
emit_array_fetch(
struct lp_build_tgsi_context *bld_base,
unsigned File, enum tgsi_opcode_type type,
struct tgsi_declaration_range range,
unsigned swizzle)
{
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 
unsigned i, size = range.Last - range.First + 1;
LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
LLVMValueRef result = LLVMGetUndef(vec);
 
struct tgsi_full_src_register tmp_reg = {};
tmp_reg.Register.File = File;
 
for (i = 0; i < size; ++i) {
tmp_reg.Register.Index = i + range.First;
LLVMValueRef temp = emit_fetch(bld_base, &tmp_reg, type, swizzle);
result = LLVMBuildInsertElement(builder, result, temp,
lp_build_const_int32(gallivm, i), "");
}
return result;
}
 
static bool uses_temp_indirect_addressing(
struct lp_build_tgsi_context *bld_base)
{
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
return (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY));
}
 
static LLVMValueRef
emit_fetch(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
enum tgsi_opcode_type type,
unsigned swizzle)
{
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
LLVMValueRef result = NULL, ptr;
 
if (swizzle == ~0) {
LLVMValueRef values[TGSI_NUM_CHANNELS];
unsigned chan;
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
values[chan] = emit_fetch(bld_base, reg, type, chan);
}
return lp_build_gather_values(bld_base->base.gallivm, values,
TGSI_NUM_CHANNELS);
}
 
if (reg->Register.Indirect) {
struct tgsi_declaration_range range = get_array_range(bld_base,
reg->Register.File, &reg->Indirect);
return LLVMBuildExtractElement(builder,
emit_array_fetch(bld_base, reg->Register.File, type, range, swizzle),
emit_array_index(bld, &reg->Indirect, reg->Register.Index - range.First),
"");
}
 
switch(reg->Register.File) {
case TGSI_FILE_IMMEDIATE: {
LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
}
 
case TGSI_FILE_INPUT:
result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
break;
 
case TGSI_FILE_TEMPORARY:
if (reg->Register.Index >= ctx->temps_count)
return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
if (uses_temp_indirect_addressing(bld_base)) {
ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
break;
}
ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
result = LLVMBuildLoad(builder, ptr, "");
break;
 
case TGSI_FILE_OUTPUT:
ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
result = LLVMBuildLoad(builder, ptr, "");
break;
 
default:
return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
}
 
return bitcast(bld_base, type, result);
}
 
static LLVMValueRef fetch_system_value(
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register *reg,
enum tgsi_opcode_type type,
unsigned swizzle)
{
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
 
LLVMValueRef cval = ctx->system_values[reg->Register.Index];
if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
cval = LLVMBuildExtractElement(gallivm->builder, cval,
lp_build_const_int32(gallivm, swizzle), "");
}
return bitcast(bld_base, type, cval);
}
 
static void emit_declaration(
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_declaration *decl)
{
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
unsigned first, last, i, idx;
switch(decl->Declaration.File) {
case TGSI_FILE_ADDRESS:
{
unsigned idx;
for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
unsigned chan;
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
ctx->soa.addr[idx][chan] = lp_build_alloca(
&ctx->gallivm,
ctx->soa.bld_base.uint_bld.elem_type, "");
}
}
break;
}
 
case TGSI_FILE_TEMPORARY:
if (decl->Declaration.Array && decl->Array.ArrayID <= RADEON_LLVM_MAX_ARRAYS)
ctx->arrays[decl->Array.ArrayID - 1] = decl->Range;
if (uses_temp_indirect_addressing(bld_base)) {
lp_emit_declaration_soa(bld_base, decl);
break;
}
first = decl->Range.First;
last = decl->Range.Last;
if (!ctx->temps_count) {
ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
}
for (idx = first; idx <= last; idx++) {
for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
ctx->temps[idx * TGSI_NUM_CHANNELS + i] =
lp_build_alloca(bld_base->base.gallivm, bld_base->base.vec_type,
"temp");
}
}
break;
 
case TGSI_FILE_INPUT:
{
unsigned idx;
for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
if (ctx->load_input)
ctx->load_input(ctx, idx, decl);
}
}
break;
 
case TGSI_FILE_SYSTEM_VALUE:
{
unsigned idx;
for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
ctx->load_system_value(ctx, idx, decl);
}
}
break;
 
case TGSI_FILE_OUTPUT:
{
unsigned idx;
for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
unsigned chan;
assert(idx < RADEON_LLVM_MAX_OUTPUTS);
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
ctx->soa.outputs[idx][chan] = lp_build_alloca(&ctx->gallivm,
ctx->soa.bld_base.base.elem_type, "");
}
}
 
ctx->output_reg_count = MAX2(ctx->output_reg_count,
decl->Range.Last + 1);
break;
}
 
default:
break;
}
}
 
static void
emit_store(
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_instruction * inst,
const struct tgsi_opcode_info * info,
LLVMValueRef dst[4])
{
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
struct lp_build_context base = bld->bld_base.base;
const struct tgsi_full_dst_register *reg = &inst->Dst[0];
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
LLVMValueRef temp_ptr;
unsigned chan, chan_index;
boolean is_vec_store = FALSE;
 
if (dst[0]) {
LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
is_vec_store = (k == LLVMVectorTypeKind);
}
 
if (is_vec_store) {
LLVMValueRef values[4] = {};
TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
LLVMValueRef index = lp_build_const_int32(gallivm, chan);
values[chan] = LLVMBuildExtractElement(gallivm->builder,
dst[0], index, "");
}
bld_base->emit_store(bld_base, inst, info, values);
return;
}
 
TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
LLVMValueRef value = dst[chan_index];
 
if (inst->Instruction.Saturate != TGSI_SAT_NONE) {
struct lp_build_emit_data clamp_emit_data;
 
memset(&clamp_emit_data, 0, sizeof(clamp_emit_data));
clamp_emit_data.arg_count = 3;
clamp_emit_data.args[0] = value;
clamp_emit_data.args[2] = base.one;
 
switch(inst->Instruction.Saturate) {
case TGSI_SAT_ZERO_ONE:
clamp_emit_data.args[1] = base.zero;
break;
case TGSI_SAT_MINUS_PLUS_ONE:
clamp_emit_data.args[1] = LLVMConstReal(
base.elem_type, -1.0f);
break;
default:
assert(0);
}
value = lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP,
&clamp_emit_data);
}
 
if (reg->Register.File == TGSI_FILE_ADDRESS) {
temp_ptr = bld->addr[reg->Register.Index][chan_index];
LLVMBuildStore(builder, value, temp_ptr);
continue;
}
value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
 
if (reg->Register.Indirect) {
struct tgsi_declaration_range range = get_array_range(bld_base,
reg->Register.File, &reg->Indirect);
 
unsigned i, size = range.Last - range.First + 1;
LLVMValueRef array = LLVMBuildInsertElement(builder,
emit_array_fetch(bld_base, reg->Register.File, TGSI_TYPE_FLOAT, range, chan_index),
value, emit_array_index(bld, &reg->Indirect, reg->Register.Index - range.First), "");
 
for (i = 0; i < size; ++i) {
switch(reg->Register.File) {
case TGSI_FILE_OUTPUT:
temp_ptr = bld->outputs[i + range.First][chan_index];
break;
 
case TGSI_FILE_TEMPORARY:
if (range.First + i >= ctx->temps_count)
continue;
if (uses_temp_indirect_addressing(bld_base))
temp_ptr = lp_get_temp_ptr_soa(bld, i + range.First, chan_index);
else
temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
break;
 
default:
return;
}
value = LLVMBuildExtractElement(builder, array,
lp_build_const_int32(gallivm, i), "");
LLVMBuildStore(builder, value, temp_ptr);
}
 
} else {
switch(reg->Register.File) {
case TGSI_FILE_OUTPUT:
temp_ptr = bld->outputs[reg->Register.Index][chan_index];
break;
 
case TGSI_FILE_TEMPORARY:
if (reg->Register.Index >= ctx->temps_count)
continue;
if (uses_temp_indirect_addressing(bld_base)) {
temp_ptr = NULL;
break;
}
temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
break;
 
default:
return;
}
LLVMBuildStore(builder, value, temp_ptr);
}
}
}
 
static void bgnloop_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
struct gallivm_state * gallivm = bld_base->base.gallivm;
LLVMBasicBlockRef loop_block;
LLVMBasicBlockRef endloop_block;
endloop_block = LLVMAppendBasicBlockInContext(gallivm->context,
ctx->main_fn, "ENDLOOP");
loop_block = LLVMInsertBasicBlockInContext(gallivm->context,
endloop_block, "LOOP");
LLVMBuildBr(gallivm->builder, loop_block);
LLVMPositionBuilderAtEnd(gallivm->builder, loop_block);
 
if (++ctx->loop_depth > ctx->loop_depth_max) {
unsigned new_max = ctx->loop_depth_max << 1;
 
if (!new_max)
new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
 
ctx->loop = REALLOC(ctx->loop, ctx->loop_depth_max *
sizeof(ctx->loop[0]),
new_max * sizeof(ctx->loop[0]));
ctx->loop_depth_max = new_max;
}
 
ctx->loop[ctx->loop_depth - 1].loop_block = loop_block;
ctx->loop[ctx->loop_depth - 1].endloop_block = endloop_block;
}
 
static void brk_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
struct gallivm_state * gallivm = bld_base->base.gallivm;
struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
 
LLVMBuildBr(gallivm->builder, current_loop->endloop_block);
}
 
static void cont_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
struct gallivm_state * gallivm = bld_base->base.gallivm;
struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
 
LLVMBuildBr(gallivm->builder, current_loop->loop_block);
}
 
static void else_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
struct gallivm_state * gallivm = bld_base->base.gallivm;
struct radeon_llvm_branch * current_branch = get_current_branch(ctx);
LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
 
/* We need to add a terminator to the current block if the previous
* instruction was an ENDIF.Example:
* IF
* [code]
* IF
* [code]
* ELSE
* [code]
* ENDIF <--
* ELSE<--
* [code]
* ENDIF
*/
 
if (current_block != current_branch->if_block) {
LLVMBuildBr(gallivm->builder, current_branch->endif_block);
}
if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
LLVMBuildBr(gallivm->builder, current_branch->endif_block);
}
current_branch->has_else = 1;
LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
}
 
static void endif_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
struct gallivm_state * gallivm = bld_base->base.gallivm;
struct radeon_llvm_branch * current_branch = get_current_branch(ctx);
LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
 
/* If we have consecutive ENDIF instructions, then the first ENDIF
* will not have a terminator, so we need to add one. */
if (current_block != current_branch->if_block
&& current_block != current_branch->else_block
&& !LLVMGetBasicBlockTerminator(current_block)) {
 
LLVMBuildBr(gallivm->builder, current_branch->endif_block);
}
if (!LLVMGetBasicBlockTerminator(current_branch->else_block)) {
LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
LLVMBuildBr(gallivm->builder, current_branch->endif_block);
}
 
if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->if_block);
LLVMBuildBr(gallivm->builder, current_branch->endif_block);
}
 
LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->endif_block);
ctx->branch_depth--;
}
 
static void endloop_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
struct gallivm_state * gallivm = bld_base->base.gallivm;
struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
 
if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(gallivm->builder))) {
LLVMBuildBr(gallivm->builder, current_loop->loop_block);
}
 
LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block);
ctx->loop_depth--;
}
 
static void if_cond_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data,
LLVMValueRef cond)
{
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
struct gallivm_state * gallivm = bld_base->base.gallivm;
LLVMBasicBlockRef if_block, else_block, endif_block;
 
endif_block = LLVMAppendBasicBlockInContext(gallivm->context,
ctx->main_fn, "ENDIF");
if_block = LLVMInsertBasicBlockInContext(gallivm->context,
endif_block, "IF");
else_block = LLVMInsertBasicBlockInContext(gallivm->context,
endif_block, "ELSE");
LLVMBuildCondBr(gallivm->builder, cond, if_block, else_block);
LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
 
if (++ctx->branch_depth > ctx->branch_depth_max) {
unsigned new_max = ctx->branch_depth_max << 1;
 
if (!new_max)
new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
 
ctx->branch = REALLOC(ctx->branch, ctx->branch_depth_max *
sizeof(ctx->branch[0]),
new_max * sizeof(ctx->branch[0]));
ctx->branch_depth_max = new_max;
}
 
ctx->branch[ctx->branch_depth - 1].endif_block = endif_block;
ctx->branch[ctx->branch_depth - 1].if_block = if_block;
ctx->branch[ctx->branch_depth - 1].else_block = else_block;
ctx->branch[ctx->branch_depth - 1].has_else = 0;
}
 
static void if_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct gallivm_state * gallivm = bld_base->base.gallivm;
LLVMValueRef cond;
 
cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
emit_data->args[0],
bld_base->base.zero, "");
 
if_cond_emit(action, bld_base, emit_data, cond);
}
 
static void uif_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct gallivm_state * gallivm = bld_base->base.gallivm;
LLVMValueRef cond;
 
cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
bld_base->int_bld.zero, "");
 
if_cond_emit(action, bld_base, emit_data, cond);
}
 
static void kill_if_fetch_args(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
const struct tgsi_full_instruction * inst = emit_data->inst;
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
unsigned i;
LLVMValueRef conds[TGSI_NUM_CHANNELS];
 
for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value,
bld_base->base.zero, "");
}
 
/* Or the conditions together */
for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], "");
}
 
emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
emit_data->arg_count = 1;
emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
lp_build_const_float(gallivm, -1.0f),
bld_base->base.zero, "");
}
 
static void kil_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
unsigned i;
for (i = 0; i < emit_data->arg_count; i++) {
emit_data->output[i] = lp_build_intrinsic_unary(
bld_base->base.gallivm->builder,
action->intr_name,
emit_data->dst_type, emit_data->args[i]);
}
}
 
void radeon_llvm_emit_prepare_cube_coords(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data,
LLVMValueRef *coords_arg)
{
 
unsigned target = emit_data->inst->Texture.Texture;
unsigned opcode = emit_data->inst->Instruction.Opcode;
struct gallivm_state * gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMTypeRef type = bld_base->base.elem_type;
LLVMValueRef coords[4];
LLVMValueRef mad_args[3];
LLVMValueRef idx;
struct LLVMOpaqueValue *cube_vec;
LLVMValueRef v;
unsigned i;
 
cube_vec = lp_build_gather_values(bld_base->base.gallivm, coords_arg, 4);
v = build_intrinsic(builder, "llvm.AMDGPU.cube", LLVMVectorType(type, 4),
&cube_vec, 1, LLVMReadNoneAttribute);
 
for (i = 0; i < 4; ++i) {
idx = lp_build_const_int32(gallivm, i);
coords[i] = LLVMBuildExtractElement(builder, v, idx, "");
}
 
coords[2] = build_intrinsic(builder, "fabs",
type, &coords[2], 1, LLVMReadNoneAttribute);
coords[2] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_RCP, coords[2]);
 
mad_args[1] = coords[2];
mad_args[2] = LLVMConstReal(type, 1.5);
 
mad_args[0] = coords[0];
coords[0] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
mad_args[0], mad_args[1], mad_args[2]);
 
mad_args[0] = coords[1];
coords[1] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
mad_args[0], mad_args[1], mad_args[2]);
 
/* apply xyz = yxw swizzle to cooords */
coords[2] = coords[3];
coords[3] = coords[1];
coords[1] = coords[0];
coords[0] = coords[3];
 
if (target == TGSI_TEXTURE_CUBE_ARRAY ||
target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
/* for cube arrays coord.z = coord.w(array_index) * 8 + face */
/* coords_arg.w component - array_index for cube arrays */
coords[2] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
coords_arg[3], lp_build_const_float(gallivm, 8.0), coords[2]);
}
 
/* Preserve compare/lod/bias. Put it in coords.w. */
if (opcode == TGSI_OPCODE_TEX2 ||
opcode == TGSI_OPCODE_TXB2 ||
opcode == TGSI_OPCODE_TXL2) {
coords[3] = coords_arg[4];
} else if (opcode == TGSI_OPCODE_TXB ||
opcode == TGSI_OPCODE_TXL ||
target == TGSI_TEXTURE_SHADOWCUBE) {
coords[3] = coords_arg[3];
}
 
memcpy(coords_arg, coords, sizeof(coords));
}
 
static void txd_fetch_args(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
const struct tgsi_full_instruction * inst = emit_data->inst;
 
LLVMValueRef coords[4];
unsigned chan, src;
for (src = 0; src < 3; src++) {
for (chan = 0; chan < 4; chan++)
coords[chan] = lp_build_emit_fetch(bld_base, inst, src, chan);
 
emit_data->args[src] = lp_build_gather_values(bld_base->base.gallivm,
coords, 4);
}
emit_data->arg_count = 3;
emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
}
 
 
static void txp_fetch_args(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
const struct tgsi_full_instruction * inst = emit_data->inst;
LLVMValueRef src_w;
unsigned chan;
LLVMValueRef coords[5];
 
emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
 
for (chan = 0; chan < 3; chan++ ) {
LLVMValueRef arg = lp_build_emit_fetch(bld_base,
emit_data->inst, 0, chan);
coords[chan] = lp_build_emit_llvm_binary(bld_base,
TGSI_OPCODE_DIV, arg, src_w);
}
coords[3] = bld_base->base.one;
 
if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
}
 
emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
coords, 4);
emit_data->arg_count = 1;
}
 
static void tex_fetch_args(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
/* XXX: lp_build_swizzle_aos() was failing with wrong arg types,
* when we used CHAN_ALL. We should be able to get this to work,
* but for now we will swizzle it ourselves
emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
0, CHAN_ALL);
 
*/
 
const struct tgsi_full_instruction * inst = emit_data->inst;
 
LLVMValueRef coords[5];
unsigned chan;
for (chan = 0; chan < 4; chan++) {
coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
}
 
if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
/* These instructions have additional operand that should be packed
* into the cube coord vector by radeon_llvm_emit_prepare_cube_coords.
* That operand should be passed as a float value in the args array
* right after the coord vector. After packing it's not used anymore,
* that's why arg_count is not increased */
coords[4] = lp_build_emit_fetch(bld_base, inst, 1, 0);
}
 
if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
}
 
emit_data->arg_count = 1;
emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
coords, 4);
emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
}
 
static void txf_fetch_args(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
const struct tgsi_full_instruction * inst = emit_data->inst;
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
const struct tgsi_texture_offset * off = inst->TexOffsets;
LLVMTypeRef offset_type = bld_base->int_bld.elem_type;
 
/* fetch tex coords */
tex_fetch_args(bld_base, emit_data);
 
/* fetch tex offsets */
if (inst->Texture.NumOffsets) {
assert(inst->Texture.NumOffsets == 1);
 
emit_data->args[1] = LLVMConstBitCast(
bld->immediates[off->Index][off->SwizzleX],
offset_type);
emit_data->args[2] = LLVMConstBitCast(
bld->immediates[off->Index][off->SwizzleY],
offset_type);
emit_data->args[3] = LLVMConstBitCast(
bld->immediates[off->Index][off->SwizzleZ],
offset_type);
} else {
emit_data->args[1] = bld_base->int_bld.zero;
emit_data->args[2] = bld_base->int_bld.zero;
emit_data->args[3] = bld_base->int_bld.zero;
}
 
emit_data->arg_count = 4;
}
 
static void emit_icmp(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
unsigned pred;
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
LLVMContextRef context = bld_base->base.gallivm->context;
 
switch (emit_data->inst->Instruction.Opcode) {
case TGSI_OPCODE_USEQ: pred = LLVMIntEQ; break;
case TGSI_OPCODE_USNE: pred = LLVMIntNE; break;
case TGSI_OPCODE_USGE: pred = LLVMIntUGE; break;
case TGSI_OPCODE_USLT: pred = LLVMIntULT; break;
case TGSI_OPCODE_ISGE: pred = LLVMIntSGE; break;
case TGSI_OPCODE_ISLT: pred = LLVMIntSLT; break;
default:
assert(!"unknown instruction");
pred = 0;
break;
}
 
LLVMValueRef v = LLVMBuildICmp(builder, pred,
emit_data->args[0], emit_data->args[1],"");
 
v = LLVMBuildSExtOrBitCast(builder, v,
LLVMInt32TypeInContext(context), "");
 
emit_data->output[emit_data->chan] = v;
}
 
static void emit_ucmp(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 
LLVMValueRef arg0 = LLVMBuildBitCast(builder, emit_data->args[0],
bld_base->uint_bld.elem_type, "");
 
LLVMValueRef v = LLVMBuildICmp(builder, LLVMIntNE, arg0,
bld_base->uint_bld.zero, "");
 
emit_data->output[emit_data->chan] =
LLVMBuildSelect(builder, v, emit_data->args[1], emit_data->args[2], "");
}
 
static void emit_cmp(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
LLVMRealPredicate pred;
LLVMValueRef cond;
 
/* Use ordered for everything but NE (which is usual for
* float comparisons)
*/
switch (emit_data->inst->Instruction.Opcode) {
case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
default: assert(!"unknown instruction"); pred = 0; break;
}
 
cond = LLVMBuildFCmp(builder,
pred, emit_data->args[0], emit_data->args[1], "");
 
emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
cond, bld_base->base.one, bld_base->base.zero, "");
}
 
static void emit_fcmp(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
LLVMContextRef context = bld_base->base.gallivm->context;
LLVMRealPredicate pred;
 
/* Use ordered for everything but NE (which is usual for
* float comparisons)
*/
switch (emit_data->inst->Instruction.Opcode) {
case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
default: assert(!"unknown instruction"); pred = 0; break;
}
 
LLVMValueRef v = LLVMBuildFCmp(builder, pred,
emit_data->args[0], emit_data->args[1],"");
 
v = LLVMBuildSExtOrBitCast(builder, v,
LLVMInt32TypeInContext(context), "");
 
emit_data->output[emit_data->chan] = v;
}
 
static void emit_not(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED,
emit_data->args[0]);
emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, "");
}
 
static void emit_arl(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
LLVMValueRef floor_index = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]);
emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
floor_index, bld_base->base.int_elem_type , "");
}
 
static void emit_and(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
emit_data->args[0], emit_data->args[1], "");
}
 
static void emit_or(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
emit_data->args[0], emit_data->args[1], "");
}
 
static void emit_uadd(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
emit_data->output[emit_data->chan] = LLVMBuildAdd(builder,
emit_data->args[0], emit_data->args[1], "");
}
 
static void emit_udiv(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder,
emit_data->args[0], emit_data->args[1], "");
}
 
static void emit_idiv(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder,
emit_data->args[0], emit_data->args[1], "");
}
 
static void emit_mod(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
emit_data->output[emit_data->chan] = LLVMBuildSRem(builder,
emit_data->args[0], emit_data->args[1], "");
}
 
static void emit_umod(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
emit_data->output[emit_data->chan] = LLVMBuildURem(builder,
emit_data->args[0], emit_data->args[1], "");
}
 
static void emit_shl(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
emit_data->output[emit_data->chan] = LLVMBuildShl(builder,
emit_data->args[0], emit_data->args[1], "");
}
 
static void emit_ushr(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
emit_data->output[emit_data->chan] = LLVMBuildLShr(builder,
emit_data->args[0], emit_data->args[1], "");
}
static void emit_ishr(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
emit_data->output[emit_data->chan] = LLVMBuildAShr(builder,
emit_data->args[0], emit_data->args[1], "");
}
 
static void emit_xor(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
emit_data->output[emit_data->chan] = LLVMBuildXor(builder,
emit_data->args[0], emit_data->args[1], "");
}
 
static void emit_ssg(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 
LLVMValueRef cmp, val;
 
if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, "");
val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], "");
cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, "");
val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), "");
} else { // float SSG
cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], bld_base->base.zero, "");
val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], "");
cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, bld_base->base.zero, "");
val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), "");
}
 
emit_data->output[emit_data->chan] = val;
}
 
static void emit_ineg(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
emit_data->output[emit_data->chan] = LLVMBuildNeg(builder,
emit_data->args[0], "");
}
 
static void emit_f2i(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
emit_data->args[0], bld_base->int_bld.elem_type, "");
}
 
static void emit_f2u(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder,
emit_data->args[0], bld_base->uint_bld.elem_type, "");
}
 
static void emit_i2f(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder,
emit_data->args[0], bld_base->base.elem_type, "");
}
 
static void emit_u2f(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder,
emit_data->args[0], bld_base->base.elem_type, "");
}
 
static void emit_immediate(struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_immediate *imm)
{
unsigned i;
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
 
for (i = 0; i < 4; ++i) {
ctx->soa.immediates[ctx->soa.num_immediates][i] =
LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false );
}
 
ctx->soa.num_immediates++;
}
 
LLVMValueRef
build_intrinsic(LLVMBuilderRef builder,
const char *name,
LLVMTypeRef ret_type,
LLVMValueRef *args,
unsigned num_args,
LLVMAttribute attr)
{
LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
LLVMValueRef function;
 
function = LLVMGetNamedFunction(module, name);
if(!function) {
LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS];
unsigned i;
 
assert(num_args <= LP_MAX_FUNC_ARGS);
 
for(i = 0; i < num_args; ++i) {
assert(args[i]);
arg_types[i] = LLVMTypeOf(args[i]);
}
 
function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args);
 
if (attr)
LLVMAddFunctionAttr(function, attr);
}
 
return LLVMBuildCall(builder, function, args, num_args, "");
}
 
static void build_tgsi_intrinsic(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data,
LLVMAttribute attr)
{
struct lp_build_context * base = &bld_base->base;
emit_data->output[emit_data->chan] = build_intrinsic(
base->gallivm->builder, action->intr_name,
emit_data->dst_type, emit_data->args,
emit_data->arg_count, attr);
}
 
void
build_tgsi_intrinsic_nomem(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
build_tgsi_intrinsic(action, bld_base, emit_data, LLVMReadNoneAttribute);
}
 
static void emit_bfi(const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef bfi_args[3];
 
// Calculate the bitmask: (((1 << src3) - 1) << src2
bfi_args[0] = LLVMBuildShl(builder,
LLVMBuildSub(builder,
LLVMBuildShl(builder,
bld_base->int_bld.one,
emit_data->args[3], ""),
bld_base->int_bld.one, ""),
emit_data->args[2], "");
 
bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
emit_data->args[2], "");
 
bfi_args[2] = emit_data->args[0];
 
/* Calculate:
* (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
* Use the right-hand side, which the LLVM backend can convert to V_BFI.
*/
emit_data->output[emit_data->chan] =
LLVMBuildXor(builder, bfi_args[2],
LLVMBuildAnd(builder, bfi_args[0],
LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
""), ""), "");
}
 
/* this is ffs in C */
static void emit_lsb(const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMValueRef args[2] = {
emit_data->args[0],
 
/* The value of 1 means that ffs(x=0) = undef, so LLVM won't
* add special code to check for x=0. The reason is that
* the LLVM behavior for x=0 is different from what we
* need here.
*
* The hardware already implements the correct behavior.
*/
lp_build_const_int32(gallivm, 1)
};
 
emit_data->output[emit_data->chan] =
build_intrinsic(gallivm->builder, "llvm.cttz.i32",
emit_data->dst_type, args, Elements(args),
LLVMReadNoneAttribute);
}
 
/* Find the last bit set. */
static void emit_umsb(const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef args[2] = {
emit_data->args[0],
/* Don't generate code for handling zero: */
lp_build_const_int32(gallivm, 1)
};
 
LLVMValueRef msb =
build_intrinsic(builder, "llvm.ctlz.i32",
emit_data->dst_type, args, Elements(args),
LLVMReadNoneAttribute);
 
/* The HW returns the last bit index from MSB, but TGSI wants
* the index from LSB. Invert it by doing "31 - msb". */
msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
msb, "");
 
/* Check for zero: */
emit_data->output[emit_data->chan] =
LLVMBuildSelect(builder,
LLVMBuildICmp(builder, LLVMIntEQ, args[0],
bld_base->uint_bld.zero, ""),
lp_build_const_int32(gallivm, -1), msb, "");
}
 
/* Find the last bit opposite of the sign bit. */
static void emit_imsb(const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef arg = emit_data->args[0];
 
LLVMValueRef msb =
build_intrinsic(builder, "llvm.AMDGPU.flbit.i32",
emit_data->dst_type, &arg, 1,
LLVMReadNoneAttribute);
 
/* The HW returns the last bit index from MSB, but TGSI wants
* the index from LSB. Invert it by doing "31 - msb". */
msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
msb, "");
 
/* If arg == 0 || arg == -1 (0xffffffff), return -1. */
LLVMValueRef all_ones = lp_build_const_int32(gallivm, -1);
 
LLVMValueRef cond =
LLVMBuildOr(builder,
LLVMBuildICmp(builder, LLVMIntEQ, arg,
bld_base->uint_bld.zero, ""),
LLVMBuildICmp(builder, LLVMIntEQ, arg,
all_ones, ""), "");
 
emit_data->output[emit_data->chan] =
LLVMBuildSelect(builder, cond, all_ones, msb, "");
}
 
void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
{
struct lp_type type;
 
/* Initialize the gallivm object:
* We are only using the module, context, and builder fields of this struct.
* This should be enough for us to be able to pass our gallivm struct to the
* helper functions in the gallivm module.
*/
memset(&ctx->gallivm, 0, sizeof (ctx->gallivm));
memset(&ctx->soa, 0, sizeof(ctx->soa));
ctx->gallivm.context = LLVMContextCreate();
ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
ctx->gallivm.context);
ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
 
ctx->store_output_intr = "llvm.AMDGPU.store.output.";
ctx->swizzle_intr = "llvm.AMDGPU.swizzle";
struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
 
/* XXX: We need to revisit this.I think the correct way to do this is
* to use length = 4 here and use the elem_bld for everything. */
type.floating = TRUE;
type.fixed = FALSE;
type.sign = TRUE;
type.norm = FALSE;
type.width = 32;
type.length = 1;
 
lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
 
bld_base->soa = 1;
bld_base->emit_store = emit_store;
bld_base->emit_swizzle = emit_swizzle;
bld_base->emit_declaration = emit_declaration;
bld_base->emit_immediate = emit_immediate;
 
bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch;
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch;
bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch;
bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch;
bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
 
/* Allocate outputs */
ctx->soa.outputs = ctx->outputs;
 
ctx->num_arrays = 0;
 
/* XXX: Is there a better way to initialize all this ? */
 
lp_set_default_actions(bld_base);
 
bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "fabs";
bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.AMDGPU.brev";
bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "ceil";
bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp.";
bld_base->op_actions[TGSI_OPCODE_CMP].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_CMP].intr_name = "llvm.AMDGPU.cndlt";
bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args;
bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy";
bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args;
bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp.";
bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "floor";
bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_FMA].intr_name = "llvm.fma.f32";
bld_base->op_actions[TGSI_OPCODE_FRC].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = "llvm.AMDIL.fraction.";
bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
bld_base->op_actions[TGSI_OPCODE_IABS].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs.";
bld_base->op_actions[TGSI_OPCODE_IBFE].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32";
bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
bld_base->op_actions[TGSI_OPCODE_IMAX].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_IMAX].intr_name = "llvm.AMDGPU.imax";
bld_base->op_actions[TGSI_OPCODE_IMIN].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_IMIN].intr_name = "llvm.AMDGPU.imin";
bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb;
bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args;
bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
bld_base->op_actions[TGSI_OPCODE_KILL_IF].intr_name = "llvm.AMDGPU.kill";
bld_base->op_actions[TGSI_OPCODE_KILL].emit = lp_build_tgsi_intrinsic;
bld_base->op_actions[TGSI_OPCODE_KILL].intr_name = "llvm.AMDGPU.kilp";
bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb;
bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32";
bld_base->op_actions[TGSI_OPCODE_LRP].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_LRP].intr_name = "llvm.AMDGPU.lrp";
bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest.";
bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_cmp;
bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_cmp;
bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_cmp;
bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_cmp;
bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_cmp;
bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_cmp;
bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args;
bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex";
bld_base->op_actions[TGSI_OPCODE_TEX2].fetch_args = tex_fetch_args;
bld_base->op_actions[TGSI_OPCODE_TEX2].intr_name = "llvm.AMDGPU.tex";
bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args;
bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb";
bld_base->op_actions[TGSI_OPCODE_TXB2].fetch_args = tex_fetch_args;
bld_base->op_actions[TGSI_OPCODE_TXB2].intr_name = "llvm.AMDGPU.txb";
bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args;
bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd";
bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = txf_fetch_args;
bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf";
bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args;
bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl";
bld_base->op_actions[TGSI_OPCODE_TXL2].fetch_args = tex_fetch_args;
bld_base->op_actions[TGSI_OPCODE_TXL2].intr_name = "llvm.AMDGPU.txl";
bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args;
bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex";
bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args;
bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq";
bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDGPU.trunc";
bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
bld_base->op_actions[TGSI_OPCODE_UBFE].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32";
bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
bld_base->op_actions[TGSI_OPCODE_UMAX].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_UMAX].intr_name = "llvm.AMDGPU.umax";
bld_base->op_actions[TGSI_OPCODE_UMIN].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_UMIN].intr_name = "llvm.AMDGPU.umin";
bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
 
bld_base->rsq_action.emit = build_tgsi_intrinsic_nomem;
#if HAVE_LLVM >= 0x0305
bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq.clamped.f32";
#else
bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq";
#endif
}
 
void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
LLVMTypeRef *ParamTypes, unsigned ParamCount)
{
LLVMTypeRef main_fn_type;
LLVMBasicBlockRef main_fn_body;
 
/* Setup the function */
main_fn_type = LLVMFunctionType(LLVMVoidTypeInContext(ctx->gallivm.context),
ParamTypes, ParamCount, 0);
ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type);
main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
ctx->main_fn, "main_body");
LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
}
 
void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx)
{
struct gallivm_state * gallivm = ctx->soa.bld_base.base.gallivm;
/* End the main function with Return*/
LLVMBuildRetVoid(gallivm->builder);
 
/* Create the pass manager */
ctx->gallivm.passmgr = LLVMCreateFunctionPassManagerForModule(
gallivm->module);
 
/* This pass should eliminate all the load and store instructions */
LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
 
/* Add some optimization passes */
LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
LLVMAddLICMPass(gallivm->passmgr);
LLVMAddAggressiveDCEPass(gallivm->passmgr);
LLVMAddCFGSimplificationPass(gallivm->passmgr);
LLVMAddInstructionCombiningPass(gallivm->passmgr);
 
/* Run the pass */
LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn);
 
LLVMDisposeBuilder(gallivm->builder);
LLVMDisposePassManager(gallivm->passmgr);
 
}
 
void radeon_llvm_dispose(struct radeon_llvm_context * ctx)
{
LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
FREE(ctx->temps);
ctx->temps = NULL;
FREE(ctx->loop);
ctx->loop = NULL;
ctx->loop_depth_max = 0;
FREE(ctx->branch);
ctx->branch = NULL;
ctx->branch_depth_max = 0;
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_uvd.c
0,0 → 1,947
/**************************************************************************
*
* Copyright 2011 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/*
* Authors:
* Christian König <christian.koenig@amd.com>
*
*/
 
#include <sys/types.h>
#include <assert.h>
#include <errno.h>
#include <unistd.h>
#include <stdio.h>
 
#include "pipe/p_video_codec.h"
 
#include "util/u_memory.h"
#include "util/u_video.h"
 
#include "vl/vl_defines.h"
#include "vl/vl_mpeg12_decoder.h"
 
#include "r600_pipe_common.h"
#include "radeon_video.h"
#include "radeon_uvd.h"
 
#define NUM_BUFFERS 4
 
#define NUM_MPEG2_REFS 6
#define NUM_H264_REFS 17
#define NUM_VC1_REFS 5
 
#define FB_BUFFER_OFFSET 0x1000
#define FB_BUFFER_SIZE 2048
 
/* UVD decoder representation */
struct ruvd_decoder {
struct pipe_video_codec base;
 
ruvd_set_dtb set_dtb;
 
unsigned stream_handle;
unsigned frame_number;
 
struct pipe_screen *screen;
struct radeon_winsys* ws;
struct radeon_winsys_cs* cs;
 
unsigned cur_buffer;
 
struct rvid_buffer msg_fb_buffers[NUM_BUFFERS];
struct ruvd_msg *msg;
uint32_t *fb;
 
struct rvid_buffer bs_buffers[NUM_BUFFERS];
void* bs_ptr;
unsigned bs_size;
 
struct rvid_buffer dpb;
};
 
/* flush IB to the hardware */
static void flush(struct ruvd_decoder *dec)
{
dec->ws->cs_flush(dec->cs, RADEON_FLUSH_ASYNC, NULL, 0);
}
 
/* add a new set register command to the IB */
static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val)
{
uint32_t *pm4 = dec->cs->buf;
pm4[dec->cs->cdw++] = RUVD_PKT0(reg >> 2, 0);
pm4[dec->cs->cdw++] = val;
}
 
/* send a command to the VCPU through the GPCOM registers */
static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
struct radeon_winsys_cs_handle* cs_buf, uint32_t off,
enum radeon_bo_usage usage, enum radeon_bo_domain domain)
{
int reloc_idx;
 
reloc_idx = dec->ws->cs_add_reloc(dec->cs, cs_buf, usage, domain,
RADEON_PRIO_MIN);
set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);
set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);
set_reg(dec, RUVD_GPCOM_VCPU_CMD, cmd << 1);
}
 
/* map the next available message/feedback buffer */
static void map_msg_fb_buf(struct ruvd_decoder *dec)
{
struct rvid_buffer* buf;
uint8_t *ptr;
 
/* grab the current message/feedback buffer */
buf = &dec->msg_fb_buffers[dec->cur_buffer];
 
/* and map it for CPU access */
ptr = dec->ws->buffer_map(buf->res->cs_buf, dec->cs, PIPE_TRANSFER_WRITE);
 
/* calc buffer offsets */
dec->msg = (struct ruvd_msg *)ptr;
dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
}
 
/* unmap and send a message command to the VCPU */
static void send_msg_buf(struct ruvd_decoder *dec)
{
struct rvid_buffer* buf;
 
/* ignore the request if message/feedback buffer isn't mapped */
if (!dec->msg || !dec->fb)
return;
 
/* grab the current message buffer */
buf = &dec->msg_fb_buffers[dec->cur_buffer];
 
/* unmap the buffer */
dec->ws->buffer_unmap(buf->res->cs_buf);
dec->msg = NULL;
dec->fb = NULL;
 
/* and send it to the hardware */
send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->cs_buf, 0,
RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
}
 
/* cycle to the next set of buffers */
static void next_buffer(struct ruvd_decoder *dec)
{
++dec->cur_buffer;
dec->cur_buffer %= NUM_BUFFERS;
}
 
/* convert the profile into something UVD understands */
static uint32_t profile2stream_type(enum pipe_video_profile profile)
{
switch (u_reduce_video_profile(profile)) {
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
return RUVD_CODEC_H264;
 
case PIPE_VIDEO_FORMAT_VC1:
return RUVD_CODEC_VC1;
 
case PIPE_VIDEO_FORMAT_MPEG12:
return RUVD_CODEC_MPEG2;
 
case PIPE_VIDEO_FORMAT_MPEG4:
return RUVD_CODEC_MPEG4;
 
default:
assert(0);
return 0;
}
}
 
/* calculate size of reference picture buffer */
static unsigned calc_dpb_size(const struct pipe_video_codec *templ)
{
unsigned width_in_mb, height_in_mb, image_size, dpb_size;
 
// always align them to MB size for dpb calculation
unsigned width = align(templ->width, VL_MACROBLOCK_WIDTH);
unsigned height = align(templ->height, VL_MACROBLOCK_HEIGHT);
 
// always one more for currently decoded picture
unsigned max_references = templ->max_references + 1;
 
// aligned size of a single frame
image_size = width * height;
image_size += image_size / 2;
image_size = align(image_size, 1024);
 
// picture width & height in 16 pixel units
width_in_mb = width / VL_MACROBLOCK_WIDTH;
height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
 
switch (u_reduce_video_profile(templ->profile)) {
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
// the firmware seems to allways assume a minimum of ref frames
max_references = MAX2(NUM_H264_REFS, max_references);
 
// reference picture buffer
dpb_size = image_size * max_references;
 
// macroblock context buffer
dpb_size += width_in_mb * height_in_mb * max_references * 192;
 
// IT surface buffer
dpb_size += width_in_mb * height_in_mb * 32;
break;
 
case PIPE_VIDEO_FORMAT_VC1:
// the firmware seems to allways assume a minimum of ref frames
max_references = MAX2(NUM_VC1_REFS, max_references);
 
// reference picture buffer
dpb_size = image_size * max_references;
 
// CONTEXT_BUFFER
dpb_size += width_in_mb * height_in_mb * 128;
 
// IT surface buffer
dpb_size += width_in_mb * 64;
 
// DB surface buffer
dpb_size += width_in_mb * 128;
 
// BP
dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64);
break;
 
case PIPE_VIDEO_FORMAT_MPEG12:
// reference picture buffer, must be big enough for all frames
dpb_size = image_size * NUM_MPEG2_REFS;
break;
 
case PIPE_VIDEO_FORMAT_MPEG4:
// reference picture buffer
dpb_size = image_size * max_references;
 
// CM
dpb_size += width_in_mb * height_in_mb * 64;
 
// IT surface buffer
dpb_size += align(width_in_mb * height_in_mb * 32, 64);
break;
 
default:
// something is missing here
assert(0);
 
// at least use a sane default value
dpb_size = 32 * 1024 * 1024;
break;
}
return dpb_size;
}
 
/* get h264 specific message bits */
static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic)
{
struct ruvd_h264 result;
 
memset(&result, 0, sizeof(result));
switch (pic->base.profile) {
case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
result.profile = RUVD_H264_PROFILE_BASELINE;
break;
 
case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
result.profile = RUVD_H264_PROFILE_MAIN;
break;
 
case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
result.profile = RUVD_H264_PROFILE_HIGH;
break;
 
default:
assert(0);
break;
}
if (((dec->base.width * dec->base.height) >> 8) <= 1620)
result.level = 30;
else
result.level = 41;
 
result.sps_info_flags = 0;
result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0;
result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1;
result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2;
result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3;
 
result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4;
result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type;
result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
 
switch (dec->base.chroma_format) {
case PIPE_VIDEO_CHROMA_FORMAT_400:
result.chroma_format = 0;
break;
case PIPE_VIDEO_CHROMA_FORMAT_420:
result.chroma_format = 1;
break;
case PIPE_VIDEO_CHROMA_FORMAT_422:
result.chroma_format = 2;
break;
case PIPE_VIDEO_CHROMA_FORMAT_444:
result.chroma_format = 3;
break;
}
 
result.pps_info_flags = 0;
result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0;
result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1;
result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2;
result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3;
result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4;
result.pps_info_flags |= pic->pps->weighted_pred_flag << 6;
result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7;
result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8;
 
result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1;
result.slice_group_map_type = pic->pps->slice_group_map_type;
result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1;
result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26;
result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset;
result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset;
 
memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16);
memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64);
 
result.num_ref_frames = pic->num_ref_frames;
 
result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1;
result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1;
 
result.frame_num = pic->frame_num;
memcpy(result.frame_num_list, pic->frame_num_list, 4*16);
result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0];
result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1];
memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4*16*2);
 
result.decoded_pic_idx = pic->frame_num;
 
return result;
}
 
/* get vc1 specific message bits */
static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic)
{
struct ruvd_vc1 result;
 
memset(&result, 0, sizeof(result));
 
switch(pic->base.profile) {
case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
result.profile = RUVD_VC1_PROFILE_SIMPLE;
result.level = 1;
break;
 
case PIPE_VIDEO_PROFILE_VC1_MAIN:
result.profile = RUVD_VC1_PROFILE_MAIN;
result.level = 2;
break;
 
case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
result.profile = RUVD_VC1_PROFILE_ADVANCED;
result.level = 4;
break;
 
default:
assert(0);
}
 
/* fields common for all profiles */
result.sps_info_flags |= pic->postprocflag << 7;
result.sps_info_flags |= pic->pulldown << 6;
result.sps_info_flags |= pic->interlace << 5;
result.sps_info_flags |= pic->tfcntrflag << 4;
result.sps_info_flags |= pic->finterpflag << 3;
result.sps_info_flags |= pic->psf << 1;
 
result.pps_info_flags |= pic->range_mapy_flag << 31;
result.pps_info_flags |= pic->range_mapy << 28;
result.pps_info_flags |= pic->range_mapuv_flag << 27;
result.pps_info_flags |= pic->range_mapuv << 24;
result.pps_info_flags |= pic->multires << 21;
result.pps_info_flags |= pic->maxbframes << 16;
result.pps_info_flags |= pic->overlap << 11;
result.pps_info_flags |= pic->quantizer << 9;
result.pps_info_flags |= pic->panscan_flag << 7;
result.pps_info_flags |= pic->refdist_flag << 6;
result.pps_info_flags |= pic->vstransform << 0;
 
/* some fields only apply to main/advanced profile */
if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) {
result.pps_info_flags |= pic->syncmarker << 20;
result.pps_info_flags |= pic->rangered << 19;
result.pps_info_flags |= pic->loopfilter << 5;
result.pps_info_flags |= pic->fastuvmc << 4;
result.pps_info_flags |= pic->extended_mv << 3;
result.pps_info_flags |= pic->extended_dmv << 8;
result.pps_info_flags |= pic->dquant << 1;
}
 
result.chroma_format = 1;
 
#if 0
//(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT)
uint32_t slice_count
uint8_t picture_type
uint8_t frame_coding_mode
uint8_t deblockEnable
uint8_t pquant
#endif
 
return result;
}
 
/* extract the frame number from a referenced video buffer */
static uint32_t get_ref_pic_idx(struct ruvd_decoder *dec, struct pipe_video_buffer *ref)
{
uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS;
uint32_t max = MAX2(dec->frame_number, 1) - 1;
uintptr_t frame;
 
/* seems to be the most sane fallback */
if (!ref)
return max;
 
/* get the frame number from the associated data */
frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
 
/* limit the frame number to a valid range */
return MAX2(MIN2(frame, max), min);
}
 
/* get mpeg2 specific msg bits */
static struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder *dec,
struct pipe_mpeg12_picture_desc *pic)
{
const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
struct ruvd_mpeg2 result;
unsigned i;
 
memset(&result, 0, sizeof(result));
result.decoded_pic_idx = dec->frame_number;
for (i = 0; i < 2; ++i)
result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);
 
result.load_intra_quantiser_matrix = 1;
result.load_nonintra_quantiser_matrix = 1;
 
for (i = 0; i < 64; ++i) {
result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]];
result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]];
}
 
result.profile_and_level_indication = 0;
result.chroma_format = 0x1;
 
result.picture_coding_type = pic->picture_coding_type;
result.f_code[0][0] = pic->f_code[0][0] + 1;
result.f_code[0][1] = pic->f_code[0][1] + 1;
result.f_code[1][0] = pic->f_code[1][0] + 1;
result.f_code[1][1] = pic->f_code[1][1] + 1;
result.intra_dc_precision = pic->intra_dc_precision;
result.pic_structure = pic->picture_structure;
result.top_field_first = pic->top_field_first;
result.frame_pred_frame_dct = pic->frame_pred_frame_dct;
result.concealment_motion_vectors = pic->concealment_motion_vectors;
result.q_scale_type = pic->q_scale_type;
result.intra_vlc_format = pic->intra_vlc_format;
result.alternate_scan = pic->alternate_scan;
 
return result;
}
 
/* get mpeg4 specific msg bits */
static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec,
struct pipe_mpeg4_picture_desc *pic)
{
struct ruvd_mpeg4 result;
unsigned i;
 
memset(&result, 0, sizeof(result));
result.decoded_pic_idx = dec->frame_number;
for (i = 0; i < 2; ++i)
result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);
 
result.variant_type = 0;
result.profile_and_level_indication = 0xF0; // ASP Level0
 
result.video_object_layer_verid = 0x5; // advanced simple
result.video_object_layer_shape = 0x0; // rectangular
 
result.video_object_layer_width = dec->base.width;
result.video_object_layer_height = dec->base.height;
 
result.vop_time_increment_resolution = pic->vop_time_increment_resolution;
 
result.flags |= pic->short_video_header << 0;
//result.flags |= obmc_disable << 1;
result.flags |= pic->interlaced << 2;
result.flags |= 1 << 3; // load_intra_quant_mat
result.flags |= 1 << 4; // load_nonintra_quant_mat
result.flags |= pic->quarter_sample << 5;
result.flags |= 1 << 6; // complexity_estimation_disable
result.flags |= pic->resync_marker_disable << 7;
//result.flags |= data_partitioned << 8;
//result.flags |= reversible_vlc << 9;
result.flags |= 0 << 10; // newpred_enable
result.flags |= 0 << 11; // reduced_resolution_vop_enable
//result.flags |= scalability << 12;
//result.flags |= is_object_layer_identifier << 13;
//result.flags |= fixed_vop_rate << 14;
//result.flags |= newpred_segment_type << 15;
 
result.quant_type = pic->quant_type;
 
for (i = 0; i < 64; ++i) {
result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]];
result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]];
}
 
/*
int32_t trd [2]
int32_t trb [2]
uint8_t vop_coding_type
uint8_t vop_fcode_forward
uint8_t vop_fcode_backward
uint8_t rounding_control
uint8_t alternate_vertical_scan_flag
uint8_t top_field_first
*/
 
return result;
}
 
/**
* destroy this video decoder
*/
static void ruvd_destroy(struct pipe_video_codec *decoder)
{
struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
unsigned i;
 
assert(decoder);
 
map_msg_fb_buf(dec);
memset(dec->msg, 0, sizeof(*dec->msg));
dec->msg->size = sizeof(*dec->msg);
dec->msg->msg_type = RUVD_MSG_DESTROY;
dec->msg->stream_handle = dec->stream_handle;
send_msg_buf(dec);
 
flush(dec);
 
dec->ws->cs_destroy(dec->cs);
 
for (i = 0; i < NUM_BUFFERS; ++i) {
rvid_destroy_buffer(&dec->msg_fb_buffers[i]);
rvid_destroy_buffer(&dec->bs_buffers[i]);
}
 
rvid_destroy_buffer(&dec->dpb);
 
FREE(dec);
}
 
/* free associated data in the video buffer callback */
static void ruvd_destroy_associated_data(void *data)
{
/* NOOP, since we only use an intptr */
}
 
/**
* start decoding of a new frame
*/
static void ruvd_begin_frame(struct pipe_video_codec *decoder,
struct pipe_video_buffer *target,
struct pipe_picture_desc *picture)
{
struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
uintptr_t frame;
 
assert(decoder);
 
frame = ++dec->frame_number;
vl_video_buffer_set_associated_data(target, decoder, (void *)frame,
&ruvd_destroy_associated_data);
 
dec->bs_size = 0;
dec->bs_ptr = dec->ws->buffer_map(
dec->bs_buffers[dec->cur_buffer].res->cs_buf,
dec->cs, PIPE_TRANSFER_WRITE);
}
 
/**
* decode a macroblock
*/
static void ruvd_decode_macroblock(struct pipe_video_codec *decoder,
struct pipe_video_buffer *target,
struct pipe_picture_desc *picture,
const struct pipe_macroblock *macroblocks,
unsigned num_macroblocks)
{
/* not supported (yet) */
assert(0);
}
 
/**
* decode a bitstream
*/
static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,
struct pipe_video_buffer *target,
struct pipe_picture_desc *picture,
unsigned num_buffers,
const void * const *buffers,
const unsigned *sizes)
{
struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
unsigned i;
 
assert(decoder);
 
if (!dec->bs_ptr)
return;
 
for (i = 0; i < num_buffers; ++i) {
struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];
unsigned new_size = dec->bs_size + sizes[i];
 
if (new_size > buf->res->buf->size) {
dec->ws->buffer_unmap(buf->res->cs_buf);
if (!rvid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
RVID_ERR("Can't resize bitstream buffer!");
return;
}
 
dec->bs_ptr = dec->ws->buffer_map(buf->res->cs_buf, dec->cs,
PIPE_TRANSFER_WRITE);
if (!dec->bs_ptr)
return;
 
dec->bs_ptr += dec->bs_size;
}
 
memcpy(dec->bs_ptr, buffers[i], sizes[i]);
dec->bs_size += sizes[i];
dec->bs_ptr += sizes[i];
}
}
 
/**
* end decoding of the current frame
*/
static void ruvd_end_frame(struct pipe_video_codec *decoder,
struct pipe_video_buffer *target,
struct pipe_picture_desc *picture)
{
struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
struct radeon_winsys_cs_handle *dt;
struct rvid_buffer *msg_fb_buf, *bs_buf;
unsigned bs_size;
 
assert(decoder);
 
if (!dec->bs_ptr)
return;
 
msg_fb_buf = &dec->msg_fb_buffers[dec->cur_buffer];
bs_buf = &dec->bs_buffers[dec->cur_buffer];
 
bs_size = align(dec->bs_size, 128);
memset(dec->bs_ptr, 0, bs_size - dec->bs_size);
dec->ws->buffer_unmap(bs_buf->res->cs_buf);
 
map_msg_fb_buf(dec);
dec->msg->size = sizeof(*dec->msg);
dec->msg->msg_type = RUVD_MSG_DECODE;
dec->msg->stream_handle = dec->stream_handle;
dec->msg->status_report_feedback_number = dec->frame_number;
 
dec->msg->body.decode.stream_type = profile2stream_type(dec->base.profile);
dec->msg->body.decode.decode_flags = 0x1;
dec->msg->body.decode.width_in_samples = dec->base.width;
dec->msg->body.decode.height_in_samples = dec->base.height;
 
dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
dec->msg->body.decode.bsd_size = bs_size;
 
dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);
 
switch (u_reduce_video_profile(picture->profile)) {
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture);
break;
 
case PIPE_VIDEO_FORMAT_VC1:
dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture);
break;
 
case PIPE_VIDEO_FORMAT_MPEG12:
dec->msg->body.decode.codec.mpeg2 = get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc*)picture);
break;
 
case PIPE_VIDEO_FORMAT_MPEG4:
dec->msg->body.decode.codec.mpeg4 = get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture);
break;
 
default:
assert(0);
return;
}
 
dec->msg->body.decode.db_surf_tile_config = dec->msg->body.decode.dt_surf_tile_config;
dec->msg->body.decode.extension_support = 0x1;
 
/* set at least the feedback buffer size */
dec->fb[0] = FB_BUFFER_SIZE;
 
send_msg_buf(dec);
 
send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->cs_buf, 0,
RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->cs_buf,
0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0,
RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_buf->res->cs_buf,
FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
set_reg(dec, RUVD_ENGINE_CNTL, 1);
 
flush(dec);
next_buffer(dec);
}
 
/**
* flush any outstanding command buffers to the hardware
*/
static void ruvd_flush(struct pipe_video_codec *decoder)
{
}
 
/**
* create and UVD decoder
*/
struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
const struct pipe_video_codec *templ,
ruvd_set_dtb set_dtb)
{
struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws;
unsigned dpb_size = calc_dpb_size(templ);
unsigned width = templ->width, height = templ->height;
unsigned bs_buf_size;
struct radeon_info info;
struct ruvd_decoder *dec;
int i;
 
ws->query_info(ws, &info);
 
switch(u_reduce_video_profile(templ->profile)) {
case PIPE_VIDEO_FORMAT_MPEG12:
if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM || info.family < CHIP_PALM)
return vl_create_mpeg12_decoder(context, templ);
 
/* fall through */
case PIPE_VIDEO_FORMAT_MPEG4:
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
width = align(width, VL_MACROBLOCK_WIDTH);
height = align(height, VL_MACROBLOCK_HEIGHT);
break;
 
default:
break;
}
 
 
dec = CALLOC_STRUCT(ruvd_decoder);
 
if (!dec)
return NULL;
 
dec->base = *templ;
dec->base.context = context;
dec->base.width = width;
dec->base.height = height;
 
dec->base.destroy = ruvd_destroy;
dec->base.begin_frame = ruvd_begin_frame;
dec->base.decode_macroblock = ruvd_decode_macroblock;
dec->base.decode_bitstream = ruvd_decode_bitstream;
dec->base.end_frame = ruvd_end_frame;
dec->base.flush = ruvd_flush;
 
dec->set_dtb = set_dtb;
dec->stream_handle = rvid_alloc_stream_handle();
dec->screen = context->screen;
dec->ws = ws;
dec->cs = ws->cs_create(ws, RING_UVD, NULL, NULL, NULL);
if (!dec->cs) {
RVID_ERR("Can't get command submission context.\n");
goto error;
}
 
bs_buf_size = width * height * 512 / (16 * 16);
for (i = 0; i < NUM_BUFFERS; ++i) {
unsigned msg_fb_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);
if (!rvid_create_buffer(dec->screen, &dec->msg_fb_buffers[i],
msg_fb_size, PIPE_USAGE_STAGING)) {
RVID_ERR("Can't allocated message buffers.\n");
goto error;
}
 
if (!rvid_create_buffer(dec->screen, &dec->bs_buffers[i],
bs_buf_size, PIPE_USAGE_STAGING)) {
RVID_ERR("Can't allocated bitstream buffers.\n");
goto error;
}
 
rvid_clear_buffer(context, &dec->msg_fb_buffers[i]);
rvid_clear_buffer(context, &dec->bs_buffers[i]);
}
 
if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated dpb.\n");
goto error;
}
 
rvid_clear_buffer(context, &dec->dpb);
 
map_msg_fb_buf(dec);
dec->msg->size = sizeof(*dec->msg);
dec->msg->msg_type = RUVD_MSG_CREATE;
dec->msg->stream_handle = dec->stream_handle;
dec->msg->body.create.stream_type = profile2stream_type(dec->base.profile);
dec->msg->body.create.width_in_samples = dec->base.width;
dec->msg->body.create.height_in_samples = dec->base.height;
dec->msg->body.create.dpb_size = dec->dpb.res->buf->size;
send_msg_buf(dec);
flush(dec);
next_buffer(dec);
 
return &dec->base;
 
error:
if (dec->cs) dec->ws->cs_destroy(dec->cs);
 
for (i = 0; i < NUM_BUFFERS; ++i) {
rvid_destroy_buffer(&dec->msg_fb_buffers[i]);
rvid_destroy_buffer(&dec->bs_buffers[i]);
}
 
rvid_destroy_buffer(&dec->dpb);
 
FREE(dec);
 
return NULL;
}
 
/* calculate top/bottom offset */
static unsigned texture_offset(struct radeon_surf *surface, unsigned layer)
{
return surface->level[0].offset +
layer * surface->level[0].slice_size;
}
 
/* hw encode the aspect of macro tiles */
static unsigned macro_tile_aspect(unsigned macro_tile_aspect)
{
switch (macro_tile_aspect) {
default:
case 1: macro_tile_aspect = 0; break;
case 2: macro_tile_aspect = 1; break;
case 4: macro_tile_aspect = 2; break;
case 8: macro_tile_aspect = 3; break;
}
return macro_tile_aspect;
}
 
/* hw encode the bank width and height */
static unsigned bank_wh(unsigned bankwh)
{
switch (bankwh) {
default:
case 1: bankwh = 0; break;
case 2: bankwh = 1; break;
case 4: bankwh = 2; break;
case 8: bankwh = 3; break;
}
return bankwh;
}
 
/**
* fill decoding target field from the luma and chroma surfaces
*/
void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
struct radeon_surf *chroma)
{
msg->body.decode.dt_pitch = luma->level[0].pitch_bytes;
switch (luma->level[0].mode) {
case RADEON_SURF_MODE_LINEAR_ALIGNED:
msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
break;
case RADEON_SURF_MODE_1D:
msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;
break;
case RADEON_SURF_MODE_2D:
msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;
break;
default:
assert(0);
break;
}
 
msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0);
msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0);
if (msg->body.decode.dt_field_mode) {
msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1);
msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1);
} else {
msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
}
 
assert(luma->bankw == chroma->bankw);
assert(luma->bankh == chroma->bankh);
assert(luma->mtilea == chroma->mtilea);
 
msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->bankw));
msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->bankh));
msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->mtilea));
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_uvd.h
0,0 → 1,358
/**************************************************************************
*
* Copyright 2011 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/*
* Authors:
* Christian König <christian.koenig@amd.com>
*
*/
 
#ifndef RADEON_UVD_H
#define RADEON_UVD_H
 
#include "radeon/radeon_winsys.h"
#include "vl/vl_video_buffer.h"
 
/* UVD uses PM4 packet type 0 and 2 */
#define RUVD_PKT_TYPE_S(x) (((x) & 0x3) << 30)
#define RUVD_PKT_TYPE_G(x) (((x) >> 30) & 0x3)
#define RUVD_PKT_TYPE_C 0x3FFFFFFF
#define RUVD_PKT_COUNT_S(x) (((x) & 0x3FFF) << 16)
#define RUVD_PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF)
#define RUVD_PKT_COUNT_C 0xC000FFFF
#define RUVD_PKT0_BASE_INDEX_S(x) (((x) & 0xFFFF) << 0)
#define RUVD_PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF)
#define RUVD_PKT0_BASE_INDEX_C 0xFFFF0000
#define RUVD_PKT0(index, count) (RUVD_PKT_TYPE_S(0) | RUVD_PKT0_BASE_INDEX_S(index) | RUVD_PKT_COUNT_S(count))
#define RUVD_PKT2() (RUVD_PKT_TYPE_S(2))
 
/* registers involved with UVD */
#define RUVD_GPCOM_VCPU_CMD 0xEF0C
#define RUVD_GPCOM_VCPU_DATA0 0xEF10
#define RUVD_GPCOM_VCPU_DATA1 0xEF14
#define RUVD_ENGINE_CNTL 0xEF18
 
/* UVD commands to VCPU */
#define RUVD_CMD_MSG_BUFFER 0x00000000
#define RUVD_CMD_DPB_BUFFER 0x00000001
#define RUVD_CMD_DECODING_TARGET_BUFFER 0x00000002
#define RUVD_CMD_FEEDBACK_BUFFER 0x00000003
#define RUVD_CMD_BITSTREAM_BUFFER 0x00000100
 
/* UVD message types */
#define RUVD_MSG_CREATE 0
#define RUVD_MSG_DECODE 1
#define RUVD_MSG_DESTROY 2
 
/* UVD stream types */
#define RUVD_CODEC_H264 0x00000000
#define RUVD_CODEC_VC1 0x00000001
#define RUVD_CODEC_MPEG2 0x00000003
#define RUVD_CODEC_MPEG4 0x00000004
 
/* UVD decode target buffer tiling mode */
#define RUVD_TILE_LINEAR 0x00000000
#define RUVD_TILE_8X4 0x00000001
#define RUVD_TILE_8X8 0x00000002
#define RUVD_TILE_32AS8 0x00000003
 
/* UVD decode target buffer array mode */
#define RUVD_ARRAY_MODE_LINEAR 0x00000000
#define RUVD_ARRAY_MODE_MACRO_LINEAR_MICRO_TILED 0x00000001
#define RUVD_ARRAY_MODE_1D_THIN 0x00000002
#define RUVD_ARRAY_MODE_2D_THIN 0x00000004
#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_LINEAR 0x00000004
#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_TILED 0x00000005
 
/* UVD tile config */
#define RUVD_BANK_WIDTH(x) ((x) << 0)
#define RUVD_BANK_HEIGHT(x) ((x) << 3)
#define RUVD_MACRO_TILE_ASPECT_RATIO(x) ((x) << 6)
#define RUVD_NUM_BANKS(x) ((x) << 9)
 
/* H.264 profile definitions */
#define RUVD_H264_PROFILE_BASELINE 0x00000000
#define RUVD_H264_PROFILE_MAIN 0x00000001
#define RUVD_H264_PROFILE_HIGH 0x00000002
#define RUVD_H264_PROFILE_STEREO_HIGH 0x00000003
#define RUVD_H264_PROFILE_MVC 0x00000004
 
/* VC-1 profile definitions */
#define RUVD_VC1_PROFILE_SIMPLE 0x00000000
#define RUVD_VC1_PROFILE_MAIN 0x00000001
#define RUVD_VC1_PROFILE_ADVANCED 0x00000002
 
struct ruvd_mvc_element {
uint16_t viewOrderIndex;
uint16_t viewId;
uint16_t numOfAnchorRefsInL0;
uint16_t viewIdOfAnchorRefsInL0[15];
uint16_t numOfAnchorRefsInL1;
uint16_t viewIdOfAnchorRefsInL1[15];
uint16_t numOfNonAnchorRefsInL0;
uint16_t viewIdOfNonAnchorRefsInL0[15];
uint16_t numOfNonAnchorRefsInL1;
uint16_t viewIdOfNonAnchorRefsInL1[15];
};
 
struct ruvd_h264 {
uint32_t profile;
uint32_t level;
 
uint32_t sps_info_flags;
uint32_t pps_info_flags;
uint8_t chroma_format;
uint8_t bit_depth_luma_minus8;
uint8_t bit_depth_chroma_minus8;
uint8_t log2_max_frame_num_minus4;
 
uint8_t pic_order_cnt_type;
uint8_t log2_max_pic_order_cnt_lsb_minus4;
uint8_t num_ref_frames;
uint8_t reserved_8bit;
 
int8_t pic_init_qp_minus26;
int8_t pic_init_qs_minus26;
int8_t chroma_qp_index_offset;
int8_t second_chroma_qp_index_offset;
 
uint8_t num_slice_groups_minus1;
uint8_t slice_group_map_type;
uint8_t num_ref_idx_l0_active_minus1;
uint8_t num_ref_idx_l1_active_minus1;
 
uint16_t slice_group_change_rate_minus1;
uint16_t reserved_16bit_1;
 
uint8_t scaling_list_4x4[6][16];
uint8_t scaling_list_8x8[2][64];
 
uint32_t frame_num;
uint32_t frame_num_list[16];
int32_t curr_field_order_cnt_list[2];
int32_t field_order_cnt_list[16][2];
 
uint32_t decoded_pic_idx;
 
uint32_t curr_pic_ref_frame_num;
 
uint8_t ref_frame_list[16];
 
uint32_t reserved[122];
 
struct {
uint32_t numViews;
uint32_t viewId0;
struct ruvd_mvc_element mvcElements[1];
} mvc;
};
 
struct ruvd_vc1 {
uint32_t profile;
uint32_t level;
uint32_t sps_info_flags;
uint32_t pps_info_flags;
uint32_t pic_structure;
uint32_t chroma_format;
};
 
struct ruvd_mpeg2 {
uint32_t decoded_pic_idx;
uint32_t ref_pic_idx[2];
 
uint8_t load_intra_quantiser_matrix;
uint8_t load_nonintra_quantiser_matrix;
uint8_t reserved_quantiser_alignement[2];
uint8_t intra_quantiser_matrix[64];
uint8_t nonintra_quantiser_matrix[64];
 
uint8_t profile_and_level_indication;
uint8_t chroma_format;
 
uint8_t picture_coding_type;
 
uint8_t reserved_1;
 
uint8_t f_code[2][2];
uint8_t intra_dc_precision;
uint8_t pic_structure;
uint8_t top_field_first;
uint8_t frame_pred_frame_dct;
uint8_t concealment_motion_vectors;
uint8_t q_scale_type;
uint8_t intra_vlc_format;
uint8_t alternate_scan;
};
 
struct ruvd_mpeg4
{
uint32_t decoded_pic_idx;
uint32_t ref_pic_idx[2];
 
uint32_t variant_type;
uint8_t profile_and_level_indication;
 
uint8_t video_object_layer_verid;
uint8_t video_object_layer_shape;
 
uint8_t reserved_1;
 
uint16_t video_object_layer_width;
uint16_t video_object_layer_height;
 
uint16_t vop_time_increment_resolution;
 
uint16_t reserved_2;
 
uint32_t flags;
 
uint8_t quant_type;
 
uint8_t reserved_3[3];
 
uint8_t intra_quant_mat[64];
uint8_t nonintra_quant_mat[64];
 
struct {
uint8_t sprite_enable;
 
uint8_t reserved_4[3];
 
uint16_t sprite_width;
uint16_t sprite_height;
int16_t sprite_left_coordinate;
int16_t sprite_top_coordinate;
 
uint8_t no_of_sprite_warping_points;
uint8_t sprite_warping_accuracy;
uint8_t sprite_brightness_change;
uint8_t low_latency_sprite_enable;
} sprite_config;
 
struct {
uint32_t flags;
uint8_t vol_mode;
uint8_t reserved_5[3];
} divx_311_config;
};
 
/* message between driver and hardware */
struct ruvd_msg {
 
uint32_t size;
uint32_t msg_type;
uint32_t stream_handle;
uint32_t status_report_feedback_number;
 
union {
struct {
uint32_t stream_type;
uint32_t session_flags;
uint32_t asic_id;
uint32_t width_in_samples;
uint32_t height_in_samples;
uint32_t dpb_buffer;
uint32_t dpb_size;
uint32_t dpb_model;
uint32_t version_info;
} create;
 
struct {
uint32_t stream_type;
uint32_t decode_flags;
uint32_t width_in_samples;
uint32_t height_in_samples;
 
uint32_t dpb_buffer;
uint32_t dpb_size;
uint32_t dpb_model;
uint32_t dpb_reserved;
 
uint32_t db_offset_alignment;
uint32_t db_pitch;
uint32_t db_tiling_mode;
uint32_t db_array_mode;
uint32_t db_field_mode;
uint32_t db_surf_tile_config;
uint32_t db_aligned_height;
uint32_t db_reserved;
 
uint32_t use_addr_macro;
 
uint32_t bsd_buffer;
uint32_t bsd_size;
 
uint32_t pic_param_buffer;
uint32_t pic_param_size;
uint32_t mb_cntl_buffer;
uint32_t mb_cntl_size;
 
uint32_t dt_buffer;
uint32_t dt_pitch;
uint32_t dt_tiling_mode;
uint32_t dt_array_mode;
uint32_t dt_field_mode;
uint32_t dt_luma_top_offset;
uint32_t dt_luma_bottom_offset;
uint32_t dt_chroma_top_offset;
uint32_t dt_chroma_bottom_offset;
uint32_t dt_surf_tile_config;
uint32_t dt_reserved[3];
 
uint32_t reserved[16];
 
union {
struct ruvd_h264 h264;
struct ruvd_vc1 vc1;
struct ruvd_mpeg2 mpeg2;
struct ruvd_mpeg4 mpeg4;
 
uint32_t info[768];
} codec;
 
uint8_t extension_support;
uint8_t reserved_8bit_1;
uint8_t reserved_8bit_2;
uint8_t reserved_8bit_3;
uint32_t extension_reserved[64];
} decode;
} body;
};
 
/* driver dependent callback */
typedef struct radeon_winsys_cs_handle* (*ruvd_set_dtb)
(struct ruvd_msg* msg, struct vl_video_buffer *vb);
 
/* create an UVD decode */
struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
const struct pipe_video_codec *templat,
ruvd_set_dtb set_dtb);
 
/* fill decoding target field from the luma and chroma surfaces */
void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
struct radeon_surf *chroma);
#endif
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_vce.c
0,0 → 1,430
/**************************************************************************
*
* Copyright 2013 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/*
* Authors:
* Christian König <christian.koenig@amd.com>
*
*/
 
#include <stdio.h>
 
#include "pipe/p_video_codec.h"
 
#include "util/u_video.h"
#include "util/u_memory.h"
 
#include "vl/vl_video_buffer.h"
 
#include "r600_pipe_common.h"
#include "radeon_video.h"
#include "radeon_vce.h"
 
/**
* flush commands to the hardware
*/
static void flush(struct rvce_encoder *enc)
{
enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL, 0);
}
 
#if 0
static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb)
{
uint32_t *ptr = enc->ws->buffer_map(fb->res->cs_buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
unsigned i = 0;
fprintf(stderr, "\n");
fprintf(stderr, "encStatus:\t\t\t%08x\n", ptr[i++]);
fprintf(stderr, "encHasBitstream:\t\t%08x\n", ptr[i++]);
fprintf(stderr, "encHasAudioBitstream:\t\t%08x\n", ptr[i++]);
fprintf(stderr, "encBitstreamOffset:\t\t%08x\n", ptr[i++]);
fprintf(stderr, "encBitstreamSize:\t\t%08x\n", ptr[i++]);
fprintf(stderr, "encAudioBitstreamOffset:\t%08x\n", ptr[i++]);
fprintf(stderr, "encAudioBitstreamSize:\t\t%08x\n", ptr[i++]);
fprintf(stderr, "encExtrabytes:\t\t\t%08x\n", ptr[i++]);
fprintf(stderr, "encAudioExtrabytes:\t\t%08x\n", ptr[i++]);
fprintf(stderr, "videoTimeStamp:\t\t\t%08x\n", ptr[i++]);
fprintf(stderr, "audioTimeStamp:\t\t\t%08x\n", ptr[i++]);
fprintf(stderr, "videoOutputType:\t\t%08x\n", ptr[i++]);
fprintf(stderr, "attributeFlags:\t\t\t%08x\n", ptr[i++]);
fprintf(stderr, "seiPrivatePackageOffset:\t%08x\n", ptr[i++]);
fprintf(stderr, "seiPrivatePackageSize:\t\t%08x\n", ptr[i++]);
fprintf(stderr, "\n");
enc->ws->buffer_unmap(fb->res->cs_buf);
}
#endif
 
/**
* reset the CPB handling
*/
static void reset_cpb(struct rvce_encoder *enc)
{
unsigned i;
 
LIST_INITHEAD(&enc->cpb_slots);
for (i = 0; i < enc->cpb_num; ++i) {
struct rvce_cpb_slot *slot = &enc->cpb_array[i];
slot->index = i;
slot->picture_type = PIPE_H264_ENC_PICTURE_TYPE_SKIP;
slot->frame_num = 0;
slot->pic_order_cnt = 0;
LIST_ADDTAIL(&slot->list, &enc->cpb_slots);
}
}
 
/**
* sort l0 and l1 to the top of the list
*/
static void sort_cpb(struct rvce_encoder *enc)
{
struct rvce_cpb_slot *i, *l0 = NULL, *l1 = NULL;
 
LIST_FOR_EACH_ENTRY(i, &enc->cpb_slots, list) {
if (i->frame_num == enc->pic.ref_idx_l0)
l0 = i;
 
if (i->frame_num == enc->pic.ref_idx_l1)
l1 = i;
 
if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P && l0)
break;
 
if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B &&
l0 && l1)
break;
}
 
if (l1) {
LIST_DEL(&l1->list);
LIST_ADD(&l1->list, &enc->cpb_slots);
}
 
if (l0) {
LIST_DEL(&l0->list);
LIST_ADD(&l0->list, &enc->cpb_slots);
}
}
 
/**
* get number of cpbs based on dpb
*/
static unsigned get_cpb_num(struct rvce_encoder *enc)
{
unsigned w = align(enc->base.width, 16) / 16;
unsigned h = align(enc->base.height, 16) / 16;
unsigned dpb;
 
switch (enc->base.level) {
case 10:
dpb = 396;
break;
case 11:
dpb = 900;
break;
case 12:
case 13:
case 20:
dpb = 2376;
break;
case 21:
dpb = 4752;
break;
case 22:
case 30:
dpb = 8100;
break;
case 31:
dpb = 18000;
break;
case 32:
dpb = 20480;
break;
case 40:
case 41:
dpb = 32768;
break;
default:
case 42:
dpb = 34816;
break;
case 50:
dpb = 110400;
break;
case 51:
dpb = 184320;
break;
}
 
return MIN2(dpb / (w * h), 16);
}
 
/**
* destroy this video encoder
*/
static void rvce_destroy(struct pipe_video_codec *encoder)
{
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
if (enc->stream_handle) {
struct rvid_buffer fb;
rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
enc->fb = &fb;
enc->session(enc);
enc->feedback(enc);
enc->destroy(enc);
flush(enc);
rvid_destroy_buffer(&fb);
}
rvid_destroy_buffer(&enc->cpb);
enc->ws->cs_destroy(enc->cs);
FREE(enc->cpb_array);
FREE(enc);
}
 
static void rvce_begin_frame(struct pipe_video_codec *encoder,
struct pipe_video_buffer *source,
struct pipe_picture_desc *picture)
{
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source;
struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture;
 
bool need_rate_control =
enc->pic.rate_ctrl.rate_ctrl_method != pic->rate_ctrl.rate_ctrl_method ||
enc->pic.quant_i_frames != pic->quant_i_frames ||
enc->pic.quant_p_frames != pic->quant_p_frames ||
enc->pic.quant_b_frames != pic->quant_b_frames;
 
enc->pic = *pic;
 
enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
 
if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR)
reset_cpb(enc);
else if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_B)
sort_cpb(enc);
if (!enc->stream_handle) {
struct rvid_buffer fb;
enc->stream_handle = rvid_alloc_stream_handle();
rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
enc->fb = &fb;
enc->session(enc);
enc->create(enc);
enc->rate_control(enc);
need_rate_control = false;
enc->config_extension(enc);
enc->motion_estimation(enc);
enc->rdo(enc);
if (enc->use_vui)
enc->vui(enc);
enc->pic_control(enc);
enc->feedback(enc);
flush(enc);
//dump_feedback(enc, &fb);
rvid_destroy_buffer(&fb);
}
 
enc->session(enc);
 
if (need_rate_control)
enc->rate_control(enc);
}
 
static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
struct pipe_video_buffer *source,
struct pipe_resource *destination,
void **fb)
{
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
enc->get_buffer(destination, &enc->bs_handle, NULL);
enc->bs_size = destination->width0;
 
*fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
if (!rvid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) {
RVID_ERR("Can't create feedback buffer.\n");
return;
}
enc->encode(enc);
enc->feedback(enc);
}
 
static void rvce_end_frame(struct pipe_video_codec *encoder,
struct pipe_video_buffer *source,
struct pipe_picture_desc *picture)
{
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
struct rvce_cpb_slot *slot = LIST_ENTRY(
struct rvce_cpb_slot, enc->cpb_slots.prev, list);
 
flush(enc);
 
/* update the CPB backtrack with the just encoded frame */
slot->picture_type = enc->pic.picture_type;
slot->frame_num = enc->pic.frame_num;
slot->pic_order_cnt = enc->pic.pic_order_cnt;
if (!enc->pic.not_referenced) {
LIST_DEL(&slot->list);
LIST_ADD(&slot->list, &enc->cpb_slots);
}
}
 
static void rvce_get_feedback(struct pipe_video_codec *encoder,
void *feedback, unsigned *size)
{
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
struct rvid_buffer *fb = feedback;
 
if (size) {
uint32_t *ptr = enc->ws->buffer_map(fb->res->cs_buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
 
if (ptr[1]) {
*size = ptr[4] - ptr[9];
} else {
*size = 0;
}
 
enc->ws->buffer_unmap(fb->res->cs_buf);
}
//dump_feedback(enc, fb);
rvid_destroy_buffer(fb);
FREE(fb);
}
 
/**
* flush any outstanding command buffers to the hardware
*/
static void rvce_flush(struct pipe_video_codec *encoder)
{
}
 
static void rvce_cs_flush(void *ctx, unsigned flags,
struct pipe_fence_handle **fence)
{
// just ignored
}
 
struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
const struct pipe_video_codec *templ,
struct radeon_winsys* ws,
rvce_get_buffer get_buffer)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen;
struct rvce_encoder *enc;
struct pipe_video_buffer *tmp_buf, templat = {};
struct radeon_surf *tmp_surf;
unsigned cpb_size;
 
if (!rscreen->info.vce_fw_version) {
RVID_ERR("Kernel doesn't supports VCE!\n");
return NULL;
 
} else if (!rvce_is_fw_version_supported(rscreen)) {
RVID_ERR("Unsupported VCE fw version loaded!\n");
return NULL;
}
 
enc = CALLOC_STRUCT(rvce_encoder);
if (!enc)
return NULL;
 
if ((rscreen->info.drm_major > 2) || (rscreen->info.drm_minor >= 42))
enc->use_vui = true;
 
enc->base = *templ;
enc->base.context = context;
 
enc->base.destroy = rvce_destroy;
enc->base.begin_frame = rvce_begin_frame;
enc->base.encode_bitstream = rvce_encode_bitstream;
enc->base.end_frame = rvce_end_frame;
enc->base.flush = rvce_flush;
enc->base.get_feedback = rvce_get_feedback;
enc->get_buffer = get_buffer;
 
enc->screen = context->screen;
enc->ws = ws;
enc->cs = ws->cs_create(ws, RING_VCE, rvce_cs_flush, enc, NULL);
if (!enc->cs) {
RVID_ERR("Can't get command submission context.\n");
goto error;
}
 
templat.buffer_format = PIPE_FORMAT_NV12;
templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
templat.width = enc->base.width;
templat.height = enc->base.height;
templat.interlaced = false;
if (!(tmp_buf = context->create_video_buffer(context, &templat))) {
RVID_ERR("Can't create video buffer.\n");
goto error;
}
 
enc->cpb_num = get_cpb_num(enc);
if (!enc->cpb_num)
goto error;
 
get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf);
cpb_size = align(tmp_surf->level[0].pitch_bytes, 128);
cpb_size = cpb_size * align(tmp_surf->npix_y, 16);
cpb_size = cpb_size * 3 / 2;
cpb_size = cpb_size * enc->cpb_num;
tmp_buf->destroy(tmp_buf);
if (!rvid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't create CPB buffer.\n");
goto error;
}
 
enc->cpb_array = CALLOC(enc->cpb_num, sizeof(struct rvce_cpb_slot));
if (!enc->cpb_array)
goto error;
 
reset_cpb(enc);
 
radeon_vce_40_2_2_init(enc);
 
return &enc->base;
 
error:
if (enc->cs)
enc->ws->cs_destroy(enc->cs);
 
rvid_destroy_buffer(&enc->cpb);
 
FREE(enc->cpb_array);
FREE(enc);
return NULL;
}
 
/**
* check if kernel has the right fw version loaded
*/
bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
{
return rscreen->info.vce_fw_version == ((40 << 24) | (2 << 16) | (2 << 8));
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_vce.h
0,0 → 1,117
/**************************************************************************
*
* Copyright 2013 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/*
* Authors:
* Christian König <christian.koenig@amd.com>
*
*/
 
#ifndef RADEON_VCE_H
#define RADEON_VCE_H
 
#include "util/list.h"
 
#define RVCE_RELOC(buf, usage, domain) (enc->ws->cs_add_reloc(enc->cs, (buf), (usage), domain, RADEON_PRIO_MIN))
 
#define RVCE_CS(value) (enc->cs->buf[enc->cs->cdw++] = (value))
#define RVCE_BEGIN(cmd) { uint32_t *begin = &enc->cs->buf[enc->cs->cdw++]; RVCE_CS(cmd)
#define RVCE_READ(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_READ, domain) * 4)
#define RVCE_WRITE(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_WRITE, domain) * 4)
#define RVCE_READWRITE(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_READWRITE, domain) * 4)
#define RVCE_END() *begin = (&enc->cs->buf[enc->cs->cdw] - begin) * 4; }
 
struct r600_common_screen;
 
/* driver dependent callback */
typedef void (*rvce_get_buffer)(struct pipe_resource *resource,
struct radeon_winsys_cs_handle **handle,
struct radeon_surf **surface);
 
/* Coded picture buffer slot */
struct rvce_cpb_slot {
struct list_head list;
 
unsigned index;
enum pipe_h264_enc_picture_type picture_type;
unsigned frame_num;
unsigned pic_order_cnt;
};
 
/* VCE encoder representation */
struct rvce_encoder {
struct pipe_video_codec base;
 
/* version specific packets */
void (*session)(struct rvce_encoder *enc);
void (*create)(struct rvce_encoder *enc);
void (*feedback)(struct rvce_encoder *enc);
void (*rate_control)(struct rvce_encoder *enc);
void (*config_extension)(struct rvce_encoder *enc);
void (*pic_control)(struct rvce_encoder *enc);
void (*motion_estimation)(struct rvce_encoder *enc);
void (*rdo)(struct rvce_encoder *enc);
void (*vui)(struct rvce_encoder *enc);
void (*encode)(struct rvce_encoder *enc);
void (*destroy)(struct rvce_encoder *enc);
 
unsigned stream_handle;
 
struct pipe_screen *screen;
struct radeon_winsys* ws;
struct radeon_winsys_cs* cs;
 
rvce_get_buffer get_buffer;
 
struct radeon_winsys_cs_handle* handle;
struct radeon_surf* luma;
struct radeon_surf* chroma;
 
struct radeon_winsys_cs_handle* bs_handle;
unsigned bs_size;
 
struct rvce_cpb_slot *cpb_array;
struct list_head cpb_slots;
unsigned cpb_num;
 
struct rvid_buffer *fb;
struct rvid_buffer cpb;
struct pipe_h264_enc_picture_desc pic;
bool use_vui;
};
 
struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
const struct pipe_video_codec *templat,
struct radeon_winsys* ws,
rvce_get_buffer get_buffer);
 
bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen);
 
/* init vce fw 40.2.2 specific callbacks */
void radeon_vce_40_2_2_init(struct rvce_encoder *enc);
 
#endif
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
0,0 → 1,452
/**************************************************************************
*
* Copyright 2013 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/*
* Authors:
* Christian König <christian.koenig@amd.com>
*
*/
 
#include <stdio.h>
 
#include "pipe/p_video_codec.h"
 
#include "util/u_video.h"
#include "util/u_memory.h"
 
#include "vl/vl_video_buffer.h"
 
#include "r600_pipe_common.h"
#include "radeon_video.h"
#include "radeon_vce.h"
 
static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 };
 
static struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc)
{
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list);
}
 
static struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc)
{
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list);
}
 
static struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc)
{
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list);
}
 
static void frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
unsigned *luma_offset, unsigned *chroma_offset)
{
unsigned pitch = align(enc->luma->level[0].pitch_bytes, 128);
unsigned vpitch = align(enc->luma->npix_y, 16);
unsigned fsize = pitch * (vpitch + vpitch / 2);
 
*luma_offset = slot->index * fsize;
*chroma_offset = *luma_offset + pitch * vpitch;
}
 
static void session(struct rvce_encoder *enc)
{
RVCE_BEGIN(0x00000001); // session cmd
RVCE_CS(enc->stream_handle);
RVCE_END();
}
 
static void task_info(struct rvce_encoder *enc, uint32_t taskOperation)
{
RVCE_BEGIN(0x00000002); // task info
RVCE_CS(0xffffffff); // offsetOfNextTaskInfo
RVCE_CS(taskOperation); // taskOperation
RVCE_CS(0x00000000); // referencePictureDependency
RVCE_CS(0x00000000); // collocateFlagDependency
RVCE_CS(0x00000000); // feedbackIndex
RVCE_CS(0x00000000); // videoBitstreamRingIndex
RVCE_END();
}
 
static void feedback(struct rvce_encoder *enc)
{
RVCE_BEGIN(0x05000005); // feedback buffer
RVCE_WRITE(enc->fb->res->cs_buf, enc->fb->res->domains); // feedbackRingAddressHi
RVCE_CS(0x00000000); // feedbackRingAddressLo
RVCE_CS(0x00000001); // feedbackRingSize
RVCE_END();
}
 
static void create(struct rvce_encoder *enc)
{
task_info(enc, 0x00000000);
 
RVCE_BEGIN(0x01000001); // create cmd
RVCE_CS(0x00000000); // encUseCircularBuffer
RVCE_CS(profiles[enc->base.profile -
PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE]); // encProfile
RVCE_CS(enc->base.level); // encLevel
RVCE_CS(0x00000000); // encPicStructRestriction
RVCE_CS(enc->base.width); // encImageWidth
RVCE_CS(enc->base.height); // encImageHeight
RVCE_CS(enc->luma->level[0].pitch_bytes); // encRefPicLumaPitch
RVCE_CS(enc->chroma->level[0].pitch_bytes); // encRefPicChromaPitch
RVCE_CS(align(enc->luma->npix_y, 16) / 8); // encRefYHeightInQw
RVCE_CS(0x00000000); // encRefPic(Addr|Array)Mode, encPicStructRestriction, disableRDO
RVCE_END();
}
 
static void rate_control(struct rvce_encoder *enc)
{
RVCE_BEGIN(0x04000005); // rate control
RVCE_CS(enc->pic.rate_ctrl.rate_ctrl_method); // encRateControlMethod
RVCE_CS(enc->pic.rate_ctrl.target_bitrate); // encRateControlTargetBitRate
RVCE_CS(enc->pic.rate_ctrl.peak_bitrate); // encRateControlPeakBitRate
RVCE_CS(enc->pic.rate_ctrl.frame_rate_num); // encRateControlFrameRateNum
RVCE_CS(0x00000000); // encGOPSize
RVCE_CS(enc->pic.quant_i_frames); // encQP_I
RVCE_CS(enc->pic.quant_p_frames); // encQP_P
RVCE_CS(enc->pic.quant_b_frames); // encQP_B
RVCE_CS(enc->pic.rate_ctrl.vbv_buffer_size); // encVBVBufferSize
RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); // encRateControlFrameRateDen
RVCE_CS(0x00000000); // encVBVBufferLevel
RVCE_CS(0x00000000); // encMaxAUSize
RVCE_CS(0x00000000); // encQPInitialMode
RVCE_CS(enc->pic.rate_ctrl.target_bits_picture); // encTargetBitsPerPicture
RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_integer); // encPeakBitsPerPictureInteger
RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_fraction); // encPeakBitsPerPictureFractional
RVCE_CS(0x00000000); // encMinQP
RVCE_CS(0x00000033); // encMaxQP
RVCE_CS(0x00000000); // encSkipFrameEnable
RVCE_CS(0x00000000); // encFillerDataEnable
RVCE_CS(0x00000000); // encEnforceHRD
RVCE_CS(0x00000000); // encBPicsDeltaQP
RVCE_CS(0x00000000); // encReferenceBPicsDeltaQP
RVCE_CS(0x00000000); // encRateControlReInitDisable
RVCE_END();
}
 
static void config_extension(struct rvce_encoder *enc)
{
RVCE_BEGIN(0x04000001); // config extension
RVCE_CS(0x00000003); // encEnablePerfLogging
RVCE_END();
}
 
static void pic_control(struct rvce_encoder *enc)
{
unsigned encNumMBsPerSlice;
 
encNumMBsPerSlice = align(enc->base.width, 16) / 16;
encNumMBsPerSlice *= align(enc->base.height, 16) / 16;
 
RVCE_BEGIN(0x04000002); // pic control
RVCE_CS(0x00000000); // encUseConstrainedIntraPred
RVCE_CS(0x00000000); // encCABACEnable
RVCE_CS(0x00000000); // encCABACIDC
RVCE_CS(0x00000000); // encLoopFilterDisable
RVCE_CS(0x00000000); // encLFBetaOffset
RVCE_CS(0x00000000); // encLFAlphaC0Offset
RVCE_CS(0x00000000); // encCropLeftOffset
RVCE_CS((align(enc->base.width, 16) - enc->base.width) >> 1); // encCropRightOffset
RVCE_CS(0x00000000); // encCropTopOffset
RVCE_CS((align(enc->base.height, 16) - enc->base.height) >> 1); // encCropBottomOffset
RVCE_CS(encNumMBsPerSlice); // encNumMBsPerSlice
RVCE_CS(0x00000000); // encIntraRefreshNumMBsPerSlot
RVCE_CS(0x00000000); // encForceIntraRefresh
RVCE_CS(0x00000000); // encForceIMBPeriod
RVCE_CS(0x00000000); // encPicOrderCntType
RVCE_CS(0x00000000); // log2_max_pic_order_cnt_lsb_minus4
RVCE_CS(0x00000000); // encSPSID
RVCE_CS(0x00000000); // encPPSID
RVCE_CS(0x00000040); // encConstraintSetFlags
RVCE_CS(MAX2(enc->base.max_references, 1) - 1); // encBPicPattern
RVCE_CS(0x00000000); // weightPredModeBPicture
RVCE_CS(MIN2(enc->base.max_references, 2)); // encNumberOfReferenceFrames
RVCE_CS(enc->base.max_references + 1); // encMaxNumRefFrames
RVCE_CS(0x00000001); // encNumDefaultActiveRefL0
RVCE_CS(0x00000001); // encNumDefaultActiveRefL1
RVCE_CS(0x00000000); // encSliceMode
RVCE_CS(0x00000000); // encMaxSliceSize
RVCE_END();
}
 
static void motion_estimation(struct rvce_encoder *enc)
{
RVCE_BEGIN(0x04000007); // motion estimation
RVCE_CS(0x00000001); // encIMEDecimationSearch
RVCE_CS(0x00000001); // motionEstHalfPixel
RVCE_CS(0x00000000); // motionEstQuarterPixel
RVCE_CS(0x00000000); // disableFavorPMVPoint
RVCE_CS(0x00000000); // forceZeroPointCenter
RVCE_CS(0x00000000); // LSMVert
RVCE_CS(0x00000010); // encSearchRangeX
RVCE_CS(0x00000010); // encSearchRangeY
RVCE_CS(0x00000010); // encSearch1RangeX
RVCE_CS(0x00000010); // encSearch1RangeY
RVCE_CS(0x00000000); // disable16x16Frame1
RVCE_CS(0x00000000); // disableSATD
RVCE_CS(0x00000000); // enableAMD
RVCE_CS(0x000000fe); // encDisableSubMode
RVCE_CS(0x00000000); // encIMESkipX
RVCE_CS(0x00000000); // encIMESkipY
RVCE_CS(0x00000000); // encEnImeOverwDisSubm
RVCE_CS(0x00000000); // encImeOverwDisSubmNo
RVCE_CS(0x00000001); // encIME2SearchRangeX
RVCE_CS(0x00000001); // encIME2SearchRangeY
RVCE_CS(0x00000000); // parallelModeSpeedupEnable
RVCE_CS(0x00000000); // fme0_encDisableSubMode
RVCE_CS(0x00000000); // fme1_encDisableSubMode
RVCE_CS(0x00000000); // imeSWSpeedupEnable
RVCE_END();
}
 
static void rdo(struct rvce_encoder *enc)
{
RVCE_BEGIN(0x04000008); // rdo
RVCE_CS(0x00000000); // encDisableTbePredIFrame
RVCE_CS(0x00000000); // encDisableTbePredPFrame
RVCE_CS(0x00000000); // useFmeInterpolY
RVCE_CS(0x00000000); // useFmeInterpolUV
RVCE_CS(0x00000000); // useFmeIntrapolY
RVCE_CS(0x00000000); // useFmeIntrapolUV
RVCE_CS(0x00000000); // useFmeInterpolY_1
RVCE_CS(0x00000000); // useFmeInterpolUV_1
RVCE_CS(0x00000000); // useFmeIntrapolY_1
RVCE_CS(0x00000000); // useFmeIntrapolUV_1
RVCE_CS(0x00000000); // enc16x16CostAdj
RVCE_CS(0x00000000); // encSkipCostAdj
RVCE_CS(0x00000000); // encForce16x16skip
RVCE_CS(0x00000000); // encDisableThresholdCalcA
RVCE_CS(0x00000000); // encLumaCoeffCost
RVCE_CS(0x00000000); // encLumaMBCoeffCost
RVCE_CS(0x00000000); // encChromaCoeffCost
RVCE_END();
}
 
static void vui(struct rvce_encoder *enc)
{
int i;
 
RVCE_BEGIN(0x04000009); // vui
RVCE_CS(0x00000000); //aspectRatioInfoPresentFlag
RVCE_CS(0x00000000); //aspectRatioInfo.aspectRatioIdc
RVCE_CS(0x00000000); //aspectRatioInfo.sarWidth
RVCE_CS(0x00000000); //aspectRatioInfo.sarHeight
RVCE_CS(0x00000000); //overscanInfoPresentFlag
RVCE_CS(0x00000000); //overScanInfo.overscanAppropFlag
RVCE_CS(0x00000000); //videoSignalTypePresentFlag
RVCE_CS(0x00000005); //videoSignalTypeInfo.videoFormat
RVCE_CS(0x00000000); //videoSignalTypeInfo.videoFullRangeFlag
RVCE_CS(0x00000000); //videoSignalTypeInfo.colorDescriptionPresentFlag
RVCE_CS(0x00000002); //videoSignalTypeInfo.colorPrim
RVCE_CS(0x00000002); //videoSignalTypeInfo.transferChar
RVCE_CS(0x00000002); //videoSignalTypeInfo.matrixCoef
RVCE_CS(0x00000000); //chromaLocInfoPresentFlag
RVCE_CS(0x00000000); //chromaLocInfo.chromaLocTop
RVCE_CS(0x00000000); //chromaLocInfo.chromaLocBottom
RVCE_CS(0x00000001); //timingInfoPresentFlag
RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); //timingInfo.numUnitsInTick
RVCE_CS(enc->pic.rate_ctrl.frame_rate_num * 2); //timingInfo.timeScale;
RVCE_CS(0x00000001); //timingInfo.fixedFrameRateFlag
RVCE_CS(0x00000000); //nalHRDParametersPresentFlag
RVCE_CS(0x00000000); //hrdParam.cpbCntMinus1
RVCE_CS(0x00000004); //hrdParam.bitRateScale
RVCE_CS(0x00000006); //hrdParam.cpbSizeScale
for (i = 0; i < 32; i++) {
RVCE_CS(0x00000000); //hrdParam.bitRateValueMinus
RVCE_CS(0x00000000); //hrdParam.cpbSizeValueMinus
RVCE_CS(0x00000000); //hrdParam.cbrFlag
}
RVCE_CS(0x00000017); //hrdParam.initialCpbRemovalDelayLengthMinus1
RVCE_CS(0x00000017); //hrdParam.cpbRemovalDelayLengthMinus1
RVCE_CS(0x00000017); //hrdParam.dpbOutputDelayLengthMinus1
RVCE_CS(0x00000018); //hrdParam.timeOffsetLength
RVCE_CS(0x00000000); //lowDelayHRDFlag
RVCE_CS(0x00000000); //picStructPresentFlag
RVCE_CS(0x00000000); //bitstreamRestrictionPresentFlag
RVCE_CS(0x00000001); //bitstreamRestrictions.motionVectorsOverPicBoundariesFlag
RVCE_CS(0x00000002); //bitstreamRestrictions.maxBytesPerPicDenom
RVCE_CS(0x00000001); //bitstreamRestrictions.maxBitsPerMbDenom
RVCE_CS(0x00000010); //bitstreamRestrictions.log2MaxMvLengthHori
RVCE_CS(0x00000010); //bitstreamRestrictions.log2MaxMvLengthVert
RVCE_CS(0x00000003); //bitstreamRestrictions.numReorderFrames
RVCE_CS(0x00000003); //bitstreamRestrictions.maxDecFrameBuffering
RVCE_END();
}
 
static void encode(struct rvce_encoder *enc)
{
int i;
unsigned luma_offset, chroma_offset;
 
task_info(enc, 0x00000003);
 
RVCE_BEGIN(0x05000001); // context buffer
RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains); // encodeContextAddressHi
RVCE_CS(0x00000000); // encodeContextAddressLo
RVCE_END();
 
RVCE_BEGIN(0x05000004); // video bitstream buffer
RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT); // videoBitstreamRingAddressHi
RVCE_CS(0x00000000); // videoBitstreamRingAddressLo
RVCE_CS(enc->bs_size); // videoBitstreamRingSize
RVCE_END();
 
RVCE_BEGIN(0x03000001); // encode
RVCE_CS(0x00000000); // insertHeaders
RVCE_CS(0x00000000); // pictureStructure
RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize
RVCE_CS(0x00000000); // forceRefreshMap
RVCE_CS(0x00000000); // insertAUD
RVCE_CS(0x00000000); // endOfSequence
RVCE_CS(0x00000000); // endOfStream
RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureLumaAddressHi
RVCE_CS(enc->luma->level[0].offset); // inputPictureLumaAddressLo
RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureChromaAddressHi
RVCE_CS(enc->chroma->level[0].offset); // inputPictureChromaAddressLo
RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch
RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch
RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch
RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode
RVCE_CS(0x00000000); // encInputPicTileConfig
RVCE_CS(enc->pic.picture_type); // encPicType
RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag
RVCE_CS(0x00000000); // encIdrPicId
RVCE_CS(0x00000000); // encMGSKeyPic
RVCE_CS(!enc->pic.not_referenced); // encReferenceFlag
RVCE_CS(0x00000000); // encTemporalLayerIndex
RVCE_CS(0x00000000); // num_ref_idx_active_override_flag
RVCE_CS(0x00000000); // num_ref_idx_l0_active_minus1
RVCE_CS(0x00000000); // num_ref_idx_l1_active_minus1
 
i = enc->pic.frame_num - enc->pic.ref_idx_l0;
if (i > 1 && enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) {
RVCE_CS(0x00000001); // encRefListModificationOp
RVCE_CS(i - 1); // encRefListModificationNum
} else {
RVCE_CS(0x00000000); // encRefListModificationOp
RVCE_CS(0x00000000); // encRefListModificationNum
}
 
for (i = 0; i < 3; ++i) {
RVCE_CS(0x00000000); // encRefListModificationOp
RVCE_CS(0x00000000); // encRefListModificationNum
}
for (i = 0; i < 4; ++i) {
RVCE_CS(0x00000000); // encDecodedPictureMarkingOp
RVCE_CS(0x00000000); // encDecodedPictureMarkingNum
RVCE_CS(0x00000000); // encDecodedPictureMarkingIdx
RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingOp
RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingNum
}
 
// encReferencePictureL0[0]
RVCE_CS(0x00000000); // pictureStructure
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
struct rvce_cpb_slot *l0 = l0_slot(enc);
frame_offset(enc, l0, &luma_offset, &chroma_offset);
RVCE_CS(l0->picture_type); // encPicType
RVCE_CS(l0->frame_num); // frameNumber
RVCE_CS(l0->pic_order_cnt); // pictureOrderCount
RVCE_CS(luma_offset); // lumaOffset
RVCE_CS(chroma_offset); // chromaOffset
} else {
RVCE_CS(0x00000000); // encPicType
RVCE_CS(0x00000000); // frameNumber
RVCE_CS(0x00000000); // pictureOrderCount
RVCE_CS(0xffffffff); // lumaOffset
RVCE_CS(0xffffffff); // chromaOffset
}
 
// encReferencePictureL0[1]
RVCE_CS(0x00000000); // pictureStructure
RVCE_CS(0x00000000); // encPicType
RVCE_CS(0x00000000); // frameNumber
RVCE_CS(0x00000000); // pictureOrderCount
RVCE_CS(0xffffffff); // lumaOffset
RVCE_CS(0xffffffff); // chromaOffset
 
// encReferencePictureL1[0]
RVCE_CS(0x00000000); // pictureStructure
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
struct rvce_cpb_slot *l1 = l1_slot(enc);
frame_offset(enc, l1, &luma_offset, &chroma_offset);
RVCE_CS(l1->picture_type); // encPicType
RVCE_CS(l1->frame_num); // frameNumber
RVCE_CS(l1->pic_order_cnt); // pictureOrderCount
RVCE_CS(luma_offset); // lumaOffset
RVCE_CS(chroma_offset); // chromaOffset
} else {
RVCE_CS(0x00000000); // encPicType
RVCE_CS(0x00000000); // frameNumber
RVCE_CS(0x00000000); // pictureOrderCount
RVCE_CS(0xffffffff); // lumaOffset
RVCE_CS(0xffffffff); // chromaOffset
}
 
frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
RVCE_CS(luma_offset); // encReconstructedLumaOffset
RVCE_CS(chroma_offset); // encReconstructedChromaOffset
RVCE_CS(0x00000000); // encColocBufferOffset
RVCE_CS(0x00000000); // encReconstructedRefBasePictureLumaOffset
RVCE_CS(0x00000000); // encReconstructedRefBasePictureChromaOffset
RVCE_CS(0x00000000); // encReferenceRefBasePictureLumaOffset
RVCE_CS(0x00000000); // encReferenceRefBasePictureChromaOffset
RVCE_CS(0x00000000); // pictureCount
RVCE_CS(enc->pic.frame_num); // frameNumber
RVCE_CS(enc->pic.pic_order_cnt); // pictureOrderCount
RVCE_CS(0x00000000); // numIPicRemainInRCGOP
RVCE_CS(0x00000000); // numPPicRemainInRCGOP
RVCE_CS(0x00000000); // numBPicRemainInRCGOP
RVCE_CS(0x00000000); // numIRPicRemainInRCGOP
RVCE_CS(0x00000000); // enableIntraRefresh
RVCE_END();
}
 
static void destroy(struct rvce_encoder *enc)
{
task_info(enc, 0x00000001);
 
RVCE_BEGIN(0x02000001); // destroy
RVCE_END();
}
 
void radeon_vce_40_2_2_init(struct rvce_encoder *enc)
{
enc->session = session;
enc->create = create;
enc->feedback = feedback;
enc->rate_control = rate_control;
enc->config_extension = config_extension;
enc->pic_control = pic_control;
enc->motion_estimation = motion_estimation;
enc->rdo = rdo;
enc->vui = vui;
enc->encode = encode;
enc->destroy = destroy;
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_video.c
0,0 → 1,321
/**************************************************************************
*
* Copyright 2013 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/*
* Authors:
* Christian König <christian.koenig@amd.com>
*
*/
 
#include <unistd.h>
 
#include "util/u_memory.h"
#include "util/u_video.h"
 
#include "vl/vl_defines.h"
#include "vl/vl_video_buffer.h"
 
#include "r600_pipe_common.h"
#include "radeon_video.h"
#include "radeon_vce.h"
 
/* generate an stream handle */
unsigned rvid_alloc_stream_handle()
{
static unsigned counter = 0;
unsigned stream_handle = 0;
unsigned pid = getpid();
int i;
 
for (i = 0; i < 32; ++i)
stream_handle |= ((pid >> i) & 1) << (31 - i);
 
stream_handle ^= ++counter;
return stream_handle;
}
 
/* create a buffer in the winsys */
bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
unsigned size, unsigned usage)
{
memset(buffer, 0, sizeof(*buffer));
buffer->usage = usage;
buffer->res = (struct r600_resource *)
pipe_buffer_create(screen, PIPE_BIND_CUSTOM, usage, size);
 
return buffer->res != NULL;
}
 
/* destroy a buffer */
void rvid_destroy_buffer(struct rvid_buffer *buffer)
{
pipe_resource_reference((struct pipe_resource **)&buffer->res, NULL);
}
 
/* reallocate a buffer, preserving its content */
bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
struct rvid_buffer *new_buf, unsigned new_size)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
struct radeon_winsys* ws = rscreen->ws;
unsigned bytes = MIN2(new_buf->res->buf->size, new_size);
struct rvid_buffer old_buf = *new_buf;
void *src = NULL, *dst = NULL;
 
if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage))
goto error;
 
src = ws->buffer_map(old_buf.res->cs_buf, cs, PIPE_TRANSFER_READ);
if (!src)
goto error;
 
dst = ws->buffer_map(new_buf->res->cs_buf, cs, PIPE_TRANSFER_WRITE);
if (!dst)
goto error;
 
memcpy(dst, src, bytes);
if (new_size > bytes) {
new_size -= bytes;
dst += bytes;
memset(dst, 0, new_size);
}
ws->buffer_unmap(new_buf->res->cs_buf);
ws->buffer_unmap(old_buf.res->cs_buf);
rvid_destroy_buffer(&old_buf);
return true;
 
error:
if (src)
ws->buffer_unmap(old_buf.res->cs_buf);
rvid_destroy_buffer(new_buf);
*new_buf = old_buf;
return false;
}
 
/* clear the buffer with zeros */
void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
{
struct r600_common_context *rctx = (struct r600_common_context*)context;
 
rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size,
0, false);
context->flush(context, NULL, 0);
}
 
/**
* join surfaces into the same buffer with identical tiling params
* sumup their sizes and replace the backend buffers with a single bo
*/
void rvid_join_surfaces(struct radeon_winsys* ws, unsigned bind,
struct pb_buffer** buffers[VL_NUM_COMPONENTS],
struct radeon_surf *surfaces[VL_NUM_COMPONENTS])
{
unsigned best_tiling, best_wh, off;
unsigned size, alignment;
struct pb_buffer *pb;
unsigned i, j;
 
for (i = 0, best_tiling = 0, best_wh = ~0; i < VL_NUM_COMPONENTS; ++i) {
unsigned wh;
 
if (!surfaces[i])
continue;
 
/* choose the smallest bank w/h for now */
wh = surfaces[i]->bankw * surfaces[i]->bankh;
if (wh < best_wh) {
best_wh = wh;
best_tiling = i;
}
}
 
for (i = 0, off = 0; i < VL_NUM_COMPONENTS; ++i) {
if (!surfaces[i])
continue;
 
/* copy the tiling parameters */
surfaces[i]->bankw = surfaces[best_tiling]->bankw;
surfaces[i]->bankh = surfaces[best_tiling]->bankh;
surfaces[i]->mtilea = surfaces[best_tiling]->mtilea;
surfaces[i]->tile_split = surfaces[best_tiling]->tile_split;
 
/* adjust the texture layer offsets */
off = align(off, surfaces[i]->bo_alignment);
for (j = 0; j < Elements(surfaces[i]->level); ++j)
surfaces[i]->level[j].offset += off;
off += surfaces[i]->bo_size;
}
 
for (i = 0, size = 0, alignment = 0; i < VL_NUM_COMPONENTS; ++i) {
if (!buffers[i] || !*buffers[i])
continue;
 
size = align(size, (*buffers[i])->alignment);
size += (*buffers[i])->size;
alignment = MAX2(alignment, (*buffers[i])->alignment * 1);
}
 
if (!size)
return;
 
/* TODO: 2D tiling workaround */
alignment *= 2;
 
pb = ws->buffer_create(ws, size, alignment, bind, RADEON_DOMAIN_VRAM, 0);
if (!pb)
return;
 
for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
if (!buffers[i] || !*buffers[i])
continue;
 
pb_reference(buffers[i], pb);
}
 
pb_reference(&pb, NULL);
}
 
int rvid_get_video_param(struct pipe_screen *screen,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint,
enum pipe_video_cap param)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
 
if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
switch (param) {
case PIPE_VIDEO_CAP_SUPPORTED:
return u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
rvce_is_fw_version_supported(rscreen);
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
case PIPE_VIDEO_CAP_MAX_WIDTH:
return 2048;
case PIPE_VIDEO_CAP_MAX_HEIGHT:
return 1152;
case PIPE_VIDEO_CAP_PREFERED_FORMAT:
return PIPE_FORMAT_NV12;
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
return false;
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
return false;
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
return true;
default:
return 0;
}
}
 
/* UVD 2.x limits */
if (rscreen->family < CHIP_PALM) {
enum pipe_video_format codec = u_reduce_video_profile(profile);
switch (param) {
case PIPE_VIDEO_CAP_SUPPORTED:
/* no support for MPEG4 */
return codec != PIPE_VIDEO_FORMAT_MPEG4 &&
/* FIXME: VC-1 simple/main profile is broken */
profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE &&
profile != PIPE_VIDEO_PROFILE_VC1_MAIN;
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
/* MPEG2 only with shaders and no support for
interlacing on R6xx style UVD */
return codec != PIPE_VIDEO_FORMAT_MPEG12 &&
rscreen->family > CHIP_RV770;
default:
break;
}
}
 
switch (param) {
case PIPE_VIDEO_CAP_SUPPORTED:
switch (u_reduce_video_profile(profile)) {
case PIPE_VIDEO_FORMAT_MPEG12:
case PIPE_VIDEO_FORMAT_MPEG4:
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
return entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE;
case PIPE_VIDEO_FORMAT_VC1:
/* FIXME: VC-1 simple/main profile is broken */
return profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED &&
entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE;
default:
return false;
}
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
case PIPE_VIDEO_CAP_MAX_WIDTH:
return 2048;
case PIPE_VIDEO_CAP_MAX_HEIGHT:
return 1152;
case PIPE_VIDEO_CAP_PREFERED_FORMAT:
return PIPE_FORMAT_NV12;
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
return true;
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
return true;
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
return true;
case PIPE_VIDEO_CAP_MAX_LEVEL:
switch (profile) {
case PIPE_VIDEO_PROFILE_MPEG1:
return 0;
case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
return 3;
case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE:
return 3;
case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE:
return 5;
case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
return 1;
case PIPE_VIDEO_PROFILE_VC1_MAIN:
return 2;
case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
return 4;
case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
return 41;
default:
return 0;
}
default:
return 0;
}
}
 
boolean rvid_is_format_supported(struct pipe_screen *screen,
enum pipe_format format,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint)
{
/* we can only handle this one with UVD */
if (profile != PIPE_VIDEO_PROFILE_UNKNOWN)
return format == PIPE_FORMAT_NV12;
 
return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint);
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_video.h
0,0 → 1,85
/**************************************************************************
*
* Copyright 2013 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/*
* Authors:
* Christian König <christian.koenig@amd.com>
*
*/
 
#ifndef RADEON_VIDEO_H
#define RADEON_VIDEO_H
 
#include "radeon/radeon_winsys.h"
#include "vl/vl_video_buffer.h"
 
#define RVID_ERR(fmt, args...) \
fprintf(stderr, "EE %s:%d %s UVD - "fmt, __FILE__, __LINE__, __func__, ##args)
 
/* video buffer representation */
struct rvid_buffer
{
unsigned usage;
struct r600_resource *res;
};
 
/* generate an stream handle */
unsigned rvid_alloc_stream_handle(void);
 
/* create a buffer in the winsys */
bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
unsigned size, unsigned usage);
 
/* destroy a buffer */
void rvid_destroy_buffer(struct rvid_buffer *buffer);
 
/* reallocate a buffer, preserving its content */
bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
struct rvid_buffer *new_buf, unsigned new_size);
 
/* clear the buffer with zeros */
void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer);
 
/* join surfaces into the same buffer with identical tiling params
sumup their sizes and replace the backend buffers with a single bo */
void rvid_join_surfaces(struct radeon_winsys* ws, unsigned bind,
struct pb_buffer** buffers[VL_NUM_COMPONENTS],
struct radeon_surf *surfaces[VL_NUM_COMPONENTS]);
 
/* returns supported codecs and other parameters */
int rvid_get_video_param(struct pipe_screen *screen,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint,
enum pipe_video_cap param);
 
/* the hardware only supports NV12 */
boolean rvid_is_format_supported(struct pipe_screen *screen,
enum pipe_format format,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint);
 
#endif // RADEON_VIDEO_H
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_winsys.h
0,0 → 1,683
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Copyright 2010 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
#ifndef RADEON_WINSYS_H
#define RADEON_WINSYS_H
 
/* The public winsys interface header for the radeon driver. */
 
/* R300 features in DRM.
*
* 2.6.0:
* - Hyper-Z
* - GB_Z_PEQ_CONFIG on rv350->r4xx
* - R500 FG_ALPHA_VALUE
*
* 2.8.0:
* - R500 US_FORMAT regs
* - R500 ARGB2101010 colorbuffer
* - CMask and AA regs
* - R16F/RG16F
*/
 
#include "pipebuffer/pb_buffer.h"
 
#define RADEON_MAX_CMDBUF_DWORDS (16 * 1024)
 
#define RADEON_FLUSH_ASYNC (1 << 0)
#define RADEON_FLUSH_KEEP_TILING_FLAGS (1 << 1) /* needs DRM 2.12.0 */
#define RADEON_FLUSH_COMPUTE (1 << 2)
#define RADEON_FLUSH_END_OF_FRAME (1 << 3)
 
/* Tiling flags. */
enum radeon_bo_layout {
RADEON_LAYOUT_LINEAR = 0,
RADEON_LAYOUT_TILED,
RADEON_LAYOUT_SQUARETILED,
 
RADEON_LAYOUT_UNKNOWN
};
 
enum radeon_bo_domain { /* bitfield */
RADEON_DOMAIN_GTT = 2,
RADEON_DOMAIN_VRAM = 4,
RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT
};
 
enum radeon_bo_flag { /* bitfield */
RADEON_FLAG_GTT_WC = (1 << 0),
RADEON_FLAG_CPU_ACCESS = (1 << 1),
RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
};
 
enum radeon_bo_usage { /* bitfield */
RADEON_USAGE_READ = 2,
RADEON_USAGE_WRITE = 4,
RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE
};
 
enum radeon_family {
CHIP_UNKNOWN = 0,
CHIP_R300, /* R3xx-based cores. */
CHIP_R350,
CHIP_RV350,
CHIP_RV370,
CHIP_RV380,
CHIP_RS400,
CHIP_RC410,
CHIP_RS480,
CHIP_R420, /* R4xx-based cores. */
CHIP_R423,
CHIP_R430,
CHIP_R480,
CHIP_R481,
CHIP_RV410,
CHIP_RS600,
CHIP_RS690,
CHIP_RS740,
CHIP_RV515, /* R5xx-based cores. */
CHIP_R520,
CHIP_RV530,
CHIP_R580,
CHIP_RV560,
CHIP_RV570,
CHIP_R600,
CHIP_RV610,
CHIP_RV630,
CHIP_RV670,
CHIP_RV620,
CHIP_RV635,
CHIP_RS780,
CHIP_RS880,
CHIP_RV770,
CHIP_RV730,
CHIP_RV710,
CHIP_RV740,
CHIP_CEDAR,
CHIP_REDWOOD,
CHIP_JUNIPER,
CHIP_CYPRESS,
CHIP_HEMLOCK,
CHIP_PALM,
CHIP_SUMO,
CHIP_SUMO2,
CHIP_BARTS,
CHIP_TURKS,
CHIP_CAICOS,
CHIP_CAYMAN,
CHIP_ARUBA,
CHIP_TAHITI,
CHIP_PITCAIRN,
CHIP_VERDE,
CHIP_OLAND,
CHIP_HAINAN,
CHIP_BONAIRE,
CHIP_KAVERI,
CHIP_KABINI,
CHIP_HAWAII,
CHIP_MULLINS,
CHIP_LAST,
};
 
enum chip_class {
CLASS_UNKNOWN = 0,
R300,
R400,
R500,
R600,
R700,
EVERGREEN,
CAYMAN,
SI,
CIK,
};
 
enum ring_type {
RING_GFX = 0,
RING_DMA,
RING_UVD,
RING_VCE,
RING_LAST,
};
 
enum radeon_value_id {
RADEON_REQUESTED_VRAM_MEMORY,
RADEON_REQUESTED_GTT_MEMORY,
RADEON_BUFFER_WAIT_TIME_NS,
RADEON_TIMESTAMP,
RADEON_NUM_CS_FLUSHES,
RADEON_NUM_BYTES_MOVED,
RADEON_VRAM_USAGE,
RADEON_GTT_USAGE,
RADEON_GPU_TEMPERATURE,
RADEON_CURRENT_SCLK,
RADEON_CURRENT_MCLK
};
 
enum radeon_bo_priority {
RADEON_PRIO_MIN,
RADEON_PRIO_SHADER_DATA, /* shader code, resource descriptors */
RADEON_PRIO_SHADER_BUFFER_RO, /* read-only */
RADEON_PRIO_SHADER_TEXTURE_RO, /* read-only */
RADEON_PRIO_SHADER_RESOURCE_RW, /* buffers, textures, streamout, GS rings, RATs; read/write */
RADEON_PRIO_COLOR_BUFFER,
RADEON_PRIO_DEPTH_BUFFER,
RADEON_PRIO_SHADER_TEXTURE_MSAA,
RADEON_PRIO_COLOR_BUFFER_MSAA,
RADEON_PRIO_DEPTH_BUFFER_MSAA,
RADEON_PRIO_COLOR_META,
RADEON_PRIO_DEPTH_META,
RADEON_PRIO_MAX /* must be <= 15 */
};
 
struct winsys_handle;
struct radeon_winsys_cs_handle;
 
struct radeon_winsys_cs {
unsigned cdw; /* Number of used dwords. */
uint32_t *buf; /* The command buffer. */
enum ring_type ring_type;
};
 
struct radeon_info {
uint32_t pci_id;
enum radeon_family family;
enum chip_class chip_class;
uint64_t gart_size;
uint64_t vram_size;
uint32_t max_sclk;
uint32_t max_compute_units;
uint32_t max_se;
uint32_t max_sh_per_se;
 
uint32_t drm_major; /* version */
uint32_t drm_minor;
uint32_t drm_patchlevel;
 
boolean has_uvd;
uint32_t vce_fw_version;
boolean has_userptr;
 
uint32_t r300_num_gb_pipes;
uint32_t r300_num_z_pipes;
 
uint32_t r600_num_backends;
uint32_t r600_clock_crystal_freq;
uint32_t r600_tiling_config;
uint32_t r600_num_tile_pipes;
uint32_t r600_max_pipes;
boolean r600_virtual_address;
boolean r600_has_dma;
 
uint32_t r600_backend_map;
boolean r600_backend_map_valid;
 
boolean si_tile_mode_array_valid;
uint32_t si_tile_mode_array[32];
uint32_t si_backend_enabled_mask;
 
boolean cik_macrotile_mode_array_valid;
uint32_t cik_macrotile_mode_array[16];
};
 
enum radeon_feature_id {
RADEON_FID_R300_HYPERZ_ACCESS, /* ZMask + HiZ */
RADEON_FID_R300_CMASK_ACCESS,
};
 
#define RADEON_SURF_MAX_LEVEL 32
 
#define RADEON_SURF_TYPE_MASK 0xFF
#define RADEON_SURF_TYPE_SHIFT 0
#define RADEON_SURF_TYPE_1D 0
#define RADEON_SURF_TYPE_2D 1
#define RADEON_SURF_TYPE_3D 2
#define RADEON_SURF_TYPE_CUBEMAP 3
#define RADEON_SURF_TYPE_1D_ARRAY 4
#define RADEON_SURF_TYPE_2D_ARRAY 5
#define RADEON_SURF_MODE_MASK 0xFF
#define RADEON_SURF_MODE_SHIFT 8
#define RADEON_SURF_MODE_LINEAR 0
#define RADEON_SURF_MODE_LINEAR_ALIGNED 1
#define RADEON_SURF_MODE_1D 2
#define RADEON_SURF_MODE_2D 3
#define RADEON_SURF_SCANOUT (1 << 16)
#define RADEON_SURF_ZBUFFER (1 << 17)
#define RADEON_SURF_SBUFFER (1 << 18)
#define RADEON_SURF_Z_OR_SBUFFER (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER)
#define RADEON_SURF_HAS_SBUFFER_MIPTREE (1 << 19)
#define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20)
#define RADEON_SURF_FMASK (1 << 21)
 
#define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK)
#define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT)
#define RADEON_SURF_CLR(v, field) ((v) & ~(RADEON_SURF_ ## field ## _MASK << RADEON_SURF_ ## field ## _SHIFT))
 
struct radeon_surf_level {
uint64_t offset;
uint64_t slice_size;
uint32_t npix_x;
uint32_t npix_y;
uint32_t npix_z;
uint32_t nblk_x;
uint32_t nblk_y;
uint32_t nblk_z;
uint32_t pitch_bytes;
uint32_t mode;
};
 
struct radeon_surf {
/* These are inputs to the calculator. */
uint32_t npix_x;
uint32_t npix_y;
uint32_t npix_z;
uint32_t blk_w;
uint32_t blk_h;
uint32_t blk_d;
uint32_t array_size;
uint32_t last_level;
uint32_t bpe;
uint32_t nsamples;
uint32_t flags;
 
/* These are return values. Some of them can be set by the caller, but
* they will be treated as hints (e.g. bankw, bankh) and might be
* changed by the calculator.
*/
uint64_t bo_size;
uint64_t bo_alignment;
/* This applies to EG and later. */
uint32_t bankw;
uint32_t bankh;
uint32_t mtilea;
uint32_t tile_split;
uint32_t stencil_tile_split;
uint64_t stencil_offset;
struct radeon_surf_level level[RADEON_SURF_MAX_LEVEL];
struct radeon_surf_level stencil_level[RADEON_SURF_MAX_LEVEL];
uint32_t tiling_index[RADEON_SURF_MAX_LEVEL];
uint32_t stencil_tiling_index[RADEON_SURF_MAX_LEVEL];
};
 
struct radeon_winsys {
/**
* The screen object this winsys was created for
*/
struct pipe_screen *screen;
 
/**
* Decrement the winsys reference count.
*
* \param ws The winsys this function is called for.
* \return True if the winsys and screen should be destroyed.
*/
bool (*unref)(struct radeon_winsys *ws);
 
/**
* Destroy this winsys.
*
* \param ws The winsys this function is called from.
*/
void (*destroy)(struct radeon_winsys *ws);
 
/**
* Query an info structure from winsys.
*
* \param ws The winsys this function is called from.
* \param info Return structure
*/
void (*query_info)(struct radeon_winsys *ws,
struct radeon_info *info);
 
/**************************************************************************
* Buffer management. Buffer attributes are mostly fixed over its lifetime.
*
* Remember that gallium gets to choose the interface it needs, and the
* window systems must then implement that interface (rather than the
* other way around...).
*************************************************************************/
 
/**
* Create a buffer object.
*
* \param ws The winsys this function is called from.
* \param size The size to allocate.
* \param alignment An alignment of the buffer in memory.
* \param use_reusable_pool Whether the cache buffer manager should be used.
* \param domain A bitmask of the RADEON_DOMAIN_* flags.
* \return The created buffer object.
*/
struct pb_buffer *(*buffer_create)(struct radeon_winsys *ws,
unsigned size,
unsigned alignment,
boolean use_reusable_pool,
enum radeon_bo_domain domain,
enum radeon_bo_flag flags);
 
struct radeon_winsys_cs_handle *(*buffer_get_cs_handle)(
struct pb_buffer *buf);
 
/**
* Map the entire data store of a buffer object into the client's address
* space.
*
* \param buf A winsys buffer object to map.
* \param cs A command stream to flush if the buffer is referenced by it.
* \param usage A bitmask of the PIPE_TRANSFER_* flags.
* \return The pointer at the beginning of the buffer.
*/
void *(*buffer_map)(struct radeon_winsys_cs_handle *buf,
struct radeon_winsys_cs *cs,
enum pipe_transfer_usage usage);
 
/**
* Unmap a buffer object from the client's address space.
*
* \param buf A winsys buffer object to unmap.
*/
void (*buffer_unmap)(struct radeon_winsys_cs_handle *buf);
 
/**
* Return TRUE if a buffer object is being used by the GPU.
*
* \param buf A winsys buffer object.
* \param usage Only check whether the buffer is busy for the given usage.
*/
boolean (*buffer_is_busy)(struct pb_buffer *buf,
enum radeon_bo_usage usage);
 
/**
* Wait for a buffer object until it is not used by a GPU. This is
* equivalent to a fence placed after the last command using the buffer,
* and synchronizing to the fence.
*
* \param buf A winsys buffer object to wait for.
* \param usage Only wait until the buffer is idle for the given usage,
* but may still be busy for some other usage.
*/
void (*buffer_wait)(struct pb_buffer *buf, enum radeon_bo_usage usage);
 
/**
* Return tiling flags describing a memory layout of a buffer object.
*
* \param buf A winsys buffer object to get the flags from.
* \param macrotile A pointer to the return value of the microtile flag.
* \param microtile A pointer to the return value of the macrotile flag.
*
* \note microtile and macrotile are not bitmasks!
*/
void (*buffer_get_tiling)(struct pb_buffer *buf,
enum radeon_bo_layout *microtile,
enum radeon_bo_layout *macrotile,
unsigned *bankw, unsigned *bankh,
unsigned *tile_split,
unsigned *stencil_tile_split,
unsigned *mtilea,
bool *scanout);
 
/**
* Set tiling flags describing a memory layout of a buffer object.
*
* \param buf A winsys buffer object to set the flags for.
* \param cs A command stream to flush if the buffer is referenced by it.
* \param macrotile A macrotile flag.
* \param microtile A microtile flag.
* \param stride A stride of the buffer in bytes, for texturing.
*
* \note microtile and macrotile are not bitmasks!
*/
void (*buffer_set_tiling)(struct pb_buffer *buf,
struct radeon_winsys_cs *rcs,
enum radeon_bo_layout microtile,
enum radeon_bo_layout macrotile,
unsigned bankw, unsigned bankh,
unsigned tile_split,
unsigned stencil_tile_split,
unsigned mtilea,
unsigned stride,
bool scanout);
 
/**
* Get a winsys buffer from a winsys handle. The internal structure
* of the handle is platform-specific and only a winsys should access it.
*
* \param ws The winsys this function is called from.
* \param whandle A winsys handle pointer as was received from a state
* tracker.
* \param stride The returned buffer stride in bytes.
*/
struct pb_buffer *(*buffer_from_handle)(struct radeon_winsys *ws,
struct winsys_handle *whandle,
unsigned *stride);
 
/**
* Get a winsys buffer from a user pointer. The resulting buffer can't
* be exported. Both pointer and size must be page aligned.
*
* \param ws The winsys this function is called from.
* \param pointer User pointer to turn into a buffer object.
* \param Size Size in bytes for the new buffer.
*/
struct pb_buffer *(*buffer_from_ptr)(struct radeon_winsys *ws,
void *pointer, unsigned size);
 
/**
* Get a winsys handle from a winsys buffer. The internal structure
* of the handle is platform-specific and only a winsys should access it.
*
* \param buf A winsys buffer object to get the handle from.
* \param whandle A winsys handle pointer.
* \param stride A stride of the buffer in bytes, for texturing.
* \return TRUE on success.
*/
boolean (*buffer_get_handle)(struct pb_buffer *buf,
unsigned stride,
struct winsys_handle *whandle);
 
/**
* Return the virtual address of a buffer.
*
* \param buf A winsys buffer object
* \return virtual address
*/
uint64_t (*buffer_get_virtual_address)(struct radeon_winsys_cs_handle *buf);
 
/**
* Query the initial placement of the buffer from the kernel driver.
*/
enum radeon_bo_domain (*buffer_get_initial_domain)(struct radeon_winsys_cs_handle *buf);
 
/**************************************************************************
* Command submission.
*
* Each pipe context should create its own command stream and submit
* commands independently of other contexts.
*************************************************************************/
 
/**
* Create a command stream.
*
* \param ws The winsys this function is called from.
* \param ring_type The ring type (GFX, DMA, UVD)
* \param flush Flush callback function associated with the command stream.
* \param user User pointer that will be passed to the flush callback.
* \param trace_buf Trace buffer when tracing is enabled
*/
struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws,
enum ring_type ring_type,
void (*flush)(void *ctx, unsigned flags,
struct pipe_fence_handle **fence),
void *flush_ctx,
struct radeon_winsys_cs_handle *trace_buf);
 
/**
* Destroy a command stream.
*
* \param cs A command stream to destroy.
*/
void (*cs_destroy)(struct radeon_winsys_cs *cs);
 
/**
* Add a new buffer relocation. Every relocation must first be added
* before it can be written.
*
* \param cs A command stream to add buffer for validation against.
* \param buf A winsys buffer to validate.
* \param usage Whether the buffer is used for read and/or write.
* \param domain Bitmask of the RADEON_DOMAIN_* flags.
* \param priority A higher number means a greater chance of being
* placed in the requested domain. 15 is the maximum.
* \return Relocation index.
*/
unsigned (*cs_add_reloc)(struct radeon_winsys_cs *cs,
struct radeon_winsys_cs_handle *buf,
enum radeon_bo_usage usage,
enum radeon_bo_domain domain,
enum radeon_bo_priority priority);
 
/**
* Return the index of an already-added buffer.
*
* \param cs Command stream
* \param buf Buffer
* \return The buffer index, or -1 if the buffer has not been added.
*/
int (*cs_get_reloc)(struct radeon_winsys_cs *cs,
struct radeon_winsys_cs_handle *buf);
 
/**
* Return TRUE if there is enough memory in VRAM and GTT for the relocs
* added so far. If the validation fails, all the relocations which have
* been added since the last call of cs_validate will be removed and
* the CS will be flushed (provided there are still any relocations).
*
* \param cs A command stream to validate.
*/
boolean (*cs_validate)(struct radeon_winsys_cs *cs);
 
/**
* Return TRUE if there is enough memory in VRAM and GTT for the relocs
* added so far.
*
* \param cs A command stream to validate.
* \param vram VRAM memory size pending to be use
* \param gtt GTT memory size pending to be use
*/
boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt);
 
/**
* Flush a command stream.
*
* \param cs A command stream to flush.
* \param flags, RADEON_FLUSH_ASYNC or 0.
* \param fence Pointer to a fence. If non-NULL, a fence is inserted
* after the CS and is returned through this parameter.
* \param cs_trace_id A unique identifier of the cs, used for tracing.
*/
void (*cs_flush)(struct radeon_winsys_cs *cs,
unsigned flags,
struct pipe_fence_handle **fence,
uint32_t cs_trace_id);
 
/**
* Return TRUE if a buffer is referenced by a command stream.
*
* \param cs A command stream.
* \param buf A winsys buffer.
*/
boolean (*cs_is_buffer_referenced)(struct radeon_winsys_cs *cs,
struct radeon_winsys_cs_handle *buf,
enum radeon_bo_usage usage);
 
/**
* Request access to a feature for a command stream.
*
* \param cs A command stream.
* \param fid Feature ID, one of RADEON_FID_*
* \param enable Whether to enable or disable the feature.
*/
boolean (*cs_request_feature)(struct radeon_winsys_cs *cs,
enum radeon_feature_id fid,
boolean enable);
/**
* Make sure all asynchronous flush of the cs have completed
*
* \param cs A command stream.
*/
void (*cs_sync_flush)(struct radeon_winsys_cs *cs);
 
/**
* Wait for the fence and return true if the fence has been signalled.
* The timeout of 0 will only return the status.
* The timeout of PIPE_TIMEOUT_INFINITE will always wait until the fence
* is signalled.
*/
bool (*fence_wait)(struct radeon_winsys *ws,
struct pipe_fence_handle *fence,
uint64_t timeout);
 
/**
* Reference counting for fences.
*/
void (*fence_reference)(struct pipe_fence_handle **dst,
struct pipe_fence_handle *src);
 
/**
* Initialize surface
*
* \param ws The winsys this function is called from.
* \param surf Surface structure ptr
*/
int (*surface_init)(struct radeon_winsys *ws,
struct radeon_surf *surf);
 
/**
* Find best values for a surface
*
* \param ws The winsys this function is called from.
* \param surf Surface structure ptr
*/
int (*surface_best)(struct radeon_winsys *ws,
struct radeon_surf *surf);
 
uint64_t (*query_value)(struct radeon_winsys *ws,
enum radeon_value_id value);
 
void (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset,
unsigned num_registers, uint32_t *out);
};
 
 
static INLINE void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value)
{
cs->buf[cs->cdw++] = value;
}
 
static INLINE void radeon_emit_array(struct radeon_winsys_cs *cs,
const uint32_t *values, unsigned count)
{
memcpy(cs->buf+cs->cdw, values, count * 4);
cs->cdw += count;
}
 
#endif