/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/LLVM_REVISION.txt |
---|
0,0 → 1,0 |
@181269 |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/Makefile.am |
---|
0,0 → 1,35 |
include Makefile.sources |
include $(top_srcdir)/src/gallium/Automake.inc |
AM_CFLAGS = \ |
$(GALLIUM_DRIVER_CFLAGS) \ |
$(RADEON_CFLAGS) \ |
-Wstrict-overflow=0 |
# ^^ disable warnings about overflows (os_time_timeout) |
noinst_LTLIBRARIES = libradeon.la |
libradeon_la_SOURCES = \ |
$(C_SOURCES) |
if NEED_RADEON_LLVM |
AM_CFLAGS += \ |
$(LLVM_CFLAGS) |
libradeon_la_SOURCES += \ |
$(LLVM_C_FILES) |
libradeon_la_LIBADD = \ |
$(CLOCK_LIB) \ |
$(LLVM_LIBS) \ |
$(ELF_LIB) |
libradeon_la_LDFLAGS = \ |
$(LLVM_LDFLAGS) |
endif |
EXTRA_DIST = \ |
LLVM_REVISION.txt |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/Makefile.in |
---|
0,0 → 1,917 |
# Makefile.in generated by automake 1.15 from Makefile.am. |
# @configure_input@ |
# Copyright (C) 1994-2014 Free Software Foundation, Inc. |
# This Makefile.in is free software; the Free Software Foundation |
# gives unlimited permission to copy and/or distribute it, |
# with or without modifications, as long as this notice is preserved. |
# This program is distributed in the hope that it will be useful, |
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without |
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A |
# PARTICULAR PURPOSE. |
@SET_MAKE@ |
VPATH = @srcdir@ |
am__is_gnu_make = { \ |
if test -z '$(MAKELEVEL)'; then \ |
false; \ |
elif test -n '$(MAKE_HOST)'; then \ |
true; \ |
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ |
true; \ |
else \ |
false; \ |
fi; \ |
} |
am__make_running_with_option = \ |
case $${target_option-} in \ |
?) ;; \ |
*) echo "am__make_running_with_option: internal error: invalid" \ |
"target option '$${target_option-}' specified" >&2; \ |
exit 1;; \ |
esac; \ |
has_opt=no; \ |
sane_makeflags=$$MAKEFLAGS; \ |
if $(am__is_gnu_make); then \ |
sane_makeflags=$$MFLAGS; \ |
else \ |
case $$MAKEFLAGS in \ |
*\\[\ \ ]*) \ |
bs=\\; \ |
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ |
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ |
esac; \ |
fi; \ |
skip_next=no; \ |
strip_trailopt () \ |
{ \ |
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ |
}; \ |
for flg in $$sane_makeflags; do \ |
test $$skip_next = yes && { skip_next=no; continue; }; \ |
case $$flg in \ |
*=*|--*) continue;; \ |
-*I) strip_trailopt 'I'; skip_next=yes;; \ |
-*I?*) strip_trailopt 'I';; \ |
-*O) strip_trailopt 'O'; skip_next=yes;; \ |
-*O?*) strip_trailopt 'O';; \ |
-*l) strip_trailopt 'l'; skip_next=yes;; \ |
-*l?*) strip_trailopt 'l';; \ |
-[dEDm]) skip_next=yes;; \ |
-[JT]) skip_next=yes;; \ |
esac; \ |
case $$flg in \ |
*$$target_option*) has_opt=yes; break;; \ |
esac; \ |
done; \ |
test $$has_opt = yes |
am__make_dryrun = (target_option=n; $(am__make_running_with_option)) |
am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) |
pkgdatadir = $(datadir)/@PACKAGE@ |
pkgincludedir = $(includedir)/@PACKAGE@ |
pkglibdir = $(libdir)/@PACKAGE@ |
pkglibexecdir = $(libexecdir)/@PACKAGE@ |
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd |
install_sh_DATA = $(install_sh) -c -m 644 |
install_sh_PROGRAM = $(install_sh) -c |
install_sh_SCRIPT = $(install_sh) -c |
INSTALL_HEADER = $(INSTALL_DATA) |
transform = $(program_transform_name) |
NORMAL_INSTALL = : |
PRE_INSTALL = : |
POST_INSTALL = : |
NORMAL_UNINSTALL = : |
PRE_UNINSTALL = : |
POST_UNINSTALL = : |
build_triplet = @build@ |
host_triplet = @host@ |
target_triplet = @target@ |
@HAVE_DRISW_TRUE@am__append_1 = \ |
@HAVE_DRISW_TRUE@ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la |
@NEED_WINSYS_XLIB_TRUE@am__append_2 = \ |
@NEED_WINSYS_XLIB_TRUE@ $(top_builddir)/src/gallium/winsys/sw/xlib/libws_xlib.la \ |
@NEED_WINSYS_XLIB_TRUE@ -lX11 -lXext -lXfixes \ |
@NEED_WINSYS_XLIB_TRUE@ $(LIBDRM_LIBS) |
@NEED_RADEON_LLVM_TRUE@am__append_3 = \ |
@NEED_RADEON_LLVM_TRUE@ $(LLVM_CFLAGS) |
@NEED_RADEON_LLVM_TRUE@am__append_4 = \ |
@NEED_RADEON_LLVM_TRUE@ $(LLVM_C_FILES) |
subdir = src/gallium/drivers/radeon |
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 |
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \ |
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \ |
$(top_srcdir)/m4/ax_gcc_builtin.m4 \ |
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \ |
$(top_srcdir)/m4/ax_prog_bison.m4 \ |
$(top_srcdir)/m4/ax_prog_flex.m4 \ |
$(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/libtool.m4 \ |
$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ |
$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ |
$(top_srcdir)/VERSION $(top_srcdir)/configure.ac |
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ |
$(ACLOCAL_M4) |
DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) |
mkinstalldirs = $(install_sh) -d |
CONFIG_CLEAN_FILES = |
CONFIG_CLEAN_VPATH_FILES = |
LTLIBRARIES = $(noinst_LTLIBRARIES) |
am__DEPENDENCIES_1 = |
@NEED_RADEON_LLVM_TRUE@libradeon_la_DEPENDENCIES = \ |
@NEED_RADEON_LLVM_TRUE@ $(am__DEPENDENCIES_1) \ |
@NEED_RADEON_LLVM_TRUE@ $(am__DEPENDENCIES_1) \ |
@NEED_RADEON_LLVM_TRUE@ $(am__DEPENDENCIES_1) |
am__libradeon_la_SOURCES_DIST = cayman_msaa.c r600_buffer_common.c \ |
r600_cs.h r600d_common.h r600_gpu_load.c r600_pipe_common.c \ |
r600_pipe_common.h r600_query.c r600_streamout.c \ |
r600_texture.c radeon_uvd.c radeon_uvd.h radeon_vce_40_2_2.c \ |
radeon_vce.c radeon_vce.h radeon_video.c radeon_video.h \ |
radeon_winsys.h radeon_elf_util.c radeon_elf_util.h \ |
radeon_llvm_emit.c radeon_llvm_emit.h radeon_llvm.h \ |
radeon_llvm_util.c radeon_llvm_util.h radeon_setup_tgsi_llvm.c |
am__objects_1 = cayman_msaa.lo r600_buffer_common.lo r600_gpu_load.lo \ |
r600_pipe_common.lo r600_query.lo r600_streamout.lo \ |
r600_texture.lo radeon_uvd.lo radeon_vce_40_2_2.lo \ |
radeon_vce.lo radeon_video.lo |
am__objects_2 = radeon_elf_util.lo radeon_llvm_emit.lo \ |
radeon_llvm_util.lo radeon_setup_tgsi_llvm.lo |
@NEED_RADEON_LLVM_TRUE@am__objects_3 = $(am__objects_2) |
am_libradeon_la_OBJECTS = $(am__objects_1) $(am__objects_3) |
libradeon_la_OBJECTS = $(am_libradeon_la_OBJECTS) |
AM_V_lt = $(am__v_lt_@AM_V@) |
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) |
am__v_lt_0 = --silent |
am__v_lt_1 = |
libradeon_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ |
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ |
$(libradeon_la_LDFLAGS) $(LDFLAGS) -o $@ |
AM_V_P = $(am__v_P_@AM_V@) |
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) |
am__v_P_0 = false |
am__v_P_1 = : |
AM_V_GEN = $(am__v_GEN_@AM_V@) |
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) |
am__v_GEN_0 = @echo " GEN " $@; |
am__v_GEN_1 = |
AM_V_at = $(am__v_at_@AM_V@) |
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) |
am__v_at_0 = @ |
am__v_at_1 = |
DEFAULT_INCLUDES = -I.@am__isrc@ |
depcomp = $(SHELL) $(top_srcdir)/bin/depcomp |
am__depfiles_maybe = depfiles |
am__mv = mv -f |
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ |
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) |
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ |
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ |
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ |
$(AM_CFLAGS) $(CFLAGS) |
AM_V_CC = $(am__v_CC_@AM_V@) |
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) |
am__v_CC_0 = @echo " CC " $@; |
am__v_CC_1 = |
CCLD = $(CC) |
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ |
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ |
$(AM_LDFLAGS) $(LDFLAGS) -o $@ |
AM_V_CCLD = $(am__v_CCLD_@AM_V@) |
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) |
am__v_CCLD_0 = @echo " CCLD " $@; |
am__v_CCLD_1 = |
SOURCES = $(libradeon_la_SOURCES) |
DIST_SOURCES = $(am__libradeon_la_SOURCES_DIST) |
am__can_run_installinfo = \ |
case $$AM_UPDATE_INFO_DIR in \ |
n|no|NO) false;; \ |
*) (install-info --version) >/dev/null 2>&1;; \ |
esac |
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) |
# Read a list of newline-separated strings from the standard input, |
# and print each of them once, without duplicates. Input order is |
# *not* preserved. |
am__uniquify_input = $(AWK) '\ |
BEGIN { nonempty = 0; } \ |
{ items[$$0] = 1; nonempty = 1; } \ |
END { if (nonempty) { for (i in items) print i; }; } \ |
' |
# Make sure the list of sources is unique. This is necessary because, |
# e.g., the same source file might be shared among _SOURCES variables |
# for different programs/libraries. |
am__define_uniq_tagged_files = \ |
list='$(am__tagged_files)'; \ |
unique=`for i in $$list; do \ |
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ |
done | $(am__uniquify_input)` |
ETAGS = etags |
CTAGS = ctags |
am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.sources \ |
$(top_srcdir)/bin/depcomp \ |
$(top_srcdir)/src/gallium/Automake.inc |
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) |
ACLOCAL = @ACLOCAL@ |
AMTAR = @AMTAR@ |
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ |
AR = @AR@ |
AUTOCONF = @AUTOCONF@ |
AUTOHEADER = @AUTOHEADER@ |
AUTOMAKE = @AUTOMAKE@ |
AWK = @AWK@ |
BSYMBOLIC = @BSYMBOLIC@ |
CC = @CC@ |
CCAS = @CCAS@ |
CCASDEPMODE = @CCASDEPMODE@ |
CCASFLAGS = @CCASFLAGS@ |
CCDEPMODE = @CCDEPMODE@ |
CFLAGS = @CFLAGS@ |
CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@ |
CLOCK_LIB = @CLOCK_LIB@ |
CPP = @CPP@ |
CPPFLAGS = @CPPFLAGS@ |
CXX = @CXX@ |
CXXCPP = @CXXCPP@ |
CXXDEPMODE = @CXXDEPMODE@ |
CXXFLAGS = @CXXFLAGS@ |
CYGPATH_W = @CYGPATH_W@ |
D3D_DRIVER_INSTALL_DIR = @D3D_DRIVER_INSTALL_DIR@ |
DEFINES = @DEFINES@ |
DEFS = @DEFS@ |
DEPDIR = @DEPDIR@ |
DLLTOOL = @DLLTOOL@ |
DLOPEN_LIBS = @DLOPEN_LIBS@ |
DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@ |
DRI2PROTO_LIBS = @DRI2PROTO_LIBS@ |
DRI3PROTO_CFLAGS = @DRI3PROTO_CFLAGS@ |
DRI3PROTO_LIBS = @DRI3PROTO_LIBS@ |
DRIGL_CFLAGS = @DRIGL_CFLAGS@ |
DRIGL_LIBS = @DRIGL_LIBS@ |
DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@ |
DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@ |
DRI_LIB_DEPS = @DRI_LIB_DEPS@ |
DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@ |
DSYMUTIL = @DSYMUTIL@ |
DUMPBIN = @DUMPBIN@ |
ECHO_C = @ECHO_C@ |
ECHO_N = @ECHO_N@ |
ECHO_T = @ECHO_T@ |
EGL_CFLAGS = @EGL_CFLAGS@ |
EGL_CLIENT_APIS = @EGL_CLIENT_APIS@ |
EGL_LIB_DEPS = @EGL_LIB_DEPS@ |
EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@ |
EGREP = @EGREP@ |
ELF_LIB = @ELF_LIB@ |
EXEEXT = @EXEEXT@ |
EXPAT_CFLAGS = @EXPAT_CFLAGS@ |
EXPAT_LIBS = @EXPAT_LIBS@ |
FGREP = @FGREP@ |
FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@ |
FREEDRENO_LIBS = @FREEDRENO_LIBS@ |
GALLIUM_PIPE_LOADER_CLIENT_DEFINES = @GALLIUM_PIPE_LOADER_CLIENT_DEFINES@ |
GALLIUM_PIPE_LOADER_CLIENT_LIBS = @GALLIUM_PIPE_LOADER_CLIENT_LIBS@ |
GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@ |
GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@ |
GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@ |
GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@ |
GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@ |
GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@ |
GC_SECTIONS = @GC_SECTIONS@ |
GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@ |
GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@ |
GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@ |
GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@ |
GLPROTO_CFLAGS = @GLPROTO_CFLAGS@ |
GLPROTO_LIBS = @GLPROTO_LIBS@ |
GLX_TLS = @GLX_TLS@ |
GL_LIB = @GL_LIB@ |
GL_LIB_DEPS = @GL_LIB_DEPS@ |
GL_PC_CFLAGS = @GL_PC_CFLAGS@ |
GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@ |
GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@ |
GREP = @GREP@ |
HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@ |
INDENT = @INDENT@ |
INDENT_FLAGS = @INDENT_FLAGS@ |
INSTALL = @INSTALL@ |
INSTALL_DATA = @INSTALL_DATA@ |
INSTALL_PROGRAM = @INSTALL_PROGRAM@ |
INSTALL_SCRIPT = @INSTALL_SCRIPT@ |
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ |
INTEL_CFLAGS = @INTEL_CFLAGS@ |
INTEL_LIBS = @INTEL_LIBS@ |
LD = @LD@ |
LDFLAGS = @LDFLAGS@ |
LD_NO_UNDEFINED = @LD_NO_UNDEFINED@ |
LEX = @LEX@ |
LEXLIB = @LEXLIB@ |
LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ |
LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@ |
LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@ |
LIBDRM_CFLAGS = @LIBDRM_CFLAGS@ |
LIBDRM_LIBS = @LIBDRM_LIBS@ |
LIBOBJS = @LIBOBJS@ |
LIBS = @LIBS@ |
LIBSHA1_CFLAGS = @LIBSHA1_CFLAGS@ |
LIBSHA1_LIBS = @LIBSHA1_LIBS@ |
LIBTOOL = @LIBTOOL@ |
LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@ |
LIBUDEV_LIBS = @LIBUDEV_LIBS@ |
LIB_DIR = @LIB_DIR@ |
LIB_EXT = @LIB_EXT@ |
LIPO = @LIPO@ |
LLVM_BINDIR = @LLVM_BINDIR@ |
LLVM_CFLAGS = @LLVM_CFLAGS@ |
LLVM_CONFIG = @LLVM_CONFIG@ |
LLVM_CPPFLAGS = @LLVM_CPPFLAGS@ |
LLVM_CXXFLAGS = @LLVM_CXXFLAGS@ |
LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@ |
LLVM_LDFLAGS = @LLVM_LDFLAGS@ |
LLVM_LIBDIR = @LLVM_LIBDIR@ |
LLVM_LIBS = @LLVM_LIBS@ |
LLVM_VERSION = @LLVM_VERSION@ |
LN_S = @LN_S@ |
LTLIBOBJS = @LTLIBOBJS@ |
LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ |
MAKEINFO = @MAKEINFO@ |
MANIFEST_TOOL = @MANIFEST_TOOL@ |
MESA_LLVM = @MESA_LLVM@ |
MKDIR_P = @MKDIR_P@ |
MSVC2008_COMPAT_CFLAGS = @MSVC2008_COMPAT_CFLAGS@ |
MSVC2008_COMPAT_CXXFLAGS = @MSVC2008_COMPAT_CXXFLAGS@ |
MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@ |
MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@ |
NINE_MAJOR = @NINE_MAJOR@ |
NINE_MINOR = @NINE_MINOR@ |
NINE_TINY = @NINE_TINY@ |
NINE_VERSION = @NINE_VERSION@ |
NM = @NM@ |
NMEDIT = @NMEDIT@ |
NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@ |
NOUVEAU_LIBS = @NOUVEAU_LIBS@ |
OBJDUMP = @OBJDUMP@ |
OBJEXT = @OBJEXT@ |
OMX_CFLAGS = @OMX_CFLAGS@ |
OMX_LIBS = @OMX_LIBS@ |
OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@ |
OPENCL_LIBNAME = @OPENCL_LIBNAME@ |
OPENSSL_CFLAGS = @OPENSSL_CFLAGS@ |
OPENSSL_LIBS = @OPENSSL_LIBS@ |
OSMESA_LIB = @OSMESA_LIB@ |
OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@ |
OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@ |
OSMESA_PC_REQ = @OSMESA_PC_REQ@ |
OSMESA_VERSION = @OSMESA_VERSION@ |
OTOOL = @OTOOL@ |
OTOOL64 = @OTOOL64@ |
PACKAGE = @PACKAGE@ |
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ |
PACKAGE_NAME = @PACKAGE_NAME@ |
PACKAGE_STRING = @PACKAGE_STRING@ |
PACKAGE_TARNAME = @PACKAGE_TARNAME@ |
PACKAGE_URL = @PACKAGE_URL@ |
PACKAGE_VERSION = @PACKAGE_VERSION@ |
PATH_SEPARATOR = @PATH_SEPARATOR@ |
PKG_CONFIG = @PKG_CONFIG@ |
PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ |
PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ |
POSIX_SHELL = @POSIX_SHELL@ |
PRESENTPROTO_CFLAGS = @PRESENTPROTO_CFLAGS@ |
PRESENTPROTO_LIBS = @PRESENTPROTO_LIBS@ |
PTHREAD_CC = @PTHREAD_CC@ |
PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ |
PTHREAD_LIBS = @PTHREAD_LIBS@ |
PYTHON2 = @PYTHON2@ |
RADEON_CFLAGS = @RADEON_CFLAGS@ |
RADEON_LIBS = @RADEON_LIBS@ |
RANLIB = @RANLIB@ |
SED = @SED@ |
SELINUX_CFLAGS = @SELINUX_CFLAGS@ |
SELINUX_LIBS = @SELINUX_LIBS@ |
SET_MAKE = @SET_MAKE@ |
SHA1_CFLAGS = @SHA1_CFLAGS@ |
SHA1_LIBS = @SHA1_LIBS@ |
SHELL = @SHELL@ |
SSE41_CFLAGS = @SSE41_CFLAGS@ |
STRIP = @STRIP@ |
VA_CFLAGS = @VA_CFLAGS@ |
VA_LIBS = @VA_LIBS@ |
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@ |
VA_MAJOR = @VA_MAJOR@ |
VA_MINOR = @VA_MINOR@ |
VDPAU_CFLAGS = @VDPAU_CFLAGS@ |
VDPAU_LIBS = @VDPAU_LIBS@ |
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@ |
VDPAU_MAJOR = @VDPAU_MAJOR@ |
VDPAU_MINOR = @VDPAU_MINOR@ |
VERSION = @VERSION@ |
VG_LIB_DEPS = @VG_LIB_DEPS@ |
VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@ |
VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@ |
VL_CFLAGS = @VL_CFLAGS@ |
VL_LIBS = @VL_LIBS@ |
WAYLAND_CFLAGS = @WAYLAND_CFLAGS@ |
WAYLAND_LIBS = @WAYLAND_LIBS@ |
WAYLAND_SCANNER = @WAYLAND_SCANNER@ |
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@ |
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@ |
X11_INCLUDES = @X11_INCLUDES@ |
XA_MAJOR = @XA_MAJOR@ |
XA_MINOR = @XA_MINOR@ |
XA_TINY = @XA_TINY@ |
XA_VERSION = @XA_VERSION@ |
XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@ |
XCB_DRI2_LIBS = @XCB_DRI2_LIBS@ |
XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@ |
XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@ |
XLIBGL_CFLAGS = @XLIBGL_CFLAGS@ |
XLIBGL_LIBS = @XLIBGL_LIBS@ |
XVMC_CFLAGS = @XVMC_CFLAGS@ |
XVMC_LIBS = @XVMC_LIBS@ |
XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@ |
XVMC_MAJOR = @XVMC_MAJOR@ |
XVMC_MINOR = @XVMC_MINOR@ |
YACC = @YACC@ |
YFLAGS = @YFLAGS@ |
abs_builddir = @abs_builddir@ |
abs_srcdir = @abs_srcdir@ |
abs_top_builddir = @abs_top_builddir@ |
abs_top_srcdir = @abs_top_srcdir@ |
ac_ct_AR = @ac_ct_AR@ |
ac_ct_CC = @ac_ct_CC@ |
ac_ct_CXX = @ac_ct_CXX@ |
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ |
acv_mako_found = @acv_mako_found@ |
am__include = @am__include@ |
am__leading_dot = @am__leading_dot@ |
am__quote = @am__quote@ |
am__tar = @am__tar@ |
am__untar = @am__untar@ |
ax_pthread_config = @ax_pthread_config@ |
bindir = @bindir@ |
build = @build@ |
build_alias = @build_alias@ |
build_cpu = @build_cpu@ |
build_os = @build_os@ |
build_vendor = @build_vendor@ |
builddir = @builddir@ |
datadir = @datadir@ |
datarootdir = @datarootdir@ |
docdir = @docdir@ |
dvidir = @dvidir@ |
exec_prefix = @exec_prefix@ |
host = @host@ |
host_alias = @host_alias@ |
host_cpu = @host_cpu@ |
host_os = @host_os@ |
host_vendor = @host_vendor@ |
htmldir = @htmldir@ |
ifGNUmake = @ifGNUmake@ |
includedir = @includedir@ |
infodir = @infodir@ |
install_sh = @install_sh@ |
libdir = @libdir@ |
libexecdir = @libexecdir@ |
localedir = @localedir@ |
localstatedir = @localstatedir@ |
mandir = @mandir@ |
mkdir_p = @mkdir_p@ |
oldincludedir = @oldincludedir@ |
pdfdir = @pdfdir@ |
prefix = @prefix@ |
program_transform_name = @program_transform_name@ |
psdir = @psdir@ |
sbindir = @sbindir@ |
sharedstatedir = @sharedstatedir@ |
srcdir = @srcdir@ |
sysconfdir = @sysconfdir@ |
target = @target@ |
target_alias = @target_alias@ |
target_cpu = @target_cpu@ |
target_os = @target_os@ |
target_vendor = @target_vendor@ |
top_build_prefix = @top_build_prefix@ |
top_builddir = @top_builddir@ |
top_srcdir = @top_srcdir@ |
C_SOURCES := \ |
cayman_msaa.c \ |
r600_buffer_common.c \ |
r600_cs.h \ |
r600d_common.h \ |
r600_gpu_load.c \ |
r600_pipe_common.c \ |
r600_pipe_common.h \ |
r600_query.c \ |
r600_streamout.c \ |
r600_texture.c \ |
radeon_uvd.c \ |
radeon_uvd.h \ |
radeon_vce_40_2_2.c \ |
radeon_vce.c \ |
radeon_vce.h \ |
radeon_video.c \ |
radeon_video.h \ |
radeon_winsys.h |
LLVM_C_FILES := \ |
radeon_elf_util.c \ |
radeon_elf_util.h \ |
radeon_llvm_emit.c \ |
radeon_llvm_emit.h \ |
radeon_llvm.h \ |
radeon_llvm_util.c \ |
radeon_llvm_util.h \ |
radeon_setup_tgsi_llvm.c |
GALLIUM_CFLAGS = \ |
-I$(top_srcdir)/include \ |
-I$(top_srcdir)/src \ |
-I$(top_srcdir)/src/gallium/include \ |
-I$(top_srcdir)/src/gallium/auxiliary \ |
$(DEFINES) |
# src/gallium/auxiliary must appear before src/gallium/drivers |
# because there are stupidly two rbug_context.h files in |
# different directories, and which one is included by the |
# preprocessor is determined by the ordering of the -I flags. |
GALLIUM_DRIVER_CFLAGS = \ |
-I$(srcdir)/include \ |
-I$(top_srcdir)/src \ |
-I$(top_srcdir)/include \ |
-I$(top_srcdir)/src/gallium/include \ |
-I$(top_srcdir)/src/gallium/auxiliary \ |
-I$(top_srcdir)/src/gallium/drivers \ |
-I$(top_srcdir)/src/gallium/winsys \ |
$(DEFINES) \ |
$(VISIBILITY_CFLAGS) |
GALLIUM_DRIVER_CXXFLAGS = \ |
-I$(srcdir)/include \ |
-I$(top_srcdir)/src \ |
-I$(top_srcdir)/include \ |
-I$(top_srcdir)/src/gallium/include \ |
-I$(top_srcdir)/src/gallium/auxiliary \ |
-I$(top_srcdir)/src/gallium/drivers \ |
-I$(top_srcdir)/src/gallium/winsys \ |
$(DEFINES) \ |
$(VISIBILITY_CXXFLAGS) |
GALLIUM_TARGET_CFLAGS = \ |
-I$(top_srcdir)/src \ |
-I$(top_srcdir)/include \ |
-I$(top_srcdir)/src/loader \ |
-I$(top_srcdir)/src/gallium/include \ |
-I$(top_srcdir)/src/gallium/auxiliary \ |
-I$(top_srcdir)/src/gallium/drivers \ |
-I$(top_srcdir)/src/gallium/winsys \ |
$(DEFINES) \ |
$(PTHREAD_CFLAGS) \ |
$(LIBDRM_CFLAGS) \ |
$(VISIBILITY_CFLAGS) |
GALLIUM_COMMON_LIB_DEPS = \ |
-lm \ |
$(CLOCK_LIB) \ |
$(PTHREAD_LIBS) \ |
$(DLOPEN_LIBS) |
GALLIUM_WINSYS_CFLAGS = \ |
-I$(top_srcdir)/src \ |
-I$(top_srcdir)/include \ |
-I$(top_srcdir)/src/gallium/include \ |
-I$(top_srcdir)/src/gallium/auxiliary \ |
$(DEFINES) \ |
$(VISIBILITY_CFLAGS) |
GALLIUM_PIPE_LOADER_WINSYS_LIBS = \ |
$(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \ |
$(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \ |
$(am__append_1) $(am__append_2) |
AM_CFLAGS = $(GALLIUM_DRIVER_CFLAGS) $(RADEON_CFLAGS) \ |
-Wstrict-overflow=0 $(am__append_3) |
# ^^ disable warnings about overflows (os_time_timeout) |
noinst_LTLIBRARIES = libradeon.la |
libradeon_la_SOURCES = $(C_SOURCES) $(am__append_4) |
@NEED_RADEON_LLVM_TRUE@libradeon_la_LIBADD = \ |
@NEED_RADEON_LLVM_TRUE@ $(CLOCK_LIB) \ |
@NEED_RADEON_LLVM_TRUE@ $(LLVM_LIBS) \ |
@NEED_RADEON_LLVM_TRUE@ $(ELF_LIB) |
@NEED_RADEON_LLVM_TRUE@libradeon_la_LDFLAGS = \ |
@NEED_RADEON_LLVM_TRUE@ $(LLVM_LDFLAGS) |
EXTRA_DIST = \ |
LLVM_REVISION.txt |
all: all-am |
.SUFFIXES: |
.SUFFIXES: .c .lo .o .obj |
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__configure_deps) |
@for dep in $?; do \ |
case '$(am__configure_deps)' in \ |
*$$dep*) \ |
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ |
&& { if test -f $@; then exit 0; else break; fi; }; \ |
exit 1;; \ |
esac; \ |
done; \ |
echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/drivers/radeon/Makefile'; \ |
$(am__cd) $(top_srcdir) && \ |
$(AUTOMAKE) --foreign src/gallium/drivers/radeon/Makefile |
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status |
@case '$?' in \ |
*config.status*) \ |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ |
*) \ |
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ |
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ |
esac; |
$(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__empty): |
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
$(top_srcdir)/configure: $(am__configure_deps) |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
$(ACLOCAL_M4): $(am__aclocal_m4_deps) |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
$(am__aclocal_m4_deps): |
clean-noinstLTLIBRARIES: |
-test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) |
@list='$(noinst_LTLIBRARIES)'; \ |
locs=`for p in $$list; do echo $$p; done | \ |
sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ |
sort -u`; \ |
test -z "$$locs" || { \ |
echo rm -f $${locs}; \ |
rm -f $${locs}; \ |
} |
libradeon.la: $(libradeon_la_OBJECTS) $(libradeon_la_DEPENDENCIES) $(EXTRA_libradeon_la_DEPENDENCIES) |
$(AM_V_CCLD)$(libradeon_la_LINK) $(libradeon_la_OBJECTS) $(libradeon_la_LIBADD) $(LIBS) |
mostlyclean-compile: |
-rm -f *.$(OBJEXT) |
distclean-compile: |
-rm -f *.tab.c |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cayman_msaa.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_buffer_common.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_gpu_load.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_pipe_common.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_query.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_streamout.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_texture.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_elf_util.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_llvm_emit.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_llvm_util.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_setup_tgsi_llvm.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_uvd.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vce.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vce_40_2_2.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_video.Plo@am__quote@ |
.c.o: |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< |
.c.obj: |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` |
.c.lo: |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< |
mostlyclean-libtool: |
-rm -f *.lo |
clean-libtool: |
-rm -rf .libs _libs |
ID: $(am__tagged_files) |
$(am__define_uniq_tagged_files); mkid -fID $$unique |
tags: tags-am |
TAGS: tags |
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) |
set x; \ |
here=`pwd`; \ |
$(am__define_uniq_tagged_files); \ |
shift; \ |
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ |
test -n "$$unique" || unique=$$empty_fix; \ |
if test $$# -gt 0; then \ |
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ |
"$$@" $$unique; \ |
else \ |
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ |
$$unique; \ |
fi; \ |
fi |
ctags: ctags-am |
CTAGS: ctags |
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) |
$(am__define_uniq_tagged_files); \ |
test -z "$(CTAGS_ARGS)$$unique" \ |
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ |
$$unique |
GTAGS: |
here=`$(am__cd) $(top_builddir) && pwd` \ |
&& $(am__cd) $(top_srcdir) \ |
&& gtags -i $(GTAGS_ARGS) "$$here" |
cscopelist: cscopelist-am |
cscopelist-am: $(am__tagged_files) |
list='$(am__tagged_files)'; \ |
case "$(srcdir)" in \ |
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ |
*) sdir=$(subdir)/$(srcdir) ;; \ |
esac; \ |
for i in $$list; do \ |
if test -f "$$i"; then \ |
echo "$(subdir)/$$i"; \ |
else \ |
echo "$$sdir/$$i"; \ |
fi; \ |
done >> $(top_builddir)/cscope.files |
distclean-tags: |
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags |
distdir: $(DISTFILES) |
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ |
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ |
list='$(DISTFILES)'; \ |
dist_files=`for file in $$list; do echo $$file; done | \ |
sed -e "s|^$$srcdirstrip/||;t" \ |
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ |
case $$dist_files in \ |
*/*) $(MKDIR_P) `echo "$$dist_files" | \ |
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ |
sort -u` ;; \ |
esac; \ |
for file in $$dist_files; do \ |
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ |
if test -d $$d/$$file; then \ |
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ |
if test -d "$(distdir)/$$file"; then \ |
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ |
fi; \ |
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ |
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ |
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ |
fi; \ |
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ |
else \ |
test -f "$(distdir)/$$file" \ |
|| cp -p $$d/$$file "$(distdir)/$$file" \ |
|| exit 1; \ |
fi; \ |
done |
check-am: all-am |
check: check-am |
all-am: Makefile $(LTLIBRARIES) |
installdirs: |
install: install-am |
install-exec: install-exec-am |
install-data: install-data-am |
uninstall: uninstall-am |
install-am: all-am |
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am |
installcheck: installcheck-am |
install-strip: |
if test -z '$(STRIP)'; then \ |
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ |
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ |
install; \ |
else \ |
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ |
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ |
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ |
fi |
mostlyclean-generic: |
clean-generic: |
distclean-generic: |
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) |
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) |
maintainer-clean-generic: |
@echo "This command is intended for maintainers to use" |
@echo "it deletes files that may require special tools to rebuild." |
clean: clean-am |
clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ |
mostlyclean-am |
distclean: distclean-am |
-rm -rf ./$(DEPDIR) |
-rm -f Makefile |
distclean-am: clean-am distclean-compile distclean-generic \ |
distclean-tags |
dvi: dvi-am |
dvi-am: |
html: html-am |
html-am: |
info: info-am |
info-am: |
install-data-am: |
install-dvi: install-dvi-am |
install-dvi-am: |
install-exec-am: |
install-html: install-html-am |
install-html-am: |
install-info: install-info-am |
install-info-am: |
install-man: |
install-pdf: install-pdf-am |
install-pdf-am: |
install-ps: install-ps-am |
install-ps-am: |
installcheck-am: |
maintainer-clean: maintainer-clean-am |
-rm -rf ./$(DEPDIR) |
-rm -f Makefile |
maintainer-clean-am: distclean-am maintainer-clean-generic |
mostlyclean: mostlyclean-am |
mostlyclean-am: mostlyclean-compile mostlyclean-generic \ |
mostlyclean-libtool |
pdf: pdf-am |
pdf-am: |
ps: ps-am |
ps-am: |
uninstall-am: |
.MAKE: install-am install-strip |
.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ |
clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ |
ctags-am distclean distclean-compile distclean-generic \ |
distclean-libtool distclean-tags distdir dvi dvi-am html \ |
html-am info info-am install install-am install-data \ |
install-data-am install-dvi install-dvi-am install-exec \ |
install-exec-am install-html install-html-am install-info \ |
install-info-am install-man install-pdf install-pdf-am \ |
install-ps install-ps-am install-strip installcheck \ |
installcheck-am installdirs maintainer-clean \ |
maintainer-clean-generic mostlyclean mostlyclean-compile \ |
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ |
tags tags-am uninstall uninstall-am |
.PRECIOUS: Makefile |
# Tell versions [3.59,3.63) of GNU make to not export all variables. |
# Otherwise a system limit (for SysV at least) may be exceeded. |
.NOEXPORT: |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/Makefile.sources |
---|
0,0 → 1,29 |
C_SOURCES := \ |
cayman_msaa.c \ |
r600_buffer_common.c \ |
r600_cs.h \ |
r600d_common.h \ |
r600_gpu_load.c \ |
r600_pipe_common.c \ |
r600_pipe_common.h \ |
r600_query.c \ |
r600_streamout.c \ |
r600_texture.c \ |
radeon_uvd.c \ |
radeon_uvd.h \ |
radeon_vce_40_2_2.c \ |
radeon_vce.c \ |
radeon_vce.h \ |
radeon_video.c \ |
radeon_video.h \ |
radeon_winsys.h |
LLVM_C_FILES := \ |
radeon_elf_util.c \ |
radeon_elf_util.h \ |
radeon_llvm_emit.c \ |
radeon_llvm_emit.h \ |
radeon_llvm.h \ |
radeon_llvm_util.c \ |
radeon_llvm_util.h \ |
radeon_setup_tgsi_llvm.c |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/cayman_msaa.c |
---|
0,0 → 1,250 |
/* |
* Copyright 2014 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: Marek Olšák <maraeo@gmail.com> |
* |
*/ |
#include "r600_cs.h" |
/* 2xMSAA |
* There are two locations (-4, 4), (4, -4). */ |
const uint32_t eg_sample_locs_2x[4] = { |
FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4), |
FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4), |
FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4), |
FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4), |
}; |
const unsigned eg_max_dist_2x = 4; |
/* 4xMSAA |
* There are 4 locations: (-2, -2), (2, 2), (-6, 6), (6, -6). */ |
const uint32_t eg_sample_locs_4x[4] = { |
FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6), |
FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6), |
FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6), |
FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6), |
}; |
const unsigned eg_max_dist_4x = 6; |
/* Cayman 8xMSAA */ |
static const uint32_t cm_sample_locs_8x[] = { |
FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2), |
FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2), |
FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2), |
FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2), |
FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4), |
FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4), |
FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4), |
FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4), |
}; |
static const unsigned cm_max_dist_8x = 8; |
/* Cayman 16xMSAA */ |
static const uint32_t cm_sample_locs_16x[] = { |
FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5), |
FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5), |
FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5), |
FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5), |
FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1), |
FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1), |
FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1), |
FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1), |
FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6), |
FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6), |
FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6), |
FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6), |
FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0), |
FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0), |
FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0), |
FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0), |
}; |
static const unsigned cm_max_dist_16x = 8; |
void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count, |
unsigned sample_index, float *out_value) |
{ |
int offset, index; |
struct { |
int idx:4; |
} val; |
switch (sample_count) { |
case 1: |
default: |
out_value[0] = out_value[1] = 0.5; |
break; |
case 2: |
offset = 4 * (sample_index * 2); |
val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf; |
out_value[0] = (float)(val.idx + 8) / 16.0f; |
val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf; |
out_value[1] = (float)(val.idx + 8) / 16.0f; |
break; |
case 4: |
offset = 4 * (sample_index * 2); |
val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf; |
out_value[0] = (float)(val.idx + 8) / 16.0f; |
val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf; |
out_value[1] = (float)(val.idx + 8) / 16.0f; |
break; |
case 8: |
offset = 4 * (sample_index % 4 * 2); |
index = (sample_index / 4) * 4; |
val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf; |
out_value[0] = (float)(val.idx + 8) / 16.0f; |
val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf; |
out_value[1] = (float)(val.idx + 8) / 16.0f; |
break; |
case 16: |
offset = 4 * (sample_index % 4 * 2); |
index = (sample_index / 4) * 4; |
val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf; |
out_value[0] = (float)(val.idx + 8) / 16.0f; |
val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf; |
out_value[1] = (float)(val.idx + 8) / 16.0f; |
break; |
} |
} |
void cayman_init_msaa(struct pipe_context *ctx) |
{ |
struct r600_common_context *rctx = (struct r600_common_context*)ctx; |
int i; |
cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]); |
for (i = 0; i < 2; i++) |
cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]); |
for (i = 0; i < 4; i++) |
cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]); |
for (i = 0; i < 8; i++) |
cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]); |
for (i = 0; i < 16; i++) |
cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]); |
} |
void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples) |
{ |
switch (nr_samples) { |
case 2: |
r600_write_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]); |
r600_write_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]); |
r600_write_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]); |
r600_write_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]); |
break; |
case 4: |
r600_write_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]); |
r600_write_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]); |
r600_write_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]); |
r600_write_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]); |
break; |
case 8: |
r600_write_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14); |
radeon_emit(cs, cm_sample_locs_8x[0]); |
radeon_emit(cs, cm_sample_locs_8x[4]); |
radeon_emit(cs, 0); |
radeon_emit(cs, 0); |
radeon_emit(cs, cm_sample_locs_8x[1]); |
radeon_emit(cs, cm_sample_locs_8x[5]); |
radeon_emit(cs, 0); |
radeon_emit(cs, 0); |
radeon_emit(cs, cm_sample_locs_8x[2]); |
radeon_emit(cs, cm_sample_locs_8x[6]); |
radeon_emit(cs, 0); |
radeon_emit(cs, 0); |
radeon_emit(cs, cm_sample_locs_8x[3]); |
radeon_emit(cs, cm_sample_locs_8x[7]); |
break; |
case 16: |
r600_write_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16); |
radeon_emit(cs, cm_sample_locs_16x[0]); |
radeon_emit(cs, cm_sample_locs_16x[4]); |
radeon_emit(cs, cm_sample_locs_16x[8]); |
radeon_emit(cs, cm_sample_locs_16x[12]); |
radeon_emit(cs, cm_sample_locs_16x[1]); |
radeon_emit(cs, cm_sample_locs_16x[5]); |
radeon_emit(cs, cm_sample_locs_16x[9]); |
radeon_emit(cs, cm_sample_locs_16x[13]); |
radeon_emit(cs, cm_sample_locs_16x[2]); |
radeon_emit(cs, cm_sample_locs_16x[6]); |
radeon_emit(cs, cm_sample_locs_16x[10]); |
radeon_emit(cs, cm_sample_locs_16x[14]); |
radeon_emit(cs, cm_sample_locs_16x[3]); |
radeon_emit(cs, cm_sample_locs_16x[7]); |
radeon_emit(cs, cm_sample_locs_16x[11]); |
radeon_emit(cs, cm_sample_locs_16x[15]); |
break; |
} |
} |
void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples, |
int ps_iter_samples, int overrast_samples) |
{ |
int setup_samples = nr_samples > 1 ? nr_samples : |
overrast_samples > 1 ? overrast_samples : 0; |
if (setup_samples > 1) { |
/* indexed by log2(nr_samples) */ |
unsigned max_dist[] = { |
0, |
eg_max_dist_2x, |
eg_max_dist_4x, |
cm_max_dist_8x, |
cm_max_dist_16x |
}; |
unsigned log_samples = util_logbase2(setup_samples); |
unsigned log_ps_iter_samples = |
util_logbase2(util_next_power_of_two(ps_iter_samples)); |
r600_write_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2); |
radeon_emit(cs, S_028BDC_LAST_PIXEL(1) | |
S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */ |
radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) | |
S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) | |
S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */ |
if (nr_samples > 1) { |
r600_write_context_reg(cs, CM_R_028804_DB_EQAA, |
S_028804_MAX_ANCHOR_SAMPLES(log_samples) | |
S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) | |
S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | |
S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) | |
S_028804_HIGH_QUALITY_INTERSECTIONS(1) | |
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1)); |
r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, |
EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1)); |
} else if (overrast_samples > 1) { |
r600_write_context_reg(cs, CM_R_028804_DB_EQAA, |
S_028804_HIGH_QUALITY_INTERSECTIONS(1) | |
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) | |
S_028804_OVERRASTERIZATION_AMOUNT(log_samples)); |
r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0); |
} |
} else { |
r600_write_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2); |
radeon_emit(cs, S_028BDC_LAST_PIXEL(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */ |
radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */ |
r600_write_context_reg(cs, CM_R_028804_DB_EQAA, |
S_028804_HIGH_QUALITY_INTERSECTIONS(1) | |
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1)); |
r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0); |
} |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_buffer_common.c |
---|
0,0 → 1,448 |
/* |
* Copyright 2013 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Marek Olšák |
*/ |
#include "r600_cs.h" |
#include "util/u_memory.h" |
#include "util/u_upload_mgr.h" |
#include <inttypes.h> |
#include <stdio.h> |
boolean r600_rings_is_buffer_referenced(struct r600_common_context *ctx, |
struct radeon_winsys_cs_handle *buf, |
enum radeon_bo_usage usage) |
{ |
if (ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs, buf, usage)) { |
return TRUE; |
} |
if (ctx->rings.dma.cs && ctx->rings.dma.cs->cdw && |
ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs, buf, usage)) { |
return TRUE; |
} |
return FALSE; |
} |
void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx, |
struct r600_resource *resource, |
unsigned usage) |
{ |
enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE; |
bool busy = false; |
if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { |
return ctx->ws->buffer_map(resource->cs_buf, NULL, usage); |
} |
if (!(usage & PIPE_TRANSFER_WRITE)) { |
/* have to wait for the last write */ |
rusage = RADEON_USAGE_WRITE; |
} |
if (ctx->rings.gfx.cs->cdw != ctx->initial_gfx_cs_size && |
ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs, |
resource->cs_buf, rusage)) { |
if (usage & PIPE_TRANSFER_DONTBLOCK) { |
ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); |
return NULL; |
} else { |
ctx->rings.gfx.flush(ctx, 0, NULL); |
busy = true; |
} |
} |
if (ctx->rings.dma.cs && |
ctx->rings.dma.cs->cdw && |
ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs, |
resource->cs_buf, rusage)) { |
if (usage & PIPE_TRANSFER_DONTBLOCK) { |
ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); |
return NULL; |
} else { |
ctx->rings.dma.flush(ctx, 0, NULL); |
busy = true; |
} |
} |
if (busy || ctx->ws->buffer_is_busy(resource->buf, rusage)) { |
if (usage & PIPE_TRANSFER_DONTBLOCK) { |
return NULL; |
} else { |
/* We will be wait for the GPU. Wait for any offloaded |
* CS flush to complete to avoid busy-waiting in the winsys. */ |
ctx->ws->cs_sync_flush(ctx->rings.gfx.cs); |
if (ctx->rings.dma.cs) |
ctx->ws->cs_sync_flush(ctx->rings.dma.cs); |
} |
} |
/* Setting the CS to NULL will prevent doing checks we have done already. */ |
return ctx->ws->buffer_map(resource->cs_buf, NULL, usage); |
} |
bool r600_init_resource(struct r600_common_screen *rscreen, |
struct r600_resource *res, |
unsigned size, unsigned alignment, |
bool use_reusable_pool) |
{ |
struct r600_texture *rtex = (struct r600_texture*)res; |
struct pb_buffer *old_buf, *new_buf; |
enum radeon_bo_flag flags = 0; |
switch (res->b.b.usage) { |
case PIPE_USAGE_STREAM: |
flags = RADEON_FLAG_GTT_WC; |
/* fall through */ |
case PIPE_USAGE_STAGING: |
/* Transfers are likely to occur more often with these resources. */ |
res->domains = RADEON_DOMAIN_GTT; |
break; |
case PIPE_USAGE_DYNAMIC: |
/* Older kernels didn't always flush the HDP cache before |
* CS execution |
*/ |
if (rscreen->info.drm_minor < 40) { |
res->domains = RADEON_DOMAIN_GTT; |
flags |= RADEON_FLAG_GTT_WC; |
break; |
} |
flags |= RADEON_FLAG_CPU_ACCESS; |
/* fall through */ |
case PIPE_USAGE_DEFAULT: |
case PIPE_USAGE_IMMUTABLE: |
default: |
/* Not listing GTT here improves performance in some apps. */ |
res->domains = RADEON_DOMAIN_VRAM; |
flags |= RADEON_FLAG_GTT_WC; |
break; |
} |
if (res->b.b.target == PIPE_BUFFER && |
res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | |
PIPE_RESOURCE_FLAG_MAP_COHERENT)) { |
/* Use GTT for all persistent mappings with older kernels, |
* because they didn't always flush the HDP cache before CS |
* execution. |
* |
* Write-combined CPU mappings are fine, the kernel ensures all CPU |
* writes finish before the GPU executes a command stream. |
*/ |
if (rscreen->info.drm_minor < 40) |
res->domains = RADEON_DOMAIN_GTT; |
else if (res->domains & RADEON_DOMAIN_VRAM) |
flags |= RADEON_FLAG_CPU_ACCESS; |
} |
/* Tiled textures are unmappable. Always put them in VRAM. */ |
if (res->b.b.target != PIPE_BUFFER && |
rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) { |
res->domains = RADEON_DOMAIN_VRAM; |
flags &= ~RADEON_FLAG_CPU_ACCESS; |
flags |= RADEON_FLAG_NO_CPU_ACCESS; |
} |
/* Allocate a new resource. */ |
new_buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment, |
use_reusable_pool, |
res->domains, flags); |
if (!new_buf) { |
return false; |
} |
/* Replace the pointer such that if res->buf wasn't NULL, it won't be |
* NULL. This should prevent crashes with multiple contexts using |
* the same buffer where one of the contexts invalidates it while |
* the others are using it. */ |
old_buf = res->buf; |
res->cs_buf = rscreen->ws->buffer_get_cs_handle(new_buf); /* should be atomic */ |
res->buf = new_buf; /* should be atomic */ |
if (rscreen->info.r600_virtual_address) |
res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->cs_buf); |
else |
res->gpu_address = 0; |
pb_reference(&old_buf, NULL); |
util_range_set_empty(&res->valid_buffer_range); |
res->TC_L2_dirty = false; |
if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) { |
fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %u bytes\n", |
res->gpu_address, res->gpu_address + res->buf->size, |
res->buf->size); |
} |
return true; |
} |
static void r600_buffer_destroy(struct pipe_screen *screen, |
struct pipe_resource *buf) |
{ |
struct r600_resource *rbuffer = r600_resource(buf); |
util_range_destroy(&rbuffer->valid_buffer_range); |
pb_reference(&rbuffer->buf, NULL); |
FREE(rbuffer); |
} |
static void *r600_buffer_get_transfer(struct pipe_context *ctx, |
struct pipe_resource *resource, |
unsigned level, |
unsigned usage, |
const struct pipe_box *box, |
struct pipe_transfer **ptransfer, |
void *data, struct r600_resource *staging, |
unsigned offset) |
{ |
struct r600_common_context *rctx = (struct r600_common_context*)ctx; |
struct r600_transfer *transfer = util_slab_alloc(&rctx->pool_transfers); |
transfer->transfer.resource = resource; |
transfer->transfer.level = level; |
transfer->transfer.usage = usage; |
transfer->transfer.box = *box; |
transfer->transfer.stride = 0; |
transfer->transfer.layer_stride = 0; |
transfer->offset = offset; |
transfer->staging = staging; |
*ptransfer = &transfer->transfer; |
return data; |
} |
static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx, |
unsigned dstx, unsigned srcx, unsigned size) |
{ |
bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4); |
return rctx->screen->has_cp_dma || |
(dword_aligned && (rctx->rings.dma.cs || |
rctx->screen->has_streamout)); |
} |
static void *r600_buffer_transfer_map(struct pipe_context *ctx, |
struct pipe_resource *resource, |
unsigned level, |
unsigned usage, |
const struct pipe_box *box, |
struct pipe_transfer **ptransfer) |
{ |
struct r600_common_context *rctx = (struct r600_common_context*)ctx; |
struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen; |
struct r600_resource *rbuffer = r600_resource(resource); |
uint8_t *data; |
assert(box->x + box->width <= resource->width0); |
/* See if the buffer range being mapped has never been initialized, |
* in which case it can be mapped unsynchronized. */ |
if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && |
usage & PIPE_TRANSFER_WRITE && |
!util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) { |
usage |= PIPE_TRANSFER_UNSYNCHRONIZED; |
} |
/* If discarding the entire range, discard the whole resource instead. */ |
if (usage & PIPE_TRANSFER_DISCARD_RANGE && |
box->x == 0 && box->width == resource->width0) { |
usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; |
} |
if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE && |
!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { |
assert(usage & PIPE_TRANSFER_WRITE); |
/* Check if mapping this buffer would cause waiting for the GPU. */ |
if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || |
rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) { |
rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b); |
} |
/* At this point, the buffer is always idle. */ |
usage |= PIPE_TRANSFER_UNSYNCHRONIZED; |
} |
else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && |
!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && |
!(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) && |
r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) { |
assert(usage & PIPE_TRANSFER_WRITE); |
/* Check if mapping this buffer would cause waiting for the GPU. */ |
if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || |
rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) { |
/* Do a wait-free write-only transfer using a temporary buffer. */ |
unsigned offset; |
struct r600_resource *staging = NULL; |
u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT), |
&offset, (struct pipe_resource**)&staging, (void**)&data); |
if (staging) { |
data += box->x % R600_MAP_BUFFER_ALIGNMENT; |
return r600_buffer_get_transfer(ctx, resource, level, usage, box, |
ptransfer, data, staging, offset); |
} else { |
return NULL; /* error, shouldn't occur though */ |
} |
} |
/* At this point, the buffer is always idle (we checked it above). */ |
usage |= PIPE_TRANSFER_UNSYNCHRONIZED; |
} |
/* Using a staging buffer in GTT for larger reads is much faster. */ |
else if ((usage & PIPE_TRANSFER_READ) && |
!(usage & PIPE_TRANSFER_WRITE) && |
rbuffer->domains == RADEON_DOMAIN_VRAM && |
r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) { |
struct r600_resource *staging; |
staging = (struct r600_resource*) pipe_buffer_create( |
ctx->screen, PIPE_BIND_TRANSFER_READ, PIPE_USAGE_STAGING, |
box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT)); |
if (staging) { |
/* Copy the VRAM buffer to the staging buffer. */ |
rctx->dma_copy(ctx, &staging->b.b, 0, |
box->x % R600_MAP_BUFFER_ALIGNMENT, |
0, 0, resource, level, box); |
data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ); |
data += box->x % R600_MAP_BUFFER_ALIGNMENT; |
return r600_buffer_get_transfer(ctx, resource, level, usage, box, |
ptransfer, data, staging, 0); |
} |
} |
data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage); |
if (!data) { |
return NULL; |
} |
data += box->x; |
return r600_buffer_get_transfer(ctx, resource, level, usage, box, |
ptransfer, data, NULL, 0); |
} |
static void r600_buffer_transfer_unmap(struct pipe_context *ctx, |
struct pipe_transfer *transfer) |
{ |
struct r600_common_context *rctx = (struct r600_common_context*)ctx; |
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; |
struct r600_resource *rbuffer = r600_resource(transfer->resource); |
if (rtransfer->staging) { |
if (rtransfer->transfer.usage & PIPE_TRANSFER_WRITE) { |
struct pipe_resource *dst, *src; |
unsigned soffset, doffset, size; |
struct pipe_box box; |
dst = transfer->resource; |
src = &rtransfer->staging->b.b; |
size = transfer->box.width; |
doffset = transfer->box.x; |
soffset = rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT; |
u_box_1d(soffset, size, &box); |
/* Copy the staging buffer into the original one. */ |
rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box); |
} |
pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL); |
} |
if (transfer->usage & PIPE_TRANSFER_WRITE) { |
util_range_add(&rbuffer->valid_buffer_range, transfer->box.x, |
transfer->box.x + transfer->box.width); |
} |
util_slab_free(&rctx->pool_transfers, transfer); |
} |
static const struct u_resource_vtbl r600_buffer_vtbl = |
{ |
NULL, /* get_handle */ |
r600_buffer_destroy, /* resource_destroy */ |
r600_buffer_transfer_map, /* transfer_map */ |
NULL, /* transfer_flush_region */ |
r600_buffer_transfer_unmap, /* transfer_unmap */ |
NULL /* transfer_inline_write */ |
}; |
static struct r600_resource * |
r600_alloc_buffer_struct(struct pipe_screen *screen, |
const struct pipe_resource *templ) |
{ |
struct r600_resource *rbuffer; |
rbuffer = MALLOC_STRUCT(r600_resource); |
rbuffer->b.b = *templ; |
pipe_reference_init(&rbuffer->b.b.reference, 1); |
rbuffer->b.b.screen = screen; |
rbuffer->b.vtbl = &r600_buffer_vtbl; |
rbuffer->buf = NULL; |
rbuffer->TC_L2_dirty = false; |
util_range_init(&rbuffer->valid_buffer_range); |
return rbuffer; |
} |
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, |
const struct pipe_resource *templ, |
unsigned alignment) |
{ |
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; |
struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ); |
if (!r600_init_resource(rscreen, rbuffer, templ->width0, alignment, TRUE)) { |
FREE(rbuffer); |
return NULL; |
} |
return &rbuffer->b.b; |
} |
struct pipe_resource * |
r600_buffer_from_user_memory(struct pipe_screen *screen, |
const struct pipe_resource *templ, |
void *user_memory) |
{ |
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; |
struct radeon_winsys *ws = rscreen->ws; |
struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ); |
rbuffer->domains = RADEON_DOMAIN_GTT; |
util_range_add(&rbuffer->valid_buffer_range, 0, templ->width0); |
/* Convert a user pointer to a buffer. */ |
rbuffer->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0); |
if (!rbuffer->buf) { |
FREE(rbuffer); |
return NULL; |
} |
rbuffer->cs_buf = ws->buffer_get_cs_handle(rbuffer->buf); |
if (rscreen->info.r600_virtual_address) |
rbuffer->gpu_address = |
ws->buffer_get_virtual_address(rbuffer->cs_buf); |
else |
rbuffer->gpu_address = 0; |
return &rbuffer->b.b; |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_cs.h |
---|
0,0 → 1,133 |
/* |
* Copyright 2013 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Authors: Marek Olšák <maraeo@gmail.com> |
*/ |
/** |
* This file contains helpers for writing commands to commands streams. |
*/ |
#ifndef R600_CS_H |
#define R600_CS_H |
#include "r600_pipe_common.h" |
#include "r600d_common.h" |
static INLINE unsigned r600_context_bo_reloc(struct r600_common_context *rctx, |
struct r600_ring *ring, |
struct r600_resource *rbo, |
enum radeon_bo_usage usage, |
enum radeon_bo_priority priority) |
{ |
assert(usage); |
/* Make sure that all previous rings are flushed so that everything |
* looks serialized from the driver point of view. |
*/ |
if (!ring->flushing) { |
if (ring == &rctx->rings.gfx) { |
if (rctx->rings.dma.cs) { |
/* flush dma ring */ |
rctx->rings.dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL); |
} |
} else { |
/* flush gfx ring */ |
rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL); |
} |
} |
return rctx->ws->cs_add_reloc(ring->cs, rbo->cs_buf, usage, |
rbo->domains, priority) * 4; |
} |
static INLINE void r600_emit_reloc(struct r600_common_context *rctx, |
struct r600_ring *ring, struct r600_resource *rbo, |
enum radeon_bo_usage usage, |
enum radeon_bo_priority priority) |
{ |
struct radeon_winsys_cs *cs = ring->cs; |
bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_virtual_address; |
unsigned reloc = r600_context_bo_reloc(rctx, ring, rbo, usage, priority); |
if (!has_vm) { |
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); |
radeon_emit(cs, reloc); |
} |
} |
static INLINE void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) |
{ |
assert(reg < R600_CONTEXT_REG_OFFSET); |
assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS); |
radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0)); |
radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2); |
} |
static INLINE void r600_write_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) |
{ |
r600_write_config_reg_seq(cs, reg, 1); |
radeon_emit(cs, value); |
} |
static INLINE void r600_write_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) |
{ |
assert(reg >= R600_CONTEXT_REG_OFFSET); |
assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS); |
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0)); |
radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2); |
} |
static INLINE void r600_write_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) |
{ |
r600_write_context_reg_seq(cs, reg, 1); |
radeon_emit(cs, value); |
} |
static INLINE void si_write_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) |
{ |
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END); |
assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS); |
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0)); |
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2); |
} |
static INLINE void si_write_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) |
{ |
si_write_sh_reg_seq(cs, reg, 1); |
radeon_emit(cs, value); |
} |
static INLINE void cik_write_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) |
{ |
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); |
assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS); |
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0)); |
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2); |
} |
static INLINE void cik_write_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) |
{ |
cik_write_uconfig_reg_seq(cs, reg, 1); |
radeon_emit(cs, value); |
} |
#endif |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_gpu_load.c |
---|
0,0 → 1,141 |
/* |
* Copyright 2015 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: Marek Olšák <maraeo@gmail.com> |
* |
*/ |
/* The GPU load is measured as follows. |
* |
* There is a thread which samples the GRBM_STATUS register at a certain |
* frequency and the "busy" or "idle" counter is incremented based on |
* whether the GUI_ACTIVE bit is set or not. |
* |
* Then, the user can sample the counters twice and calculate the average |
* GPU load between the two samples. |
*/ |
#include "r600_pipe_common.h" |
#include "os/os_time.h" |
/* For good accuracy at 1000 fps or lower. This will be inaccurate for higher |
* fps (there are too few samples per frame). */ |
#define SAMPLES_PER_SEC 10000 |
#define GRBM_STATUS 0x8010 |
#define GUI_ACTIVE(x) (((x) >> 31) & 0x1) |
static bool r600_is_gpu_busy(struct r600_common_screen *rscreen) |
{ |
uint32_t value = 0; |
rscreen->ws->read_registers(rscreen->ws, GRBM_STATUS, 1, &value); |
return GUI_ACTIVE(value); |
} |
static PIPE_THREAD_ROUTINE(r600_gpu_load_thread, param) |
{ |
struct r600_common_screen *rscreen = (struct r600_common_screen*)param; |
const int period_us = 1000000 / SAMPLES_PER_SEC; |
int sleep_us = period_us; |
int64_t cur_time, last_time = os_time_get(); |
while (!p_atomic_read(&rscreen->gpu_load_stop_thread)) { |
if (sleep_us) |
os_time_sleep(sleep_us); |
/* Make sure we sleep the ideal amount of time to match |
* the expected frequency. */ |
cur_time = os_time_get(); |
if (os_time_timeout(last_time, last_time + period_us, |
cur_time)) |
sleep_us = MAX2(sleep_us - 1, 1); |
else |
sleep_us += 1; |
/*printf("Hz: %.1f\n", 1000000.0 / (cur_time - last_time));*/ |
last_time = cur_time; |
/* Update the counters. */ |
if (r600_is_gpu_busy(rscreen)) |
p_atomic_inc(&rscreen->gpu_load_counter_busy); |
else |
p_atomic_inc(&rscreen->gpu_load_counter_idle); |
} |
p_atomic_dec(&rscreen->gpu_load_stop_thread); |
return 0; |
} |
void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen) |
{ |
if (!rscreen->gpu_load_thread) |
return; |
p_atomic_inc(&rscreen->gpu_load_stop_thread); |
pipe_thread_wait(rscreen->gpu_load_thread); |
rscreen->gpu_load_thread = 0; |
} |
static uint64_t r600_gpu_load_read_counter(struct r600_common_screen *rscreen) |
{ |
/* Start the thread if needed. */ |
if (!rscreen->gpu_load_thread) { |
pipe_mutex_lock(rscreen->gpu_load_mutex); |
/* Check again inside the mutex. */ |
if (!rscreen->gpu_load_thread) |
rscreen->gpu_load_thread = |
pipe_thread_create(r600_gpu_load_thread, rscreen); |
pipe_mutex_unlock(rscreen->gpu_load_mutex); |
} |
/* The busy counter is in the lower 32 bits. |
* The idle counter is in the upper 32 bits. */ |
return p_atomic_read(&rscreen->gpu_load_counter_busy) | |
((uint64_t)p_atomic_read(&rscreen->gpu_load_counter_idle) << 32); |
} |
/** |
* Just return the counters. |
*/ |
uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen) |
{ |
return r600_gpu_load_read_counter(rscreen); |
} |
unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin) |
{ |
uint64_t end = r600_gpu_load_read_counter(rscreen); |
unsigned busy = (end & 0xffffffff) - (begin & 0xffffffff); |
unsigned idle = (end >> 32) - (begin >> 32); |
/* Calculate the GPU load. |
* |
* If no counters have been incremented, return the current load. |
* It's for the case when the load is queried faster than |
* the counters are updated. |
*/ |
if (idle || busy) |
return busy*100 / (busy + idle); |
else |
return r600_is_gpu_busy(rscreen) ? 100 : 0; |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_pipe_common.c |
---|
0,0 → 1,966 |
/* |
* Copyright 2013 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: Marek Olšák <maraeo@gmail.com> |
* |
*/ |
#include "r600_pipe_common.h" |
#include "r600_cs.h" |
#include "tgsi/tgsi_parse.h" |
#include "util/u_draw_quad.h" |
#include "util/u_memory.h" |
#include "util/u_format_s3tc.h" |
#include "util/u_upload_mgr.h" |
#include "vl/vl_decoder.h" |
#include "vl/vl_video_buffer.h" |
#include "radeon/radeon_video.h" |
#include <inttypes.h> |
#ifndef HAVE_LLVM |
#define HAVE_LLVM 0 |
#endif |
/* |
* pipe_context |
*/ |
void r600_draw_rectangle(struct blitter_context *blitter, |
int x1, int y1, int x2, int y2, float depth, |
enum blitter_attrib_type type, |
const union pipe_color_union *attrib) |
{ |
struct r600_common_context *rctx = |
(struct r600_common_context*)util_blitter_get_pipe(blitter); |
struct pipe_viewport_state viewport; |
struct pipe_resource *buf = NULL; |
unsigned offset = 0; |
float *vb; |
if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) { |
util_blitter_draw_rectangle(blitter, x1, y1, x2, y2, depth, type, attrib); |
return; |
} |
/* Some operations (like color resolve on r6xx) don't work |
* with the conventional primitive types. |
* One that works is PT_RECTLIST, which we use here. */ |
/* setup viewport */ |
viewport.scale[0] = 1.0f; |
viewport.scale[1] = 1.0f; |
viewport.scale[2] = 1.0f; |
viewport.translate[0] = 0.0f; |
viewport.translate[1] = 0.0f; |
viewport.translate[2] = 0.0f; |
rctx->b.set_viewport_states(&rctx->b, 0, 1, &viewport); |
/* Upload vertices. The hw rectangle has only 3 vertices, |
* I guess the 4th one is derived from the first 3. |
* The vertex specification should match u_blitter's vertex element state. */ |
u_upload_alloc(rctx->uploader, 0, sizeof(float) * 24, &offset, &buf, (void**)&vb); |
vb[0] = x1; |
vb[1] = y1; |
vb[2] = depth; |
vb[3] = 1; |
vb[8] = x1; |
vb[9] = y2; |
vb[10] = depth; |
vb[11] = 1; |
vb[16] = x2; |
vb[17] = y1; |
vb[18] = depth; |
vb[19] = 1; |
if (attrib) { |
memcpy(vb+4, attrib->f, sizeof(float)*4); |
memcpy(vb+12, attrib->f, sizeof(float)*4); |
memcpy(vb+20, attrib->f, sizeof(float)*4); |
} |
/* draw */ |
util_draw_vertex_buffer(&rctx->b, NULL, buf, blitter->vb_slot, offset, |
R600_PRIM_RECTANGLE_LIST, 3, 2); |
pipe_resource_reference(&buf, NULL); |
} |
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw) |
{ |
/* The number of dwords we already used in the DMA so far. */ |
num_dw += ctx->rings.dma.cs->cdw; |
/* Flush if there's not enough space. */ |
if (num_dw > RADEON_MAX_CMDBUF_DWORDS) { |
ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); |
} |
} |
static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags) |
{ |
} |
void r600_preflush_suspend_features(struct r600_common_context *ctx) |
{ |
/* Disable render condition. */ |
ctx->saved_render_cond = NULL; |
ctx->saved_render_cond_cond = FALSE; |
ctx->saved_render_cond_mode = 0; |
if (ctx->current_render_cond) { |
ctx->saved_render_cond = ctx->current_render_cond; |
ctx->saved_render_cond_cond = ctx->current_render_cond_cond; |
ctx->saved_render_cond_mode = ctx->current_render_cond_mode; |
ctx->b.render_condition(&ctx->b, NULL, FALSE, 0); |
} |
/* suspend queries */ |
ctx->nontimer_queries_suspended = false; |
if (ctx->num_cs_dw_nontimer_queries_suspend) { |
r600_suspend_nontimer_queries(ctx); |
ctx->nontimer_queries_suspended = true; |
} |
ctx->streamout.suspended = false; |
if (ctx->streamout.begin_emitted) { |
r600_emit_streamout_end(ctx); |
ctx->streamout.suspended = true; |
} |
} |
void r600_postflush_resume_features(struct r600_common_context *ctx) |
{ |
if (ctx->streamout.suspended) { |
ctx->streamout.append_bitmask = ctx->streamout.enabled_mask; |
r600_streamout_buffers_dirty(ctx); |
} |
/* resume queries */ |
if (ctx->nontimer_queries_suspended) { |
r600_resume_nontimer_queries(ctx); |
} |
/* Re-enable render condition. */ |
if (ctx->saved_render_cond) { |
ctx->b.render_condition(&ctx->b, ctx->saved_render_cond, |
ctx->saved_render_cond_cond, |
ctx->saved_render_cond_mode); |
} |
} |
static void r600_flush_from_st(struct pipe_context *ctx, |
struct pipe_fence_handle **fence, |
unsigned flags) |
{ |
struct r600_common_context *rctx = (struct r600_common_context *)ctx; |
unsigned rflags = 0; |
if (flags & PIPE_FLUSH_END_OF_FRAME) |
rflags |= RADEON_FLUSH_END_OF_FRAME; |
if (rctx->rings.dma.cs) { |
rctx->rings.dma.flush(rctx, rflags, NULL); |
} |
rctx->rings.gfx.flush(rctx, rflags, fence); |
} |
static void r600_flush_dma_ring(void *ctx, unsigned flags, |
struct pipe_fence_handle **fence) |
{ |
struct r600_common_context *rctx = (struct r600_common_context *)ctx; |
struct radeon_winsys_cs *cs = rctx->rings.dma.cs; |
if (!cs->cdw) { |
return; |
} |
rctx->rings.dma.flushing = true; |
rctx->ws->cs_flush(cs, flags, fence, 0); |
rctx->rings.dma.flushing = false; |
} |
bool r600_common_context_init(struct r600_common_context *rctx, |
struct r600_common_screen *rscreen) |
{ |
util_slab_create(&rctx->pool_transfers, |
sizeof(struct r600_transfer), 64, |
UTIL_SLAB_SINGLETHREADED); |
rctx->screen = rscreen; |
rctx->ws = rscreen->ws; |
rctx->family = rscreen->family; |
rctx->chip_class = rscreen->chip_class; |
if (rscreen->family == CHIP_HAWAII) |
rctx->max_db = 16; |
else if (rscreen->chip_class >= EVERGREEN) |
rctx->max_db = 8; |
else |
rctx->max_db = 4; |
rctx->b.transfer_map = u_transfer_map_vtbl; |
rctx->b.transfer_flush_region = u_default_transfer_flush_region; |
rctx->b.transfer_unmap = u_transfer_unmap_vtbl; |
rctx->b.transfer_inline_write = u_default_transfer_inline_write; |
rctx->b.memory_barrier = r600_memory_barrier; |
rctx->b.flush = r600_flush_from_st; |
LIST_INITHEAD(&rctx->texture_buffers); |
r600_init_context_texture_functions(rctx); |
r600_streamout_init(rctx); |
r600_query_init(rctx); |
cayman_init_msaa(&rctx->b); |
rctx->allocator_so_filled_size = u_suballocator_create(&rctx->b, 4096, 4, |
0, PIPE_USAGE_DEFAULT, TRUE); |
if (!rctx->allocator_so_filled_size) |
return false; |
rctx->uploader = u_upload_create(&rctx->b, 1024 * 1024, 256, |
PIPE_BIND_INDEX_BUFFER | |
PIPE_BIND_CONSTANT_BUFFER); |
if (!rctx->uploader) |
return false; |
if (rscreen->info.r600_has_dma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) { |
rctx->rings.dma.cs = rctx->ws->cs_create(rctx->ws, RING_DMA, |
r600_flush_dma_ring, |
rctx, NULL); |
rctx->rings.dma.flush = r600_flush_dma_ring; |
} |
return true; |
} |
void r600_common_context_cleanup(struct r600_common_context *rctx) |
{ |
if (rctx->rings.gfx.cs) { |
rctx->ws->cs_destroy(rctx->rings.gfx.cs); |
} |
if (rctx->rings.dma.cs) { |
rctx->ws->cs_destroy(rctx->rings.dma.cs); |
} |
if (rctx->uploader) { |
u_upload_destroy(rctx->uploader); |
} |
util_slab_destroy(&rctx->pool_transfers); |
if (rctx->allocator_so_filled_size) { |
u_suballocator_destroy(rctx->allocator_so_filled_size); |
} |
} |
void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r) |
{ |
struct r600_common_context *rctx = (struct r600_common_context *)ctx; |
struct r600_resource *rr = (struct r600_resource *)r; |
if (r == NULL) { |
return; |
} |
/* |
* The idea is to compute a gross estimate of memory requirement of |
* each draw call. After each draw call, memory will be precisely |
* accounted. So the uncertainty is only on the current draw call. |
* In practice this gave very good estimate (+/- 10% of the target |
* memory limit). |
*/ |
if (rr->domains & RADEON_DOMAIN_GTT) { |
rctx->gtt += rr->buf->size; |
} |
if (rr->domains & RADEON_DOMAIN_VRAM) { |
rctx->vram += rr->buf->size; |
} |
} |
/* |
* pipe_screen |
*/ |
static const struct debug_named_value common_debug_options[] = { |
/* logging */ |
{ "tex", DBG_TEX, "Print texture info" }, |
{ "texmip", DBG_TEXMIP, "Print texture info (mipmapped only)" }, |
{ "compute", DBG_COMPUTE, "Print compute info" }, |
{ "vm", DBG_VM, "Print virtual addresses when creating resources" }, |
{ "trace_cs", DBG_TRACE_CS, "Trace cs and write rlockup_<csid>.c file with faulty cs" }, |
{ "info", DBG_INFO, "Print driver information" }, |
/* shaders */ |
{ "fs", DBG_FS, "Print fetch shaders" }, |
{ "vs", DBG_VS, "Print vertex shaders" }, |
{ "gs", DBG_GS, "Print geometry shaders" }, |
{ "ps", DBG_PS, "Print pixel shaders" }, |
{ "cs", DBG_CS, "Print compute shaders" }, |
/* features */ |
{ "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" }, |
{ "nohyperz", DBG_NO_HYPERZ, "Disable Hyper-Z" }, |
/* GL uses the word INVALIDATE, gallium uses the word DISCARD */ |
{ "noinvalrange", DBG_NO_DISCARD_RANGE, "Disable handling of INVALIDATE_RANGE map flags" }, |
{ "no2d", DBG_NO_2D_TILING, "Disable 2D tiling" }, |
{ "notiling", DBG_NO_TILING, "Disable tiling" }, |
{ "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on end-of-packet." }, |
{ "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." }, |
{ "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." }, |
DEBUG_NAMED_VALUE_END /* must be last */ |
}; |
static const char* r600_get_vendor(struct pipe_screen* pscreen) |
{ |
return "X.Org"; |
} |
static const char* r600_get_device_vendor(struct pipe_screen* pscreen) |
{ |
return "AMD"; |
} |
static const char* r600_get_name(struct pipe_screen* pscreen) |
{ |
struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen; |
switch (rscreen->family) { |
case CHIP_R600: return "AMD R600"; |
case CHIP_RV610: return "AMD RV610"; |
case CHIP_RV630: return "AMD RV630"; |
case CHIP_RV670: return "AMD RV670"; |
case CHIP_RV620: return "AMD RV620"; |
case CHIP_RV635: return "AMD RV635"; |
case CHIP_RS780: return "AMD RS780"; |
case CHIP_RS880: return "AMD RS880"; |
case CHIP_RV770: return "AMD RV770"; |
case CHIP_RV730: return "AMD RV730"; |
case CHIP_RV710: return "AMD RV710"; |
case CHIP_RV740: return "AMD RV740"; |
case CHIP_CEDAR: return "AMD CEDAR"; |
case CHIP_REDWOOD: return "AMD REDWOOD"; |
case CHIP_JUNIPER: return "AMD JUNIPER"; |
case CHIP_CYPRESS: return "AMD CYPRESS"; |
case CHIP_HEMLOCK: return "AMD HEMLOCK"; |
case CHIP_PALM: return "AMD PALM"; |
case CHIP_SUMO: return "AMD SUMO"; |
case CHIP_SUMO2: return "AMD SUMO2"; |
case CHIP_BARTS: return "AMD BARTS"; |
case CHIP_TURKS: return "AMD TURKS"; |
case CHIP_CAICOS: return "AMD CAICOS"; |
case CHIP_CAYMAN: return "AMD CAYMAN"; |
case CHIP_ARUBA: return "AMD ARUBA"; |
case CHIP_TAHITI: return "AMD TAHITI"; |
case CHIP_PITCAIRN: return "AMD PITCAIRN"; |
case CHIP_VERDE: return "AMD CAPE VERDE"; |
case CHIP_OLAND: return "AMD OLAND"; |
case CHIP_HAINAN: return "AMD HAINAN"; |
case CHIP_BONAIRE: return "AMD BONAIRE"; |
case CHIP_KAVERI: return "AMD KAVERI"; |
case CHIP_KABINI: return "AMD KABINI"; |
case CHIP_HAWAII: return "AMD HAWAII"; |
case CHIP_MULLINS: return "AMD MULLINS"; |
default: return "AMD unknown"; |
} |
} |
static float r600_get_paramf(struct pipe_screen* pscreen, |
enum pipe_capf param) |
{ |
struct r600_common_screen *rscreen = (struct r600_common_screen *)pscreen; |
switch (param) { |
case PIPE_CAPF_MAX_LINE_WIDTH: |
case PIPE_CAPF_MAX_LINE_WIDTH_AA: |
case PIPE_CAPF_MAX_POINT_WIDTH: |
case PIPE_CAPF_MAX_POINT_WIDTH_AA: |
if (rscreen->family >= CHIP_CEDAR) |
return 16384.0f; |
else |
return 8192.0f; |
case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: |
return 16.0f; |
case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: |
return 16.0f; |
case PIPE_CAPF_GUARD_BAND_LEFT: |
case PIPE_CAPF_GUARD_BAND_TOP: |
case PIPE_CAPF_GUARD_BAND_RIGHT: |
case PIPE_CAPF_GUARD_BAND_BOTTOM: |
return 0.0f; |
} |
return 0.0f; |
} |
static int r600_get_video_param(struct pipe_screen *screen, |
enum pipe_video_profile profile, |
enum pipe_video_entrypoint entrypoint, |
enum pipe_video_cap param) |
{ |
switch (param) { |
case PIPE_VIDEO_CAP_SUPPORTED: |
return vl_profile_supported(screen, profile, entrypoint); |
case PIPE_VIDEO_CAP_NPOT_TEXTURES: |
return 1; |
case PIPE_VIDEO_CAP_MAX_WIDTH: |
case PIPE_VIDEO_CAP_MAX_HEIGHT: |
return vl_video_buffer_max_size(screen); |
case PIPE_VIDEO_CAP_PREFERED_FORMAT: |
return PIPE_FORMAT_NV12; |
case PIPE_VIDEO_CAP_PREFERS_INTERLACED: |
return false; |
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: |
return false; |
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: |
return true; |
case PIPE_VIDEO_CAP_MAX_LEVEL: |
return vl_level_supported(screen, profile); |
default: |
return 0; |
} |
} |
const char *r600_get_llvm_processor_name(enum radeon_family family) |
{ |
switch (family) { |
case CHIP_R600: |
case CHIP_RV630: |
case CHIP_RV635: |
case CHIP_RV670: |
return "r600"; |
case CHIP_RV610: |
case CHIP_RV620: |
case CHIP_RS780: |
case CHIP_RS880: |
return "rs880"; |
case CHIP_RV710: |
return "rv710"; |
case CHIP_RV730: |
return "rv730"; |
case CHIP_RV740: |
case CHIP_RV770: |
return "rv770"; |
case CHIP_PALM: |
case CHIP_CEDAR: |
return "cedar"; |
case CHIP_SUMO: |
case CHIP_SUMO2: |
return "sumo"; |
case CHIP_REDWOOD: |
return "redwood"; |
case CHIP_JUNIPER: |
return "juniper"; |
case CHIP_HEMLOCK: |
case CHIP_CYPRESS: |
return "cypress"; |
case CHIP_BARTS: |
return "barts"; |
case CHIP_TURKS: |
return "turks"; |
case CHIP_CAICOS: |
return "caicos"; |
case CHIP_CAYMAN: |
case CHIP_ARUBA: |
return "cayman"; |
case CHIP_TAHITI: return "tahiti"; |
case CHIP_PITCAIRN: return "pitcairn"; |
case CHIP_VERDE: return "verde"; |
case CHIP_OLAND: return "oland"; |
case CHIP_HAINAN: return "hainan"; |
case CHIP_BONAIRE: return "bonaire"; |
case CHIP_KABINI: return "kabini"; |
case CHIP_KAVERI: return "kaveri"; |
case CHIP_HAWAII: return "hawaii"; |
case CHIP_MULLINS: |
#if HAVE_LLVM >= 0x0305 |
return "mullins"; |
#else |
return "kabini"; |
#endif |
default: return ""; |
} |
} |
static int r600_get_compute_param(struct pipe_screen *screen, |
enum pipe_compute_cap param, |
void *ret) |
{ |
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; |
//TODO: select these params by asic |
switch (param) { |
case PIPE_COMPUTE_CAP_IR_TARGET: { |
const char *gpu; |
const char *triple; |
if (rscreen->family <= CHIP_ARUBA || HAVE_LLVM < 0x0306) { |
triple = "r600--"; |
} else { |
triple = "amdgcn--"; |
} |
switch(rscreen->family) { |
/* Clang < 3.6 is missing Hainan in its list of |
* GPUs, so we need to use the name of a similar GPU. |
*/ |
#if HAVE_LLVM < 0x0306 |
case CHIP_HAINAN: |
gpu = "oland"; |
break; |
#endif |
default: |
gpu = r600_get_llvm_processor_name(rscreen->family); |
break; |
} |
if (ret) { |
sprintf(ret, "%s-%s", gpu, triple); |
} |
/* +2 for dash and terminating NIL byte */ |
return (strlen(triple) + strlen(gpu) + 2) * sizeof(char); |
} |
case PIPE_COMPUTE_CAP_GRID_DIMENSION: |
if (ret) { |
uint64_t *grid_dimension = ret; |
grid_dimension[0] = 3; |
} |
return 1 * sizeof(uint64_t); |
case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: |
if (ret) { |
uint64_t *grid_size = ret; |
grid_size[0] = 65535; |
grid_size[1] = 65535; |
grid_size[2] = 1; |
} |
return 3 * sizeof(uint64_t) ; |
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: |
if (ret) { |
uint64_t *block_size = ret; |
block_size[0] = 256; |
block_size[1] = 256; |
block_size[2] = 256; |
} |
return 3 * sizeof(uint64_t); |
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: |
if (ret) { |
uint64_t *max_threads_per_block = ret; |
*max_threads_per_block = 256; |
} |
return sizeof(uint64_t); |
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: |
if (ret) { |
uint64_t *max_global_size = ret; |
uint64_t max_mem_alloc_size; |
r600_get_compute_param(screen, |
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, |
&max_mem_alloc_size); |
/* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least |
* 1/4 of the MAX_GLOBAL_SIZE. Since the |
* MAX_MEM_ALLOC_SIZE is fixed for older kernels, |
* make sure we never report more than |
* 4 * MAX_MEM_ALLOC_SIZE. |
*/ |
*max_global_size = MIN2(4 * max_mem_alloc_size, |
rscreen->info.gart_size + |
rscreen->info.vram_size); |
} |
return sizeof(uint64_t); |
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: |
if (ret) { |
uint64_t *max_local_size = ret; |
/* Value reported by the closed source driver. */ |
*max_local_size = 32768; |
} |
return sizeof(uint64_t); |
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: |
if (ret) { |
uint64_t *max_input_size = ret; |
/* Value reported by the closed source driver. */ |
*max_input_size = 1024; |
} |
return sizeof(uint64_t); |
case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: |
if (ret) { |
uint64_t *max_mem_alloc_size = ret; |
/* XXX: The limit in older kernels is 256 MB. We |
* should add a query here for newer kernels. |
*/ |
*max_mem_alloc_size = 256 * 1024 * 1024; |
} |
return sizeof(uint64_t); |
case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: |
if (ret) { |
uint32_t *max_clock_frequency = ret; |
*max_clock_frequency = rscreen->info.max_sclk; |
} |
return sizeof(uint32_t); |
case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: |
if (ret) { |
uint32_t *max_compute_units = ret; |
*max_compute_units = rscreen->info.max_compute_units; |
} |
return sizeof(uint32_t); |
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: |
if (ret) { |
uint32_t *images_supported = ret; |
*images_supported = 0; |
} |
return sizeof(uint32_t); |
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: |
break; /* unused */ |
} |
fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param); |
return 0; |
} |
static uint64_t r600_get_timestamp(struct pipe_screen *screen) |
{ |
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; |
return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) / |
rscreen->info.r600_clock_crystal_freq; |
} |
static int r600_get_driver_query_info(struct pipe_screen *screen, |
unsigned index, |
struct pipe_driver_query_info *info) |
{ |
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; |
struct pipe_driver_query_info list[] = { |
{"draw-calls", R600_QUERY_DRAW_CALLS, {0}}, |
{"requested-VRAM", R600_QUERY_REQUESTED_VRAM, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES}, |
{"requested-GTT", R600_QUERY_REQUESTED_GTT, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES}, |
{"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}}, |
{"num-cs-flushes", R600_QUERY_NUM_CS_FLUSHES, {0}}, |
{"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, PIPE_DRIVER_QUERY_TYPE_BYTES}, |
{"VRAM-usage", R600_QUERY_VRAM_USAGE, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES}, |
{"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES}, |
{"temperature", R600_QUERY_GPU_TEMPERATURE, {100}}, |
{"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}}, |
{"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}}, |
{"GPU-load", R600_QUERY_GPU_LOAD, {100}} |
}; |
unsigned num_queries; |
if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) |
num_queries = Elements(list); |
else |
num_queries = 8; |
if (!info) |
return num_queries; |
if (index >= num_queries) |
return 0; |
*info = list[index]; |
return 1; |
} |
static void r600_fence_reference(struct pipe_screen *screen, |
struct pipe_fence_handle **ptr, |
struct pipe_fence_handle *fence) |
{ |
struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws; |
rws->fence_reference(ptr, fence); |
} |
static boolean r600_fence_signalled(struct pipe_screen *screen, |
struct pipe_fence_handle *fence) |
{ |
struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws; |
return rws->fence_wait(rws, fence, 0); |
} |
static boolean r600_fence_finish(struct pipe_screen *screen, |
struct pipe_fence_handle *fence, |
uint64_t timeout) |
{ |
struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws; |
return rws->fence_wait(rws, fence, timeout); |
} |
static bool r600_interpret_tiling(struct r600_common_screen *rscreen, |
uint32_t tiling_config) |
{ |
switch ((tiling_config & 0xe) >> 1) { |
case 0: |
rscreen->tiling_info.num_channels = 1; |
break; |
case 1: |
rscreen->tiling_info.num_channels = 2; |
break; |
case 2: |
rscreen->tiling_info.num_channels = 4; |
break; |
case 3: |
rscreen->tiling_info.num_channels = 8; |
break; |
default: |
return false; |
} |
switch ((tiling_config & 0x30) >> 4) { |
case 0: |
rscreen->tiling_info.num_banks = 4; |
break; |
case 1: |
rscreen->tiling_info.num_banks = 8; |
break; |
default: |
return false; |
} |
switch ((tiling_config & 0xc0) >> 6) { |
case 0: |
rscreen->tiling_info.group_bytes = 256; |
break; |
case 1: |
rscreen->tiling_info.group_bytes = 512; |
break; |
default: |
return false; |
} |
return true; |
} |
static bool evergreen_interpret_tiling(struct r600_common_screen *rscreen, |
uint32_t tiling_config) |
{ |
switch (tiling_config & 0xf) { |
case 0: |
rscreen->tiling_info.num_channels = 1; |
break; |
case 1: |
rscreen->tiling_info.num_channels = 2; |
break; |
case 2: |
rscreen->tiling_info.num_channels = 4; |
break; |
case 3: |
rscreen->tiling_info.num_channels = 8; |
break; |
default: |
return false; |
} |
switch ((tiling_config & 0xf0) >> 4) { |
case 0: |
rscreen->tiling_info.num_banks = 4; |
break; |
case 1: |
rscreen->tiling_info.num_banks = 8; |
break; |
case 2: |
rscreen->tiling_info.num_banks = 16; |
break; |
default: |
return false; |
} |
switch ((tiling_config & 0xf00) >> 8) { |
case 0: |
rscreen->tiling_info.group_bytes = 256; |
break; |
case 1: |
rscreen->tiling_info.group_bytes = 512; |
break; |
default: |
return false; |
} |
return true; |
} |
static bool r600_init_tiling(struct r600_common_screen *rscreen) |
{ |
uint32_t tiling_config = rscreen->info.r600_tiling_config; |
/* set default group bytes, overridden by tiling info ioctl */ |
if (rscreen->chip_class <= R700) { |
rscreen->tiling_info.group_bytes = 256; |
} else { |
rscreen->tiling_info.group_bytes = 512; |
} |
if (!tiling_config) |
return true; |
if (rscreen->chip_class <= R700) { |
return r600_interpret_tiling(rscreen, tiling_config); |
} else { |
return evergreen_interpret_tiling(rscreen, tiling_config); |
} |
} |
struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, |
const struct pipe_resource *templ) |
{ |
if (templ->target == PIPE_BUFFER) { |
return r600_buffer_create(screen, templ, 4096); |
} else { |
return r600_texture_create(screen, templ); |
} |
} |
bool r600_common_screen_init(struct r600_common_screen *rscreen, |
struct radeon_winsys *ws) |
{ |
ws->query_info(ws, &rscreen->info); |
rscreen->b.get_name = r600_get_name; |
rscreen->b.get_vendor = r600_get_vendor; |
rscreen->b.get_device_vendor = r600_get_device_vendor; |
rscreen->b.get_compute_param = r600_get_compute_param; |
rscreen->b.get_paramf = r600_get_paramf; |
rscreen->b.get_driver_query_info = r600_get_driver_query_info; |
rscreen->b.get_timestamp = r600_get_timestamp; |
rscreen->b.fence_finish = r600_fence_finish; |
rscreen->b.fence_reference = r600_fence_reference; |
rscreen->b.fence_signalled = r600_fence_signalled; |
rscreen->b.resource_destroy = u_resource_destroy_vtbl; |
rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory; |
if (rscreen->info.has_uvd) { |
rscreen->b.get_video_param = rvid_get_video_param; |
rscreen->b.is_video_format_supported = rvid_is_format_supported; |
} else { |
rscreen->b.get_video_param = r600_get_video_param; |
rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported; |
} |
r600_init_screen_texture_functions(rscreen); |
rscreen->ws = ws; |
rscreen->family = rscreen->info.family; |
rscreen->chip_class = rscreen->info.chip_class; |
rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0); |
if (!r600_init_tiling(rscreen)) { |
return false; |
} |
util_format_s3tc_init(); |
pipe_mutex_init(rscreen->aux_context_lock); |
pipe_mutex_init(rscreen->gpu_load_mutex); |
if (rscreen->info.drm_minor >= 28 && (rscreen->debug_flags & DBG_TRACE_CS)) { |
rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->b, |
PIPE_BIND_CUSTOM, |
PIPE_USAGE_STAGING, |
4096); |
if (rscreen->trace_bo) { |
rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL, |
PIPE_TRANSFER_UNSYNCHRONIZED); |
} |
} |
if (rscreen->debug_flags & DBG_INFO) { |
printf("pci_id = 0x%x\n", rscreen->info.pci_id); |
printf("family = %i\n", rscreen->info.family); |
printf("chip_class = %i\n", rscreen->info.chip_class); |
printf("gart_size = %i MB\n", (int)(rscreen->info.gart_size >> 20)); |
printf("vram_size = %i MB\n", (int)(rscreen->info.vram_size >> 20)); |
printf("max_sclk = %i\n", rscreen->info.max_sclk); |
printf("max_compute_units = %i\n", rscreen->info.max_compute_units); |
printf("max_se = %i\n", rscreen->info.max_se); |
printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se); |
printf("drm = %i.%i.%i\n", rscreen->info.drm_major, |
rscreen->info.drm_minor, rscreen->info.drm_patchlevel); |
printf("has_uvd = %i\n", rscreen->info.has_uvd); |
printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version); |
printf("r600_num_backends = %i\n", rscreen->info.r600_num_backends); |
printf("r600_clock_crystal_freq = %i\n", rscreen->info.r600_clock_crystal_freq); |
printf("r600_tiling_config = 0x%x\n", rscreen->info.r600_tiling_config); |
printf("r600_num_tile_pipes = %i\n", rscreen->info.r600_num_tile_pipes); |
printf("r600_max_pipes = %i\n", rscreen->info.r600_max_pipes); |
printf("r600_virtual_address = %i\n", rscreen->info.r600_virtual_address); |
printf("r600_has_dma = %i\n", rscreen->info.r600_has_dma); |
printf("r600_backend_map = %i\n", rscreen->info.r600_backend_map); |
printf("r600_backend_map_valid = %i\n", rscreen->info.r600_backend_map_valid); |
printf("si_tile_mode_array_valid = %i\n", rscreen->info.si_tile_mode_array_valid); |
printf("cik_macrotile_mode_array_valid = %i\n", rscreen->info.cik_macrotile_mode_array_valid); |
} |
return true; |
} |
void r600_destroy_common_screen(struct r600_common_screen *rscreen) |
{ |
r600_gpu_load_kill_thread(rscreen); |
pipe_mutex_destroy(rscreen->gpu_load_mutex); |
pipe_mutex_destroy(rscreen->aux_context_lock); |
rscreen->aux_context->destroy(rscreen->aux_context); |
if (rscreen->trace_bo) { |
rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf); |
pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL); |
} |
rscreen->ws->destroy(rscreen->ws); |
FREE(rscreen); |
} |
bool r600_can_dump_shader(struct r600_common_screen *rscreen, |
const struct tgsi_token *tokens) |
{ |
/* Compute shader don't have tgsi_tokens */ |
if (!tokens) |
return (rscreen->debug_flags & DBG_CS) != 0; |
switch (tgsi_get_processor_type(tokens)) { |
case TGSI_PROCESSOR_VERTEX: |
return (rscreen->debug_flags & DBG_VS) != 0; |
case TGSI_PROCESSOR_GEOMETRY: |
return (rscreen->debug_flags & DBG_GS) != 0; |
case TGSI_PROCESSOR_FRAGMENT: |
return (rscreen->debug_flags & DBG_PS) != 0; |
case TGSI_PROCESSOR_COMPUTE: |
return (rscreen->debug_flags & DBG_CS) != 0; |
default: |
return false; |
} |
} |
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst, |
unsigned offset, unsigned size, unsigned value, |
bool is_framebuffer) |
{ |
struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context; |
pipe_mutex_lock(rscreen->aux_context_lock); |
rctx->clear_buffer(&rctx->b, dst, offset, size, value, is_framebuffer); |
rscreen->aux_context->flush(rscreen->aux_context, NULL, 0); |
pipe_mutex_unlock(rscreen->aux_context_lock); |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_pipe_common.h |
---|
0,0 → 1,588 |
/* |
* Copyright 2013 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: Marek Olšák <maraeo@gmail.com> |
* |
*/ |
/** |
* This file contains common screen and context structures and functions |
* for r600g and radeonsi. |
*/ |
#ifndef R600_PIPE_COMMON_H |
#define R600_PIPE_COMMON_H |
#include <stdio.h> |
#include "radeon/radeon_winsys.h" |
#include "util/u_blitter.h" |
#include "util/list.h" |
#include "util/u_range.h" |
#include "util/u_slab.h" |
#include "util/u_suballoc.h" |
#include "util/u_transfer.h" |
#define R600_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0) |
#define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1) |
#define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2) |
#define R600_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0) |
#define R600_QUERY_REQUESTED_VRAM (PIPE_QUERY_DRIVER_SPECIFIC + 1) |
#define R600_QUERY_REQUESTED_GTT (PIPE_QUERY_DRIVER_SPECIFIC + 2) |
#define R600_QUERY_BUFFER_WAIT_TIME (PIPE_QUERY_DRIVER_SPECIFIC + 3) |
#define R600_QUERY_NUM_CS_FLUSHES (PIPE_QUERY_DRIVER_SPECIFIC + 4) |
#define R600_QUERY_NUM_BYTES_MOVED (PIPE_QUERY_DRIVER_SPECIFIC + 5) |
#define R600_QUERY_VRAM_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 6) |
#define R600_QUERY_GTT_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 7) |
#define R600_QUERY_GPU_TEMPERATURE (PIPE_QUERY_DRIVER_SPECIFIC + 8) |
#define R600_QUERY_CURRENT_GPU_SCLK (PIPE_QUERY_DRIVER_SPECIFIC + 9) |
#define R600_QUERY_CURRENT_GPU_MCLK (PIPE_QUERY_DRIVER_SPECIFIC + 10) |
#define R600_QUERY_GPU_LOAD (PIPE_QUERY_DRIVER_SPECIFIC + 11) |
#define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0) |
#define R600_CONTEXT_PRIVATE_FLAG (1u << 1) |
/* special primitive types */ |
#define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX |
/* Debug flags. */ |
/* logging */ |
#define DBG_TEX (1 << 0) |
#define DBG_TEXMIP (1 << 1) |
#define DBG_COMPUTE (1 << 2) |
#define DBG_VM (1 << 3) |
#define DBG_TRACE_CS (1 << 4) |
/* shader logging */ |
#define DBG_FS (1 << 5) |
#define DBG_VS (1 << 6) |
#define DBG_GS (1 << 7) |
#define DBG_PS (1 << 8) |
#define DBG_CS (1 << 9) |
/* features */ |
#define DBG_NO_ASYNC_DMA (1 << 10) |
#define DBG_NO_HYPERZ (1 << 11) |
#define DBG_NO_DISCARD_RANGE (1 << 12) |
#define DBG_NO_2D_TILING (1 << 13) |
#define DBG_NO_TILING (1 << 14) |
#define DBG_SWITCH_ON_EOP (1 << 15) |
#define DBG_FORCE_DMA (1 << 16) |
#define DBG_PRECOMPILE (1 << 17) |
#define DBG_INFO (1 << 18) |
/* The maximum allowed bit is 20. */ |
#define R600_MAP_BUFFER_ALIGNMENT 64 |
struct r600_common_context; |
struct radeon_shader_reloc { |
char *name; |
uint64_t offset; |
}; |
struct radeon_shader_binary { |
/** Shader code */ |
unsigned char *code; |
unsigned code_size; |
/** Config/Context register state that accompanies this shader. |
* This is a stream of dword pairs. First dword contains the |
* register address, the second dword contains the value.*/ |
unsigned char *config; |
unsigned config_size; |
/** The number of bytes of config information for each global symbol. |
*/ |
unsigned config_size_per_symbol; |
/** Constant data accessed by the shader. This will be uploaded |
* into a constant buffer. */ |
unsigned char *rodata; |
unsigned rodata_size; |
/** List of symbol offsets for the shader */ |
uint64_t *global_symbol_offsets; |
unsigned global_symbol_count; |
struct radeon_shader_reloc *relocs; |
unsigned reloc_count; |
/** Set to 1 if the disassembly for this binary has been dumped to |
* stderr. */ |
int disassembled; |
}; |
struct r600_resource { |
struct u_resource b; |
/* Winsys objects. */ |
struct pb_buffer *buf; |
struct radeon_winsys_cs_handle *cs_buf; |
uint64_t gpu_address; |
/* Resource state. */ |
enum radeon_bo_domain domains; |
/* The buffer range which is initialized (with a write transfer, |
* streamout, DMA, or as a random access target). The rest of |
* the buffer is considered invalid and can be mapped unsynchronized. |
* |
* This allows unsychronized mapping of a buffer range which hasn't |
* been used yet. It's for applications which forget to use |
* the unsynchronized map flag and expect the driver to figure it out. |
*/ |
struct util_range valid_buffer_range; |
/* For buffers only. This indicates that a write operation has been |
* performed by TC L2, but the cache hasn't been flushed. |
* Any hw block which doesn't use or bypasses TC L2 should check this |
* flag and flush the cache before using the buffer. |
* |
* For example, TC L2 must be flushed if a buffer which has been |
* modified by a shader store instruction is about to be used as |
* an index buffer. The reason is that VGT DMA index fetching doesn't |
* use TC L2. |
*/ |
bool TC_L2_dirty; |
}; |
struct r600_transfer { |
struct pipe_transfer transfer; |
struct r600_resource *staging; |
unsigned offset; |
}; |
struct r600_fmask_info { |
unsigned offset; |
unsigned size; |
unsigned alignment; |
unsigned pitch; |
unsigned bank_height; |
unsigned slice_tile_max; |
unsigned tile_mode_index; |
}; |
struct r600_cmask_info { |
unsigned offset; |
unsigned size; |
unsigned alignment; |
unsigned slice_tile_max; |
unsigned base_address_reg; |
}; |
struct r600_texture { |
struct r600_resource resource; |
unsigned size; |
unsigned pitch_override; |
bool is_depth; |
unsigned dirty_level_mask; /* each bit says if that mipmap is compressed */ |
struct r600_texture *flushed_depth_texture; |
boolean is_flushing_texture; |
struct radeon_surf surface; |
/* Colorbuffer compression and fast clear. */ |
struct r600_fmask_info fmask; |
struct r600_cmask_info cmask; |
struct r600_resource *cmask_buffer; |
unsigned cb_color_info; /* fast clear enable bit */ |
unsigned color_clear_value[2]; |
/* Depth buffer compression and fast clear. */ |
struct r600_resource *htile_buffer; |
bool depth_cleared; /* if it was cleared at least once */ |
float depth_clear_value; |
bool non_disp_tiling; /* R600-Cayman only */ |
unsigned mipmap_shift; |
}; |
struct r600_surface { |
struct pipe_surface base; |
bool color_initialized; |
bool depth_initialized; |
/* Misc. color flags. */ |
bool alphatest_bypass; |
bool export_16bpc; |
/* Color registers. */ |
unsigned cb_color_info; |
unsigned cb_color_base; |
unsigned cb_color_view; |
unsigned cb_color_size; /* R600 only */ |
unsigned cb_color_dim; /* EG only */ |
unsigned cb_color_pitch; /* EG and later */ |
unsigned cb_color_slice; /* EG and later */ |
unsigned cb_color_attrib; /* EG and later */ |
unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */ |
unsigned cb_color_fmask_slice; /* EG and later */ |
unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */ |
unsigned cb_color_mask; /* R600 only */ |
struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */ |
struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */ |
/* DB registers. */ |
unsigned db_depth_info; /* R600 only, then SI and later */ |
unsigned db_z_info; /* EG and later */ |
unsigned db_depth_base; /* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */ |
unsigned db_depth_view; |
unsigned db_depth_size; |
unsigned db_depth_slice; /* EG and later */ |
unsigned db_stencil_base; /* EG and later */ |
unsigned db_stencil_info; /* EG and later */ |
unsigned db_prefetch_limit; /* R600 only */ |
unsigned db_htile_surface; |
unsigned db_htile_data_base; |
unsigned db_preload_control; /* EG and later */ |
unsigned pa_su_poly_offset_db_fmt_cntl; |
}; |
struct r600_tiling_info { |
unsigned num_channels; |
unsigned num_banks; |
unsigned group_bytes; |
}; |
struct r600_common_screen { |
struct pipe_screen b; |
struct radeon_winsys *ws; |
enum radeon_family family; |
enum chip_class chip_class; |
struct radeon_info info; |
struct r600_tiling_info tiling_info; |
unsigned debug_flags; |
bool has_cp_dma; |
bool has_streamout; |
/* Auxiliary context. Mainly used to initialize resources. |
* It must be locked prior to using and flushed before unlocking. */ |
struct pipe_context *aux_context; |
pipe_mutex aux_context_lock; |
struct r600_resource *trace_bo; |
uint32_t *trace_ptr; |
unsigned cs_count; |
/* GPU load thread. */ |
pipe_mutex gpu_load_mutex; |
pipe_thread gpu_load_thread; |
unsigned gpu_load_counter_busy; |
unsigned gpu_load_counter_idle; |
unsigned gpu_load_stop_thread; /* bool */ |
}; |
/* This encapsulates a state or an operation which can emitted into the GPU |
* command stream. */ |
struct r600_atom { |
void (*emit)(struct r600_common_context *ctx, struct r600_atom *state); |
unsigned num_dw; |
bool dirty; |
}; |
struct r600_so_target { |
struct pipe_stream_output_target b; |
/* The buffer where BUFFER_FILLED_SIZE is stored. */ |
struct r600_resource *buf_filled_size; |
unsigned buf_filled_size_offset; |
bool buf_filled_size_valid; |
unsigned stride_in_dw; |
}; |
struct r600_streamout { |
struct r600_atom begin_atom; |
bool begin_emitted; |
unsigned num_dw_for_end; |
unsigned enabled_mask; |
unsigned num_targets; |
struct r600_so_target *targets[PIPE_MAX_SO_BUFFERS]; |
unsigned append_bitmask; |
bool suspended; |
/* External state which comes from the vertex shader, |
* it must be set explicitly when binding a shader. */ |
unsigned *stride_in_dw; |
/* The state of VGT_STRMOUT_(CONFIG|EN). */ |
struct r600_atom enable_atom; |
bool streamout_enabled; |
bool prims_gen_query_enabled; |
int num_prims_gen_queries; |
}; |
struct r600_ring { |
struct radeon_winsys_cs *cs; |
bool flushing; |
void (*flush)(void *ctx, unsigned flags, |
struct pipe_fence_handle **fence); |
}; |
struct r600_rings { |
struct r600_ring gfx; |
struct r600_ring dma; |
}; |
struct r600_common_context { |
struct pipe_context b; /* base class */ |
struct r600_common_screen *screen; |
struct radeon_winsys *ws; |
enum radeon_family family; |
enum chip_class chip_class; |
struct r600_rings rings; |
unsigned initial_gfx_cs_size; |
struct u_upload_mgr *uploader; |
struct u_suballocator *allocator_so_filled_size; |
struct util_slab_mempool pool_transfers; |
/* Current unaccounted memory usage. */ |
uint64_t vram; |
uint64_t gtt; |
/* States. */ |
struct r600_streamout streamout; |
/* Additional context states. */ |
unsigned flags; /* flush flags */ |
/* Queries. */ |
/* The list of active queries. Only one query of each type can be active. */ |
int num_occlusion_queries; |
/* Keep track of non-timer queries, because they should be suspended |
* during context flushing. |
* The timer queries (TIME_ELAPSED) shouldn't be suspended. */ |
struct list_head active_nontimer_queries; |
unsigned num_cs_dw_nontimer_queries_suspend; |
/* If queries have been suspended. */ |
bool nontimer_queries_suspended; |
/* Additional hardware info. */ |
unsigned backend_mask; |
unsigned max_db; /* for OQ */ |
/* Misc stats. */ |
unsigned num_draw_calls; |
/* Render condition. */ |
struct pipe_query *current_render_cond; |
unsigned current_render_cond_mode; |
boolean current_render_cond_cond; |
boolean predicate_drawing; |
/* For context flushing. */ |
struct pipe_query *saved_render_cond; |
boolean saved_render_cond_cond; |
unsigned saved_render_cond_mode; |
/* MSAA sample locations. |
* The first index is the sample index. |
* The second index is the coordinate: X, Y. */ |
float sample_locations_1x[1][2]; |
float sample_locations_2x[2][2]; |
float sample_locations_4x[4][2]; |
float sample_locations_8x[8][2]; |
float sample_locations_16x[16][2]; |
/* The list of all texture buffer objects in this context. |
* This list is walked when a buffer is invalidated/reallocated and |
* the GPU addresses are updated. */ |
struct list_head texture_buffers; |
/* Copy one resource to another using async DMA. */ |
void (*dma_copy)(struct pipe_context *ctx, |
struct pipe_resource *dst, |
unsigned dst_level, |
unsigned dst_x, unsigned dst_y, unsigned dst_z, |
struct pipe_resource *src, |
unsigned src_level, |
const struct pipe_box *src_box); |
void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst, |
unsigned offset, unsigned size, unsigned value, |
bool is_framebuffer); |
void (*blit_decompress_depth)(struct pipe_context *ctx, |
struct r600_texture *texture, |
struct r600_texture *staging, |
unsigned first_level, unsigned last_level, |
unsigned first_layer, unsigned last_layer, |
unsigned first_sample, unsigned last_sample); |
/* Reallocate the buffer and update all resource bindings where |
* the buffer is bound, including all resource descriptors. */ |
void (*invalidate_buffer)(struct pipe_context *ctx, struct pipe_resource *buf); |
/* Enable or disable occlusion queries. */ |
void (*set_occlusion_query_state)(struct pipe_context *ctx, bool enable); |
/* This ensures there is enough space in the command stream. */ |
void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw, |
bool include_draw_vbo); |
}; |
/* r600_buffer.c */ |
boolean r600_rings_is_buffer_referenced(struct r600_common_context *ctx, |
struct radeon_winsys_cs_handle *buf, |
enum radeon_bo_usage usage); |
void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx, |
struct r600_resource *resource, |
unsigned usage); |
bool r600_init_resource(struct r600_common_screen *rscreen, |
struct r600_resource *res, |
unsigned size, unsigned alignment, |
bool use_reusable_pool); |
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, |
const struct pipe_resource *templ, |
unsigned alignment); |
struct pipe_resource * |
r600_buffer_from_user_memory(struct pipe_screen *screen, |
const struct pipe_resource *templ, |
void *user_memory); |
/* r600_common_pipe.c */ |
void r600_draw_rectangle(struct blitter_context *blitter, |
int x1, int y1, int x2, int y2, float depth, |
enum blitter_attrib_type type, |
const union pipe_color_union *attrib); |
bool r600_common_screen_init(struct r600_common_screen *rscreen, |
struct radeon_winsys *ws); |
void r600_destroy_common_screen(struct r600_common_screen *rscreen); |
void r600_preflush_suspend_features(struct r600_common_context *ctx); |
void r600_postflush_resume_features(struct r600_common_context *ctx); |
bool r600_common_context_init(struct r600_common_context *rctx, |
struct r600_common_screen *rscreen); |
void r600_common_context_cleanup(struct r600_common_context *rctx); |
void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r); |
bool r600_can_dump_shader(struct r600_common_screen *rscreen, |
const struct tgsi_token *tokens); |
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst, |
unsigned offset, unsigned size, unsigned value, |
bool is_framebuffer); |
struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, |
const struct pipe_resource *templ); |
const char *r600_get_llvm_processor_name(enum radeon_family family); |
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw); |
/* r600_gpu_load.c */ |
void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen); |
uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen); |
unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin); |
/* r600_query.c */ |
void r600_query_init(struct r600_common_context *rctx); |
void r600_suspend_nontimer_queries(struct r600_common_context *ctx); |
void r600_resume_nontimer_queries(struct r600_common_context *ctx); |
void r600_query_init_backend_mask(struct r600_common_context *ctx); |
/* r600_streamout.c */ |
void r600_streamout_buffers_dirty(struct r600_common_context *rctx); |
void r600_set_streamout_targets(struct pipe_context *ctx, |
unsigned num_targets, |
struct pipe_stream_output_target **targets, |
const unsigned *offset); |
void r600_emit_streamout_end(struct r600_common_context *rctx); |
void r600_update_prims_generated_query_state(struct r600_common_context *rctx, |
unsigned type, int diff); |
void r600_streamout_init(struct r600_common_context *rctx); |
/* r600_texture.c */ |
void r600_texture_get_fmask_info(struct r600_common_screen *rscreen, |
struct r600_texture *rtex, |
unsigned nr_samples, |
struct r600_fmask_info *out); |
void r600_texture_get_cmask_info(struct r600_common_screen *rscreen, |
struct r600_texture *rtex, |
struct r600_cmask_info *out); |
bool r600_init_flushed_depth_texture(struct pipe_context *ctx, |
struct pipe_resource *texture, |
struct r600_texture **staging); |
struct pipe_resource *r600_texture_create(struct pipe_screen *screen, |
const struct pipe_resource *templ); |
struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe, |
struct pipe_resource *texture, |
const struct pipe_surface *templ, |
unsigned width, unsigned height); |
unsigned r600_translate_colorswap(enum pipe_format format); |
void evergreen_do_fast_color_clear(struct r600_common_context *rctx, |
struct pipe_framebuffer_state *fb, |
struct r600_atom *fb_state, |
unsigned *buffers, |
const union pipe_color_union *color); |
void r600_init_screen_texture_functions(struct r600_common_screen *rscreen); |
void r600_init_context_texture_functions(struct r600_common_context *rctx); |
/* cayman_msaa.c */ |
extern const uint32_t eg_sample_locs_2x[4]; |
extern const unsigned eg_max_dist_2x; |
extern const uint32_t eg_sample_locs_4x[4]; |
extern const unsigned eg_max_dist_4x; |
void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count, |
unsigned sample_index, float *out_value); |
void cayman_init_msaa(struct pipe_context *ctx); |
void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples); |
void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples, |
int ps_iter_samples, int overrast_samples); |
/* Inline helpers. */ |
static INLINE struct r600_resource *r600_resource(struct pipe_resource *r) |
{ |
return (struct r600_resource*)r; |
} |
static INLINE void |
r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res) |
{ |
pipe_resource_reference((struct pipe_resource **)ptr, |
(struct pipe_resource *)res); |
} |
static inline unsigned r600_tex_aniso_filter(unsigned filter) |
{ |
if (filter <= 1) return 0; |
if (filter <= 2) return 1; |
if (filter <= 4) return 2; |
if (filter <= 8) return 3; |
/* else */ return 4; |
} |
#define COMPUTE_DBG(rscreen, fmt, args...) \ |
do { \ |
if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \ |
} while (0); |
#define R600_ERR(fmt, args...) \ |
fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args) |
/* For MSAA sample positions. */ |
#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \ |
(((s0x) & 0xf) | (((s0y) & 0xf) << 4) | \ |
(((s1x) & 0xf) << 8) | (((s1y) & 0xf) << 12) | \ |
(((s2x) & 0xf) << 16) | (((s2y) & 0xf) << 20) | \ |
(((s3x) & 0xf) << 24) | (((s3y) & 0xf) << 28)) |
#endif |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_query.c |
---|
0,0 → 1,969 |
/* |
* Copyright 2010 Jerome Glisse <glisse@freedesktop.org> |
* Copyright 2014 Marek Olšák <marek.olsak@amd.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "r600_cs.h" |
#include "util/u_memory.h" |
struct r600_query_buffer { |
/* The buffer where query results are stored. */ |
struct r600_resource *buf; |
/* Offset of the next free result after current query data */ |
unsigned results_end; |
/* If a query buffer is full, a new buffer is created and the old one |
* is put in here. When we calculate the result, we sum up the samples |
* from all buffers. */ |
struct r600_query_buffer *previous; |
}; |
struct r600_query { |
/* The query buffer and how many results are in it. */ |
struct r600_query_buffer buffer; |
/* The type of query */ |
unsigned type; |
/* Size of the result in memory for both begin_query and end_query, |
* this can be one or two numbers, or it could even be a size of a structure. */ |
unsigned result_size; |
/* The number of dwords for begin_query or end_query. */ |
unsigned num_cs_dw; |
/* linked list of queries */ |
struct list_head list; |
/* for custom non-GPU queries */ |
uint64_t begin_result; |
uint64_t end_result; |
/* Fence for GPU_FINISHED. */ |
struct pipe_fence_handle *fence; |
}; |
static bool r600_is_timer_query(unsigned type) |
{ |
return type == PIPE_QUERY_TIME_ELAPSED || |
type == PIPE_QUERY_TIMESTAMP; |
} |
static bool r600_query_needs_begin(unsigned type) |
{ |
return type != PIPE_QUERY_GPU_FINISHED && |
type != PIPE_QUERY_TIMESTAMP; |
} |
static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx, unsigned type) |
{ |
unsigned j, i, num_results, buf_size = 4096; |
uint32_t *results; |
/* Non-GPU queries. */ |
switch (type) { |
case PIPE_QUERY_TIMESTAMP_DISJOINT: |
case PIPE_QUERY_GPU_FINISHED: |
case R600_QUERY_DRAW_CALLS: |
case R600_QUERY_REQUESTED_VRAM: |
case R600_QUERY_REQUESTED_GTT: |
case R600_QUERY_BUFFER_WAIT_TIME: |
case R600_QUERY_NUM_CS_FLUSHES: |
case R600_QUERY_NUM_BYTES_MOVED: |
case R600_QUERY_VRAM_USAGE: |
case R600_QUERY_GTT_USAGE: |
case R600_QUERY_GPU_TEMPERATURE: |
case R600_QUERY_CURRENT_GPU_SCLK: |
case R600_QUERY_CURRENT_GPU_MCLK: |
case R600_QUERY_GPU_LOAD: |
return NULL; |
} |
/* Queries are normally read by the CPU after |
* being written by the gpu, hence staging is probably a good |
* usage pattern. |
*/ |
struct r600_resource *buf = (struct r600_resource*) |
pipe_buffer_create(ctx->b.screen, PIPE_BIND_CUSTOM, |
PIPE_USAGE_STAGING, buf_size); |
switch (type) { |
case PIPE_QUERY_OCCLUSION_COUNTER: |
case PIPE_QUERY_OCCLUSION_PREDICATE: |
results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE); |
memset(results, 0, buf_size); |
/* Set top bits for unused backends. */ |
num_results = buf_size / (16 * ctx->max_db); |
for (j = 0; j < num_results; j++) { |
for (i = 0; i < ctx->max_db; i++) { |
if (!(ctx->backend_mask & (1<<i))) { |
results[(i * 4)+1] = 0x80000000; |
results[(i * 4)+3] = 0x80000000; |
} |
} |
results += 4 * ctx->max_db; |
} |
ctx->ws->buffer_unmap(buf->cs_buf); |
break; |
case PIPE_QUERY_TIME_ELAPSED: |
case PIPE_QUERY_TIMESTAMP: |
break; |
case PIPE_QUERY_PRIMITIVES_EMITTED: |
case PIPE_QUERY_PRIMITIVES_GENERATED: |
case PIPE_QUERY_SO_STATISTICS: |
case PIPE_QUERY_SO_OVERFLOW_PREDICATE: |
case PIPE_QUERY_PIPELINE_STATISTICS: |
results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE); |
memset(results, 0, buf_size); |
ctx->ws->buffer_unmap(buf->cs_buf); |
break; |
default: |
assert(0); |
} |
return buf; |
} |
static void r600_update_occlusion_query_state(struct r600_common_context *rctx, |
unsigned type, int diff) |
{ |
if (type == PIPE_QUERY_OCCLUSION_COUNTER || |
type == PIPE_QUERY_OCCLUSION_PREDICATE) { |
bool old_enable = rctx->num_occlusion_queries != 0; |
bool enable; |
rctx->num_occlusion_queries += diff; |
assert(rctx->num_occlusion_queries >= 0); |
enable = rctx->num_occlusion_queries != 0; |
if (enable != old_enable) { |
rctx->set_occlusion_query_state(&rctx->b, enable); |
} |
} |
} |
static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_query *query) |
{ |
struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; |
uint64_t va; |
r600_update_occlusion_query_state(ctx, query->type, 1); |
r600_update_prims_generated_query_state(ctx, query->type, 1); |
ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw * 2, TRUE); |
/* Get a new query buffer if needed. */ |
if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) { |
struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer); |
*qbuf = query->buffer; |
query->buffer.buf = r600_new_query_buffer(ctx, query->type); |
query->buffer.results_end = 0; |
query->buffer.previous = qbuf; |
} |
/* emit begin query */ |
va = query->buffer.buf->gpu_address + query->buffer.results_end; |
switch (query->type) { |
case PIPE_QUERY_OCCLUSION_COUNTER: |
case PIPE_QUERY_OCCLUSION_PREDICATE: |
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); |
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); |
radeon_emit(cs, va); |
radeon_emit(cs, (va >> 32UL) & 0xFF); |
break; |
case PIPE_QUERY_PRIMITIVES_EMITTED: |
case PIPE_QUERY_PRIMITIVES_GENERATED: |
case PIPE_QUERY_SO_STATISTICS: |
case PIPE_QUERY_SO_OVERFLOW_PREDICATE: |
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); |
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3)); |
radeon_emit(cs, va); |
radeon_emit(cs, (va >> 32UL) & 0xFF); |
break; |
case PIPE_QUERY_TIME_ELAPSED: |
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); |
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5)); |
radeon_emit(cs, va); |
radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF)); |
radeon_emit(cs, 0); |
radeon_emit(cs, 0); |
break; |
case PIPE_QUERY_PIPELINE_STATISTICS: |
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); |
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); |
radeon_emit(cs, va); |
radeon_emit(cs, (va >> 32UL) & 0xFF); |
break; |
default: |
assert(0); |
} |
r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE, |
RADEON_PRIO_MIN); |
if (!r600_is_timer_query(query->type)) { |
ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw; |
} |
} |
static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_query *query) |
{ |
struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; |
uint64_t va; |
/* The queries which need begin already called this in begin_query. */ |
if (!r600_query_needs_begin(query->type)) { |
ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw, FALSE); |
} |
va = query->buffer.buf->gpu_address; |
/* emit end query */ |
switch (query->type) { |
case PIPE_QUERY_OCCLUSION_COUNTER: |
case PIPE_QUERY_OCCLUSION_PREDICATE: |
va += query->buffer.results_end + 8; |
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); |
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); |
radeon_emit(cs, va); |
radeon_emit(cs, (va >> 32UL) & 0xFF); |
break; |
case PIPE_QUERY_PRIMITIVES_EMITTED: |
case PIPE_QUERY_PRIMITIVES_GENERATED: |
case PIPE_QUERY_SO_STATISTICS: |
case PIPE_QUERY_SO_OVERFLOW_PREDICATE: |
va += query->buffer.results_end + query->result_size/2; |
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); |
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3)); |
radeon_emit(cs, va); |
radeon_emit(cs, (va >> 32UL) & 0xFF); |
break; |
case PIPE_QUERY_TIME_ELAPSED: |
va += query->buffer.results_end + query->result_size/2; |
/* fall through */ |
case PIPE_QUERY_TIMESTAMP: |
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); |
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5)); |
radeon_emit(cs, va); |
radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF)); |
radeon_emit(cs, 0); |
radeon_emit(cs, 0); |
break; |
case PIPE_QUERY_PIPELINE_STATISTICS: |
va += query->buffer.results_end + query->result_size/2; |
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); |
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); |
radeon_emit(cs, va); |
radeon_emit(cs, (va >> 32UL) & 0xFF); |
break; |
default: |
assert(0); |
} |
r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE, |
RADEON_PRIO_MIN); |
query->buffer.results_end += query->result_size; |
if (r600_query_needs_begin(query->type)) { |
if (!r600_is_timer_query(query->type)) { |
ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw; |
} |
} |
r600_update_occlusion_query_state(ctx, query->type, -1); |
r600_update_prims_generated_query_state(ctx, query->type, -1); |
} |
static void r600_emit_query_predication(struct r600_common_context *ctx, struct r600_query *query, |
int operation, bool flag_wait) |
{ |
struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; |
if (operation == PREDICATION_OP_CLEAR) { |
ctx->need_gfx_cs_space(&ctx->b, 3, FALSE); |
radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0)); |
radeon_emit(cs, 0); |
radeon_emit(cs, PRED_OP(PREDICATION_OP_CLEAR)); |
} else { |
struct r600_query_buffer *qbuf; |
unsigned count; |
uint32_t op; |
/* Find how many results there are. */ |
count = 0; |
for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { |
count += qbuf->results_end / query->result_size; |
} |
ctx->need_gfx_cs_space(&ctx->b, 5 * count, TRUE); |
op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE | |
(flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW); |
/* emit predicate packets for all data blocks */ |
for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { |
unsigned results_base = 0; |
uint64_t va = qbuf->buf->gpu_address; |
while (results_base < qbuf->results_end) { |
radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0)); |
radeon_emit(cs, (va + results_base) & 0xFFFFFFFFUL); |
radeon_emit(cs, op | (((va + results_base) >> 32UL) & 0xFF)); |
r600_emit_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ, |
RADEON_PRIO_MIN); |
results_base += query->result_size; |
/* set CONTINUE bit for all packets except the first */ |
op |= PREDICATION_CONTINUE; |
} |
} |
} |
} |
static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) |
{ |
struct r600_common_context *rctx = (struct r600_common_context *)ctx; |
struct r600_query *query; |
bool skip_allocation = false; |
query = CALLOC_STRUCT(r600_query); |
if (query == NULL) |
return NULL; |
query->type = query_type; |
switch (query_type) { |
case PIPE_QUERY_OCCLUSION_COUNTER: |
case PIPE_QUERY_OCCLUSION_PREDICATE: |
query->result_size = 16 * rctx->max_db; |
query->num_cs_dw = 6; |
break; |
break; |
case PIPE_QUERY_TIME_ELAPSED: |
query->result_size = 16; |
query->num_cs_dw = 8; |
break; |
case PIPE_QUERY_TIMESTAMP: |
query->result_size = 8; |
query->num_cs_dw = 8; |
break; |
case PIPE_QUERY_PRIMITIVES_EMITTED: |
case PIPE_QUERY_PRIMITIVES_GENERATED: |
case PIPE_QUERY_SO_STATISTICS: |
case PIPE_QUERY_SO_OVERFLOW_PREDICATE: |
/* NumPrimitivesWritten, PrimitiveStorageNeeded. */ |
query->result_size = 32; |
query->num_cs_dw = 6; |
break; |
case PIPE_QUERY_PIPELINE_STATISTICS: |
/* 11 values on EG, 8 on R600. */ |
query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16; |
query->num_cs_dw = 6; |
break; |
/* Non-GPU queries and queries not requiring a buffer. */ |
case PIPE_QUERY_TIMESTAMP_DISJOINT: |
case PIPE_QUERY_GPU_FINISHED: |
case R600_QUERY_DRAW_CALLS: |
case R600_QUERY_REQUESTED_VRAM: |
case R600_QUERY_REQUESTED_GTT: |
case R600_QUERY_BUFFER_WAIT_TIME: |
case R600_QUERY_NUM_CS_FLUSHES: |
case R600_QUERY_NUM_BYTES_MOVED: |
case R600_QUERY_VRAM_USAGE: |
case R600_QUERY_GTT_USAGE: |
case R600_QUERY_GPU_TEMPERATURE: |
case R600_QUERY_CURRENT_GPU_SCLK: |
case R600_QUERY_CURRENT_GPU_MCLK: |
case R600_QUERY_GPU_LOAD: |
skip_allocation = true; |
break; |
default: |
assert(0); |
FREE(query); |
return NULL; |
} |
if (!skip_allocation) { |
query->buffer.buf = r600_new_query_buffer(rctx, query_type); |
if (!query->buffer.buf) { |
FREE(query); |
return NULL; |
} |
} |
return (struct pipe_query*)query; |
} |
static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query) |
{ |
struct r600_query *rquery = (struct r600_query*)query; |
struct r600_query_buffer *prev = rquery->buffer.previous; |
/* Release all query buffers. */ |
while (prev) { |
struct r600_query_buffer *qbuf = prev; |
prev = prev->previous; |
pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL); |
FREE(qbuf); |
} |
pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL); |
FREE(query); |
} |
static boolean r600_begin_query(struct pipe_context *ctx, |
struct pipe_query *query) |
{ |
struct r600_common_context *rctx = (struct r600_common_context *)ctx; |
struct r600_query *rquery = (struct r600_query *)query; |
struct r600_query_buffer *prev = rquery->buffer.previous; |
if (!r600_query_needs_begin(rquery->type)) { |
assert(0); |
return false; |
} |
/* Non-GPU queries. */ |
switch (rquery->type) { |
case PIPE_QUERY_TIMESTAMP_DISJOINT: |
return true; |
case R600_QUERY_DRAW_CALLS: |
rquery->begin_result = rctx->num_draw_calls; |
return true; |
case R600_QUERY_REQUESTED_VRAM: |
case R600_QUERY_REQUESTED_GTT: |
case R600_QUERY_VRAM_USAGE: |
case R600_QUERY_GTT_USAGE: |
case R600_QUERY_GPU_TEMPERATURE: |
case R600_QUERY_CURRENT_GPU_SCLK: |
case R600_QUERY_CURRENT_GPU_MCLK: |
rquery->begin_result = 0; |
return true; |
case R600_QUERY_BUFFER_WAIT_TIME: |
rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS); |
return true; |
case R600_QUERY_NUM_CS_FLUSHES: |
rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES); |
return true; |
case R600_QUERY_NUM_BYTES_MOVED: |
rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED); |
return true; |
case R600_QUERY_GPU_LOAD: |
rquery->begin_result = r600_gpu_load_begin(rctx->screen); |
return true; |
} |
/* Discard the old query buffers. */ |
while (prev) { |
struct r600_query_buffer *qbuf = prev; |
prev = prev->previous; |
pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL); |
FREE(qbuf); |
} |
/* Obtain a new buffer if the current one can't be mapped without a stall. */ |
if (r600_rings_is_buffer_referenced(rctx, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) || |
rctx->ws->buffer_is_busy(rquery->buffer.buf->buf, RADEON_USAGE_READWRITE)) { |
pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL); |
rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type); |
} |
rquery->buffer.results_end = 0; |
rquery->buffer.previous = NULL; |
r600_emit_query_begin(rctx, rquery); |
if (!r600_is_timer_query(rquery->type)) { |
LIST_ADDTAIL(&rquery->list, &rctx->active_nontimer_queries); |
} |
return true; |
} |
static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query) |
{ |
struct r600_common_context *rctx = (struct r600_common_context *)ctx; |
struct r600_query *rquery = (struct r600_query *)query; |
/* Non-GPU queries. */ |
switch (rquery->type) { |
case PIPE_QUERY_TIMESTAMP_DISJOINT: |
return; |
case PIPE_QUERY_GPU_FINISHED: |
rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC, &rquery->fence); |
return; |
case R600_QUERY_DRAW_CALLS: |
rquery->end_result = rctx->num_draw_calls; |
return; |
case R600_QUERY_REQUESTED_VRAM: |
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_VRAM_MEMORY); |
return; |
case R600_QUERY_REQUESTED_GTT: |
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_GTT_MEMORY); |
return; |
case R600_QUERY_BUFFER_WAIT_TIME: |
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS); |
return; |
case R600_QUERY_NUM_CS_FLUSHES: |
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES); |
return; |
case R600_QUERY_NUM_BYTES_MOVED: |
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED); |
return; |
case R600_QUERY_VRAM_USAGE: |
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_VRAM_USAGE); |
return; |
case R600_QUERY_GTT_USAGE: |
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GTT_USAGE); |
return; |
case R600_QUERY_GPU_TEMPERATURE: |
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GPU_TEMPERATURE) / 1000; |
return; |
case R600_QUERY_CURRENT_GPU_SCLK: |
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_SCLK) * 1000000; |
return; |
case R600_QUERY_CURRENT_GPU_MCLK: |
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_MCLK) * 1000000; |
return; |
case R600_QUERY_GPU_LOAD: |
rquery->end_result = r600_gpu_load_end(rctx->screen, rquery->begin_result); |
return; |
} |
r600_emit_query_end(rctx, rquery); |
if (r600_query_needs_begin(rquery->type) && !r600_is_timer_query(rquery->type)) { |
LIST_DELINIT(&rquery->list); |
} |
} |
static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index, |
bool test_status_bit) |
{ |
uint32_t *current_result = (uint32_t*)map; |
uint64_t start, end; |
start = (uint64_t)current_result[start_index] | |
(uint64_t)current_result[start_index+1] << 32; |
end = (uint64_t)current_result[end_index] | |
(uint64_t)current_result[end_index+1] << 32; |
if (!test_status_bit || |
((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) { |
return end - start; |
} |
return 0; |
} |
static boolean r600_get_query_buffer_result(struct r600_common_context *ctx, |
struct r600_query *query, |
struct r600_query_buffer *qbuf, |
boolean wait, |
union pipe_query_result *result) |
{ |
struct pipe_screen *screen = ctx->b.screen; |
unsigned results_base = 0; |
char *map; |
/* Non-GPU queries. */ |
switch (query->type) { |
case PIPE_QUERY_TIMESTAMP_DISJOINT: |
/* Convert from cycles per millisecond to cycles per second (Hz). */ |
result->timestamp_disjoint.frequency = |
(uint64_t)ctx->screen->info.r600_clock_crystal_freq * 1000; |
result->timestamp_disjoint.disjoint = FALSE; |
return TRUE; |
case PIPE_QUERY_GPU_FINISHED: |
result->b = screen->fence_finish(screen, query->fence, |
wait ? PIPE_TIMEOUT_INFINITE : 0); |
return result->b; |
case R600_QUERY_DRAW_CALLS: |
case R600_QUERY_REQUESTED_VRAM: |
case R600_QUERY_REQUESTED_GTT: |
case R600_QUERY_BUFFER_WAIT_TIME: |
case R600_QUERY_NUM_CS_FLUSHES: |
case R600_QUERY_NUM_BYTES_MOVED: |
case R600_QUERY_VRAM_USAGE: |
case R600_QUERY_GTT_USAGE: |
case R600_QUERY_GPU_TEMPERATURE: |
case R600_QUERY_CURRENT_GPU_SCLK: |
case R600_QUERY_CURRENT_GPU_MCLK: |
result->u64 = query->end_result - query->begin_result; |
return TRUE; |
case R600_QUERY_GPU_LOAD: |
result->u64 = query->end_result; |
return TRUE; |
} |
map = r600_buffer_map_sync_with_rings(ctx, qbuf->buf, |
PIPE_TRANSFER_READ | |
(wait ? 0 : PIPE_TRANSFER_DONTBLOCK)); |
if (!map) |
return FALSE; |
/* count all results across all data blocks */ |
switch (query->type) { |
case PIPE_QUERY_OCCLUSION_COUNTER: |
while (results_base != qbuf->results_end) { |
result->u64 += |
r600_query_read_result(map + results_base, 0, 2, true); |
results_base += 16; |
} |
break; |
case PIPE_QUERY_OCCLUSION_PREDICATE: |
while (results_base != qbuf->results_end) { |
result->b = result->b || |
r600_query_read_result(map + results_base, 0, 2, true) != 0; |
results_base += 16; |
} |
break; |
case PIPE_QUERY_TIME_ELAPSED: |
while (results_base != qbuf->results_end) { |
result->u64 += |
r600_query_read_result(map + results_base, 0, 2, false); |
results_base += query->result_size; |
} |
break; |
case PIPE_QUERY_TIMESTAMP: |
{ |
uint32_t *current_result = (uint32_t*)map; |
result->u64 = (uint64_t)current_result[0] | |
(uint64_t)current_result[1] << 32; |
break; |
} |
case PIPE_QUERY_PRIMITIVES_EMITTED: |
/* SAMPLE_STREAMOUTSTATS stores this structure: |
* { |
* u64 NumPrimitivesWritten; |
* u64 PrimitiveStorageNeeded; |
* } |
* We only need NumPrimitivesWritten here. */ |
while (results_base != qbuf->results_end) { |
result->u64 += |
r600_query_read_result(map + results_base, 2, 6, true); |
results_base += query->result_size; |
} |
break; |
case PIPE_QUERY_PRIMITIVES_GENERATED: |
/* Here we read PrimitiveStorageNeeded. */ |
while (results_base != qbuf->results_end) { |
result->u64 += |
r600_query_read_result(map + results_base, 0, 4, true); |
results_base += query->result_size; |
} |
break; |
case PIPE_QUERY_SO_STATISTICS: |
while (results_base != qbuf->results_end) { |
result->so_statistics.num_primitives_written += |
r600_query_read_result(map + results_base, 2, 6, true); |
result->so_statistics.primitives_storage_needed += |
r600_query_read_result(map + results_base, 0, 4, true); |
results_base += query->result_size; |
} |
break; |
case PIPE_QUERY_SO_OVERFLOW_PREDICATE: |
while (results_base != qbuf->results_end) { |
result->b = result->b || |
r600_query_read_result(map + results_base, 2, 6, true) != |
r600_query_read_result(map + results_base, 0, 4, true); |
results_base += query->result_size; |
} |
break; |
case PIPE_QUERY_PIPELINE_STATISTICS: |
if (ctx->chip_class >= EVERGREEN) { |
while (results_base != qbuf->results_end) { |
result->pipeline_statistics.ps_invocations += |
r600_query_read_result(map + results_base, 0, 22, false); |
result->pipeline_statistics.c_primitives += |
r600_query_read_result(map + results_base, 2, 24, false); |
result->pipeline_statistics.c_invocations += |
r600_query_read_result(map + results_base, 4, 26, false); |
result->pipeline_statistics.vs_invocations += |
r600_query_read_result(map + results_base, 6, 28, false); |
result->pipeline_statistics.gs_invocations += |
r600_query_read_result(map + results_base, 8, 30, false); |
result->pipeline_statistics.gs_primitives += |
r600_query_read_result(map + results_base, 10, 32, false); |
result->pipeline_statistics.ia_primitives += |
r600_query_read_result(map + results_base, 12, 34, false); |
result->pipeline_statistics.ia_vertices += |
r600_query_read_result(map + results_base, 14, 36, false); |
result->pipeline_statistics.hs_invocations += |
r600_query_read_result(map + results_base, 16, 38, false); |
result->pipeline_statistics.ds_invocations += |
r600_query_read_result(map + results_base, 18, 40, false); |
result->pipeline_statistics.cs_invocations += |
r600_query_read_result(map + results_base, 20, 42, false); |
results_base += query->result_size; |
} |
} else { |
while (results_base != qbuf->results_end) { |
result->pipeline_statistics.ps_invocations += |
r600_query_read_result(map + results_base, 0, 16, false); |
result->pipeline_statistics.c_primitives += |
r600_query_read_result(map + results_base, 2, 18, false); |
result->pipeline_statistics.c_invocations += |
r600_query_read_result(map + results_base, 4, 20, false); |
result->pipeline_statistics.vs_invocations += |
r600_query_read_result(map + results_base, 6, 22, false); |
result->pipeline_statistics.gs_invocations += |
r600_query_read_result(map + results_base, 8, 24, false); |
result->pipeline_statistics.gs_primitives += |
r600_query_read_result(map + results_base, 10, 26, false); |
result->pipeline_statistics.ia_primitives += |
r600_query_read_result(map + results_base, 12, 28, false); |
result->pipeline_statistics.ia_vertices += |
r600_query_read_result(map + results_base, 14, 30, false); |
results_base += query->result_size; |
} |
} |
#if 0 /* for testing */ |
printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, " |
"DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, " |
"Clipper prims=%llu, PS=%llu, CS=%llu\n", |
result->pipeline_statistics.ia_vertices, |
result->pipeline_statistics.ia_primitives, |
result->pipeline_statistics.vs_invocations, |
result->pipeline_statistics.hs_invocations, |
result->pipeline_statistics.ds_invocations, |
result->pipeline_statistics.gs_invocations, |
result->pipeline_statistics.gs_primitives, |
result->pipeline_statistics.c_invocations, |
result->pipeline_statistics.c_primitives, |
result->pipeline_statistics.ps_invocations, |
result->pipeline_statistics.cs_invocations); |
#endif |
break; |
default: |
assert(0); |
} |
ctx->ws->buffer_unmap(qbuf->buf->cs_buf); |
return TRUE; |
} |
static boolean r600_get_query_result(struct pipe_context *ctx, |
struct pipe_query *query, |
boolean wait, union pipe_query_result *result) |
{ |
struct r600_common_context *rctx = (struct r600_common_context *)ctx; |
struct r600_query *rquery = (struct r600_query *)query; |
struct r600_query_buffer *qbuf; |
util_query_clear_result(result, rquery->type); |
for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) { |
if (!r600_get_query_buffer_result(rctx, rquery, qbuf, wait, result)) { |
return FALSE; |
} |
} |
/* Convert the time to expected units. */ |
if (rquery->type == PIPE_QUERY_TIME_ELAPSED || |
rquery->type == PIPE_QUERY_TIMESTAMP) { |
result->u64 = (1000000 * result->u64) / rctx->screen->info.r600_clock_crystal_freq; |
} |
return TRUE; |
} |
static void r600_render_condition(struct pipe_context *ctx, |
struct pipe_query *query, |
boolean condition, |
uint mode) |
{ |
struct r600_common_context *rctx = (struct r600_common_context *)ctx; |
struct r600_query *rquery = (struct r600_query *)query; |
bool wait_flag = false; |
rctx->current_render_cond = query; |
rctx->current_render_cond_cond = condition; |
rctx->current_render_cond_mode = mode; |
if (query == NULL) { |
if (rctx->predicate_drawing) { |
rctx->predicate_drawing = false; |
r600_emit_query_predication(rctx, NULL, PREDICATION_OP_CLEAR, false); |
} |
return; |
} |
if (mode == PIPE_RENDER_COND_WAIT || |
mode == PIPE_RENDER_COND_BY_REGION_WAIT) { |
wait_flag = true; |
} |
rctx->predicate_drawing = true; |
switch (rquery->type) { |
case PIPE_QUERY_OCCLUSION_COUNTER: |
case PIPE_QUERY_OCCLUSION_PREDICATE: |
r600_emit_query_predication(rctx, rquery, PREDICATION_OP_ZPASS, wait_flag); |
break; |
case PIPE_QUERY_PRIMITIVES_EMITTED: |
case PIPE_QUERY_PRIMITIVES_GENERATED: |
case PIPE_QUERY_SO_STATISTICS: |
case PIPE_QUERY_SO_OVERFLOW_PREDICATE: |
r600_emit_query_predication(rctx, rquery, PREDICATION_OP_PRIMCOUNT, wait_flag); |
break; |
default: |
assert(0); |
} |
} |
void r600_suspend_nontimer_queries(struct r600_common_context *ctx) |
{ |
struct r600_query *query; |
LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) { |
r600_emit_query_end(ctx, query); |
} |
assert(ctx->num_cs_dw_nontimer_queries_suspend == 0); |
} |
static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx) |
{ |
struct r600_query *query; |
unsigned num_dw = 0; |
LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) { |
/* begin + end */ |
num_dw += query->num_cs_dw * 2; |
/* Workaround for the fact that |
* num_cs_dw_nontimer_queries_suspend is incremented for every |
* resumed query, which raises the bar in need_cs_space for |
* queries about to be resumed. |
*/ |
num_dw += query->num_cs_dw; |
} |
/* primitives generated query */ |
num_dw += ctx->streamout.enable_atom.num_dw; |
/* guess for ZPASS enable or PERFECT_ZPASS_COUNT enable updates */ |
num_dw += 13; |
return num_dw; |
} |
void r600_resume_nontimer_queries(struct r600_common_context *ctx) |
{ |
struct r600_query *query; |
assert(ctx->num_cs_dw_nontimer_queries_suspend == 0); |
/* Check CS space here. Resuming must not be interrupted by flushes. */ |
ctx->need_gfx_cs_space(&ctx->b, |
r600_queries_num_cs_dw_for_resuming(ctx), TRUE); |
LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) { |
r600_emit_query_begin(ctx, query); |
} |
} |
/* Get backends mask */ |
void r600_query_init_backend_mask(struct r600_common_context *ctx) |
{ |
struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; |
struct r600_resource *buffer; |
uint32_t *results; |
unsigned num_backends = ctx->screen->info.r600_num_backends; |
unsigned i, mask = 0; |
/* if backend_map query is supported by the kernel */ |
if (ctx->screen->info.r600_backend_map_valid) { |
unsigned num_tile_pipes = ctx->screen->info.r600_num_tile_pipes; |
unsigned backend_map = ctx->screen->info.r600_backend_map; |
unsigned item_width, item_mask; |
if (ctx->chip_class >= EVERGREEN) { |
item_width = 4; |
item_mask = 0x7; |
} else { |
item_width = 2; |
item_mask = 0x3; |
} |
while(num_tile_pipes--) { |
i = backend_map & item_mask; |
mask |= (1<<i); |
backend_map >>= item_width; |
} |
if (mask != 0) { |
ctx->backend_mask = mask; |
return; |
} |
} |
/* otherwise backup path for older kernels */ |
/* create buffer for event data */ |
buffer = (struct r600_resource*) |
pipe_buffer_create(ctx->b.screen, PIPE_BIND_CUSTOM, |
PIPE_USAGE_STAGING, ctx->max_db*16); |
if (!buffer) |
goto err; |
/* initialize buffer with zeroes */ |
results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE); |
if (results) { |
memset(results, 0, ctx->max_db * 4 * 4); |
ctx->ws->buffer_unmap(buffer->cs_buf); |
/* emit EVENT_WRITE for ZPASS_DONE */ |
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); |
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); |
radeon_emit(cs, buffer->gpu_address); |
radeon_emit(cs, buffer->gpu_address >> 32); |
r600_emit_reloc(ctx, &ctx->rings.gfx, buffer, RADEON_USAGE_WRITE, RADEON_PRIO_MIN); |
/* analyze results */ |
results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ); |
if (results) { |
for(i = 0; i < ctx->max_db; i++) { |
/* at least highest bit will be set if backend is used */ |
if (results[i*4 + 1]) |
mask |= (1<<i); |
} |
ctx->ws->buffer_unmap(buffer->cs_buf); |
} |
} |
pipe_resource_reference((struct pipe_resource**)&buffer, NULL); |
if (mask != 0) { |
ctx->backend_mask = mask; |
return; |
} |
err: |
/* fallback to old method - set num_backends lower bits to 1 */ |
ctx->backend_mask = (~((uint32_t)0))>>(32-num_backends); |
return; |
} |
void r600_query_init(struct r600_common_context *rctx) |
{ |
rctx->b.create_query = r600_create_query; |
rctx->b.destroy_query = r600_destroy_query; |
rctx->b.begin_query = r600_begin_query; |
rctx->b.end_query = r600_end_query; |
rctx->b.get_query_result = r600_get_query_result; |
if (((struct r600_common_screen*)rctx->b.screen)->info.r600_num_backends > 0) |
rctx->b.render_condition = r600_render_condition; |
LIST_INITHEAD(&rctx->active_nontimer_queries); |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_streamout.c |
---|
0,0 → 1,369 |
/* |
* Copyright 2013 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: Marek Olšák <maraeo@gmail.com> |
* |
*/ |
#include "r600_pipe_common.h" |
#include "r600_cs.h" |
#include "util/u_memory.h" |
static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable); |
static struct pipe_stream_output_target * |
r600_create_so_target(struct pipe_context *ctx, |
struct pipe_resource *buffer, |
unsigned buffer_offset, |
unsigned buffer_size) |
{ |
struct r600_common_context *rctx = (struct r600_common_context *)ctx; |
struct r600_so_target *t; |
struct r600_resource *rbuffer = (struct r600_resource*)buffer; |
t = CALLOC_STRUCT(r600_so_target); |
if (!t) { |
return NULL; |
} |
u_suballocator_alloc(rctx->allocator_so_filled_size, 4, |
&t->buf_filled_size_offset, |
(struct pipe_resource**)&t->buf_filled_size); |
if (!t->buf_filled_size) { |
FREE(t); |
return NULL; |
} |
t->b.reference.count = 1; |
t->b.context = ctx; |
pipe_resource_reference(&t->b.buffer, buffer); |
t->b.buffer_offset = buffer_offset; |
t->b.buffer_size = buffer_size; |
util_range_add(&rbuffer->valid_buffer_range, buffer_offset, |
buffer_offset + buffer_size); |
return &t->b; |
} |
static void r600_so_target_destroy(struct pipe_context *ctx, |
struct pipe_stream_output_target *target) |
{ |
struct r600_so_target *t = (struct r600_so_target*)target; |
pipe_resource_reference(&t->b.buffer, NULL); |
pipe_resource_reference((struct pipe_resource**)&t->buf_filled_size, NULL); |
FREE(t); |
} |
void r600_streamout_buffers_dirty(struct r600_common_context *rctx) |
{ |
struct r600_atom *begin = &rctx->streamout.begin_atom; |
unsigned num_bufs = util_bitcount(rctx->streamout.enabled_mask); |
unsigned num_bufs_appended = util_bitcount(rctx->streamout.enabled_mask & |
rctx->streamout.append_bitmask); |
if (!num_bufs) |
return; |
rctx->streamout.num_dw_for_end = |
12 + /* flush_vgt_streamout */ |
num_bufs * 11; /* STRMOUT_BUFFER_UPDATE, BUFFER_SIZE */ |
begin->num_dw = 12 + /* flush_vgt_streamout */ |
3; /* VGT_STRMOUT_BUFFER_CONFIG */ |
if (rctx->chip_class >= SI) { |
begin->num_dw += num_bufs * 4; /* SET_CONTEXT_REG */ |
} else { |
begin->num_dw += num_bufs * 7; /* SET_CONTEXT_REG */ |
if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) |
begin->num_dw += num_bufs * 5; /* STRMOUT_BASE_UPDATE */ |
} |
begin->num_dw += |
num_bufs_appended * 8 + /* STRMOUT_BUFFER_UPDATE */ |
(num_bufs - num_bufs_appended) * 6 + /* STRMOUT_BUFFER_UPDATE */ |
(rctx->family > CHIP_R600 && rctx->family < CHIP_RS780 ? 2 : 0); /* SURFACE_BASE_UPDATE */ |
begin->dirty = true; |
r600_set_streamout_enable(rctx, true); |
} |
void r600_set_streamout_targets(struct pipe_context *ctx, |
unsigned num_targets, |
struct pipe_stream_output_target **targets, |
const unsigned *offsets) |
{ |
struct r600_common_context *rctx = (struct r600_common_context *)ctx; |
unsigned i; |
unsigned append_bitmask = 0; |
/* Stop streamout. */ |
if (rctx->streamout.num_targets && rctx->streamout.begin_emitted) { |
r600_emit_streamout_end(rctx); |
} |
/* Set the new targets. */ |
for (i = 0; i < num_targets; i++) { |
pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], targets[i]); |
r600_context_add_resource_size(ctx, targets[i]->buffer); |
if (offsets[i] == ((unsigned)-1)) |
append_bitmask |= 1 << i; |
} |
for (; i < rctx->streamout.num_targets; i++) { |
pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], NULL); |
} |
rctx->streamout.enabled_mask = (num_targets >= 1 && targets[0] ? 1 : 0) | |
(num_targets >= 2 && targets[1] ? 2 : 0) | |
(num_targets >= 3 && targets[2] ? 4 : 0) | |
(num_targets >= 4 && targets[3] ? 8 : 0); |
rctx->streamout.num_targets = num_targets; |
rctx->streamout.append_bitmask = append_bitmask; |
if (num_targets) { |
r600_streamout_buffers_dirty(rctx); |
} else { |
rctx->streamout.begin_atom.dirty = false; |
r600_set_streamout_enable(rctx, false); |
} |
} |
static void r600_flush_vgt_streamout(struct r600_common_context *rctx) |
{ |
struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; |
unsigned reg_strmout_cntl; |
/* The register is at different places on different ASICs. */ |
if (rctx->chip_class >= CIK) { |
reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; |
} else if (rctx->chip_class >= EVERGREEN) { |
reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL; |
} else { |
reg_strmout_cntl = R_008490_CP_STRMOUT_CNTL; |
} |
if (rctx->chip_class >= CIK) { |
cik_write_uconfig_reg(cs, reg_strmout_cntl, 0); |
} else { |
r600_write_config_reg(cs, reg_strmout_cntl, 0); |
} |
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); |
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0)); |
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); |
radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ |
radeon_emit(cs, reg_strmout_cntl >> 2); /* register */ |
radeon_emit(cs, 0); |
radeon_emit(cs, S_008490_OFFSET_UPDATE_DONE(1)); /* reference value */ |
radeon_emit(cs, S_008490_OFFSET_UPDATE_DONE(1)); /* mask */ |
radeon_emit(cs, 4); /* poll interval */ |
} |
static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r600_atom *atom) |
{ |
struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; |
struct r600_so_target **t = rctx->streamout.targets; |
unsigned *stride_in_dw = rctx->streamout.stride_in_dw; |
unsigned i, update_flags = 0; |
r600_flush_vgt_streamout(rctx); |
r600_write_context_reg(cs, rctx->chip_class >= EVERGREEN ? |
R_028B98_VGT_STRMOUT_BUFFER_CONFIG : |
R_028B20_VGT_STRMOUT_BUFFER_EN, |
rctx->streamout.enabled_mask); |
for (i = 0; i < rctx->streamout.num_targets; i++) { |
if (!t[i]) |
continue; |
t[i]->stride_in_dw = stride_in_dw[i]; |
if (rctx->chip_class >= SI) { |
/* SI binds streamout buffers as shader resources. |
* VGT only counts primitives and tells the shader |
* through SGPRs what to do. */ |
r600_write_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2); |
radeon_emit(cs, (t[i]->b.buffer_offset + |
t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */ |
radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */ |
} else { |
uint64_t va = r600_resource(t[i]->b.buffer)->gpu_address; |
update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i); |
r600_write_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3); |
radeon_emit(cs, (t[i]->b.buffer_offset + |
t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */ |
radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */ |
radeon_emit(cs, va >> 8); /* BUFFER_BASE */ |
r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer), |
RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW); |
/* R7xx requires this packet after updating BUFFER_BASE. |
* Without this, R7xx locks up. */ |
if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) { |
radeon_emit(cs, PKT3(PKT3_STRMOUT_BASE_UPDATE, 1, 0)); |
radeon_emit(cs, i); |
radeon_emit(cs, va >> 8); |
r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer), |
RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW); |
} |
} |
if (rctx->streamout.append_bitmask & (1 << i) && t[i]->buf_filled_size_valid) { |
uint64_t va = t[i]->buf_filled_size->gpu_address + |
t[i]->buf_filled_size_offset; |
/* Append. */ |
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); |
radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | |
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */ |
radeon_emit(cs, 0); /* unused */ |
radeon_emit(cs, 0); /* unused */ |
radeon_emit(cs, va); /* src address lo */ |
radeon_emit(cs, va >> 32); /* src address hi */ |
r600_emit_reloc(rctx, &rctx->rings.gfx, t[i]->buf_filled_size, |
RADEON_USAGE_READ, RADEON_PRIO_MIN); |
} else { |
/* Start from the beginning. */ |
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); |
radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | |
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */ |
radeon_emit(cs, 0); /* unused */ |
radeon_emit(cs, 0); /* unused */ |
radeon_emit(cs, t[i]->b.buffer_offset >> 2); /* buffer offset in DW */ |
radeon_emit(cs, 0); /* unused */ |
} |
} |
if (rctx->family > CHIP_R600 && rctx->family < CHIP_RV770) { |
radeon_emit(cs, PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0)); |
radeon_emit(cs, update_flags); |
} |
rctx->streamout.begin_emitted = true; |
} |
void r600_emit_streamout_end(struct r600_common_context *rctx) |
{ |
struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; |
struct r600_so_target **t = rctx->streamout.targets; |
unsigned i; |
uint64_t va; |
r600_flush_vgt_streamout(rctx); |
for (i = 0; i < rctx->streamout.num_targets; i++) { |
if (!t[i]) |
continue; |
va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset; |
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); |
radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | |
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) | |
STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */ |
radeon_emit(cs, va); /* dst address lo */ |
radeon_emit(cs, va >> 32); /* dst address hi */ |
radeon_emit(cs, 0); /* unused */ |
radeon_emit(cs, 0); /* unused */ |
r600_emit_reloc(rctx, &rctx->rings.gfx, t[i]->buf_filled_size, |
RADEON_USAGE_WRITE, RADEON_PRIO_MIN); |
/* Zero the buffer size. The counters (primitives generated, |
* primitives emitted) may be enabled even if there is not |
* buffer bound. This ensures that the primitives-emitted query |
* won't increment. */ |
r600_write_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0); |
t[i]->buf_filled_size_valid = true; |
} |
rctx->streamout.begin_emitted = false; |
rctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH; |
} |
/* STREAMOUT CONFIG DERIVED STATE |
* |
* Streamout must be enabled for the PRIMITIVES_GENERATED query to work. |
* The buffer mask is an independent state, so no writes occur if there |
* are no buffers bound. |
*/ |
static bool r600_get_strmout_en(struct r600_common_context *rctx) |
{ |
return rctx->streamout.streamout_enabled || |
rctx->streamout.prims_gen_query_enabled; |
} |
static void r600_emit_streamout_enable(struct r600_common_context *rctx, |
struct r600_atom *atom) |
{ |
r600_write_context_reg(rctx->rings.gfx.cs, |
rctx->chip_class >= EVERGREEN ? |
R_028B94_VGT_STRMOUT_CONFIG : |
R_028AB0_VGT_STRMOUT_EN, |
S_028B94_STREAMOUT_0_EN(r600_get_strmout_en(rctx))); |
} |
static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable) |
{ |
bool old_strmout_en = r600_get_strmout_en(rctx); |
rctx->streamout.streamout_enabled = enable; |
if (old_strmout_en != r600_get_strmout_en(rctx)) |
rctx->streamout.enable_atom.dirty = true; |
} |
void r600_update_prims_generated_query_state(struct r600_common_context *rctx, |
unsigned type, int diff) |
{ |
if (type == PIPE_QUERY_PRIMITIVES_GENERATED) { |
bool old_strmout_en = r600_get_strmout_en(rctx); |
rctx->streamout.num_prims_gen_queries += diff; |
assert(rctx->streamout.num_prims_gen_queries >= 0); |
rctx->streamout.prims_gen_query_enabled = |
rctx->streamout.num_prims_gen_queries != 0; |
if (old_strmout_en != r600_get_strmout_en(rctx)) |
rctx->streamout.enable_atom.dirty = true; |
} |
} |
void r600_streamout_init(struct r600_common_context *rctx) |
{ |
rctx->b.create_stream_output_target = r600_create_so_target; |
rctx->b.stream_output_target_destroy = r600_so_target_destroy; |
rctx->streamout.begin_atom.emit = r600_emit_streamout_begin; |
rctx->streamout.enable_atom.emit = r600_emit_streamout_enable; |
rctx->streamout.enable_atom.num_dw = 3; |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_texture.c |
---|
0,0 → 1,1296 |
/* |
* Copyright 2010 Jerome Glisse <glisse@freedesktop.org> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Jerome Glisse |
* Corbin Simpson |
*/ |
#include "r600_pipe_common.h" |
#include "r600_cs.h" |
#include "util/u_format.h" |
#include "util/u_memory.h" |
#include "util/u_pack_color.h" |
#include <errno.h> |
#include <inttypes.h> |
/* Same as resource_copy_region, except that both upsampling and downsampling are allowed. */ |
static void r600_copy_region_with_blit(struct pipe_context *pipe, |
struct pipe_resource *dst, |
unsigned dst_level, |
unsigned dstx, unsigned dsty, unsigned dstz, |
struct pipe_resource *src, |
unsigned src_level, |
const struct pipe_box *src_box) |
{ |
struct pipe_blit_info blit; |
memset(&blit, 0, sizeof(blit)); |
blit.src.resource = src; |
blit.src.format = src->format; |
blit.src.level = src_level; |
blit.src.box = *src_box; |
blit.dst.resource = dst; |
blit.dst.format = dst->format; |
blit.dst.level = dst_level; |
blit.dst.box.x = dstx; |
blit.dst.box.y = dsty; |
blit.dst.box.z = dstz; |
blit.dst.box.width = src_box->width; |
blit.dst.box.height = src_box->height; |
blit.dst.box.depth = src_box->depth; |
blit.mask = util_format_get_mask(src->format) & |
util_format_get_mask(dst->format); |
blit.filter = PIPE_TEX_FILTER_NEAREST; |
if (blit.mask) { |
pipe->blit(pipe, &blit); |
} |
} |
/* Copy from a full GPU texture to a transfer's staging one. */ |
static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) |
{ |
struct r600_common_context *rctx = (struct r600_common_context*)ctx; |
struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; |
struct pipe_resource *dst = &rtransfer->staging->b.b; |
struct pipe_resource *src = transfer->resource; |
if (src->nr_samples > 1) { |
r600_copy_region_with_blit(ctx, dst, 0, 0, 0, 0, |
src, transfer->level, &transfer->box); |
return; |
} |
rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level, |
&transfer->box); |
} |
/* Copy from a transfer's staging texture to a full GPU one. */ |
static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) |
{ |
struct r600_common_context *rctx = (struct r600_common_context*)ctx; |
struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; |
struct pipe_resource *dst = transfer->resource; |
struct pipe_resource *src = &rtransfer->staging->b.b; |
struct pipe_box sbox; |
u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox); |
if (dst->nr_samples > 1) { |
r600_copy_region_with_blit(ctx, dst, transfer->level, |
transfer->box.x, transfer->box.y, transfer->box.z, |
src, 0, &sbox); |
return; |
} |
rctx->dma_copy(ctx, dst, transfer->level, |
transfer->box.x, transfer->box.y, transfer->box.z, |
src, 0, &sbox); |
} |
static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned level, |
const struct pipe_box *box) |
{ |
enum pipe_format format = rtex->resource.b.b.format; |
return rtex->surface.level[level].offset + |
box->z * rtex->surface.level[level].slice_size + |
box->y / util_format_get_blockheight(format) * rtex->surface.level[level].pitch_bytes + |
box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); |
} |
static int r600_init_surface(struct r600_common_screen *rscreen, |
struct radeon_surf *surface, |
const struct pipe_resource *ptex, |
unsigned array_mode, |
bool is_flushed_depth) |
{ |
const struct util_format_description *desc = |
util_format_description(ptex->format); |
bool is_depth, is_stencil; |
is_depth = util_format_has_depth(desc); |
is_stencil = util_format_has_stencil(desc); |
surface->npix_x = ptex->width0; |
surface->npix_y = ptex->height0; |
surface->npix_z = ptex->depth0; |
surface->blk_w = util_format_get_blockwidth(ptex->format); |
surface->blk_h = util_format_get_blockheight(ptex->format); |
surface->blk_d = 1; |
surface->array_size = 1; |
surface->last_level = ptex->last_level; |
if (rscreen->chip_class >= EVERGREEN && !is_flushed_depth && |
ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { |
surface->bpe = 4; /* stencil is allocated separately on evergreen */ |
} else { |
surface->bpe = util_format_get_blocksize(ptex->format); |
/* align byte per element on dword */ |
if (surface->bpe == 3) { |
surface->bpe = 4; |
} |
} |
surface->nsamples = ptex->nr_samples ? ptex->nr_samples : 1; |
surface->flags = RADEON_SURF_SET(array_mode, MODE); |
switch (ptex->target) { |
case PIPE_TEXTURE_1D: |
surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE); |
break; |
case PIPE_TEXTURE_RECT: |
case PIPE_TEXTURE_2D: |
surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE); |
break; |
case PIPE_TEXTURE_3D: |
surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE); |
break; |
case PIPE_TEXTURE_1D_ARRAY: |
surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE); |
surface->array_size = ptex->array_size; |
break; |
case PIPE_TEXTURE_2D_ARRAY: |
case PIPE_TEXTURE_CUBE_ARRAY: /* cube array layout like 2d array */ |
surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE); |
surface->array_size = ptex->array_size; |
break; |
case PIPE_TEXTURE_CUBE: |
surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_CUBEMAP, TYPE); |
break; |
case PIPE_BUFFER: |
default: |
return -EINVAL; |
} |
if (ptex->bind & PIPE_BIND_SCANOUT) { |
surface->flags |= RADEON_SURF_SCANOUT; |
} |
if (!is_flushed_depth && is_depth) { |
surface->flags |= RADEON_SURF_ZBUFFER; |
if (is_stencil) { |
surface->flags |= RADEON_SURF_SBUFFER | |
RADEON_SURF_HAS_SBUFFER_MIPTREE; |
} |
} |
if (rscreen->chip_class >= SI) { |
surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX; |
} |
return 0; |
} |
static int r600_setup_surface(struct pipe_screen *screen, |
struct r600_texture *rtex, |
unsigned pitch_in_bytes_override) |
{ |
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; |
int r; |
r = rscreen->ws->surface_init(rscreen->ws, &rtex->surface); |
if (r) { |
return r; |
} |
rtex->size = rtex->surface.bo_size; |
if (pitch_in_bytes_override && pitch_in_bytes_override != rtex->surface.level[0].pitch_bytes) { |
/* old ddx on evergreen over estimate alignment for 1d, only 1 level |
* for those |
*/ |
rtex->surface.level[0].nblk_x = pitch_in_bytes_override / rtex->surface.bpe; |
rtex->surface.level[0].pitch_bytes = pitch_in_bytes_override; |
rtex->surface.level[0].slice_size = pitch_in_bytes_override * rtex->surface.level[0].nblk_y; |
if (rtex->surface.flags & RADEON_SURF_SBUFFER) { |
rtex->surface.stencil_offset = |
rtex->surface.stencil_level[0].offset = rtex->surface.level[0].slice_size; |
} |
} |
return 0; |
} |
static boolean r600_texture_get_handle(struct pipe_screen* screen, |
struct pipe_resource *ptex, |
struct winsys_handle *whandle) |
{ |
struct r600_texture *rtex = (struct r600_texture*)ptex; |
struct r600_resource *resource = &rtex->resource; |
struct radeon_surf *surface = &rtex->surface; |
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; |
rscreen->ws->buffer_set_tiling(resource->buf, |
NULL, |
surface->level[0].mode >= RADEON_SURF_MODE_1D ? |
RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR, |
surface->level[0].mode >= RADEON_SURF_MODE_2D ? |
RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR, |
surface->bankw, surface->bankh, |
surface->tile_split, |
surface->stencil_tile_split, |
surface->mtilea, |
surface->level[0].pitch_bytes, |
(surface->flags & RADEON_SURF_SCANOUT) != 0); |
return rscreen->ws->buffer_get_handle(resource->buf, |
surface->level[0].pitch_bytes, whandle); |
} |
static void r600_texture_destroy(struct pipe_screen *screen, |
struct pipe_resource *ptex) |
{ |
struct r600_texture *rtex = (struct r600_texture*)ptex; |
struct r600_resource *resource = &rtex->resource; |
if (rtex->flushed_depth_texture) |
pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); |
pipe_resource_reference((struct pipe_resource**)&rtex->htile_buffer, NULL); |
if (rtex->cmask_buffer != &rtex->resource) { |
pipe_resource_reference((struct pipe_resource**)&rtex->cmask_buffer, NULL); |
} |
pb_reference(&resource->buf, NULL); |
FREE(rtex); |
} |
static const struct u_resource_vtbl r600_texture_vtbl; |
/* The number of samples can be specified independently of the texture. */ |
void r600_texture_get_fmask_info(struct r600_common_screen *rscreen, |
struct r600_texture *rtex, |
unsigned nr_samples, |
struct r600_fmask_info *out) |
{ |
/* FMASK is allocated like an ordinary texture. */ |
struct radeon_surf fmask = rtex->surface; |
memset(out, 0, sizeof(*out)); |
fmask.bo_alignment = 0; |
fmask.bo_size = 0; |
fmask.nsamples = 1; |
fmask.flags |= RADEON_SURF_FMASK; |
/* Force 2D tiling if it wasn't set. This may occur when creating |
* FMASK for MSAA resolve on R6xx. On R6xx, the single-sample |
* destination buffer must have an FMASK too. */ |
fmask.flags = RADEON_SURF_CLR(fmask.flags, MODE); |
fmask.flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE); |
if (rscreen->chip_class >= SI) { |
fmask.flags |= RADEON_SURF_HAS_TILE_MODE_INDEX; |
} |
switch (nr_samples) { |
case 2: |
case 4: |
fmask.bpe = 1; |
if (rscreen->chip_class <= CAYMAN) { |
fmask.bankh = 4; |
} |
break; |
case 8: |
fmask.bpe = 4; |
break; |
default: |
R600_ERR("Invalid sample count for FMASK allocation.\n"); |
return; |
} |
/* Overallocate FMASK on R600-R700 to fix colorbuffer corruption. |
* This can be fixed by writing a separate FMASK allocator specifically |
* for R600-R700 asics. */ |
if (rscreen->chip_class <= R700) { |
fmask.bpe *= 2; |
} |
if (rscreen->ws->surface_init(rscreen->ws, &fmask)) { |
R600_ERR("Got error in surface_init while allocating FMASK.\n"); |
return; |
} |
assert(fmask.level[0].mode == RADEON_SURF_MODE_2D); |
out->slice_tile_max = (fmask.level[0].nblk_x * fmask.level[0].nblk_y) / 64; |
if (out->slice_tile_max) |
out->slice_tile_max -= 1; |
out->tile_mode_index = fmask.tiling_index[0]; |
out->pitch = fmask.level[0].nblk_x; |
out->bank_height = fmask.bankh; |
out->alignment = MAX2(256, fmask.bo_alignment); |
out->size = fmask.bo_size; |
} |
static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen, |
struct r600_texture *rtex) |
{ |
r600_texture_get_fmask_info(rscreen, rtex, |
rtex->resource.b.b.nr_samples, &rtex->fmask); |
rtex->fmask.offset = align(rtex->size, rtex->fmask.alignment); |
rtex->size = rtex->fmask.offset + rtex->fmask.size; |
} |
void r600_texture_get_cmask_info(struct r600_common_screen *rscreen, |
struct r600_texture *rtex, |
struct r600_cmask_info *out) |
{ |
unsigned cmask_tile_width = 8; |
unsigned cmask_tile_height = 8; |
unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height; |
unsigned element_bits = 4; |
unsigned cmask_cache_bits = 1024; |
unsigned num_pipes = rscreen->tiling_info.num_channels; |
unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes; |
unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes; |
unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements; |
unsigned sqrt_pixels_per_macro_tile = sqrt(pixels_per_macro_tile); |
unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile); |
unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width; |
unsigned pitch_elements = align(rtex->surface.npix_x, macro_tile_width); |
unsigned height = align(rtex->surface.npix_y, macro_tile_height); |
unsigned base_align = num_pipes * pipe_interleave_bytes; |
unsigned slice_bytes = |
((pitch_elements * height * element_bits + 7) / 8) / cmask_tile_elements; |
assert(macro_tile_width % 128 == 0); |
assert(macro_tile_height % 128 == 0); |
out->slice_tile_max = ((pitch_elements * height) / (128*128)) - 1; |
out->alignment = MAX2(256, base_align); |
out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) * |
align(slice_bytes, base_align); |
} |
static void si_texture_get_cmask_info(struct r600_common_screen *rscreen, |
struct r600_texture *rtex, |
struct r600_cmask_info *out) |
{ |
unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes; |
unsigned num_pipes = rscreen->tiling_info.num_channels; |
unsigned cl_width, cl_height; |
switch (num_pipes) { |
case 2: |
cl_width = 32; |
cl_height = 16; |
break; |
case 4: |
cl_width = 32; |
cl_height = 32; |
break; |
case 8: |
cl_width = 64; |
cl_height = 32; |
break; |
case 16: /* Hawaii */ |
cl_width = 64; |
cl_height = 64; |
break; |
default: |
assert(0); |
return; |
} |
unsigned base_align = num_pipes * pipe_interleave_bytes; |
unsigned width = align(rtex->surface.npix_x, cl_width*8); |
unsigned height = align(rtex->surface.npix_y, cl_height*8); |
unsigned slice_elements = (width * height) / (8*8); |
/* Each element of CMASK is a nibble. */ |
unsigned slice_bytes = slice_elements / 2; |
out->slice_tile_max = (width * height) / (128*128); |
if (out->slice_tile_max) |
out->slice_tile_max -= 1; |
out->alignment = MAX2(256, base_align); |
out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) * |
align(slice_bytes, base_align); |
} |
static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen, |
struct r600_texture *rtex) |
{ |
if (rscreen->chip_class >= SI) { |
si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); |
} else { |
r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); |
} |
rtex->cmask.offset = align(rtex->size, rtex->cmask.alignment); |
rtex->size = rtex->cmask.offset + rtex->cmask.size; |
if (rscreen->chip_class >= SI) |
rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1); |
else |
rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1); |
} |
static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen, |
struct r600_texture *rtex) |
{ |
if (rtex->cmask_buffer) |
return; |
assert(rtex->cmask.size == 0); |
if (rscreen->chip_class >= SI) { |
si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); |
} else { |
r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); |
} |
rtex->cmask_buffer = (struct r600_resource *) |
pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM, |
PIPE_USAGE_DEFAULT, rtex->cmask.size); |
if (rtex->cmask_buffer == NULL) { |
rtex->cmask.size = 0; |
return; |
} |
/* update colorbuffer state bits */ |
rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8; |
if (rscreen->chip_class >= SI) |
rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1); |
else |
rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1); |
} |
static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen, |
struct r600_texture *rtex) |
{ |
unsigned cl_width, cl_height, width, height; |
unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align; |
unsigned num_pipes = rscreen->tiling_info.num_channels; |
if (rscreen->chip_class <= EVERGREEN && |
rscreen->info.drm_minor < 26) |
return 0; |
/* HW bug on R6xx. */ |
if (rscreen->chip_class == R600 && |
(rtex->surface.level[0].npix_x > 7680 || |
rtex->surface.level[0].npix_y > 7680)) |
return 0; |
/* HTILE is broken with 1D tiling on old kernels and CIK. */ |
if (rscreen->chip_class >= CIK && |
rtex->surface.level[0].mode == RADEON_SURF_MODE_1D && |
rscreen->info.drm_minor < 38) |
return 0; |
switch (num_pipes) { |
case 1: |
cl_width = 32; |
cl_height = 16; |
break; |
case 2: |
cl_width = 32; |
cl_height = 32; |
break; |
case 4: |
cl_width = 64; |
cl_height = 32; |
break; |
case 8: |
cl_width = 64; |
cl_height = 64; |
break; |
case 16: |
cl_width = 128; |
cl_height = 64; |
break; |
default: |
assert(0); |
return 0; |
} |
width = align(rtex->surface.npix_x, cl_width * 8); |
height = align(rtex->surface.npix_y, cl_height * 8); |
slice_elements = (width * height) / (8 * 8); |
slice_bytes = slice_elements * 4; |
pipe_interleave_bytes = rscreen->tiling_info.group_bytes; |
base_align = num_pipes * pipe_interleave_bytes; |
return (util_max_layer(&rtex->resource.b.b, 0) + 1) * |
align(slice_bytes, base_align); |
} |
static void r600_texture_allocate_htile(struct r600_common_screen *rscreen, |
struct r600_texture *rtex) |
{ |
unsigned htile_size = r600_texture_get_htile_size(rscreen, rtex); |
if (!htile_size) |
return; |
rtex->htile_buffer = (struct r600_resource*) |
pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM, |
PIPE_USAGE_DEFAULT, htile_size); |
if (rtex->htile_buffer == NULL) { |
/* this is not a fatal error as we can still keep rendering |
* without htile buffer */ |
R600_ERR("Failed to create buffer object for htile buffer.\n"); |
} else { |
r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0, |
htile_size, 0, true); |
} |
} |
/* Common processing for r600_texture_create and r600_texture_from_handle */ |
static struct r600_texture * |
r600_texture_create_object(struct pipe_screen *screen, |
const struct pipe_resource *base, |
unsigned pitch_in_bytes_override, |
struct pb_buffer *buf, |
struct radeon_surf *surface) |
{ |
struct r600_texture *rtex; |
struct r600_resource *resource; |
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; |
rtex = CALLOC_STRUCT(r600_texture); |
if (rtex == NULL) |
return NULL; |
resource = &rtex->resource; |
resource->b.b = *base; |
resource->b.vtbl = &r600_texture_vtbl; |
pipe_reference_init(&resource->b.b.reference, 1); |
resource->b.b.screen = screen; |
rtex->pitch_override = pitch_in_bytes_override; |
/* don't include stencil-only formats which we don't support for rendering */ |
rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format)); |
rtex->surface = *surface; |
if (r600_setup_surface(screen, rtex, pitch_in_bytes_override)) { |
FREE(rtex); |
return NULL; |
} |
/* Tiled depth textures utilize the non-displayable tile order. |
* This must be done after r600_setup_surface. |
* Applies to R600-Cayman. */ |
rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D; |
if (rtex->is_depth) { |
if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER | |
R600_RESOURCE_FLAG_FLUSHED_DEPTH)) && |
!(rscreen->debug_flags & DBG_NO_HYPERZ)) { |
r600_texture_allocate_htile(rscreen, rtex); |
} |
} else { |
if (base->nr_samples > 1) { |
if (!buf) { |
r600_texture_allocate_fmask(rscreen, rtex); |
r600_texture_allocate_cmask(rscreen, rtex); |
rtex->cmask_buffer = &rtex->resource; |
} |
if (!rtex->fmask.size || !rtex->cmask.size) { |
FREE(rtex); |
return NULL; |
} |
} |
} |
/* Now create the backing buffer. */ |
if (!buf) { |
if (!r600_init_resource(rscreen, resource, rtex->size, |
rtex->surface.bo_alignment, TRUE)) { |
FREE(rtex); |
return NULL; |
} |
} else { |
resource->buf = buf; |
resource->cs_buf = rscreen->ws->buffer_get_cs_handle(buf); |
resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->cs_buf); |
resource->domains = rscreen->ws->buffer_get_initial_domain(resource->cs_buf); |
} |
if (rtex->cmask.size) { |
/* Initialize the cmask to 0xCC (= compressed state). */ |
r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b, |
rtex->cmask.offset, rtex->cmask.size, |
0xCCCCCCCC, true); |
} |
/* Initialize the CMASK base register value. */ |
rtex->cmask.base_address_reg = |
(rtex->resource.gpu_address + rtex->cmask.offset) >> 8; |
if (rscreen->debug_flags & DBG_VM) { |
fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Texture %ix%ix%i, %i levels, %i samples, %s\n", |
rtex->resource.gpu_address, |
rtex->resource.gpu_address + rtex->resource.buf->size, |
base->width0, base->height0, util_max_layer(base, 0)+1, base->last_level+1, |
base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format)); |
} |
if (rscreen->debug_flags & DBG_TEX || |
(rtex->resource.b.b.last_level > 0 && rscreen->debug_flags & DBG_TEXMIP)) { |
printf("Texture: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, " |
"blk_h=%u, blk_d=%u, array_size=%u, last_level=%u, " |
"bpe=%u, nsamples=%u, flags=0x%x, %s\n", |
rtex->surface.npix_x, rtex->surface.npix_y, |
rtex->surface.npix_z, rtex->surface.blk_w, |
rtex->surface.blk_h, rtex->surface.blk_d, |
rtex->surface.array_size, rtex->surface.last_level, |
rtex->surface.bpe, rtex->surface.nsamples, |
rtex->surface.flags, util_format_short_name(base->format)); |
for (int i = 0; i <= rtex->surface.last_level; i++) { |
printf(" L %i: offset=%"PRIu64", slice_size=%"PRIu64", npix_x=%u, " |
"npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, " |
"nblk_z=%u, pitch_bytes=%u, mode=%u\n", |
i, rtex->surface.level[i].offset, |
rtex->surface.level[i].slice_size, |
u_minify(rtex->resource.b.b.width0, i), |
u_minify(rtex->resource.b.b.height0, i), |
u_minify(rtex->resource.b.b.depth0, i), |
rtex->surface.level[i].nblk_x, |
rtex->surface.level[i].nblk_y, |
rtex->surface.level[i].nblk_z, |
rtex->surface.level[i].pitch_bytes, |
rtex->surface.level[i].mode); |
} |
if (rtex->surface.flags & RADEON_SURF_SBUFFER) { |
for (int i = 0; i <= rtex->surface.last_level; i++) { |
printf(" S %i: offset=%"PRIu64", slice_size=%"PRIu64", npix_x=%u, " |
"npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, " |
"nblk_z=%u, pitch_bytes=%u, mode=%u\n", |
i, rtex->surface.stencil_level[i].offset, |
rtex->surface.stencil_level[i].slice_size, |
u_minify(rtex->resource.b.b.width0, i), |
u_minify(rtex->resource.b.b.height0, i), |
u_minify(rtex->resource.b.b.depth0, i), |
rtex->surface.stencil_level[i].nblk_x, |
rtex->surface.stencil_level[i].nblk_y, |
rtex->surface.stencil_level[i].nblk_z, |
rtex->surface.stencil_level[i].pitch_bytes, |
rtex->surface.stencil_level[i].mode); |
} |
} |
} |
return rtex; |
} |
static unsigned r600_choose_tiling(struct r600_common_screen *rscreen, |
const struct pipe_resource *templ) |
{ |
const struct util_format_description *desc = util_format_description(templ->format); |
/* MSAA resources must be 2D tiled. */ |
if (templ->nr_samples > 1) |
return RADEON_SURF_MODE_2D; |
/* Transfer resources should be linear. */ |
if (templ->flags & R600_RESOURCE_FLAG_TRANSFER) |
return RADEON_SURF_MODE_LINEAR_ALIGNED; |
/* Handle common candidates for the linear mode. |
* Compressed textures must always be tiled. */ |
if (!(templ->flags & R600_RESOURCE_FLAG_FORCE_TILING) && |
!util_format_is_compressed(templ->format)) { |
/* Not everything can be linear, so we cannot enforce it |
* for all textures. */ |
if ((rscreen->debug_flags & DBG_NO_TILING) && |
(!util_format_is_depth_or_stencil(templ->format) || |
!(templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH))) |
return RADEON_SURF_MODE_LINEAR_ALIGNED; |
/* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */ |
if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) |
return RADEON_SURF_MODE_LINEAR_ALIGNED; |
/* Cursors are linear on SI. |
* (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */ |
if (rscreen->chip_class >= SI && |
(templ->bind & PIPE_BIND_CURSOR)) |
return RADEON_SURF_MODE_LINEAR_ALIGNED; |
if (templ->bind & PIPE_BIND_LINEAR) |
return RADEON_SURF_MODE_LINEAR_ALIGNED; |
/* Textures with a very small height are recommended to be linear. */ |
if (templ->target == PIPE_TEXTURE_1D || |
templ->target == PIPE_TEXTURE_1D_ARRAY || |
templ->height0 <= 4) |
return RADEON_SURF_MODE_LINEAR_ALIGNED; |
/* Textures likely to be mapped often. */ |
if (templ->usage == PIPE_USAGE_STAGING || |
templ->usage == PIPE_USAGE_STREAM) |
return RADEON_SURF_MODE_LINEAR_ALIGNED; |
} |
/* Make small textures 1D tiled. */ |
if (templ->width0 <= 16 || templ->height0 <= 16 || |
(rscreen->debug_flags & DBG_NO_2D_TILING)) |
return RADEON_SURF_MODE_1D; |
/* The allocator will switch to 1D if needed. */ |
return RADEON_SURF_MODE_2D; |
} |
struct pipe_resource *r600_texture_create(struct pipe_screen *screen, |
const struct pipe_resource *templ) |
{ |
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; |
struct radeon_surf surface = {0}; |
int r; |
r = r600_init_surface(rscreen, &surface, templ, |
r600_choose_tiling(rscreen, templ), |
templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH); |
if (r) { |
return NULL; |
} |
r = rscreen->ws->surface_best(rscreen->ws, &surface); |
if (r) { |
return NULL; |
} |
return (struct pipe_resource *)r600_texture_create_object(screen, templ, |
0, NULL, &surface); |
} |
static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, |
const struct pipe_resource *templ, |
struct winsys_handle *whandle) |
{ |
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; |
struct pb_buffer *buf = NULL; |
unsigned stride = 0; |
unsigned array_mode; |
enum radeon_bo_layout micro, macro; |
struct radeon_surf surface; |
bool scanout; |
int r; |
/* Support only 2D textures without mipmaps */ |
if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) || |
templ->depth0 != 1 || templ->last_level != 0) |
return NULL; |
buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride); |
if (!buf) |
return NULL; |
rscreen->ws->buffer_get_tiling(buf, µ, ¯o, |
&surface.bankw, &surface.bankh, |
&surface.tile_split, |
&surface.stencil_tile_split, |
&surface.mtilea, &scanout); |
if (macro == RADEON_LAYOUT_TILED) |
array_mode = RADEON_SURF_MODE_2D; |
else if (micro == RADEON_LAYOUT_TILED) |
array_mode = RADEON_SURF_MODE_1D; |
else |
array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; |
r = r600_init_surface(rscreen, &surface, templ, array_mode, false); |
if (r) { |
return NULL; |
} |
if (scanout) |
surface.flags |= RADEON_SURF_SCANOUT; |
return (struct pipe_resource *)r600_texture_create_object(screen, templ, |
stride, buf, &surface); |
} |
bool r600_init_flushed_depth_texture(struct pipe_context *ctx, |
struct pipe_resource *texture, |
struct r600_texture **staging) |
{ |
struct r600_texture *rtex = (struct r600_texture*)texture; |
struct pipe_resource resource; |
struct r600_texture **flushed_depth_texture = staging ? |
staging : &rtex->flushed_depth_texture; |
if (!staging && rtex->flushed_depth_texture) |
return true; /* it's ready */ |
resource.target = texture->target; |
resource.format = texture->format; |
resource.width0 = texture->width0; |
resource.height0 = texture->height0; |
resource.depth0 = texture->depth0; |
resource.array_size = texture->array_size; |
resource.last_level = texture->last_level; |
resource.nr_samples = texture->nr_samples; |
resource.usage = staging ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT; |
resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL; |
resource.flags = texture->flags | R600_RESOURCE_FLAG_FLUSHED_DEPTH; |
if (staging) |
resource.flags |= R600_RESOURCE_FLAG_TRANSFER; |
*flushed_depth_texture = (struct r600_texture *)ctx->screen->resource_create(ctx->screen, &resource); |
if (*flushed_depth_texture == NULL) { |
R600_ERR("failed to create temporary texture to hold flushed depth\n"); |
return false; |
} |
(*flushed_depth_texture)->is_flushing_texture = TRUE; |
(*flushed_depth_texture)->non_disp_tiling = false; |
return true; |
} |
/** |
* Initialize the pipe_resource descriptor to be of the same size as the box, |
* which is supposed to hold a subregion of the texture "orig" at the given |
* mipmap level. |
*/ |
static void r600_init_temp_resource_from_box(struct pipe_resource *res, |
struct pipe_resource *orig, |
const struct pipe_box *box, |
unsigned level, unsigned flags) |
{ |
memset(res, 0, sizeof(*res)); |
res->format = orig->format; |
res->width0 = box->width; |
res->height0 = box->height; |
res->depth0 = 1; |
res->array_size = 1; |
res->usage = flags & R600_RESOURCE_FLAG_TRANSFER ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT; |
res->flags = flags; |
/* We must set the correct texture target and dimensions for a 3D box. */ |
if (box->depth > 1 && util_max_layer(orig, level) > 0) |
res->target = orig->target; |
else |
res->target = PIPE_TEXTURE_2D; |
switch (res->target) { |
case PIPE_TEXTURE_1D_ARRAY: |
case PIPE_TEXTURE_2D_ARRAY: |
case PIPE_TEXTURE_CUBE_ARRAY: |
res->array_size = box->depth; |
break; |
case PIPE_TEXTURE_3D: |
res->depth0 = box->depth; |
break; |
default:; |
} |
} |
static void *r600_texture_transfer_map(struct pipe_context *ctx, |
struct pipe_resource *texture, |
unsigned level, |
unsigned usage, |
const struct pipe_box *box, |
struct pipe_transfer **ptransfer) |
{ |
struct r600_common_context *rctx = (struct r600_common_context*)ctx; |
struct r600_texture *rtex = (struct r600_texture*)texture; |
struct r600_transfer *trans; |
boolean use_staging_texture = FALSE; |
struct r600_resource *buf; |
unsigned offset = 0; |
char *map; |
/* We cannot map a tiled texture directly because the data is |
* in a different order, therefore we do detiling using a blit. |
* |
* Also, use a temporary in GTT memory for read transfers, as |
* the CPU is much happier reading out of cached system memory |
* than uncached VRAM. |
*/ |
if (rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) { |
use_staging_texture = TRUE; |
} else if ((usage & PIPE_TRANSFER_READ) && !(usage & PIPE_TRANSFER_MAP_DIRECTLY) && |
(rtex->resource.domains == RADEON_DOMAIN_VRAM)) { |
/* Untiled buffers in VRAM, which is slow for CPU reads */ |
use_staging_texture = TRUE; |
} else if (!(usage & PIPE_TRANSFER_READ) && |
(r600_rings_is_buffer_referenced(rctx, rtex->resource.cs_buf, RADEON_USAGE_READWRITE) || |
rctx->ws->buffer_is_busy(rtex->resource.buf, RADEON_USAGE_READWRITE))) { |
/* Use a staging texture for uploads if the underlying BO is busy. */ |
use_staging_texture = TRUE; |
} |
if (texture->flags & R600_RESOURCE_FLAG_TRANSFER) { |
use_staging_texture = FALSE; |
} |
if (use_staging_texture && (usage & PIPE_TRANSFER_MAP_DIRECTLY)) { |
return NULL; |
} |
trans = CALLOC_STRUCT(r600_transfer); |
if (trans == NULL) |
return NULL; |
trans->transfer.resource = texture; |
trans->transfer.level = level; |
trans->transfer.usage = usage; |
trans->transfer.box = *box; |
if (rtex->is_depth) { |
struct r600_texture *staging_depth; |
if (rtex->resource.b.b.nr_samples > 1) { |
/* MSAA depth buffers need to be converted to single sample buffers. |
* |
* Mapping MSAA depth buffers can occur if ReadPixels is called |
* with a multisample GLX visual. |
* |
* First downsample the depth buffer to a temporary texture, |
* then decompress the temporary one to staging. |
* |
* Only the region being mapped is transfered. |
*/ |
struct pipe_resource resource; |
r600_init_temp_resource_from_box(&resource, texture, box, level, 0); |
if (!r600_init_flushed_depth_texture(ctx, &resource, &staging_depth)) { |
R600_ERR("failed to create temporary texture to hold untiled copy\n"); |
FREE(trans); |
return NULL; |
} |
if (usage & PIPE_TRANSFER_READ) { |
struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource); |
r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box); |
rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth, |
0, 0, 0, box->depth, 0, 0); |
pipe_resource_reference((struct pipe_resource**)&temp, NULL); |
} |
} |
else { |
/* XXX: only readback the rectangle which is being mapped? */ |
/* XXX: when discard is true, no need to read back from depth texture */ |
if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) { |
R600_ERR("failed to create temporary texture to hold untiled copy\n"); |
FREE(trans); |
return NULL; |
} |
rctx->blit_decompress_depth(ctx, rtex, staging_depth, |
level, level, |
box->z, box->z + box->depth - 1, |
0, 0); |
offset = r600_texture_get_offset(staging_depth, level, box); |
} |
trans->transfer.stride = staging_depth->surface.level[level].pitch_bytes; |
trans->transfer.layer_stride = staging_depth->surface.level[level].slice_size; |
trans->staging = (struct r600_resource*)staging_depth; |
} else if (use_staging_texture) { |
struct pipe_resource resource; |
struct r600_texture *staging; |
r600_init_temp_resource_from_box(&resource, texture, box, level, |
R600_RESOURCE_FLAG_TRANSFER); |
resource.usage = (usage & PIPE_TRANSFER_READ) ? |
PIPE_USAGE_STAGING : PIPE_USAGE_STREAM; |
/* Create the temporary texture. */ |
staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource); |
if (staging == NULL) { |
R600_ERR("failed to create temporary texture to hold untiled copy\n"); |
FREE(trans); |
return NULL; |
} |
trans->staging = &staging->resource; |
trans->transfer.stride = staging->surface.level[0].pitch_bytes; |
trans->transfer.layer_stride = staging->surface.level[0].slice_size; |
if (usage & PIPE_TRANSFER_READ) { |
r600_copy_to_staging_texture(ctx, trans); |
} |
} else { |
/* the resource is mapped directly */ |
trans->transfer.stride = rtex->surface.level[level].pitch_bytes; |
trans->transfer.layer_stride = rtex->surface.level[level].slice_size; |
offset = r600_texture_get_offset(rtex, level, box); |
} |
if (trans->staging) { |
buf = trans->staging; |
if (!rtex->is_depth && !(usage & PIPE_TRANSFER_READ)) |
usage |= PIPE_TRANSFER_UNSYNCHRONIZED; |
} else { |
buf = &rtex->resource; |
} |
if (!(map = r600_buffer_map_sync_with_rings(rctx, buf, usage))) { |
pipe_resource_reference((struct pipe_resource**)&trans->staging, NULL); |
FREE(trans); |
return NULL; |
} |
*ptransfer = &trans->transfer; |
return map + offset; |
} |
static void r600_texture_transfer_unmap(struct pipe_context *ctx, |
struct pipe_transfer* transfer) |
{ |
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; |
struct r600_common_context *rctx = (struct r600_common_context*)ctx; |
struct radeon_winsys_cs_handle *buf; |
struct pipe_resource *texture = transfer->resource; |
struct r600_texture *rtex = (struct r600_texture*)texture; |
if (rtransfer->staging) { |
buf = rtransfer->staging->cs_buf; |
} else { |
buf = r600_resource(transfer->resource)->cs_buf; |
} |
rctx->ws->buffer_unmap(buf); |
if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) { |
if (rtex->is_depth && rtex->resource.b.b.nr_samples <= 1) { |
ctx->resource_copy_region(ctx, texture, transfer->level, |
transfer->box.x, transfer->box.y, transfer->box.z, |
&rtransfer->staging->b.b, transfer->level, |
&transfer->box); |
} else { |
r600_copy_from_staging_texture(ctx, rtransfer); |
} |
} |
if (rtransfer->staging) |
pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL); |
FREE(transfer); |
} |
static const struct u_resource_vtbl r600_texture_vtbl = |
{ |
NULL, /* get_handle */ |
r600_texture_destroy, /* resource_destroy */ |
r600_texture_transfer_map, /* transfer_map */ |
NULL, /* transfer_flush_region */ |
r600_texture_transfer_unmap, /* transfer_unmap */ |
NULL /* transfer_inline_write */ |
}; |
struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe, |
struct pipe_resource *texture, |
const struct pipe_surface *templ, |
unsigned width, unsigned height) |
{ |
struct r600_surface *surface = CALLOC_STRUCT(r600_surface); |
if (surface == NULL) |
return NULL; |
assert(templ->u.tex.first_layer <= util_max_layer(texture, templ->u.tex.level)); |
assert(templ->u.tex.last_layer <= util_max_layer(texture, templ->u.tex.level)); |
pipe_reference_init(&surface->base.reference, 1); |
pipe_resource_reference(&surface->base.texture, texture); |
surface->base.context = pipe; |
surface->base.format = templ->format; |
surface->base.width = width; |
surface->base.height = height; |
surface->base.u = templ->u; |
return &surface->base; |
} |
static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, |
struct pipe_resource *tex, |
const struct pipe_surface *templ) |
{ |
unsigned level = templ->u.tex.level; |
return r600_create_surface_custom(pipe, tex, templ, |
u_minify(tex->width0, level), |
u_minify(tex->height0, level)); |
} |
static void r600_surface_destroy(struct pipe_context *pipe, |
struct pipe_surface *surface) |
{ |
struct r600_surface *surf = (struct r600_surface*)surface; |
pipe_resource_reference((struct pipe_resource**)&surf->cb_buffer_fmask, NULL); |
pipe_resource_reference((struct pipe_resource**)&surf->cb_buffer_cmask, NULL); |
pipe_resource_reference(&surface->texture, NULL); |
FREE(surface); |
} |
unsigned r600_translate_colorswap(enum pipe_format format) |
{ |
const struct util_format_description *desc = util_format_description(format); |
#define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == UTIL_FORMAT_SWIZZLE_##swz) |
if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ |
return V_0280A0_SWAP_STD; |
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) |
return ~0U; |
switch (desc->nr_channels) { |
case 1: |
if (HAS_SWIZZLE(0,X)) |
return V_0280A0_SWAP_STD; /* X___ */ |
else if (HAS_SWIZZLE(3,X)) |
return V_0280A0_SWAP_ALT_REV; /* ___X */ |
break; |
case 2: |
if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) || |
(HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) || |
(HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y))) |
return V_0280A0_SWAP_STD; /* XY__ */ |
else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) || |
(HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) || |
(HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X))) |
return V_0280A0_SWAP_STD_REV; /* YX__ */ |
else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y)) |
return V_0280A0_SWAP_ALT; /* X__Y */ |
else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X)) |
return V_0280A0_SWAP_ALT_REV; /* Y__X */ |
break; |
case 3: |
if (HAS_SWIZZLE(0,X)) |
return V_0280A0_SWAP_STD; /* XYZ */ |
else if (HAS_SWIZZLE(0,Z)) |
return V_0280A0_SWAP_STD_REV; /* ZYX */ |
break; |
case 4: |
/* check the middle channels, the 1st and 4th channel can be NONE */ |
if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) |
return V_0280A0_SWAP_STD; /* XYZW */ |
else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) |
return V_0280A0_SWAP_STD_REV; /* WZYX */ |
else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X)) |
return V_0280A0_SWAP_ALT; /* ZYXW */ |
else if (HAS_SWIZZLE(1,X) && HAS_SWIZZLE(2,Y)) |
return V_0280A0_SWAP_ALT_REV; /* WXYZ */ |
break; |
} |
return ~0U; |
} |
static void evergreen_set_clear_color(struct r600_texture *rtex, |
enum pipe_format surface_format, |
const union pipe_color_union *color) |
{ |
union util_color uc; |
memset(&uc, 0, sizeof(uc)); |
if (util_format_is_pure_uint(surface_format)) { |
util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1); |
} else if (util_format_is_pure_sint(surface_format)) { |
util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1); |
} else { |
util_pack_color(color->f, surface_format, &uc); |
} |
memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t)); |
} |
void evergreen_do_fast_color_clear(struct r600_common_context *rctx, |
struct pipe_framebuffer_state *fb, |
struct r600_atom *fb_state, |
unsigned *buffers, |
const union pipe_color_union *color) |
{ |
int i; |
if (rctx->current_render_cond) |
return; |
for (i = 0; i < fb->nr_cbufs; i++) { |
struct r600_texture *tex; |
unsigned clear_bit = PIPE_CLEAR_COLOR0 << i; |
if (!fb->cbufs[i]) |
continue; |
/* if this colorbuffer is not being cleared */ |
if (!(*buffers & clear_bit)) |
continue; |
tex = (struct r600_texture *)fb->cbufs[i]->texture; |
/* 128-bit formats are unusupported */ |
if (util_format_get_blocksizebits(fb->cbufs[i]->format) > 64) { |
continue; |
} |
/* the clear is allowed if all layers are bound */ |
if (fb->cbufs[i]->u.tex.first_layer != 0 || |
fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->resource.b.b, 0)) { |
continue; |
} |
/* cannot clear mipmapped textures */ |
if (fb->cbufs[i]->texture->last_level != 0) { |
continue; |
} |
/* only supported on tiled surfaces */ |
if (tex->surface.level[0].mode < RADEON_SURF_MODE_1D) { |
continue; |
} |
/* fast color clear with 1D tiling doesn't work on old kernels and CIK */ |
if (tex->surface.level[0].mode == RADEON_SURF_MODE_1D && |
rctx->chip_class >= CIK && rctx->screen->info.drm_minor < 38) { |
continue; |
} |
/* ensure CMASK is enabled */ |
r600_texture_alloc_cmask_separate(rctx->screen, tex); |
if (tex->cmask.size == 0) { |
continue; |
} |
/* Do the fast clear. */ |
evergreen_set_clear_color(tex, fb->cbufs[i]->format, color); |
rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b, |
tex->cmask.offset, tex->cmask.size, 0, true); |
tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; |
fb_state->dirty = true; |
*buffers &= ~clear_bit; |
} |
} |
void r600_init_screen_texture_functions(struct r600_common_screen *rscreen) |
{ |
rscreen->b.resource_from_handle = r600_texture_from_handle; |
rscreen->b.resource_get_handle = r600_texture_get_handle; |
} |
void r600_init_context_texture_functions(struct r600_common_context *rctx) |
{ |
rctx->b.create_surface = r600_create_surface; |
rctx->b.surface_destroy = r600_surface_destroy; |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600d_common.h |
---|
0,0 → 1,206 |
/* |
* Copyright 2013 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Authors: Marek Olšák <maraeo@gmail.com> |
*/ |
#ifndef R600D_COMMON_H |
#define R600D_COMMON_H |
#define R600_CONFIG_REG_OFFSET 0x08000 |
#define R600_CONTEXT_REG_OFFSET 0x28000 |
#define SI_SH_REG_OFFSET 0x0000B000 |
#define SI_SH_REG_END 0x0000C000 |
#define CIK_UCONFIG_REG_OFFSET 0x00030000 |
#define CIK_UCONFIG_REG_END 0x00031000 |
#define PKT_TYPE_S(x) (((x) & 0x3) << 30) |
#define PKT_COUNT_S(x) (((x) & 0x3FFF) << 16) |
#define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8) |
#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1) |
#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate)) |
#define RADEON_CP_PACKET3_COMPUTE_MODE 0x00000002 |
#define PKT3_NOP 0x10 |
#define PKT3_SET_PREDICATION 0x20 |
#define PKT3_STRMOUT_BUFFER_UPDATE 0x34 |
#define STRMOUT_STORE_BUFFER_FILLED_SIZE 1 |
#define STRMOUT_OFFSET_SOURCE(x) (((x) & 0x3) << 1) |
#define STRMOUT_OFFSET_FROM_PACKET 0 |
#define STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE 1 |
#define STRMOUT_OFFSET_FROM_MEM 2 |
#define STRMOUT_OFFSET_NONE 3 |
#define STRMOUT_SELECT_BUFFER(x) (((x) & 0x3) << 8) |
#define PKT3_WAIT_REG_MEM 0x3C |
#define WAIT_REG_MEM_EQUAL 3 |
#define PKT3_EVENT_WRITE 0x46 |
#define PKT3_EVENT_WRITE_EOP 0x47 |
#define PKT3_SET_CONFIG_REG 0x68 |
#define PKT3_SET_CONTEXT_REG 0x69 |
#define PKT3_STRMOUT_BASE_UPDATE 0x72 /* r700 only */ |
#define PKT3_SURFACE_BASE_UPDATE 0x73 /* r600 only */ |
#define SURFACE_BASE_UPDATE_DEPTH (1 << 0) |
#define SURFACE_BASE_UPDATE_COLOR(x) (2 << (x)) |
#define SURFACE_BASE_UPDATE_COLOR_NUM(x) (((1 << x) - 1) << 1) |
#define SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x)) |
#define PKT3_SET_SH_REG 0x76 /* SI and later */ |
#define PKT3_SET_UCONFIG_REG 0x79 /* CIK and later */ |
#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10 |
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14 |
#define EVENT_TYPE_ZPASS_DONE 0x15 |
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 |
#define EVENT_TYPE_PIPELINESTAT_START 25 |
#define EVENT_TYPE_PIPELINESTAT_STOP 26 |
#define EVENT_TYPE_SAMPLE_PIPELINESTAT 30 |
#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f |
#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS 0x20 |
#define EVENT_TYPE_FLUSH_AND_INV_DB_META 0x2c /* supported on r700+ */ |
#define EVENT_TYPE_FLUSH_AND_INV_CB_META 46 /* supported on r700+ */ |
#define EVENT_TYPE(x) ((x) << 0) |
#define EVENT_INDEX(x) ((x) << 8) |
/* 0 - any non-TS event |
* 1 - ZPASS_DONE |
* 2 - SAMPLE_PIPELINESTAT |
* 3 - SAMPLE_STREAMOUTSTAT* |
* 4 - *S_PARTIAL_FLUSH |
* 5 - TS events |
*/ |
#define PREDICATION_OP_CLEAR 0x0 |
#define PREDICATION_OP_ZPASS 0x1 |
#define PREDICATION_OP_PRIMCOUNT 0x2 |
#define PRED_OP(x) ((x) << 16) |
#define PREDICATION_CONTINUE (1 << 31) |
#define PREDICATION_HINT_WAIT (0 << 12) |
#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12) |
#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8) |
#define PREDICATION_DRAW_VISIBLE (1 << 8) |
/* R600-R700*/ |
#define R_008490_CP_STRMOUT_CNTL 0x008490 |
#define S_008490_OFFSET_UPDATE_DONE(x) (((x) & 0x1) << 0) |
#define R_028AB0_VGT_STRMOUT_EN 0x028AB0 |
#define S_028AB0_STREAMOUT(x) (((x) & 0x1) << 0) |
#define G_028AB0_STREAMOUT(x) (((x) >> 0) & 0x1) |
#define C_028AB0_STREAMOUT 0xFFFFFFFE |
#define R_028B20_VGT_STRMOUT_BUFFER_EN 0x028B20 |
#define S_028B20_BUFFER_0_EN(x) (((x) & 0x1) << 0) |
#define G_028B20_BUFFER_0_EN(x) (((x) >> 0) & 0x1) |
#define C_028B20_BUFFER_0_EN 0xFFFFFFFE |
#define S_028B20_BUFFER_1_EN(x) (((x) & 0x1) << 1) |
#define G_028B20_BUFFER_1_EN(x) (((x) >> 1) & 0x1) |
#define C_028B20_BUFFER_1_EN 0xFFFFFFFD |
#define S_028B20_BUFFER_2_EN(x) (((x) & 0x1) << 2) |
#define G_028B20_BUFFER_2_EN(x) (((x) >> 2) & 0x1) |
#define C_028B20_BUFFER_2_EN 0xFFFFFFFB |
#define S_028B20_BUFFER_3_EN(x) (((x) & 0x1) << 3) |
#define G_028B20_BUFFER_3_EN(x) (((x) >> 3) & 0x1) |
#define C_028B20_BUFFER_3_EN 0xFFFFFFF7 |
#define R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 0x028AD0 |
#define V_0280A0_SWAP_STD 0x00000000 |
#define V_0280A0_SWAP_ALT 0x00000001 |
#define V_0280A0_SWAP_STD_REV 0x00000002 |
#define V_0280A0_SWAP_ALT_REV 0x00000003 |
/* EG+ */ |
#define R_0084FC_CP_STRMOUT_CNTL 0x0084FC |
#define S_0084FC_OFFSET_UPDATE_DONE(x) (((x) & 0x1) << 0) |
#define R_028B94_VGT_STRMOUT_CONFIG 0x028B94 |
#define S_028B94_STREAMOUT_0_EN(x) (((x) & 0x1) << 0) |
#define G_028B94_STREAMOUT_0_EN(x) (((x) >> 0) & 0x1) |
#define C_028B94_STREAMOUT_0_EN 0xFFFFFFFE |
#define S_028B94_STREAMOUT_1_EN(x) (((x) & 0x1) << 1) |
#define G_028B94_STREAMOUT_1_EN(x) (((x) >> 1) & 0x1) |
#define C_028B94_STREAMOUT_1_EN 0xFFFFFFFD |
#define S_028B94_STREAMOUT_2_EN(x) (((x) & 0x1) << 2) |
#define G_028B94_STREAMOUT_2_EN(x) (((x) >> 2) & 0x1) |
#define C_028B94_STREAMOUT_2_EN 0xFFFFFFFB |
#define S_028B94_STREAMOUT_3_EN(x) (((x) & 0x1) << 3) |
#define G_028B94_STREAMOUT_3_EN(x) (((x) >> 3) & 0x1) |
#define C_028B94_STREAMOUT_3_EN 0xFFFFFFF7 |
#define S_028B94_RAST_STREAM(x) (((x) & 0x07) << 4) |
#define G_028B94_RAST_STREAM(x) (((x) >> 4) & 0x07) |
#define C_028B94_RAST_STREAM 0xFFFFFF8F |
#define S_028B94_RAST_STREAM_MASK(x) (((x) & 0x0F) << 8) /* SI+ */ |
#define G_028B94_RAST_STREAM_MASK(x) (((x) >> 8) & 0x0F) |
#define C_028B94_RAST_STREAM_MASK 0xFFFFF0FF |
#define S_028B94_USE_RAST_STREAM_MASK(x) (((x) & 0x1) << 31) /* SI+ */ |
#define G_028B94_USE_RAST_STREAM_MASK(x) (((x) >> 31) & 0x1) |
#define C_028B94_USE_RAST_STREAM_MASK 0x7FFFFFFF |
#define R_028B98_VGT_STRMOUT_BUFFER_CONFIG 0x028B98 |
#define S_028B98_STREAM_0_BUFFER_EN(x) (((x) & 0x0F) << 0) |
#define G_028B98_STREAM_0_BUFFER_EN(x) (((x) >> 0) & 0x0F) |
#define C_028B98_STREAM_0_BUFFER_EN 0xFFFFFFF0 |
#define S_028B98_STREAM_1_BUFFER_EN(x) (((x) & 0x0F) << 4) |
#define G_028B98_STREAM_1_BUFFER_EN(x) (((x) >> 4) & 0x0F) |
#define C_028B98_STREAM_1_BUFFER_EN 0xFFFFFF0F |
#define S_028B98_STREAM_2_BUFFER_EN(x) (((x) & 0x0F) << 8) |
#define G_028B98_STREAM_2_BUFFER_EN(x) (((x) >> 8) & 0x0F) |
#define C_028B98_STREAM_2_BUFFER_EN 0xFFFFF0FF |
#define S_028B98_STREAM_3_BUFFER_EN(x) (((x) & 0x0F) << 12) |
#define G_028B98_STREAM_3_BUFFER_EN(x) (((x) >> 12) & 0x0F) |
#define C_028B98_STREAM_3_BUFFER_EN 0xFFFF0FFF |
#define EG_R_028A4C_PA_SC_MODE_CNTL_1 0x028A4C |
#define EG_S_028A4C_PS_ITER_SAMPLE(x) (((x) & 0x1) << 16) |
#define CM_R_028804_DB_EQAA 0x00028804 |
#define S_028804_MAX_ANCHOR_SAMPLES(x) (((x) & 0x7) << 0) |
#define S_028804_PS_ITER_SAMPLES(x) (((x) & 0x7) << 4) |
#define S_028804_MASK_EXPORT_NUM_SAMPLES(x) (((x) & 0x7) << 8) |
#define S_028804_ALPHA_TO_MASK_NUM_SAMPLES(x) (((x) & 0x7) << 12) |
#define S_028804_HIGH_QUALITY_INTERSECTIONS(x) (((x) & 0x1) << 16) |
#define S_028804_INCOHERENT_EQAA_READS(x) (((x) & 0x1) << 17) |
#define S_028804_INTERPOLATE_COMP_Z(x) (((x) & 0x1) << 18) |
#define S_028804_INTERPOLATE_SRC_Z(x) (((x) & 0x1) << 19) |
#define S_028804_STATIC_ANCHOR_ASSOCIATIONS(x) (((x) & 0x1) << 20) |
#define S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x) (((x) & 0x1) << 21) |
#define S_028804_OVERRASTERIZATION_AMOUNT(x) (((x) & 0x7) << 24) |
#define S_028804_ENABLE_POSTZ_OVERRASTERIZATION(x) (((x) & 0x1) << 27) |
#define CM_R_028BDC_PA_SC_LINE_CNTL 0x28bdc |
#define S_028BDC_EXPAND_LINE_WIDTH(x) (((x) & 0x1) << 9) |
#define G_028BDC_EXPAND_LINE_WIDTH(x) (((x) >> 9) & 0x1) |
#define C_028BDC_EXPAND_LINE_WIDTH 0xFFFFFDFF |
#define S_028BDC_LAST_PIXEL(x) (((x) & 0x1) << 10) |
#define G_028BDC_LAST_PIXEL(x) (((x) >> 10) & 0x1) |
#define C_028BDC_LAST_PIXEL 0xFFFFFBFF |
#define CM_R_028BE0_PA_SC_AA_CONFIG 0x28be0 |
#define S_028BE0_MSAA_NUM_SAMPLES(x) (((x) & 0x7) << 0) |
#define S_028BE0_AA_MASK_CENTROID_DTMN(x) (((x) & 0x1) << 4) |
#define S_028BE0_MAX_SAMPLE_DIST(x) (((x) & 0xf) << 13) |
#define S_028BE0_MSAA_EXPOSED_SAMPLES(x) (((x) & 0x7) << 20) |
#define S_028BE0_DETAIL_TO_EXPOSED_MODE(x) (((x) & 0x3) << 24) |
#define CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 0x28bf8 |
#define CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0 0x28c08 |
#define CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0 0x28c18 |
#define CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0 0x28c28 |
#define EG_S_028C70_FAST_CLEAR(x) (((x) & 0x1) << 17) |
#define SI_S_028C70_FAST_CLEAR(x) (((x) & 0x1) << 13) |
/*CIK+*/ |
#define R_0300FC_CP_STRMOUT_CNTL 0x0300FC |
#endif |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_elf_util.c |
---|
0,0 → 1,222 |
/* |
* Copyright 2014 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: Tom Stellard <thomas.stellard@amd.com> |
* |
*/ |
#include "radeon_elf_util.h" |
#include "r600_pipe_common.h" |
#include "util/u_memory.h" |
#include <gelf.h> |
#include <libelf.h> |
#include <stdio.h> |
static void parse_symbol_table(Elf_Data *symbol_table_data, |
const GElf_Shdr *symbol_table_header, |
struct radeon_shader_binary *binary) |
{ |
GElf_Sym symbol; |
unsigned i = 0; |
unsigned symbol_count = |
symbol_table_header->sh_size / symbol_table_header->sh_entsize; |
/* We are over allocating this list, because symbol_count gives the |
* total number of symbols, and we will only be filling the list |
* with offsets of global symbols. The memory savings from |
* allocating the correct size of this list will be small, and |
* I don't think it is worth the cost of pre-computing the number |
* of global symbols. |
*/ |
binary->global_symbol_offsets = CALLOC(symbol_count, sizeof(uint64_t)); |
while (gelf_getsym(symbol_table_data, i++, &symbol)) { |
unsigned i; |
if (GELF_ST_BIND(symbol.st_info) != STB_GLOBAL || |
symbol.st_shndx == 0 /* Undefined symbol */) { |
continue; |
} |
binary->global_symbol_offsets[binary->global_symbol_count] = |
symbol.st_value; |
/* Sort the list using bubble sort. This list will usually |
* be small. */ |
for (i = binary->global_symbol_count; i > 0; --i) { |
uint64_t lhs = binary->global_symbol_offsets[i - 1]; |
uint64_t rhs = binary->global_symbol_offsets[i]; |
if (lhs < rhs) { |
break; |
} |
binary->global_symbol_offsets[i] = lhs; |
binary->global_symbol_offsets[i - 1] = rhs; |
} |
++binary->global_symbol_count; |
} |
} |
static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols, |
unsigned symbol_sh_link, |
struct radeon_shader_binary *binary) |
{ |
unsigned i; |
if (!relocs || !symbols || !binary->reloc_count) { |
return; |
} |
binary->relocs = CALLOC(binary->reloc_count, |
sizeof(struct radeon_shader_reloc)); |
for (i = 0; i < binary->reloc_count; i++) { |
GElf_Sym symbol; |
GElf_Rel rel; |
char *symbol_name; |
struct radeon_shader_reloc *reloc = &binary->relocs[i]; |
gelf_getrel(relocs, i, &rel); |
gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &symbol); |
symbol_name = elf_strptr(elf, symbol_sh_link, symbol.st_name); |
reloc->offset = rel.r_offset; |
reloc->name = strdup(symbol_name); |
} |
} |
void radeon_elf_read(const char *elf_data, unsigned elf_size, |
struct radeon_shader_binary *binary, |
unsigned debug) |
{ |
char *elf_buffer; |
Elf *elf; |
Elf_Scn *section = NULL; |
Elf_Data *symbols = NULL, *relocs = NULL; |
size_t section_str_index; |
unsigned symbol_sh_link = 0; |
/* One of the libelf implementations |
* (http://www.mr511.de/software/english.htm) requires calling |
* elf_version() before elf_memory(). |
*/ |
elf_version(EV_CURRENT); |
elf_buffer = MALLOC(elf_size); |
memcpy(elf_buffer, elf_data, elf_size); |
elf = elf_memory(elf_buffer, elf_size); |
elf_getshdrstrndx(elf, §ion_str_index); |
binary->disassembled = 0; |
while ((section = elf_nextscn(elf, section))) { |
const char *name; |
Elf_Data *section_data = NULL; |
GElf_Shdr section_header; |
if (gelf_getshdr(section, §ion_header) != §ion_header) { |
fprintf(stderr, "Failed to read ELF section header\n"); |
return; |
} |
name = elf_strptr(elf, section_str_index, section_header.sh_name); |
if (!strcmp(name, ".text")) { |
section_data = elf_getdata(section, section_data); |
binary->code_size = section_data->d_size; |
binary->code = MALLOC(binary->code_size * sizeof(unsigned char)); |
memcpy(binary->code, section_data->d_buf, binary->code_size); |
} else if (!strcmp(name, ".AMDGPU.config")) { |
section_data = elf_getdata(section, section_data); |
binary->config_size = section_data->d_size; |
binary->config = MALLOC(binary->config_size * sizeof(unsigned char)); |
memcpy(binary->config, section_data->d_buf, binary->config_size); |
} else if (debug && !strcmp(name, ".AMDGPU.disasm")) { |
binary->disassembled = 1; |
section_data = elf_getdata(section, section_data); |
fprintf(stderr, "\nShader Disassembly:\n\n"); |
fprintf(stderr, "%.*s\n", (int)section_data->d_size, |
(char *)section_data->d_buf); |
} else if (!strncmp(name, ".rodata", 7)) { |
section_data = elf_getdata(section, section_data); |
binary->rodata_size = section_data->d_size; |
binary->rodata = MALLOC(binary->rodata_size * sizeof(unsigned char)); |
memcpy(binary->rodata, section_data->d_buf, binary->rodata_size); |
} else if (!strncmp(name, ".symtab", 7)) { |
symbols = elf_getdata(section, section_data); |
symbol_sh_link = section_header.sh_link; |
parse_symbol_table(symbols, §ion_header, binary); |
} else if (!strcmp(name, ".rel.text")) { |
relocs = elf_getdata(section, section_data); |
binary->reloc_count = section_header.sh_size / |
section_header.sh_entsize; |
} |
} |
parse_relocs(elf, relocs, symbols, symbol_sh_link, binary); |
if (elf){ |
elf_end(elf); |
} |
FREE(elf_buffer); |
/* Cache the config size per symbol */ |
if (binary->global_symbol_count) { |
binary->config_size_per_symbol = |
binary->config_size / binary->global_symbol_count; |
} else { |
binary->global_symbol_count = 1; |
binary->config_size_per_symbol = binary->config_size; |
} |
} |
const unsigned char *radeon_shader_binary_config_start( |
const struct radeon_shader_binary *binary, |
uint64_t symbol_offset) |
{ |
unsigned i; |
for (i = 0; i < binary->global_symbol_count; ++i) { |
if (binary->global_symbol_offsets[i] == symbol_offset) { |
unsigned offset = i * binary->config_size_per_symbol; |
return binary->config + offset; |
} |
} |
return binary->config; |
} |
void radeon_shader_binary_free_relocs(struct radeon_shader_reloc *relocs, |
unsigned reloc_count) |
{ |
unsigned i; |
for (i = 0; i < reloc_count; i++) { |
FREE(relocs[i].name); |
} |
FREE(relocs); |
} |
void radeon_shader_binary_free_members(struct radeon_shader_binary *binary, |
unsigned free_relocs) |
{ |
FREE(binary->code); |
FREE(binary->config); |
FREE(binary->rodata); |
if (free_relocs) { |
radeon_shader_binary_free_relocs(binary->relocs, |
binary->reloc_count); |
} |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_elf_util.h |
---|
0,0 → 1,64 |
/* |
* Copyright 2014 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: Tom Stellard <thomas.stellard@amd.com> |
* |
*/ |
#ifndef RADEON_ELF_UTIL_H |
#define RADEON_ELF_UTIL_H |
#include <stdint.h> |
struct radeon_shader_binary; |
struct radeon_shader_reloc; |
/* |
* Parse the elf binary stored in \p elf_data and create a |
* radeon_shader_binary object. |
*/ |
void radeon_elf_read(const char *elf_data, unsigned elf_size, |
struct radeon_shader_binary *binary, unsigned debug); |
/** |
* @returns A pointer to the start of the configuration information for |
* the function starting at \p symbol_offset of the binary. |
*/ |
const unsigned char *radeon_shader_binary_config_start( |
const struct radeon_shader_binary *binary, |
uint64_t symbol_offset); |
/** |
* Free all memory allocated for members of \p binary. This function does |
* not free \p binary. |
* |
* @param free_relocs If false, reolc information will not be freed. |
*/ |
void radeon_shader_binary_free_members(struct radeon_shader_binary *binary, |
unsigned free_relocs); |
/** |
* Free \p relocs and all member data. |
*/ |
void radeon_shader_binary_free_relocs(struct radeon_shader_reloc *relocs, |
unsigned reloc_count); |
#endif /* RADEON_ELF_UTIL_H */ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_llvm.h |
---|
0,0 → 1,212 |
/* |
* Copyright 2011 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: Tom Stellard <thomas.stellard@amd.com> |
* |
*/ |
#ifndef RADEON_LLVM_H |
#define RADEON_LLVM_H |
#include <llvm-c/Core.h> |
#include "gallivm/lp_bld_init.h" |
#include "gallivm/lp_bld_tgsi.h" |
#define RADEON_LLVM_MAX_INPUTS 32 * 4 |
#define RADEON_LLVM_MAX_OUTPUTS 32 * 4 |
#define RADEON_LLVM_MAX_ARRAYS 16 |
#define RADEON_LLVM_INITIAL_CF_DEPTH 4 |
#define RADEON_LLVM_MAX_SYSTEM_VALUES 4 |
struct radeon_llvm_branch { |
LLVMBasicBlockRef endif_block; |
LLVMBasicBlockRef if_block; |
LLVMBasicBlockRef else_block; |
unsigned has_else; |
}; |
struct radeon_llvm_loop { |
LLVMBasicBlockRef loop_block; |
LLVMBasicBlockRef endloop_block; |
}; |
struct radeon_llvm_context { |
struct lp_build_tgsi_soa_context soa; |
unsigned chip_class; |
unsigned type; |
unsigned face_gpr; |
unsigned two_side; |
unsigned clip_vertex; |
unsigned inputs_count; |
struct r600_shader_io * r600_inputs; |
struct r600_shader_io * r600_outputs; |
struct pipe_stream_output_info *stream_outputs; |
unsigned color_buffer_count; |
unsigned fs_color_all; |
unsigned alpha_to_one; |
unsigned has_txq_cube_array_z_comp; |
unsigned uses_tex_buffers; |
unsigned has_compressed_msaa_texturing; |
/*=== Front end configuration ===*/ |
/* Special Intrinsics */ |
/** Write to an output register: float store_output(float, i32) */ |
const char * store_output_intr; |
/** Swizzle a vector value: <4 x float> swizzle(<4 x float>, i32) |
* The swizzle is an unsigned integer that encodes a TGSI_SWIZZLE_* value |
* in 2-bits. |
* Swizzle{0-1} = X Channel |
* Swizzle{2-3} = Y Channel |
* Swizzle{4-5} = Z Channel |
* Swizzle{6-7} = W Channel |
*/ |
const char * swizzle_intr; |
/* Instructions that are not described by any of the TGSI opcodes. */ |
/** This function is responsible for initilizing the inputs array and will be |
* called once for each input declared in the TGSI shader. |
*/ |
void (*load_input)(struct radeon_llvm_context *, |
unsigned input_index, |
const struct tgsi_full_declaration *decl); |
void (*load_system_value)(struct radeon_llvm_context *, |
unsigned index, |
const struct tgsi_full_declaration *decl); |
/** User data to use with the callbacks */ |
void * userdata; |
/** This array contains the input values for the shader. Typically these |
* values will be in the form of a target intrinsic that will inform the |
* backend how to load the actual inputs to the shader. |
*/ |
LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS]; |
LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS]; |
unsigned output_reg_count; |
/** This pointer is used to contain the temporary values. |
* The amount of temporary used in tgsi can't be bound to a max value and |
* thus we must allocate this array at runtime. |
*/ |
LLVMValueRef *temps; |
unsigned temps_count; |
LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES]; |
/*=== Private Members ===*/ |
struct radeon_llvm_branch *branch; |
struct radeon_llvm_loop *loop; |
unsigned branch_depth; |
unsigned branch_depth_max; |
unsigned loop_depth; |
unsigned loop_depth_max; |
struct tgsi_declaration_range arrays[RADEON_LLVM_MAX_ARRAYS]; |
unsigned num_arrays; |
LLVMValueRef main_fn; |
struct gallivm_state gallivm; |
}; |
static inline LLVMTypeRef tgsi2llvmtype( |
struct lp_build_tgsi_context * bld_base, |
enum tgsi_opcode_type type) |
{ |
LLVMContextRef ctx = bld_base->base.gallivm->context; |
switch (type) { |
case TGSI_TYPE_UNSIGNED: |
case TGSI_TYPE_SIGNED: |
return LLVMInt32TypeInContext(ctx); |
case TGSI_TYPE_UNTYPED: |
case TGSI_TYPE_FLOAT: |
return LLVMFloatTypeInContext(ctx); |
default: break; |
} |
return 0; |
} |
static inline LLVMValueRef bitcast( |
struct lp_build_tgsi_context * bld_base, |
enum tgsi_opcode_type type, |
LLVMValueRef value |
) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type); |
if (dst_type) |
return LLVMBuildBitCast(builder, value, dst_type, ""); |
else |
return value; |
} |
void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data, |
LLVMValueRef *coords_arg); |
void radeon_llvm_context_init(struct radeon_llvm_context * ctx); |
void radeon_llvm_create_func(struct radeon_llvm_context * ctx, |
LLVMTypeRef *ParamTypes, unsigned ParamCount); |
void radeon_llvm_dispose(struct radeon_llvm_context * ctx); |
inline static struct radeon_llvm_context * radeon_llvm_context( |
struct lp_build_tgsi_context * bld_base) |
{ |
return (struct radeon_llvm_context*)bld_base; |
} |
unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan); |
void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx); |
LLVMValueRef |
build_intrinsic(LLVMBuilderRef builder, |
const char *name, |
LLVMTypeRef ret_type, |
LLVMValueRef *args, |
unsigned num_args, |
LLVMAttribute attr); |
void |
build_tgsi_intrinsic_nomem( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data); |
#endif /* RADEON_LLVM_H */ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_llvm_emit.c |
---|
0,0 → 1,208 |
/* |
* Copyright 2011 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: Tom Stellard <thomas.stellard@amd.com> |
* |
*/ |
#include "radeon_llvm_emit.h" |
#include "radeon_elf_util.h" |
#include "util/u_memory.h" |
#include "pipe/p_shader_tokens.h" |
#include <llvm-c/Target.h> |
#include <llvm-c/TargetMachine.h> |
#include <llvm-c/Core.h> |
#include <stdlib.h> |
#include <stdio.h> |
#include <string.h> |
#define CPU_STRING_LEN 30 |
#define FS_STRING_LEN 30 |
#define TRIPLE_STRING_LEN 7 |
/** |
* Shader types for the LLVM backend. |
*/ |
enum radeon_llvm_shader_type { |
RADEON_LLVM_SHADER_PS = 0, |
RADEON_LLVM_SHADER_VS = 1, |
RADEON_LLVM_SHADER_GS = 2, |
RADEON_LLVM_SHADER_CS = 3, |
}; |
/** |
* Set the shader type we want to compile |
* |
* @param type shader type to set |
*/ |
void radeon_llvm_shader_type(LLVMValueRef F, unsigned type) |
{ |
char Str[2]; |
enum radeon_llvm_shader_type llvm_type; |
switch (type) { |
case TGSI_PROCESSOR_VERTEX: |
llvm_type = RADEON_LLVM_SHADER_VS; |
break; |
case TGSI_PROCESSOR_GEOMETRY: |
llvm_type = RADEON_LLVM_SHADER_GS; |
break; |
case TGSI_PROCESSOR_FRAGMENT: |
llvm_type = RADEON_LLVM_SHADER_PS; |
break; |
case TGSI_PROCESSOR_COMPUTE: |
llvm_type = RADEON_LLVM_SHADER_CS; |
break; |
default: |
assert(0); |
} |
sprintf(Str, "%1d", llvm_type); |
LLVMAddTargetDependentFunctionAttr(F, "ShaderType", Str); |
} |
static void init_r600_target() |
{ |
static unsigned initialized = 0; |
if (!initialized) { |
LLVMInitializeR600TargetInfo(); |
LLVMInitializeR600Target(); |
LLVMInitializeR600TargetMC(); |
LLVMInitializeR600AsmPrinter(); |
initialized = 1; |
} |
} |
LLVMTargetRef radeon_llvm_get_r600_target(const char *triple) |
{ |
LLVMTargetRef target = NULL; |
char *err_message = NULL; |
init_r600_target(); |
if (LLVMGetTargetFromTriple(triple, &target, &err_message)) { |
fprintf(stderr, "Cannot find target for triple %s ", triple); |
if (err_message) { |
fprintf(stderr, "%s\n", err_message); |
} |
LLVMDisposeMessage(err_message); |
return NULL; |
} |
return target; |
} |
#if HAVE_LLVM >= 0x0305 |
static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context) |
{ |
if (LLVMGetDiagInfoSeverity(di) == LLVMDSError) { |
unsigned int *diagnosticflag = (unsigned int *)context; |
char *diaginfo_message = LLVMGetDiagInfoDescription(di); |
*diagnosticflag = 1; |
fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", diaginfo_message); |
LLVMDisposeMessage(diaginfo_message); |
} |
} |
#endif |
/** |
* Compile an LLVM module to machine code. |
* |
* @returns 0 for success, 1 for failure |
*/ |
unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary, |
const char *gpu_family, unsigned dump, LLVMTargetMachineRef tm) |
{ |
char cpu[CPU_STRING_LEN]; |
char fs[FS_STRING_LEN]; |
char *err; |
bool dispose_tm = false; |
LLVMContextRef llvm_ctx; |
unsigned rval = 0; |
LLVMMemoryBufferRef out_buffer; |
unsigned buffer_size; |
const char *buffer_data; |
char triple[TRIPLE_STRING_LEN]; |
LLVMBool mem_err; |
if (!tm) { |
strncpy(triple, "r600--", TRIPLE_STRING_LEN); |
LLVMTargetRef target = radeon_llvm_get_r600_target(triple); |
if (!target) { |
return 1; |
} |
strncpy(cpu, gpu_family, CPU_STRING_LEN); |
memset(fs, 0, sizeof(fs)); |
if (dump) { |
strncpy(fs, "+DumpCode", FS_STRING_LEN); |
} |
tm = LLVMCreateTargetMachine(target, triple, cpu, fs, |
LLVMCodeGenLevelDefault, LLVMRelocDefault, |
LLVMCodeModelDefault); |
dispose_tm = true; |
} |
if (dump) { |
LLVMDumpModule(M); |
} |
/* Setup Diagnostic Handler*/ |
llvm_ctx = LLVMGetModuleContext(M); |
#if HAVE_LLVM >= 0x0305 |
LLVMContextSetDiagnosticHandler(llvm_ctx, radeonDiagnosticHandler, &rval); |
#endif |
rval = 0; |
/* Compile IR*/ |
mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err, |
&out_buffer); |
/* Process Errors/Warnings */ |
if (mem_err) { |
fprintf(stderr, "%s: %s", __FUNCTION__, err); |
FREE(err); |
LLVMDisposeTargetMachine(tm); |
return 1; |
} |
if (0 != rval) { |
fprintf(stderr, "%s: Processing Diag Flag\n", __FUNCTION__); |
} |
/* Extract Shader Code*/ |
buffer_size = LLVMGetBufferSize(out_buffer); |
buffer_data = LLVMGetBufferStart(out_buffer); |
radeon_elf_read(buffer_data, buffer_size, binary, dump); |
/* Clean up */ |
LLVMDisposeMemoryBuffer(out_buffer); |
if (dispose_tm) { |
LLVMDisposeTargetMachine(tm); |
} |
return rval; |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_llvm_emit.h |
---|
0,0 → 1,46 |
/* |
* Copyright 2012 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: Tom Stellard <thomas.stellard@amd.com> |
* |
*/ |
#ifndef RADEON_LLVM_EMIT_H |
#define RADEON_LLVM_EMIT_H |
#include <llvm-c/Core.h> |
#include <llvm-c/TargetMachine.h> |
struct radeon_shader_binary; |
void radeon_llvm_shader_type(LLVMValueRef F, unsigned type); |
LLVMTargetRef radeon_llvm_get_r600_target(const char *triple); |
unsigned radeon_llvm_compile( |
LLVMModuleRef M, |
struct radeon_shader_binary *binary, |
const char * gpu_family, |
unsigned dump, |
LLVMTargetMachineRef tm); |
#endif /* RADEON_LLVM_EMIT_H */ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_llvm_util.c |
---|
0,0 → 1,118 |
/* |
* Copyright 2012, 2013 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: Tom Stellard <thomas.stellard@amd.com> |
* |
*/ |
#include "radeon_llvm_util.h" |
#include "util/u_memory.h" |
#include <llvm-c/BitReader.h> |
#include <llvm-c/Core.h> |
#include <llvm-c/Target.h> |
#include <llvm-c/Transforms/IPO.h> |
#include <llvm-c/Transforms/PassManagerBuilder.h> |
LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx, |
const char * bitcode, unsigned bitcode_len) |
{ |
LLVMMemoryBufferRef buf; |
LLVMModuleRef module; |
buf = LLVMCreateMemoryBufferWithMemoryRangeCopy((const char*)bitcode, |
bitcode_len, "radeon"); |
LLVMParseBitcodeInContext(ctx, buf, &module, NULL); |
LLVMDisposeMemoryBuffer(buf); |
return module; |
} |
unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx, |
const char *bitcode, unsigned bitcode_len) |
{ |
LLVMModuleRef mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len); |
return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels"); |
} |
static void radeon_llvm_optimize(LLVMModuleRef mod) |
{ |
const char *data_layout = LLVMGetDataLayout(mod); |
LLVMTargetDataRef TD = LLVMCreateTargetData(data_layout); |
LLVMPassManagerBuilderRef builder = LLVMPassManagerBuilderCreate(); |
LLVMPassManagerRef pass_manager = LLVMCreatePassManager(); |
/* Functions calls are not supported yet, so we need to inline |
* everything. The most efficient way to do this is to add |
* the always_inline attribute to all non-kernel functions |
* and then run the Always Inline pass. The Always Inline |
* pass will automaically inline functions with this attribute |
* and does not perform the expensive cost analysis that the normal |
* inliner does. |
*/ |
LLVMValueRef fn; |
for (fn = LLVMGetFirstFunction(mod); fn; fn = LLVMGetNextFunction(fn)) { |
/* All the non-kernel functions have internal linkage */ |
if (LLVMGetLinkage(fn) == LLVMInternalLinkage) { |
LLVMAddFunctionAttr(fn, LLVMAlwaysInlineAttribute); |
} |
} |
LLVMAddTargetData(TD, pass_manager); |
LLVMAddAlwaysInlinerPass(pass_manager); |
LLVMPassManagerBuilderPopulateModulePassManager(builder, pass_manager); |
LLVMRunPassManager(pass_manager, mod); |
LLVMPassManagerBuilderDispose(builder); |
LLVMDisposePassManager(pass_manager); |
LLVMDisposeTargetData(TD); |
} |
LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index, |
const char *bitcode, unsigned bitcode_len) |
{ |
LLVMModuleRef mod; |
unsigned num_kernels; |
LLVMValueRef *kernel_metadata; |
unsigned i; |
mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len); |
num_kernels = LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels"); |
kernel_metadata = MALLOC(num_kernels * sizeof(LLVMValueRef)); |
LLVMGetNamedMetadataOperands(mod, "opencl.kernels", kernel_metadata); |
for (i = 0; i < num_kernels; i++) { |
LLVMValueRef kernel_signature, *kernel_function; |
unsigned num_kernel_md_operands; |
if (i == index) { |
continue; |
} |
kernel_signature = kernel_metadata[i]; |
num_kernel_md_operands = LLVMGetMDNodeNumOperands(kernel_signature); |
kernel_function = MALLOC(num_kernel_md_operands * sizeof (LLVMValueRef)); |
LLVMGetMDNodeOperands(kernel_signature, kernel_function); |
LLVMDeleteFunction(*kernel_function); |
FREE(kernel_function); |
} |
FREE(kernel_metadata); |
radeon_llvm_optimize(mod); |
return mod; |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_llvm_util.h |
---|
0,0 → 1,39 |
/* |
* Copyright 2012, 2013 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: Tom Stellard <thomas.stellard@amd.com> |
* |
*/ |
#ifndef RADEON_LLVM_UTIL_H |
#define RADEON_LLVM_UTIL_H |
#include <llvm-c/Core.h> |
LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx, |
const char * bitcode, unsigned bitcode_len); |
unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx, |
const char *bitcode, unsigned bitcode_len); |
LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index, |
const char *bitcode, unsigned bitcode_len); |
#endif |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c |
---|
0,0 → 1,1639 |
/* |
* Copyright 2011 Advanced Micro Devices, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: Tom Stellard <thomas.stellard@amd.com> |
* |
*/ |
#include "radeon_llvm.h" |
#include "gallivm/lp_bld_const.h" |
#include "gallivm/lp_bld_gather.h" |
#include "gallivm/lp_bld_flow.h" |
#include "gallivm/lp_bld_init.h" |
#include "gallivm/lp_bld_intr.h" |
#include "gallivm/lp_bld_swizzle.h" |
#include "tgsi/tgsi_info.h" |
#include "tgsi/tgsi_parse.h" |
#include "util/u_math.h" |
#include "util/u_memory.h" |
#include "util/u_debug.h" |
#include <llvm-c/Core.h> |
#include <llvm-c/Transforms/Scalar.h> |
static struct radeon_llvm_loop * get_current_loop(struct radeon_llvm_context * ctx) |
{ |
return ctx->loop_depth > 0 ? ctx->loop + (ctx->loop_depth - 1) : NULL; |
} |
static struct radeon_llvm_branch * get_current_branch( |
struct radeon_llvm_context * ctx) |
{ |
return ctx->branch_depth > 0 ? |
ctx->branch + (ctx->branch_depth - 1) : NULL; |
} |
unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan) |
{ |
return (index * 4) + chan; |
} |
static LLVMValueRef emit_swizzle( |
struct lp_build_tgsi_context * bld_base, |
LLVMValueRef value, |
unsigned swizzle_x, |
unsigned swizzle_y, |
unsigned swizzle_z, |
unsigned swizzle_w) |
{ |
LLVMValueRef swizzles[4]; |
LLVMTypeRef i32t = |
LLVMInt32TypeInContext(bld_base->base.gallivm->context); |
swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0); |
swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0); |
swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0); |
swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0); |
return LLVMBuildShuffleVector(bld_base->base.gallivm->builder, |
value, |
LLVMGetUndef(LLVMTypeOf(value)), |
LLVMConstVector(swizzles, 4), ""); |
} |
static struct tgsi_declaration_range |
get_array_range(struct lp_build_tgsi_context *bld_base, |
unsigned File, const struct tgsi_ind_register *reg) |
{ |
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); |
if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 || |
reg->ArrayID > RADEON_LLVM_MAX_ARRAYS) { |
struct tgsi_declaration_range range; |
range.First = 0; |
range.Last = bld_base->info->file_max[File]; |
return range; |
} |
return ctx->arrays[reg->ArrayID - 1]; |
} |
static LLVMValueRef |
emit_array_index( |
struct lp_build_tgsi_soa_context *bld, |
const struct tgsi_ind_register *reg, |
unsigned offset) |
{ |
struct gallivm_state * gallivm = bld->bld_base.base.gallivm; |
LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, bld->addr[reg->Index][reg->Swizzle], ""); |
return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), ""); |
} |
static LLVMValueRef |
emit_fetch( |
struct lp_build_tgsi_context *bld_base, |
const struct tgsi_full_src_register *reg, |
enum tgsi_opcode_type type, |
unsigned swizzle); |
static LLVMValueRef |
emit_array_fetch( |
struct lp_build_tgsi_context *bld_base, |
unsigned File, enum tgsi_opcode_type type, |
struct tgsi_declaration_range range, |
unsigned swizzle) |
{ |
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); |
struct gallivm_state * gallivm = bld->bld_base.base.gallivm; |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
unsigned i, size = range.Last - range.First + 1; |
LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size); |
LLVMValueRef result = LLVMGetUndef(vec); |
struct tgsi_full_src_register tmp_reg = {}; |
tmp_reg.Register.File = File; |
for (i = 0; i < size; ++i) { |
tmp_reg.Register.Index = i + range.First; |
LLVMValueRef temp = emit_fetch(bld_base, &tmp_reg, type, swizzle); |
result = LLVMBuildInsertElement(builder, result, temp, |
lp_build_const_int32(gallivm, i), ""); |
} |
return result; |
} |
static bool uses_temp_indirect_addressing( |
struct lp_build_tgsi_context *bld_base) |
{ |
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); |
return (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)); |
} |
static LLVMValueRef |
emit_fetch( |
struct lp_build_tgsi_context *bld_base, |
const struct tgsi_full_src_register *reg, |
enum tgsi_opcode_type type, |
unsigned swizzle) |
{ |
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); |
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
LLVMValueRef result = NULL, ptr; |
if (swizzle == ~0) { |
LLVMValueRef values[TGSI_NUM_CHANNELS]; |
unsigned chan; |
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
values[chan] = emit_fetch(bld_base, reg, type, chan); |
} |
return lp_build_gather_values(bld_base->base.gallivm, values, |
TGSI_NUM_CHANNELS); |
} |
if (reg->Register.Indirect) { |
struct tgsi_declaration_range range = get_array_range(bld_base, |
reg->Register.File, ®->Indirect); |
return LLVMBuildExtractElement(builder, |
emit_array_fetch(bld_base, reg->Register.File, type, range, swizzle), |
emit_array_index(bld, ®->Indirect, reg->Register.Index - range.First), |
""); |
} |
switch(reg->Register.File) { |
case TGSI_FILE_IMMEDIATE: { |
LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type); |
return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype); |
} |
case TGSI_FILE_INPUT: |
result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)]; |
break; |
case TGSI_FILE_TEMPORARY: |
if (reg->Register.Index >= ctx->temps_count) |
return LLVMGetUndef(tgsi2llvmtype(bld_base, type)); |
if (uses_temp_indirect_addressing(bld_base)) { |
ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); |
break; |
} |
ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle]; |
result = LLVMBuildLoad(builder, ptr, ""); |
break; |
case TGSI_FILE_OUTPUT: |
ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle); |
result = LLVMBuildLoad(builder, ptr, ""); |
break; |
default: |
return LLVMGetUndef(tgsi2llvmtype(bld_base, type)); |
} |
return bitcast(bld_base, type, result); |
} |
static LLVMValueRef fetch_system_value( |
struct lp_build_tgsi_context * bld_base, |
const struct tgsi_full_src_register *reg, |
enum tgsi_opcode_type type, |
unsigned swizzle) |
{ |
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); |
struct gallivm_state *gallivm = bld_base->base.gallivm; |
LLVMValueRef cval = ctx->system_values[reg->Register.Index]; |
if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) { |
cval = LLVMBuildExtractElement(gallivm->builder, cval, |
lp_build_const_int32(gallivm, swizzle), ""); |
} |
return bitcast(bld_base, type, cval); |
} |
static void emit_declaration( |
struct lp_build_tgsi_context * bld_base, |
const struct tgsi_full_declaration *decl) |
{ |
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); |
unsigned first, last, i, idx; |
switch(decl->Declaration.File) { |
case TGSI_FILE_ADDRESS: |
{ |
unsigned idx; |
for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { |
unsigned chan; |
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
ctx->soa.addr[idx][chan] = lp_build_alloca( |
&ctx->gallivm, |
ctx->soa.bld_base.uint_bld.elem_type, ""); |
} |
} |
break; |
} |
case TGSI_FILE_TEMPORARY: |
if (decl->Declaration.Array && decl->Array.ArrayID <= RADEON_LLVM_MAX_ARRAYS) |
ctx->arrays[decl->Array.ArrayID - 1] = decl->Range; |
if (uses_temp_indirect_addressing(bld_base)) { |
lp_emit_declaration_soa(bld_base, decl); |
break; |
} |
first = decl->Range.First; |
last = decl->Range.Last; |
if (!ctx->temps_count) { |
ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1; |
ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef)); |
} |
for (idx = first; idx <= last; idx++) { |
for (i = 0; i < TGSI_NUM_CHANNELS; i++) { |
ctx->temps[idx * TGSI_NUM_CHANNELS + i] = |
lp_build_alloca(bld_base->base.gallivm, bld_base->base.vec_type, |
"temp"); |
} |
} |
break; |
case TGSI_FILE_INPUT: |
{ |
unsigned idx; |
for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { |
if (ctx->load_input) |
ctx->load_input(ctx, idx, decl); |
} |
} |
break; |
case TGSI_FILE_SYSTEM_VALUE: |
{ |
unsigned idx; |
for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { |
ctx->load_system_value(ctx, idx, decl); |
} |
} |
break; |
case TGSI_FILE_OUTPUT: |
{ |
unsigned idx; |
for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { |
unsigned chan; |
assert(idx < RADEON_LLVM_MAX_OUTPUTS); |
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
ctx->soa.outputs[idx][chan] = lp_build_alloca(&ctx->gallivm, |
ctx->soa.bld_base.base.elem_type, ""); |
} |
} |
ctx->output_reg_count = MAX2(ctx->output_reg_count, |
decl->Range.Last + 1); |
break; |
} |
default: |
break; |
} |
} |
static void |
emit_store( |
struct lp_build_tgsi_context * bld_base, |
const struct tgsi_full_instruction * inst, |
const struct tgsi_opcode_info * info, |
LLVMValueRef dst[4]) |
{ |
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); |
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); |
struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
struct lp_build_context base = bld->bld_base.base; |
const struct tgsi_full_dst_register *reg = &inst->Dst[0]; |
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; |
LLVMValueRef temp_ptr; |
unsigned chan, chan_index; |
boolean is_vec_store = FALSE; |
if (dst[0]) { |
LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0])); |
is_vec_store = (k == LLVMVectorTypeKind); |
} |
if (is_vec_store) { |
LLVMValueRef values[4] = {}; |
TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) { |
LLVMValueRef index = lp_build_const_int32(gallivm, chan); |
values[chan] = LLVMBuildExtractElement(gallivm->builder, |
dst[0], index, ""); |
} |
bld_base->emit_store(bld_base, inst, info, values); |
return; |
} |
TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { |
LLVMValueRef value = dst[chan_index]; |
if (inst->Instruction.Saturate != TGSI_SAT_NONE) { |
struct lp_build_emit_data clamp_emit_data; |
memset(&clamp_emit_data, 0, sizeof(clamp_emit_data)); |
clamp_emit_data.arg_count = 3; |
clamp_emit_data.args[0] = value; |
clamp_emit_data.args[2] = base.one; |
switch(inst->Instruction.Saturate) { |
case TGSI_SAT_ZERO_ONE: |
clamp_emit_data.args[1] = base.zero; |
break; |
case TGSI_SAT_MINUS_PLUS_ONE: |
clamp_emit_data.args[1] = LLVMConstReal( |
base.elem_type, -1.0f); |
break; |
default: |
assert(0); |
} |
value = lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP, |
&clamp_emit_data); |
} |
if (reg->Register.File == TGSI_FILE_ADDRESS) { |
temp_ptr = bld->addr[reg->Register.Index][chan_index]; |
LLVMBuildStore(builder, value, temp_ptr); |
continue; |
} |
value = bitcast(bld_base, TGSI_TYPE_FLOAT, value); |
if (reg->Register.Indirect) { |
struct tgsi_declaration_range range = get_array_range(bld_base, |
reg->Register.File, ®->Indirect); |
unsigned i, size = range.Last - range.First + 1; |
LLVMValueRef array = LLVMBuildInsertElement(builder, |
emit_array_fetch(bld_base, reg->Register.File, TGSI_TYPE_FLOAT, range, chan_index), |
value, emit_array_index(bld, ®->Indirect, reg->Register.Index - range.First), ""); |
for (i = 0; i < size; ++i) { |
switch(reg->Register.File) { |
case TGSI_FILE_OUTPUT: |
temp_ptr = bld->outputs[i + range.First][chan_index]; |
break; |
case TGSI_FILE_TEMPORARY: |
if (range.First + i >= ctx->temps_count) |
continue; |
if (uses_temp_indirect_addressing(bld_base)) |
temp_ptr = lp_get_temp_ptr_soa(bld, i + range.First, chan_index); |
else |
temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index]; |
break; |
default: |
return; |
} |
value = LLVMBuildExtractElement(builder, array, |
lp_build_const_int32(gallivm, i), ""); |
LLVMBuildStore(builder, value, temp_ptr); |
} |
} else { |
switch(reg->Register.File) { |
case TGSI_FILE_OUTPUT: |
temp_ptr = bld->outputs[reg->Register.Index][chan_index]; |
break; |
case TGSI_FILE_TEMPORARY: |
if (reg->Register.Index >= ctx->temps_count) |
continue; |
if (uses_temp_indirect_addressing(bld_base)) { |
temp_ptr = NULL; |
break; |
} |
temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index]; |
break; |
default: |
return; |
} |
LLVMBuildStore(builder, value, temp_ptr); |
} |
} |
} |
static void bgnloop_emit( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); |
struct gallivm_state * gallivm = bld_base->base.gallivm; |
LLVMBasicBlockRef loop_block; |
LLVMBasicBlockRef endloop_block; |
endloop_block = LLVMAppendBasicBlockInContext(gallivm->context, |
ctx->main_fn, "ENDLOOP"); |
loop_block = LLVMInsertBasicBlockInContext(gallivm->context, |
endloop_block, "LOOP"); |
LLVMBuildBr(gallivm->builder, loop_block); |
LLVMPositionBuilderAtEnd(gallivm->builder, loop_block); |
if (++ctx->loop_depth > ctx->loop_depth_max) { |
unsigned new_max = ctx->loop_depth_max << 1; |
if (!new_max) |
new_max = RADEON_LLVM_INITIAL_CF_DEPTH; |
ctx->loop = REALLOC(ctx->loop, ctx->loop_depth_max * |
sizeof(ctx->loop[0]), |
new_max * sizeof(ctx->loop[0])); |
ctx->loop_depth_max = new_max; |
} |
ctx->loop[ctx->loop_depth - 1].loop_block = loop_block; |
ctx->loop[ctx->loop_depth - 1].endloop_block = endloop_block; |
} |
static void brk_emit( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); |
struct gallivm_state * gallivm = bld_base->base.gallivm; |
struct radeon_llvm_loop * current_loop = get_current_loop(ctx); |
LLVMBuildBr(gallivm->builder, current_loop->endloop_block); |
} |
static void cont_emit( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); |
struct gallivm_state * gallivm = bld_base->base.gallivm; |
struct radeon_llvm_loop * current_loop = get_current_loop(ctx); |
LLVMBuildBr(gallivm->builder, current_loop->loop_block); |
} |
static void else_emit( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); |
struct gallivm_state * gallivm = bld_base->base.gallivm; |
struct radeon_llvm_branch * current_branch = get_current_branch(ctx); |
LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder); |
/* We need to add a terminator to the current block if the previous |
* instruction was an ENDIF.Example: |
* IF |
* [code] |
* IF |
* [code] |
* ELSE |
* [code] |
* ENDIF <-- |
* ELSE<-- |
* [code] |
* ENDIF |
*/ |
if (current_block != current_branch->if_block) { |
LLVMBuildBr(gallivm->builder, current_branch->endif_block); |
} |
if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) { |
LLVMBuildBr(gallivm->builder, current_branch->endif_block); |
} |
current_branch->has_else = 1; |
LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block); |
} |
static void endif_emit( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); |
struct gallivm_state * gallivm = bld_base->base.gallivm; |
struct radeon_llvm_branch * current_branch = get_current_branch(ctx); |
LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder); |
/* If we have consecutive ENDIF instructions, then the first ENDIF |
* will not have a terminator, so we need to add one. */ |
if (current_block != current_branch->if_block |
&& current_block != current_branch->else_block |
&& !LLVMGetBasicBlockTerminator(current_block)) { |
LLVMBuildBr(gallivm->builder, current_branch->endif_block); |
} |
if (!LLVMGetBasicBlockTerminator(current_branch->else_block)) { |
LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block); |
LLVMBuildBr(gallivm->builder, current_branch->endif_block); |
} |
if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) { |
LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->if_block); |
LLVMBuildBr(gallivm->builder, current_branch->endif_block); |
} |
LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->endif_block); |
ctx->branch_depth--; |
} |
static void endloop_emit( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); |
struct gallivm_state * gallivm = bld_base->base.gallivm; |
struct radeon_llvm_loop * current_loop = get_current_loop(ctx); |
if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(gallivm->builder))) { |
LLVMBuildBr(gallivm->builder, current_loop->loop_block); |
} |
LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block); |
ctx->loop_depth--; |
} |
static void if_cond_emit( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data, |
LLVMValueRef cond) |
{ |
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); |
struct gallivm_state * gallivm = bld_base->base.gallivm; |
LLVMBasicBlockRef if_block, else_block, endif_block; |
endif_block = LLVMAppendBasicBlockInContext(gallivm->context, |
ctx->main_fn, "ENDIF"); |
if_block = LLVMInsertBasicBlockInContext(gallivm->context, |
endif_block, "IF"); |
else_block = LLVMInsertBasicBlockInContext(gallivm->context, |
endif_block, "ELSE"); |
LLVMBuildCondBr(gallivm->builder, cond, if_block, else_block); |
LLVMPositionBuilderAtEnd(gallivm->builder, if_block); |
if (++ctx->branch_depth > ctx->branch_depth_max) { |
unsigned new_max = ctx->branch_depth_max << 1; |
if (!new_max) |
new_max = RADEON_LLVM_INITIAL_CF_DEPTH; |
ctx->branch = REALLOC(ctx->branch, ctx->branch_depth_max * |
sizeof(ctx->branch[0]), |
new_max * sizeof(ctx->branch[0])); |
ctx->branch_depth_max = new_max; |
} |
ctx->branch[ctx->branch_depth - 1].endif_block = endif_block; |
ctx->branch[ctx->branch_depth - 1].if_block = if_block; |
ctx->branch[ctx->branch_depth - 1].else_block = else_block; |
ctx->branch[ctx->branch_depth - 1].has_else = 0; |
} |
static void if_emit( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
struct gallivm_state * gallivm = bld_base->base.gallivm; |
LLVMValueRef cond; |
cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE, |
emit_data->args[0], |
bld_base->base.zero, ""); |
if_cond_emit(action, bld_base, emit_data, cond); |
} |
static void uif_emit( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
struct gallivm_state * gallivm = bld_base->base.gallivm; |
LLVMValueRef cond; |
cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, |
bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]), |
bld_base->int_bld.zero, ""); |
if_cond_emit(action, bld_base, emit_data, cond); |
} |
static void kill_if_fetch_args( |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
const struct tgsi_full_instruction * inst = emit_data->inst; |
struct gallivm_state *gallivm = bld_base->base.gallivm; |
LLVMBuilderRef builder = gallivm->builder; |
unsigned i; |
LLVMValueRef conds[TGSI_NUM_CHANNELS]; |
for (i = 0; i < TGSI_NUM_CHANNELS; i++) { |
LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i); |
conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value, |
bld_base->base.zero, ""); |
} |
/* Or the conditions together */ |
for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) { |
conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], ""); |
} |
emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context); |
emit_data->arg_count = 1; |
emit_data->args[0] = LLVMBuildSelect(builder, conds[0], |
lp_build_const_float(gallivm, -1.0f), |
bld_base->base.zero, ""); |
} |
static void kil_emit( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
unsigned i; |
for (i = 0; i < emit_data->arg_count; i++) { |
emit_data->output[i] = lp_build_intrinsic_unary( |
bld_base->base.gallivm->builder, |
action->intr_name, |
emit_data->dst_type, emit_data->args[i]); |
} |
} |
void radeon_llvm_emit_prepare_cube_coords( |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data, |
LLVMValueRef *coords_arg) |
{ |
unsigned target = emit_data->inst->Texture.Texture; |
unsigned opcode = emit_data->inst->Instruction.Opcode; |
struct gallivm_state * gallivm = bld_base->base.gallivm; |
LLVMBuilderRef builder = gallivm->builder; |
LLVMTypeRef type = bld_base->base.elem_type; |
LLVMValueRef coords[4]; |
LLVMValueRef mad_args[3]; |
LLVMValueRef idx; |
struct LLVMOpaqueValue *cube_vec; |
LLVMValueRef v; |
unsigned i; |
cube_vec = lp_build_gather_values(bld_base->base.gallivm, coords_arg, 4); |
v = build_intrinsic(builder, "llvm.AMDGPU.cube", LLVMVectorType(type, 4), |
&cube_vec, 1, LLVMReadNoneAttribute); |
for (i = 0; i < 4; ++i) { |
idx = lp_build_const_int32(gallivm, i); |
coords[i] = LLVMBuildExtractElement(builder, v, idx, ""); |
} |
coords[2] = build_intrinsic(builder, "fabs", |
type, &coords[2], 1, LLVMReadNoneAttribute); |
coords[2] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_RCP, coords[2]); |
mad_args[1] = coords[2]; |
mad_args[2] = LLVMConstReal(type, 1.5); |
mad_args[0] = coords[0]; |
coords[0] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD, |
mad_args[0], mad_args[1], mad_args[2]); |
mad_args[0] = coords[1]; |
coords[1] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD, |
mad_args[0], mad_args[1], mad_args[2]); |
/* apply xyz = yxw swizzle to cooords */ |
coords[2] = coords[3]; |
coords[3] = coords[1]; |
coords[1] = coords[0]; |
coords[0] = coords[3]; |
if (target == TGSI_TEXTURE_CUBE_ARRAY || |
target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { |
/* for cube arrays coord.z = coord.w(array_index) * 8 + face */ |
/* coords_arg.w component - array_index for cube arrays */ |
coords[2] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD, |
coords_arg[3], lp_build_const_float(gallivm, 8.0), coords[2]); |
} |
/* Preserve compare/lod/bias. Put it in coords.w. */ |
if (opcode == TGSI_OPCODE_TEX2 || |
opcode == TGSI_OPCODE_TXB2 || |
opcode == TGSI_OPCODE_TXL2) { |
coords[3] = coords_arg[4]; |
} else if (opcode == TGSI_OPCODE_TXB || |
opcode == TGSI_OPCODE_TXL || |
target == TGSI_TEXTURE_SHADOWCUBE) { |
coords[3] = coords_arg[3]; |
} |
memcpy(coords_arg, coords, sizeof(coords)); |
} |
static void txd_fetch_args( |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
const struct tgsi_full_instruction * inst = emit_data->inst; |
LLVMValueRef coords[4]; |
unsigned chan, src; |
for (src = 0; src < 3; src++) { |
for (chan = 0; chan < 4; chan++) |
coords[chan] = lp_build_emit_fetch(bld_base, inst, src, chan); |
emit_data->args[src] = lp_build_gather_values(bld_base->base.gallivm, |
coords, 4); |
} |
emit_data->arg_count = 3; |
emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); |
} |
static void txp_fetch_args( |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
const struct tgsi_full_instruction * inst = emit_data->inst; |
LLVMValueRef src_w; |
unsigned chan; |
LLVMValueRef coords[5]; |
emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); |
src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); |
for (chan = 0; chan < 3; chan++ ) { |
LLVMValueRef arg = lp_build_emit_fetch(bld_base, |
emit_data->inst, 0, chan); |
coords[chan] = lp_build_emit_llvm_binary(bld_base, |
TGSI_OPCODE_DIV, arg, src_w); |
} |
coords[3] = bld_base->base.one; |
if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || |
inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || |
inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || |
inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && |
inst->Instruction.Opcode != TGSI_OPCODE_TXQ && |
inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { |
radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords); |
} |
emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, |
coords, 4); |
emit_data->arg_count = 1; |
} |
static void tex_fetch_args( |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
/* XXX: lp_build_swizzle_aos() was failing with wrong arg types, |
* when we used CHAN_ALL. We should be able to get this to work, |
* but for now we will swizzle it ourselves |
emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, |
0, CHAN_ALL); |
*/ |
const struct tgsi_full_instruction * inst = emit_data->inst; |
LLVMValueRef coords[5]; |
unsigned chan; |
for (chan = 0; chan < 4; chan++) { |
coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan); |
} |
if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || |
inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || |
inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { |
/* These instructions have additional operand that should be packed |
* into the cube coord vector by radeon_llvm_emit_prepare_cube_coords. |
* That operand should be passed as a float value in the args array |
* right after the coord vector. After packing it's not used anymore, |
* that's why arg_count is not increased */ |
coords[4] = lp_build_emit_fetch(bld_base, inst, 1, 0); |
} |
if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || |
inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || |
inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || |
inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && |
inst->Instruction.Opcode != TGSI_OPCODE_TXQ && |
inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { |
radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords); |
} |
emit_data->arg_count = 1; |
emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, |
coords, 4); |
emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); |
} |
static void txf_fetch_args( |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
const struct tgsi_full_instruction * inst = emit_data->inst; |
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); |
const struct tgsi_texture_offset * off = inst->TexOffsets; |
LLVMTypeRef offset_type = bld_base->int_bld.elem_type; |
/* fetch tex coords */ |
tex_fetch_args(bld_base, emit_data); |
/* fetch tex offsets */ |
if (inst->Texture.NumOffsets) { |
assert(inst->Texture.NumOffsets == 1); |
emit_data->args[1] = LLVMConstBitCast( |
bld->immediates[off->Index][off->SwizzleX], |
offset_type); |
emit_data->args[2] = LLVMConstBitCast( |
bld->immediates[off->Index][off->SwizzleY], |
offset_type); |
emit_data->args[3] = LLVMConstBitCast( |
bld->immediates[off->Index][off->SwizzleZ], |
offset_type); |
} else { |
emit_data->args[1] = bld_base->int_bld.zero; |
emit_data->args[2] = bld_base->int_bld.zero; |
emit_data->args[3] = bld_base->int_bld.zero; |
} |
emit_data->arg_count = 4; |
} |
static void emit_icmp( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
unsigned pred; |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
LLVMContextRef context = bld_base->base.gallivm->context; |
switch (emit_data->inst->Instruction.Opcode) { |
case TGSI_OPCODE_USEQ: pred = LLVMIntEQ; break; |
case TGSI_OPCODE_USNE: pred = LLVMIntNE; break; |
case TGSI_OPCODE_USGE: pred = LLVMIntUGE; break; |
case TGSI_OPCODE_USLT: pred = LLVMIntULT; break; |
case TGSI_OPCODE_ISGE: pred = LLVMIntSGE; break; |
case TGSI_OPCODE_ISLT: pred = LLVMIntSLT; break; |
default: |
assert(!"unknown instruction"); |
pred = 0; |
break; |
} |
LLVMValueRef v = LLVMBuildICmp(builder, pred, |
emit_data->args[0], emit_data->args[1],""); |
v = LLVMBuildSExtOrBitCast(builder, v, |
LLVMInt32TypeInContext(context), ""); |
emit_data->output[emit_data->chan] = v; |
} |
static void emit_ucmp( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
LLVMValueRef arg0 = LLVMBuildBitCast(builder, emit_data->args[0], |
bld_base->uint_bld.elem_type, ""); |
LLVMValueRef v = LLVMBuildICmp(builder, LLVMIntNE, arg0, |
bld_base->uint_bld.zero, ""); |
emit_data->output[emit_data->chan] = |
LLVMBuildSelect(builder, v, emit_data->args[1], emit_data->args[2], ""); |
} |
static void emit_cmp( |
const struct lp_build_tgsi_action *action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
LLVMRealPredicate pred; |
LLVMValueRef cond; |
/* Use ordered for everything but NE (which is usual for |
* float comparisons) |
*/ |
switch (emit_data->inst->Instruction.Opcode) { |
case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break; |
case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break; |
case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break; |
case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break; |
case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break; |
case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break; |
default: assert(!"unknown instruction"); pred = 0; break; |
} |
cond = LLVMBuildFCmp(builder, |
pred, emit_data->args[0], emit_data->args[1], ""); |
emit_data->output[emit_data->chan] = LLVMBuildSelect(builder, |
cond, bld_base->base.one, bld_base->base.zero, ""); |
} |
static void emit_fcmp( |
const struct lp_build_tgsi_action *action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
LLVMContextRef context = bld_base->base.gallivm->context; |
LLVMRealPredicate pred; |
/* Use ordered for everything but NE (which is usual for |
* float comparisons) |
*/ |
switch (emit_data->inst->Instruction.Opcode) { |
case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break; |
case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break; |
case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break; |
case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break; |
default: assert(!"unknown instruction"); pred = 0; break; |
} |
LLVMValueRef v = LLVMBuildFCmp(builder, pred, |
emit_data->args[0], emit_data->args[1],""); |
v = LLVMBuildSExtOrBitCast(builder, v, |
LLVMInt32TypeInContext(context), ""); |
emit_data->output[emit_data->chan] = v; |
} |
static void emit_not( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED, |
emit_data->args[0]); |
emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, ""); |
} |
static void emit_arl( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
LLVMValueRef floor_index = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]); |
emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder, |
floor_index, bld_base->base.int_elem_type , ""); |
} |
static void emit_and( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
emit_data->output[emit_data->chan] = LLVMBuildAnd(builder, |
emit_data->args[0], emit_data->args[1], ""); |
} |
static void emit_or( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
emit_data->output[emit_data->chan] = LLVMBuildOr(builder, |
emit_data->args[0], emit_data->args[1], ""); |
} |
static void emit_uadd( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
emit_data->output[emit_data->chan] = LLVMBuildAdd(builder, |
emit_data->args[0], emit_data->args[1], ""); |
} |
static void emit_udiv( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder, |
emit_data->args[0], emit_data->args[1], ""); |
} |
static void emit_idiv( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder, |
emit_data->args[0], emit_data->args[1], ""); |
} |
static void emit_mod( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
emit_data->output[emit_data->chan] = LLVMBuildSRem(builder, |
emit_data->args[0], emit_data->args[1], ""); |
} |
static void emit_umod( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
emit_data->output[emit_data->chan] = LLVMBuildURem(builder, |
emit_data->args[0], emit_data->args[1], ""); |
} |
static void emit_shl( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
emit_data->output[emit_data->chan] = LLVMBuildShl(builder, |
emit_data->args[0], emit_data->args[1], ""); |
} |
static void emit_ushr( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
emit_data->output[emit_data->chan] = LLVMBuildLShr(builder, |
emit_data->args[0], emit_data->args[1], ""); |
} |
static void emit_ishr( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
emit_data->output[emit_data->chan] = LLVMBuildAShr(builder, |
emit_data->args[0], emit_data->args[1], ""); |
} |
static void emit_xor( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
emit_data->output[emit_data->chan] = LLVMBuildXor(builder, |
emit_data->args[0], emit_data->args[1], ""); |
} |
static void emit_ssg( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
LLVMValueRef cmp, val; |
if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) { |
cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, ""); |
val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], ""); |
cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, ""); |
val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), ""); |
} else { // float SSG |
cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], bld_base->base.zero, ""); |
val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], ""); |
cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, bld_base->base.zero, ""); |
val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), ""); |
} |
emit_data->output[emit_data->chan] = val; |
} |
static void emit_ineg( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
emit_data->output[emit_data->chan] = LLVMBuildNeg(builder, |
emit_data->args[0], ""); |
} |
static void emit_f2i( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder, |
emit_data->args[0], bld_base->int_bld.elem_type, ""); |
} |
static void emit_f2u( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder, |
emit_data->args[0], bld_base->uint_bld.elem_type, ""); |
} |
static void emit_i2f( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder, |
emit_data->args[0], bld_base->base.elem_type, ""); |
} |
static void emit_u2f( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder, |
emit_data->args[0], bld_base->base.elem_type, ""); |
} |
static void emit_immediate(struct lp_build_tgsi_context * bld_base, |
const struct tgsi_full_immediate *imm) |
{ |
unsigned i; |
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); |
for (i = 0; i < 4; ++i) { |
ctx->soa.immediates[ctx->soa.num_immediates][i] = |
LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false ); |
} |
ctx->soa.num_immediates++; |
} |
LLVMValueRef |
build_intrinsic(LLVMBuilderRef builder, |
const char *name, |
LLVMTypeRef ret_type, |
LLVMValueRef *args, |
unsigned num_args, |
LLVMAttribute attr) |
{ |
LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); |
LLVMValueRef function; |
function = LLVMGetNamedFunction(module, name); |
if(!function) { |
LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS]; |
unsigned i; |
assert(num_args <= LP_MAX_FUNC_ARGS); |
for(i = 0; i < num_args; ++i) { |
assert(args[i]); |
arg_types[i] = LLVMTypeOf(args[i]); |
} |
function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args); |
if (attr) |
LLVMAddFunctionAttr(function, attr); |
} |
return LLVMBuildCall(builder, function, args, num_args, ""); |
} |
static void build_tgsi_intrinsic( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data, |
LLVMAttribute attr) |
{ |
struct lp_build_context * base = &bld_base->base; |
emit_data->output[emit_data->chan] = build_intrinsic( |
base->gallivm->builder, action->intr_name, |
emit_data->dst_type, emit_data->args, |
emit_data->arg_count, attr); |
} |
void |
build_tgsi_intrinsic_nomem( |
const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
build_tgsi_intrinsic(action, bld_base, emit_data, LLVMReadNoneAttribute); |
} |
static void emit_bfi(const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
struct gallivm_state *gallivm = bld_base->base.gallivm; |
LLVMBuilderRef builder = gallivm->builder; |
LLVMValueRef bfi_args[3]; |
// Calculate the bitmask: (((1 << src3) - 1) << src2 |
bfi_args[0] = LLVMBuildShl(builder, |
LLVMBuildSub(builder, |
LLVMBuildShl(builder, |
bld_base->int_bld.one, |
emit_data->args[3], ""), |
bld_base->int_bld.one, ""), |
emit_data->args[2], ""); |
bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1], |
emit_data->args[2], ""); |
bfi_args[2] = emit_data->args[0]; |
/* Calculate: |
* (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2) |
* Use the right-hand side, which the LLVM backend can convert to V_BFI. |
*/ |
emit_data->output[emit_data->chan] = |
LLVMBuildXor(builder, bfi_args[2], |
LLVMBuildAnd(builder, bfi_args[0], |
LLVMBuildXor(builder, bfi_args[1], bfi_args[2], |
""), ""), ""); |
} |
/* this is ffs in C */ |
static void emit_lsb(const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
struct gallivm_state *gallivm = bld_base->base.gallivm; |
LLVMValueRef args[2] = { |
emit_data->args[0], |
/* The value of 1 means that ffs(x=0) = undef, so LLVM won't |
* add special code to check for x=0. The reason is that |
* the LLVM behavior for x=0 is different from what we |
* need here. |
* |
* The hardware already implements the correct behavior. |
*/ |
lp_build_const_int32(gallivm, 1) |
}; |
emit_data->output[emit_data->chan] = |
build_intrinsic(gallivm->builder, "llvm.cttz.i32", |
emit_data->dst_type, args, Elements(args), |
LLVMReadNoneAttribute); |
} |
/* Find the last bit set. */ |
static void emit_umsb(const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
struct gallivm_state *gallivm = bld_base->base.gallivm; |
LLVMBuilderRef builder = gallivm->builder; |
LLVMValueRef args[2] = { |
emit_data->args[0], |
/* Don't generate code for handling zero: */ |
lp_build_const_int32(gallivm, 1) |
}; |
LLVMValueRef msb = |
build_intrinsic(builder, "llvm.ctlz.i32", |
emit_data->dst_type, args, Elements(args), |
LLVMReadNoneAttribute); |
/* The HW returns the last bit index from MSB, but TGSI wants |
* the index from LSB. Invert it by doing "31 - msb". */ |
msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31), |
msb, ""); |
/* Check for zero: */ |
emit_data->output[emit_data->chan] = |
LLVMBuildSelect(builder, |
LLVMBuildICmp(builder, LLVMIntEQ, args[0], |
bld_base->uint_bld.zero, ""), |
lp_build_const_int32(gallivm, -1), msb, ""); |
} |
/* Find the last bit opposite of the sign bit. */ |
static void emit_imsb(const struct lp_build_tgsi_action * action, |
struct lp_build_tgsi_context * bld_base, |
struct lp_build_emit_data * emit_data) |
{ |
struct gallivm_state *gallivm = bld_base->base.gallivm; |
LLVMBuilderRef builder = gallivm->builder; |
LLVMValueRef arg = emit_data->args[0]; |
LLVMValueRef msb = |
build_intrinsic(builder, "llvm.AMDGPU.flbit.i32", |
emit_data->dst_type, &arg, 1, |
LLVMReadNoneAttribute); |
/* The HW returns the last bit index from MSB, but TGSI wants |
* the index from LSB. Invert it by doing "31 - msb". */ |
msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31), |
msb, ""); |
/* If arg == 0 || arg == -1 (0xffffffff), return -1. */ |
LLVMValueRef all_ones = lp_build_const_int32(gallivm, -1); |
LLVMValueRef cond = |
LLVMBuildOr(builder, |
LLVMBuildICmp(builder, LLVMIntEQ, arg, |
bld_base->uint_bld.zero, ""), |
LLVMBuildICmp(builder, LLVMIntEQ, arg, |
all_ones, ""), ""); |
emit_data->output[emit_data->chan] = |
LLVMBuildSelect(builder, cond, all_ones, msb, ""); |
} |
void radeon_llvm_context_init(struct radeon_llvm_context * ctx) |
{ |
struct lp_type type; |
/* Initialize the gallivm object: |
* We are only using the module, context, and builder fields of this struct. |
* This should be enough for us to be able to pass our gallivm struct to the |
* helper functions in the gallivm module. |
*/ |
memset(&ctx->gallivm, 0, sizeof (ctx->gallivm)); |
memset(&ctx->soa, 0, sizeof(ctx->soa)); |
ctx->gallivm.context = LLVMContextCreate(); |
ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi", |
ctx->gallivm.context); |
ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context); |
ctx->store_output_intr = "llvm.AMDGPU.store.output."; |
ctx->swizzle_intr = "llvm.AMDGPU.swizzle"; |
struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base; |
/* XXX: We need to revisit this.I think the correct way to do this is |
* to use length = 4 here and use the elem_bld for everything. */ |
type.floating = TRUE; |
type.fixed = FALSE; |
type.sign = TRUE; |
type.norm = FALSE; |
type.width = 32; |
type.length = 1; |
lp_build_context_init(&bld_base->base, &ctx->gallivm, type); |
lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type)); |
lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type)); |
bld_base->soa = 1; |
bld_base->emit_store = emit_store; |
bld_base->emit_swizzle = emit_swizzle; |
bld_base->emit_declaration = emit_declaration; |
bld_base->emit_immediate = emit_immediate; |
bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch; |
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch; |
bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch; |
bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch; |
bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value; |
/* Allocate outputs */ |
ctx->soa.outputs = ctx->outputs; |
ctx->num_arrays = 0; |
/* XXX: Is there a better way to initialize all this ? */ |
lp_set_default_actions(bld_base); |
bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "fabs"; |
bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and; |
bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl; |
bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi; |
bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; |
bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.AMDGPU.brev"; |
bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit; |
bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "ceil"; |
bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp."; |
bld_base->op_actions[TGSI_OPCODE_CMP].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_CMP].intr_name = "llvm.AMDGPU.cndlt"; |
bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit; |
bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32"; |
bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx"; |
bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args; |
bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy"; |
bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args; |
bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit; |
bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; |
bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; |
bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp."; |
bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "floor"; |
bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_FMA].intr_name = "llvm.fma.f32"; |
bld_base->op_actions[TGSI_OPCODE_FRC].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = "llvm.AMDIL.fraction."; |
bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i; |
bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u; |
bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp; |
bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp; |
bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp; |
bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp; |
bld_base->op_actions[TGSI_OPCODE_IABS].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs."; |
bld_base->op_actions[TGSI_OPCODE_IBFE].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32"; |
bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv; |
bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit; |
bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit; |
bld_base->op_actions[TGSI_OPCODE_IMAX].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_IMAX].intr_name = "llvm.AMDGPU.imax"; |
bld_base->op_actions[TGSI_OPCODE_IMIN].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_IMIN].intr_name = "llvm.AMDGPU.imin"; |
bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb; |
bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg; |
bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr; |
bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp; |
bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp; |
bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg; |
bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f; |
bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args; |
bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit; |
bld_base->op_actions[TGSI_OPCODE_KILL_IF].intr_name = "llvm.AMDGPU.kill"; |
bld_base->op_actions[TGSI_OPCODE_KILL].emit = lp_build_tgsi_intrinsic; |
bld_base->op_actions[TGSI_OPCODE_KILL].intr_name = "llvm.AMDGPU.kilp"; |
bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb; |
bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32"; |
bld_base->op_actions[TGSI_OPCODE_LRP].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_LRP].intr_name = "llvm.AMDGPU.lrp"; |
bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod; |
bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb; |
bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not; |
bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or; |
bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32"; |
bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32"; |
bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest."; |
bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_cmp; |
bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_cmp; |
bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl; |
bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_cmp; |
bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_cmp; |
bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_cmp; |
bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_cmp; |
bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32"; |
bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32"; |
bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg; |
bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args; |
bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex"; |
bld_base->op_actions[TGSI_OPCODE_TEX2].fetch_args = tex_fetch_args; |
bld_base->op_actions[TGSI_OPCODE_TEX2].intr_name = "llvm.AMDGPU.tex"; |
bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args; |
bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb"; |
bld_base->op_actions[TGSI_OPCODE_TXB2].fetch_args = tex_fetch_args; |
bld_base->op_actions[TGSI_OPCODE_TXB2].intr_name = "llvm.AMDGPU.txb"; |
bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args; |
bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd"; |
bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = txf_fetch_args; |
bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf"; |
bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args; |
bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl"; |
bld_base->op_actions[TGSI_OPCODE_TXL2].fetch_args = tex_fetch_args; |
bld_base->op_actions[TGSI_OPCODE_TXL2].intr_name = "llvm.AMDGPU.txl"; |
bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args; |
bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex"; |
bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args; |
bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq"; |
bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDGPU.trunc"; |
bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd; |
bld_base->op_actions[TGSI_OPCODE_UBFE].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32"; |
bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv; |
bld_base->op_actions[TGSI_OPCODE_UMAX].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_UMAX].intr_name = "llvm.AMDGPU.umax"; |
bld_base->op_actions[TGSI_OPCODE_UMIN].emit = build_tgsi_intrinsic_nomem; |
bld_base->op_actions[TGSI_OPCODE_UMIN].intr_name = "llvm.AMDGPU.umin"; |
bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod; |
bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp; |
bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp; |
bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr; |
bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp; |
bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp; |
bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f; |
bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor; |
bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp; |
bld_base->rsq_action.emit = build_tgsi_intrinsic_nomem; |
#if HAVE_LLVM >= 0x0305 |
bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq.clamped.f32"; |
#else |
bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq"; |
#endif |
} |
void radeon_llvm_create_func(struct radeon_llvm_context * ctx, |
LLVMTypeRef *ParamTypes, unsigned ParamCount) |
{ |
LLVMTypeRef main_fn_type; |
LLVMBasicBlockRef main_fn_body; |
/* Setup the function */ |
main_fn_type = LLVMFunctionType(LLVMVoidTypeInContext(ctx->gallivm.context), |
ParamTypes, ParamCount, 0); |
ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type); |
main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context, |
ctx->main_fn, "main_body"); |
LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body); |
} |
void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx) |
{ |
struct gallivm_state * gallivm = ctx->soa.bld_base.base.gallivm; |
/* End the main function with Return*/ |
LLVMBuildRetVoid(gallivm->builder); |
/* Create the pass manager */ |
ctx->gallivm.passmgr = LLVMCreateFunctionPassManagerForModule( |
gallivm->module); |
/* This pass should eliminate all the load and store instructions */ |
LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr); |
/* Add some optimization passes */ |
LLVMAddScalarReplAggregatesPass(gallivm->passmgr); |
LLVMAddLICMPass(gallivm->passmgr); |
LLVMAddAggressiveDCEPass(gallivm->passmgr); |
LLVMAddCFGSimplificationPass(gallivm->passmgr); |
LLVMAddInstructionCombiningPass(gallivm->passmgr); |
/* Run the pass */ |
LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn); |
LLVMDisposeBuilder(gallivm->builder); |
LLVMDisposePassManager(gallivm->passmgr); |
} |
void radeon_llvm_dispose(struct radeon_llvm_context * ctx) |
{ |
LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module); |
LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context); |
FREE(ctx->temps); |
ctx->temps = NULL; |
FREE(ctx->loop); |
ctx->loop = NULL; |
ctx->loop_depth_max = 0; |
FREE(ctx->branch); |
ctx->branch = NULL; |
ctx->branch_depth_max = 0; |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_uvd.c |
---|
0,0 → 1,947 |
/************************************************************************** |
* |
* Copyright 2011 Advanced Micro Devices, Inc. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/* |
* Authors: |
* Christian König <christian.koenig@amd.com> |
* |
*/ |
#include <sys/types.h> |
#include <assert.h> |
#include <errno.h> |
#include <unistd.h> |
#include <stdio.h> |
#include "pipe/p_video_codec.h" |
#include "util/u_memory.h" |
#include "util/u_video.h" |
#include "vl/vl_defines.h" |
#include "vl/vl_mpeg12_decoder.h" |
#include "r600_pipe_common.h" |
#include "radeon_video.h" |
#include "radeon_uvd.h" |
#define NUM_BUFFERS 4 |
#define NUM_MPEG2_REFS 6 |
#define NUM_H264_REFS 17 |
#define NUM_VC1_REFS 5 |
#define FB_BUFFER_OFFSET 0x1000 |
#define FB_BUFFER_SIZE 2048 |
/* UVD decoder representation */ |
struct ruvd_decoder { |
struct pipe_video_codec base; |
ruvd_set_dtb set_dtb; |
unsigned stream_handle; |
unsigned frame_number; |
struct pipe_screen *screen; |
struct radeon_winsys* ws; |
struct radeon_winsys_cs* cs; |
unsigned cur_buffer; |
struct rvid_buffer msg_fb_buffers[NUM_BUFFERS]; |
struct ruvd_msg *msg; |
uint32_t *fb; |
struct rvid_buffer bs_buffers[NUM_BUFFERS]; |
void* bs_ptr; |
unsigned bs_size; |
struct rvid_buffer dpb; |
}; |
/* flush IB to the hardware */ |
static void flush(struct ruvd_decoder *dec) |
{ |
dec->ws->cs_flush(dec->cs, RADEON_FLUSH_ASYNC, NULL, 0); |
} |
/* add a new set register command to the IB */ |
static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val) |
{ |
uint32_t *pm4 = dec->cs->buf; |
pm4[dec->cs->cdw++] = RUVD_PKT0(reg >> 2, 0); |
pm4[dec->cs->cdw++] = val; |
} |
/* send a command to the VCPU through the GPCOM registers */ |
static void send_cmd(struct ruvd_decoder *dec, unsigned cmd, |
struct radeon_winsys_cs_handle* cs_buf, uint32_t off, |
enum radeon_bo_usage usage, enum radeon_bo_domain domain) |
{ |
int reloc_idx; |
reloc_idx = dec->ws->cs_add_reloc(dec->cs, cs_buf, usage, domain, |
RADEON_PRIO_MIN); |
set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off); |
set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4); |
set_reg(dec, RUVD_GPCOM_VCPU_CMD, cmd << 1); |
} |
/* map the next available message/feedback buffer */ |
static void map_msg_fb_buf(struct ruvd_decoder *dec) |
{ |
struct rvid_buffer* buf; |
uint8_t *ptr; |
/* grab the current message/feedback buffer */ |
buf = &dec->msg_fb_buffers[dec->cur_buffer]; |
/* and map it for CPU access */ |
ptr = dec->ws->buffer_map(buf->res->cs_buf, dec->cs, PIPE_TRANSFER_WRITE); |
/* calc buffer offsets */ |
dec->msg = (struct ruvd_msg *)ptr; |
dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET); |
} |
/* unmap and send a message command to the VCPU */ |
static void send_msg_buf(struct ruvd_decoder *dec) |
{ |
struct rvid_buffer* buf; |
/* ignore the request if message/feedback buffer isn't mapped */ |
if (!dec->msg || !dec->fb) |
return; |
/* grab the current message buffer */ |
buf = &dec->msg_fb_buffers[dec->cur_buffer]; |
/* unmap the buffer */ |
dec->ws->buffer_unmap(buf->res->cs_buf); |
dec->msg = NULL; |
dec->fb = NULL; |
/* and send it to the hardware */ |
send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->cs_buf, 0, |
RADEON_USAGE_READ, RADEON_DOMAIN_GTT); |
} |
/* cycle to the next set of buffers */ |
static void next_buffer(struct ruvd_decoder *dec) |
{ |
++dec->cur_buffer; |
dec->cur_buffer %= NUM_BUFFERS; |
} |
/* convert the profile into something UVD understands */ |
static uint32_t profile2stream_type(enum pipe_video_profile profile) |
{ |
switch (u_reduce_video_profile(profile)) { |
case PIPE_VIDEO_FORMAT_MPEG4_AVC: |
return RUVD_CODEC_H264; |
case PIPE_VIDEO_FORMAT_VC1: |
return RUVD_CODEC_VC1; |
case PIPE_VIDEO_FORMAT_MPEG12: |
return RUVD_CODEC_MPEG2; |
case PIPE_VIDEO_FORMAT_MPEG4: |
return RUVD_CODEC_MPEG4; |
default: |
assert(0); |
return 0; |
} |
} |
/* calculate size of reference picture buffer */ |
static unsigned calc_dpb_size(const struct pipe_video_codec *templ) |
{ |
unsigned width_in_mb, height_in_mb, image_size, dpb_size; |
// always align them to MB size for dpb calculation |
unsigned width = align(templ->width, VL_MACROBLOCK_WIDTH); |
unsigned height = align(templ->height, VL_MACROBLOCK_HEIGHT); |
// always one more for currently decoded picture |
unsigned max_references = templ->max_references + 1; |
// aligned size of a single frame |
image_size = width * height; |
image_size += image_size / 2; |
image_size = align(image_size, 1024); |
// picture width & height in 16 pixel units |
width_in_mb = width / VL_MACROBLOCK_WIDTH; |
height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); |
switch (u_reduce_video_profile(templ->profile)) { |
case PIPE_VIDEO_FORMAT_MPEG4_AVC: |
// the firmware seems to allways assume a minimum of ref frames |
max_references = MAX2(NUM_H264_REFS, max_references); |
// reference picture buffer |
dpb_size = image_size * max_references; |
// macroblock context buffer |
dpb_size += width_in_mb * height_in_mb * max_references * 192; |
// IT surface buffer |
dpb_size += width_in_mb * height_in_mb * 32; |
break; |
case PIPE_VIDEO_FORMAT_VC1: |
// the firmware seems to allways assume a minimum of ref frames |
max_references = MAX2(NUM_VC1_REFS, max_references); |
// reference picture buffer |
dpb_size = image_size * max_references; |
// CONTEXT_BUFFER |
dpb_size += width_in_mb * height_in_mb * 128; |
// IT surface buffer |
dpb_size += width_in_mb * 64; |
// DB surface buffer |
dpb_size += width_in_mb * 128; |
// BP |
dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64); |
break; |
case PIPE_VIDEO_FORMAT_MPEG12: |
// reference picture buffer, must be big enough for all frames |
dpb_size = image_size * NUM_MPEG2_REFS; |
break; |
case PIPE_VIDEO_FORMAT_MPEG4: |
// reference picture buffer |
dpb_size = image_size * max_references; |
// CM |
dpb_size += width_in_mb * height_in_mb * 64; |
// IT surface buffer |
dpb_size += align(width_in_mb * height_in_mb * 32, 64); |
break; |
default: |
// something is missing here |
assert(0); |
// at least use a sane default value |
dpb_size = 32 * 1024 * 1024; |
break; |
} |
return dpb_size; |
} |
/* get h264 specific message bits */ |
static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic) |
{ |
struct ruvd_h264 result; |
memset(&result, 0, sizeof(result)); |
switch (pic->base.profile) { |
case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: |
result.profile = RUVD_H264_PROFILE_BASELINE; |
break; |
case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: |
result.profile = RUVD_H264_PROFILE_MAIN; |
break; |
case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: |
result.profile = RUVD_H264_PROFILE_HIGH; |
break; |
default: |
assert(0); |
break; |
} |
if (((dec->base.width * dec->base.height) >> 8) <= 1620) |
result.level = 30; |
else |
result.level = 41; |
result.sps_info_flags = 0; |
result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0; |
result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1; |
result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2; |
result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3; |
result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; |
result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; |
result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4; |
result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type; |
result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; |
switch (dec->base.chroma_format) { |
case PIPE_VIDEO_CHROMA_FORMAT_400: |
result.chroma_format = 0; |
break; |
case PIPE_VIDEO_CHROMA_FORMAT_420: |
result.chroma_format = 1; |
break; |
case PIPE_VIDEO_CHROMA_FORMAT_422: |
result.chroma_format = 2; |
break; |
case PIPE_VIDEO_CHROMA_FORMAT_444: |
result.chroma_format = 3; |
break; |
} |
result.pps_info_flags = 0; |
result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0; |
result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1; |
result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2; |
result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3; |
result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4; |
result.pps_info_flags |= pic->pps->weighted_pred_flag << 6; |
result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7; |
result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8; |
result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1; |
result.slice_group_map_type = pic->pps->slice_group_map_type; |
result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1; |
result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26; |
result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset; |
result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset; |
memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16); |
memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64); |
result.num_ref_frames = pic->num_ref_frames; |
result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1; |
result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1; |
result.frame_num = pic->frame_num; |
memcpy(result.frame_num_list, pic->frame_num_list, 4*16); |
result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0]; |
result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1]; |
memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4*16*2); |
result.decoded_pic_idx = pic->frame_num; |
return result; |
} |
/* get vc1 specific message bits */ |
static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic) |
{ |
struct ruvd_vc1 result; |
memset(&result, 0, sizeof(result)); |
switch(pic->base.profile) { |
case PIPE_VIDEO_PROFILE_VC1_SIMPLE: |
result.profile = RUVD_VC1_PROFILE_SIMPLE; |
result.level = 1; |
break; |
case PIPE_VIDEO_PROFILE_VC1_MAIN: |
result.profile = RUVD_VC1_PROFILE_MAIN; |
result.level = 2; |
break; |
case PIPE_VIDEO_PROFILE_VC1_ADVANCED: |
result.profile = RUVD_VC1_PROFILE_ADVANCED; |
result.level = 4; |
break; |
default: |
assert(0); |
} |
/* fields common for all profiles */ |
result.sps_info_flags |= pic->postprocflag << 7; |
result.sps_info_flags |= pic->pulldown << 6; |
result.sps_info_flags |= pic->interlace << 5; |
result.sps_info_flags |= pic->tfcntrflag << 4; |
result.sps_info_flags |= pic->finterpflag << 3; |
result.sps_info_flags |= pic->psf << 1; |
result.pps_info_flags |= pic->range_mapy_flag << 31; |
result.pps_info_flags |= pic->range_mapy << 28; |
result.pps_info_flags |= pic->range_mapuv_flag << 27; |
result.pps_info_flags |= pic->range_mapuv << 24; |
result.pps_info_flags |= pic->multires << 21; |
result.pps_info_flags |= pic->maxbframes << 16; |
result.pps_info_flags |= pic->overlap << 11; |
result.pps_info_flags |= pic->quantizer << 9; |
result.pps_info_flags |= pic->panscan_flag << 7; |
result.pps_info_flags |= pic->refdist_flag << 6; |
result.pps_info_flags |= pic->vstransform << 0; |
/* some fields only apply to main/advanced profile */ |
if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) { |
result.pps_info_flags |= pic->syncmarker << 20; |
result.pps_info_flags |= pic->rangered << 19; |
result.pps_info_flags |= pic->loopfilter << 5; |
result.pps_info_flags |= pic->fastuvmc << 4; |
result.pps_info_flags |= pic->extended_mv << 3; |
result.pps_info_flags |= pic->extended_dmv << 8; |
result.pps_info_flags |= pic->dquant << 1; |
} |
result.chroma_format = 1; |
#if 0 |
//(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT) |
uint32_t slice_count |
uint8_t picture_type |
uint8_t frame_coding_mode |
uint8_t deblockEnable |
uint8_t pquant |
#endif |
return result; |
} |
/* extract the frame number from a referenced video buffer */ |
static uint32_t get_ref_pic_idx(struct ruvd_decoder *dec, struct pipe_video_buffer *ref) |
{ |
uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS; |
uint32_t max = MAX2(dec->frame_number, 1) - 1; |
uintptr_t frame; |
/* seems to be the most sane fallback */ |
if (!ref) |
return max; |
/* get the frame number from the associated data */ |
frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); |
/* limit the frame number to a valid range */ |
return MAX2(MIN2(frame, max), min); |
} |
/* get mpeg2 specific msg bits */ |
static struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder *dec, |
struct pipe_mpeg12_picture_desc *pic) |
{ |
const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal; |
struct ruvd_mpeg2 result; |
unsigned i; |
memset(&result, 0, sizeof(result)); |
result.decoded_pic_idx = dec->frame_number; |
for (i = 0; i < 2; ++i) |
result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]); |
result.load_intra_quantiser_matrix = 1; |
result.load_nonintra_quantiser_matrix = 1; |
for (i = 0; i < 64; ++i) { |
result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]]; |
result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]]; |
} |
result.profile_and_level_indication = 0; |
result.chroma_format = 0x1; |
result.picture_coding_type = pic->picture_coding_type; |
result.f_code[0][0] = pic->f_code[0][0] + 1; |
result.f_code[0][1] = pic->f_code[0][1] + 1; |
result.f_code[1][0] = pic->f_code[1][0] + 1; |
result.f_code[1][1] = pic->f_code[1][1] + 1; |
result.intra_dc_precision = pic->intra_dc_precision; |
result.pic_structure = pic->picture_structure; |
result.top_field_first = pic->top_field_first; |
result.frame_pred_frame_dct = pic->frame_pred_frame_dct; |
result.concealment_motion_vectors = pic->concealment_motion_vectors; |
result.q_scale_type = pic->q_scale_type; |
result.intra_vlc_format = pic->intra_vlc_format; |
result.alternate_scan = pic->alternate_scan; |
return result; |
} |
/* get mpeg4 specific msg bits */ |
static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec, |
struct pipe_mpeg4_picture_desc *pic) |
{ |
struct ruvd_mpeg4 result; |
unsigned i; |
memset(&result, 0, sizeof(result)); |
result.decoded_pic_idx = dec->frame_number; |
for (i = 0; i < 2; ++i) |
result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]); |
result.variant_type = 0; |
result.profile_and_level_indication = 0xF0; // ASP Level0 |
result.video_object_layer_verid = 0x5; // advanced simple |
result.video_object_layer_shape = 0x0; // rectangular |
result.video_object_layer_width = dec->base.width; |
result.video_object_layer_height = dec->base.height; |
result.vop_time_increment_resolution = pic->vop_time_increment_resolution; |
result.flags |= pic->short_video_header << 0; |
//result.flags |= obmc_disable << 1; |
result.flags |= pic->interlaced << 2; |
result.flags |= 1 << 3; // load_intra_quant_mat |
result.flags |= 1 << 4; // load_nonintra_quant_mat |
result.flags |= pic->quarter_sample << 5; |
result.flags |= 1 << 6; // complexity_estimation_disable |
result.flags |= pic->resync_marker_disable << 7; |
//result.flags |= data_partitioned << 8; |
//result.flags |= reversible_vlc << 9; |
result.flags |= 0 << 10; // newpred_enable |
result.flags |= 0 << 11; // reduced_resolution_vop_enable |
//result.flags |= scalability << 12; |
//result.flags |= is_object_layer_identifier << 13; |
//result.flags |= fixed_vop_rate << 14; |
//result.flags |= newpred_segment_type << 15; |
result.quant_type = pic->quant_type; |
for (i = 0; i < 64; ++i) { |
result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]]; |
result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]]; |
} |
/* |
int32_t trd [2] |
int32_t trb [2] |
uint8_t vop_coding_type |
uint8_t vop_fcode_forward |
uint8_t vop_fcode_backward |
uint8_t rounding_control |
uint8_t alternate_vertical_scan_flag |
uint8_t top_field_first |
*/ |
return result; |
} |
/** |
* destroy this video decoder |
*/ |
static void ruvd_destroy(struct pipe_video_codec *decoder) |
{ |
struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; |
unsigned i; |
assert(decoder); |
map_msg_fb_buf(dec); |
memset(dec->msg, 0, sizeof(*dec->msg)); |
dec->msg->size = sizeof(*dec->msg); |
dec->msg->msg_type = RUVD_MSG_DESTROY; |
dec->msg->stream_handle = dec->stream_handle; |
send_msg_buf(dec); |
flush(dec); |
dec->ws->cs_destroy(dec->cs); |
for (i = 0; i < NUM_BUFFERS; ++i) { |
rvid_destroy_buffer(&dec->msg_fb_buffers[i]); |
rvid_destroy_buffer(&dec->bs_buffers[i]); |
} |
rvid_destroy_buffer(&dec->dpb); |
FREE(dec); |
} |
/* free associated data in the video buffer callback */ |
static void ruvd_destroy_associated_data(void *data) |
{ |
/* NOOP, since we only use an intptr */ |
} |
/** |
* start decoding of a new frame |
*/ |
static void ruvd_begin_frame(struct pipe_video_codec *decoder, |
struct pipe_video_buffer *target, |
struct pipe_picture_desc *picture) |
{ |
struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; |
uintptr_t frame; |
assert(decoder); |
frame = ++dec->frame_number; |
vl_video_buffer_set_associated_data(target, decoder, (void *)frame, |
&ruvd_destroy_associated_data); |
dec->bs_size = 0; |
dec->bs_ptr = dec->ws->buffer_map( |
dec->bs_buffers[dec->cur_buffer].res->cs_buf, |
dec->cs, PIPE_TRANSFER_WRITE); |
} |
/** |
* decode a macroblock |
*/ |
static void ruvd_decode_macroblock(struct pipe_video_codec *decoder, |
struct pipe_video_buffer *target, |
struct pipe_picture_desc *picture, |
const struct pipe_macroblock *macroblocks, |
unsigned num_macroblocks) |
{ |
/* not supported (yet) */ |
assert(0); |
} |
/** |
* decode a bitstream |
*/ |
static void ruvd_decode_bitstream(struct pipe_video_codec *decoder, |
struct pipe_video_buffer *target, |
struct pipe_picture_desc *picture, |
unsigned num_buffers, |
const void * const *buffers, |
const unsigned *sizes) |
{ |
struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; |
unsigned i; |
assert(decoder); |
if (!dec->bs_ptr) |
return; |
for (i = 0; i < num_buffers; ++i) { |
struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer]; |
unsigned new_size = dec->bs_size + sizes[i]; |
if (new_size > buf->res->buf->size) { |
dec->ws->buffer_unmap(buf->res->cs_buf); |
if (!rvid_resize_buffer(dec->screen, dec->cs, buf, new_size)) { |
RVID_ERR("Can't resize bitstream buffer!"); |
return; |
} |
dec->bs_ptr = dec->ws->buffer_map(buf->res->cs_buf, dec->cs, |
PIPE_TRANSFER_WRITE); |
if (!dec->bs_ptr) |
return; |
dec->bs_ptr += dec->bs_size; |
} |
memcpy(dec->bs_ptr, buffers[i], sizes[i]); |
dec->bs_size += sizes[i]; |
dec->bs_ptr += sizes[i]; |
} |
} |
/** |
* end decoding of the current frame |
*/ |
static void ruvd_end_frame(struct pipe_video_codec *decoder, |
struct pipe_video_buffer *target, |
struct pipe_picture_desc *picture) |
{ |
struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; |
struct radeon_winsys_cs_handle *dt; |
struct rvid_buffer *msg_fb_buf, *bs_buf; |
unsigned bs_size; |
assert(decoder); |
if (!dec->bs_ptr) |
return; |
msg_fb_buf = &dec->msg_fb_buffers[dec->cur_buffer]; |
bs_buf = &dec->bs_buffers[dec->cur_buffer]; |
bs_size = align(dec->bs_size, 128); |
memset(dec->bs_ptr, 0, bs_size - dec->bs_size); |
dec->ws->buffer_unmap(bs_buf->res->cs_buf); |
map_msg_fb_buf(dec); |
dec->msg->size = sizeof(*dec->msg); |
dec->msg->msg_type = RUVD_MSG_DECODE; |
dec->msg->stream_handle = dec->stream_handle; |
dec->msg->status_report_feedback_number = dec->frame_number; |
dec->msg->body.decode.stream_type = profile2stream_type(dec->base.profile); |
dec->msg->body.decode.decode_flags = 0x1; |
dec->msg->body.decode.width_in_samples = dec->base.width; |
dec->msg->body.decode.height_in_samples = dec->base.height; |
dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size; |
dec->msg->body.decode.bsd_size = bs_size; |
dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target); |
switch (u_reduce_video_profile(picture->profile)) { |
case PIPE_VIDEO_FORMAT_MPEG4_AVC: |
dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture); |
break; |
case PIPE_VIDEO_FORMAT_VC1: |
dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture); |
break; |
case PIPE_VIDEO_FORMAT_MPEG12: |
dec->msg->body.decode.codec.mpeg2 = get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc*)picture); |
break; |
case PIPE_VIDEO_FORMAT_MPEG4: |
dec->msg->body.decode.codec.mpeg4 = get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture); |
break; |
default: |
assert(0); |
return; |
} |
dec->msg->body.decode.db_surf_tile_config = dec->msg->body.decode.dt_surf_tile_config; |
dec->msg->body.decode.extension_support = 0x1; |
/* set at least the feedback buffer size */ |
dec->fb[0] = FB_BUFFER_SIZE; |
send_msg_buf(dec); |
send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->cs_buf, 0, |
RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); |
send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->cs_buf, |
0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); |
send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0, |
RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); |
send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_buf->res->cs_buf, |
FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT); |
set_reg(dec, RUVD_ENGINE_CNTL, 1); |
flush(dec); |
next_buffer(dec); |
} |
/** |
* flush any outstanding command buffers to the hardware |
*/ |
static void ruvd_flush(struct pipe_video_codec *decoder) |
{ |
} |
/** |
* create and UVD decoder |
*/ |
struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context, |
const struct pipe_video_codec *templ, |
ruvd_set_dtb set_dtb) |
{ |
struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws; |
unsigned dpb_size = calc_dpb_size(templ); |
unsigned width = templ->width, height = templ->height; |
unsigned bs_buf_size; |
struct radeon_info info; |
struct ruvd_decoder *dec; |
int i; |
ws->query_info(ws, &info); |
switch(u_reduce_video_profile(templ->profile)) { |
case PIPE_VIDEO_FORMAT_MPEG12: |
if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM || info.family < CHIP_PALM) |
return vl_create_mpeg12_decoder(context, templ); |
/* fall through */ |
case PIPE_VIDEO_FORMAT_MPEG4: |
case PIPE_VIDEO_FORMAT_MPEG4_AVC: |
width = align(width, VL_MACROBLOCK_WIDTH); |
height = align(height, VL_MACROBLOCK_HEIGHT); |
break; |
default: |
break; |
} |
dec = CALLOC_STRUCT(ruvd_decoder); |
if (!dec) |
return NULL; |
dec->base = *templ; |
dec->base.context = context; |
dec->base.width = width; |
dec->base.height = height; |
dec->base.destroy = ruvd_destroy; |
dec->base.begin_frame = ruvd_begin_frame; |
dec->base.decode_macroblock = ruvd_decode_macroblock; |
dec->base.decode_bitstream = ruvd_decode_bitstream; |
dec->base.end_frame = ruvd_end_frame; |
dec->base.flush = ruvd_flush; |
dec->set_dtb = set_dtb; |
dec->stream_handle = rvid_alloc_stream_handle(); |
dec->screen = context->screen; |
dec->ws = ws; |
dec->cs = ws->cs_create(ws, RING_UVD, NULL, NULL, NULL); |
if (!dec->cs) { |
RVID_ERR("Can't get command submission context.\n"); |
goto error; |
} |
bs_buf_size = width * height * 512 / (16 * 16); |
for (i = 0; i < NUM_BUFFERS; ++i) { |
unsigned msg_fb_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE; |
STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET); |
if (!rvid_create_buffer(dec->screen, &dec->msg_fb_buffers[i], |
msg_fb_size, PIPE_USAGE_STAGING)) { |
RVID_ERR("Can't allocated message buffers.\n"); |
goto error; |
} |
if (!rvid_create_buffer(dec->screen, &dec->bs_buffers[i], |
bs_buf_size, PIPE_USAGE_STAGING)) { |
RVID_ERR("Can't allocated bitstream buffers.\n"); |
goto error; |
} |
rvid_clear_buffer(context, &dec->msg_fb_buffers[i]); |
rvid_clear_buffer(context, &dec->bs_buffers[i]); |
} |
if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) { |
RVID_ERR("Can't allocated dpb.\n"); |
goto error; |
} |
rvid_clear_buffer(context, &dec->dpb); |
map_msg_fb_buf(dec); |
dec->msg->size = sizeof(*dec->msg); |
dec->msg->msg_type = RUVD_MSG_CREATE; |
dec->msg->stream_handle = dec->stream_handle; |
dec->msg->body.create.stream_type = profile2stream_type(dec->base.profile); |
dec->msg->body.create.width_in_samples = dec->base.width; |
dec->msg->body.create.height_in_samples = dec->base.height; |
dec->msg->body.create.dpb_size = dec->dpb.res->buf->size; |
send_msg_buf(dec); |
flush(dec); |
next_buffer(dec); |
return &dec->base; |
error: |
if (dec->cs) dec->ws->cs_destroy(dec->cs); |
for (i = 0; i < NUM_BUFFERS; ++i) { |
rvid_destroy_buffer(&dec->msg_fb_buffers[i]); |
rvid_destroy_buffer(&dec->bs_buffers[i]); |
} |
rvid_destroy_buffer(&dec->dpb); |
FREE(dec); |
return NULL; |
} |
/* calculate top/bottom offset */ |
static unsigned texture_offset(struct radeon_surf *surface, unsigned layer) |
{ |
return surface->level[0].offset + |
layer * surface->level[0].slice_size; |
} |
/* hw encode the aspect of macro tiles */ |
static unsigned macro_tile_aspect(unsigned macro_tile_aspect) |
{ |
switch (macro_tile_aspect) { |
default: |
case 1: macro_tile_aspect = 0; break; |
case 2: macro_tile_aspect = 1; break; |
case 4: macro_tile_aspect = 2; break; |
case 8: macro_tile_aspect = 3; break; |
} |
return macro_tile_aspect; |
} |
/* hw encode the bank width and height */ |
static unsigned bank_wh(unsigned bankwh) |
{ |
switch (bankwh) { |
default: |
case 1: bankwh = 0; break; |
case 2: bankwh = 1; break; |
case 4: bankwh = 2; break; |
case 8: bankwh = 3; break; |
} |
return bankwh; |
} |
/** |
* fill decoding target field from the luma and chroma surfaces |
*/ |
void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma, |
struct radeon_surf *chroma) |
{ |
msg->body.decode.dt_pitch = luma->level[0].pitch_bytes; |
switch (luma->level[0].mode) { |
case RADEON_SURF_MODE_LINEAR_ALIGNED: |
msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR; |
msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR; |
break; |
case RADEON_SURF_MODE_1D: |
msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; |
msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN; |
break; |
case RADEON_SURF_MODE_2D: |
msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; |
msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN; |
break; |
default: |
assert(0); |
break; |
} |
msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0); |
msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0); |
if (msg->body.decode.dt_field_mode) { |
msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1); |
msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1); |
} else { |
msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset; |
msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset; |
} |
assert(luma->bankw == chroma->bankw); |
assert(luma->bankh == chroma->bankh); |
assert(luma->mtilea == chroma->mtilea); |
msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->bankw)); |
msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->bankh)); |
msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->mtilea)); |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_uvd.h |
---|
0,0 → 1,358 |
/************************************************************************** |
* |
* Copyright 2011 Advanced Micro Devices, Inc. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/* |
* Authors: |
* Christian König <christian.koenig@amd.com> |
* |
*/ |
#ifndef RADEON_UVD_H |
#define RADEON_UVD_H |
#include "radeon/radeon_winsys.h" |
#include "vl/vl_video_buffer.h" |
/* UVD uses PM4 packet type 0 and 2 */ |
#define RUVD_PKT_TYPE_S(x) (((x) & 0x3) << 30) |
#define RUVD_PKT_TYPE_G(x) (((x) >> 30) & 0x3) |
#define RUVD_PKT_TYPE_C 0x3FFFFFFF |
#define RUVD_PKT_COUNT_S(x) (((x) & 0x3FFF) << 16) |
#define RUVD_PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF) |
#define RUVD_PKT_COUNT_C 0xC000FFFF |
#define RUVD_PKT0_BASE_INDEX_S(x) (((x) & 0xFFFF) << 0) |
#define RUVD_PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF) |
#define RUVD_PKT0_BASE_INDEX_C 0xFFFF0000 |
#define RUVD_PKT0(index, count) (RUVD_PKT_TYPE_S(0) | RUVD_PKT0_BASE_INDEX_S(index) | RUVD_PKT_COUNT_S(count)) |
#define RUVD_PKT2() (RUVD_PKT_TYPE_S(2)) |
/* registers involved with UVD */ |
#define RUVD_GPCOM_VCPU_CMD 0xEF0C |
#define RUVD_GPCOM_VCPU_DATA0 0xEF10 |
#define RUVD_GPCOM_VCPU_DATA1 0xEF14 |
#define RUVD_ENGINE_CNTL 0xEF18 |
/* UVD commands to VCPU */ |
#define RUVD_CMD_MSG_BUFFER 0x00000000 |
#define RUVD_CMD_DPB_BUFFER 0x00000001 |
#define RUVD_CMD_DECODING_TARGET_BUFFER 0x00000002 |
#define RUVD_CMD_FEEDBACK_BUFFER 0x00000003 |
#define RUVD_CMD_BITSTREAM_BUFFER 0x00000100 |
/* UVD message types */ |
#define RUVD_MSG_CREATE 0 |
#define RUVD_MSG_DECODE 1 |
#define RUVD_MSG_DESTROY 2 |
/* UVD stream types */ |
#define RUVD_CODEC_H264 0x00000000 |
#define RUVD_CODEC_VC1 0x00000001 |
#define RUVD_CODEC_MPEG2 0x00000003 |
#define RUVD_CODEC_MPEG4 0x00000004 |
/* UVD decode target buffer tiling mode */ |
#define RUVD_TILE_LINEAR 0x00000000 |
#define RUVD_TILE_8X4 0x00000001 |
#define RUVD_TILE_8X8 0x00000002 |
#define RUVD_TILE_32AS8 0x00000003 |
/* UVD decode target buffer array mode */ |
#define RUVD_ARRAY_MODE_LINEAR 0x00000000 |
#define RUVD_ARRAY_MODE_MACRO_LINEAR_MICRO_TILED 0x00000001 |
#define RUVD_ARRAY_MODE_1D_THIN 0x00000002 |
#define RUVD_ARRAY_MODE_2D_THIN 0x00000004 |
#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_LINEAR 0x00000004 |
#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_TILED 0x00000005 |
/* UVD tile config */ |
#define RUVD_BANK_WIDTH(x) ((x) << 0) |
#define RUVD_BANK_HEIGHT(x) ((x) << 3) |
#define RUVD_MACRO_TILE_ASPECT_RATIO(x) ((x) << 6) |
#define RUVD_NUM_BANKS(x) ((x) << 9) |
/* H.264 profile definitions */ |
#define RUVD_H264_PROFILE_BASELINE 0x00000000 |
#define RUVD_H264_PROFILE_MAIN 0x00000001 |
#define RUVD_H264_PROFILE_HIGH 0x00000002 |
#define RUVD_H264_PROFILE_STEREO_HIGH 0x00000003 |
#define RUVD_H264_PROFILE_MVC 0x00000004 |
/* VC-1 profile definitions */ |
#define RUVD_VC1_PROFILE_SIMPLE 0x00000000 |
#define RUVD_VC1_PROFILE_MAIN 0x00000001 |
#define RUVD_VC1_PROFILE_ADVANCED 0x00000002 |
struct ruvd_mvc_element { |
uint16_t viewOrderIndex; |
uint16_t viewId; |
uint16_t numOfAnchorRefsInL0; |
uint16_t viewIdOfAnchorRefsInL0[15]; |
uint16_t numOfAnchorRefsInL1; |
uint16_t viewIdOfAnchorRefsInL1[15]; |
uint16_t numOfNonAnchorRefsInL0; |
uint16_t viewIdOfNonAnchorRefsInL0[15]; |
uint16_t numOfNonAnchorRefsInL1; |
uint16_t viewIdOfNonAnchorRefsInL1[15]; |
}; |
struct ruvd_h264 { |
uint32_t profile; |
uint32_t level; |
uint32_t sps_info_flags; |
uint32_t pps_info_flags; |
uint8_t chroma_format; |
uint8_t bit_depth_luma_minus8; |
uint8_t bit_depth_chroma_minus8; |
uint8_t log2_max_frame_num_minus4; |
uint8_t pic_order_cnt_type; |
uint8_t log2_max_pic_order_cnt_lsb_minus4; |
uint8_t num_ref_frames; |
uint8_t reserved_8bit; |
int8_t pic_init_qp_minus26; |
int8_t pic_init_qs_minus26; |
int8_t chroma_qp_index_offset; |
int8_t second_chroma_qp_index_offset; |
uint8_t num_slice_groups_minus1; |
uint8_t slice_group_map_type; |
uint8_t num_ref_idx_l0_active_minus1; |
uint8_t num_ref_idx_l1_active_minus1; |
uint16_t slice_group_change_rate_minus1; |
uint16_t reserved_16bit_1; |
uint8_t scaling_list_4x4[6][16]; |
uint8_t scaling_list_8x8[2][64]; |
uint32_t frame_num; |
uint32_t frame_num_list[16]; |
int32_t curr_field_order_cnt_list[2]; |
int32_t field_order_cnt_list[16][2]; |
uint32_t decoded_pic_idx; |
uint32_t curr_pic_ref_frame_num; |
uint8_t ref_frame_list[16]; |
uint32_t reserved[122]; |
struct { |
uint32_t numViews; |
uint32_t viewId0; |
struct ruvd_mvc_element mvcElements[1]; |
} mvc; |
}; |
struct ruvd_vc1 { |
uint32_t profile; |
uint32_t level; |
uint32_t sps_info_flags; |
uint32_t pps_info_flags; |
uint32_t pic_structure; |
uint32_t chroma_format; |
}; |
struct ruvd_mpeg2 { |
uint32_t decoded_pic_idx; |
uint32_t ref_pic_idx[2]; |
uint8_t load_intra_quantiser_matrix; |
uint8_t load_nonintra_quantiser_matrix; |
uint8_t reserved_quantiser_alignement[2]; |
uint8_t intra_quantiser_matrix[64]; |
uint8_t nonintra_quantiser_matrix[64]; |
uint8_t profile_and_level_indication; |
uint8_t chroma_format; |
uint8_t picture_coding_type; |
uint8_t reserved_1; |
uint8_t f_code[2][2]; |
uint8_t intra_dc_precision; |
uint8_t pic_structure; |
uint8_t top_field_first; |
uint8_t frame_pred_frame_dct; |
uint8_t concealment_motion_vectors; |
uint8_t q_scale_type; |
uint8_t intra_vlc_format; |
uint8_t alternate_scan; |
}; |
struct ruvd_mpeg4 |
{ |
uint32_t decoded_pic_idx; |
uint32_t ref_pic_idx[2]; |
uint32_t variant_type; |
uint8_t profile_and_level_indication; |
uint8_t video_object_layer_verid; |
uint8_t video_object_layer_shape; |
uint8_t reserved_1; |
uint16_t video_object_layer_width; |
uint16_t video_object_layer_height; |
uint16_t vop_time_increment_resolution; |
uint16_t reserved_2; |
uint32_t flags; |
uint8_t quant_type; |
uint8_t reserved_3[3]; |
uint8_t intra_quant_mat[64]; |
uint8_t nonintra_quant_mat[64]; |
struct { |
uint8_t sprite_enable; |
uint8_t reserved_4[3]; |
uint16_t sprite_width; |
uint16_t sprite_height; |
int16_t sprite_left_coordinate; |
int16_t sprite_top_coordinate; |
uint8_t no_of_sprite_warping_points; |
uint8_t sprite_warping_accuracy; |
uint8_t sprite_brightness_change; |
uint8_t low_latency_sprite_enable; |
} sprite_config; |
struct { |
uint32_t flags; |
uint8_t vol_mode; |
uint8_t reserved_5[3]; |
} divx_311_config; |
}; |
/* message between driver and hardware */ |
struct ruvd_msg { |
uint32_t size; |
uint32_t msg_type; |
uint32_t stream_handle; |
uint32_t status_report_feedback_number; |
union { |
struct { |
uint32_t stream_type; |
uint32_t session_flags; |
uint32_t asic_id; |
uint32_t width_in_samples; |
uint32_t height_in_samples; |
uint32_t dpb_buffer; |
uint32_t dpb_size; |
uint32_t dpb_model; |
uint32_t version_info; |
} create; |
struct { |
uint32_t stream_type; |
uint32_t decode_flags; |
uint32_t width_in_samples; |
uint32_t height_in_samples; |
uint32_t dpb_buffer; |
uint32_t dpb_size; |
uint32_t dpb_model; |
uint32_t dpb_reserved; |
uint32_t db_offset_alignment; |
uint32_t db_pitch; |
uint32_t db_tiling_mode; |
uint32_t db_array_mode; |
uint32_t db_field_mode; |
uint32_t db_surf_tile_config; |
uint32_t db_aligned_height; |
uint32_t db_reserved; |
uint32_t use_addr_macro; |
uint32_t bsd_buffer; |
uint32_t bsd_size; |
uint32_t pic_param_buffer; |
uint32_t pic_param_size; |
uint32_t mb_cntl_buffer; |
uint32_t mb_cntl_size; |
uint32_t dt_buffer; |
uint32_t dt_pitch; |
uint32_t dt_tiling_mode; |
uint32_t dt_array_mode; |
uint32_t dt_field_mode; |
uint32_t dt_luma_top_offset; |
uint32_t dt_luma_bottom_offset; |
uint32_t dt_chroma_top_offset; |
uint32_t dt_chroma_bottom_offset; |
uint32_t dt_surf_tile_config; |
uint32_t dt_reserved[3]; |
uint32_t reserved[16]; |
union { |
struct ruvd_h264 h264; |
struct ruvd_vc1 vc1; |
struct ruvd_mpeg2 mpeg2; |
struct ruvd_mpeg4 mpeg4; |
uint32_t info[768]; |
} codec; |
uint8_t extension_support; |
uint8_t reserved_8bit_1; |
uint8_t reserved_8bit_2; |
uint8_t reserved_8bit_3; |
uint32_t extension_reserved[64]; |
} decode; |
} body; |
}; |
/* driver dependent callback */ |
typedef struct radeon_winsys_cs_handle* (*ruvd_set_dtb) |
(struct ruvd_msg* msg, struct vl_video_buffer *vb); |
/* create an UVD decode */ |
struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context, |
const struct pipe_video_codec *templat, |
ruvd_set_dtb set_dtb); |
/* fill decoding target field from the luma and chroma surfaces */ |
void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma, |
struct radeon_surf *chroma); |
#endif |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_vce.c |
---|
0,0 → 1,430 |
/************************************************************************** |
* |
* Copyright 2013 Advanced Micro Devices, Inc. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/* |
* Authors: |
* Christian König <christian.koenig@amd.com> |
* |
*/ |
#include <stdio.h> |
#include "pipe/p_video_codec.h" |
#include "util/u_video.h" |
#include "util/u_memory.h" |
#include "vl/vl_video_buffer.h" |
#include "r600_pipe_common.h" |
#include "radeon_video.h" |
#include "radeon_vce.h" |
/** |
* flush commands to the hardware |
*/ |
static void flush(struct rvce_encoder *enc) |
{ |
enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL, 0); |
} |
#if 0 |
static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb) |
{ |
uint32_t *ptr = enc->ws->buffer_map(fb->res->cs_buf, enc->cs, PIPE_TRANSFER_READ_WRITE); |
unsigned i = 0; |
fprintf(stderr, "\n"); |
fprintf(stderr, "encStatus:\t\t\t%08x\n", ptr[i++]); |
fprintf(stderr, "encHasBitstream:\t\t%08x\n", ptr[i++]); |
fprintf(stderr, "encHasAudioBitstream:\t\t%08x\n", ptr[i++]); |
fprintf(stderr, "encBitstreamOffset:\t\t%08x\n", ptr[i++]); |
fprintf(stderr, "encBitstreamSize:\t\t%08x\n", ptr[i++]); |
fprintf(stderr, "encAudioBitstreamOffset:\t%08x\n", ptr[i++]); |
fprintf(stderr, "encAudioBitstreamSize:\t\t%08x\n", ptr[i++]); |
fprintf(stderr, "encExtrabytes:\t\t\t%08x\n", ptr[i++]); |
fprintf(stderr, "encAudioExtrabytes:\t\t%08x\n", ptr[i++]); |
fprintf(stderr, "videoTimeStamp:\t\t\t%08x\n", ptr[i++]); |
fprintf(stderr, "audioTimeStamp:\t\t\t%08x\n", ptr[i++]); |
fprintf(stderr, "videoOutputType:\t\t%08x\n", ptr[i++]); |
fprintf(stderr, "attributeFlags:\t\t\t%08x\n", ptr[i++]); |
fprintf(stderr, "seiPrivatePackageOffset:\t%08x\n", ptr[i++]); |
fprintf(stderr, "seiPrivatePackageSize:\t\t%08x\n", ptr[i++]); |
fprintf(stderr, "\n"); |
enc->ws->buffer_unmap(fb->res->cs_buf); |
} |
#endif |
/** |
* reset the CPB handling |
*/ |
static void reset_cpb(struct rvce_encoder *enc) |
{ |
unsigned i; |
LIST_INITHEAD(&enc->cpb_slots); |
for (i = 0; i < enc->cpb_num; ++i) { |
struct rvce_cpb_slot *slot = &enc->cpb_array[i]; |
slot->index = i; |
slot->picture_type = PIPE_H264_ENC_PICTURE_TYPE_SKIP; |
slot->frame_num = 0; |
slot->pic_order_cnt = 0; |
LIST_ADDTAIL(&slot->list, &enc->cpb_slots); |
} |
} |
/** |
* sort l0 and l1 to the top of the list |
*/ |
static void sort_cpb(struct rvce_encoder *enc) |
{ |
struct rvce_cpb_slot *i, *l0 = NULL, *l1 = NULL; |
LIST_FOR_EACH_ENTRY(i, &enc->cpb_slots, list) { |
if (i->frame_num == enc->pic.ref_idx_l0) |
l0 = i; |
if (i->frame_num == enc->pic.ref_idx_l1) |
l1 = i; |
if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P && l0) |
break; |
if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B && |
l0 && l1) |
break; |
} |
if (l1) { |
LIST_DEL(&l1->list); |
LIST_ADD(&l1->list, &enc->cpb_slots); |
} |
if (l0) { |
LIST_DEL(&l0->list); |
LIST_ADD(&l0->list, &enc->cpb_slots); |
} |
} |
/** |
* get number of cpbs based on dpb |
*/ |
static unsigned get_cpb_num(struct rvce_encoder *enc) |
{ |
unsigned w = align(enc->base.width, 16) / 16; |
unsigned h = align(enc->base.height, 16) / 16; |
unsigned dpb; |
switch (enc->base.level) { |
case 10: |
dpb = 396; |
break; |
case 11: |
dpb = 900; |
break; |
case 12: |
case 13: |
case 20: |
dpb = 2376; |
break; |
case 21: |
dpb = 4752; |
break; |
case 22: |
case 30: |
dpb = 8100; |
break; |
case 31: |
dpb = 18000; |
break; |
case 32: |
dpb = 20480; |
break; |
case 40: |
case 41: |
dpb = 32768; |
break; |
default: |
case 42: |
dpb = 34816; |
break; |
case 50: |
dpb = 110400; |
break; |
case 51: |
dpb = 184320; |
break; |
} |
return MIN2(dpb / (w * h), 16); |
} |
/** |
* destroy this video encoder |
*/ |
static void rvce_destroy(struct pipe_video_codec *encoder) |
{ |
struct rvce_encoder *enc = (struct rvce_encoder*)encoder; |
if (enc->stream_handle) { |
struct rvid_buffer fb; |
rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING); |
enc->fb = &fb; |
enc->session(enc); |
enc->feedback(enc); |
enc->destroy(enc); |
flush(enc); |
rvid_destroy_buffer(&fb); |
} |
rvid_destroy_buffer(&enc->cpb); |
enc->ws->cs_destroy(enc->cs); |
FREE(enc->cpb_array); |
FREE(enc); |
} |
static void rvce_begin_frame(struct pipe_video_codec *encoder, |
struct pipe_video_buffer *source, |
struct pipe_picture_desc *picture) |
{ |
struct rvce_encoder *enc = (struct rvce_encoder*)encoder; |
struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source; |
struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture; |
bool need_rate_control = |
enc->pic.rate_ctrl.rate_ctrl_method != pic->rate_ctrl.rate_ctrl_method || |
enc->pic.quant_i_frames != pic->quant_i_frames || |
enc->pic.quant_p_frames != pic->quant_p_frames || |
enc->pic.quant_b_frames != pic->quant_b_frames; |
enc->pic = *pic; |
enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma); |
enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma); |
if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) |
reset_cpb(enc); |
else if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || |
pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) |
sort_cpb(enc); |
if (!enc->stream_handle) { |
struct rvid_buffer fb; |
enc->stream_handle = rvid_alloc_stream_handle(); |
rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING); |
enc->fb = &fb; |
enc->session(enc); |
enc->create(enc); |
enc->rate_control(enc); |
need_rate_control = false; |
enc->config_extension(enc); |
enc->motion_estimation(enc); |
enc->rdo(enc); |
if (enc->use_vui) |
enc->vui(enc); |
enc->pic_control(enc); |
enc->feedback(enc); |
flush(enc); |
//dump_feedback(enc, &fb); |
rvid_destroy_buffer(&fb); |
} |
enc->session(enc); |
if (need_rate_control) |
enc->rate_control(enc); |
} |
static void rvce_encode_bitstream(struct pipe_video_codec *encoder, |
struct pipe_video_buffer *source, |
struct pipe_resource *destination, |
void **fb) |
{ |
struct rvce_encoder *enc = (struct rvce_encoder*)encoder; |
enc->get_buffer(destination, &enc->bs_handle, NULL); |
enc->bs_size = destination->width0; |
*fb = enc->fb = CALLOC_STRUCT(rvid_buffer); |
if (!rvid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) { |
RVID_ERR("Can't create feedback buffer.\n"); |
return; |
} |
enc->encode(enc); |
enc->feedback(enc); |
} |
static void rvce_end_frame(struct pipe_video_codec *encoder, |
struct pipe_video_buffer *source, |
struct pipe_picture_desc *picture) |
{ |
struct rvce_encoder *enc = (struct rvce_encoder*)encoder; |
struct rvce_cpb_slot *slot = LIST_ENTRY( |
struct rvce_cpb_slot, enc->cpb_slots.prev, list); |
flush(enc); |
/* update the CPB backtrack with the just encoded frame */ |
slot->picture_type = enc->pic.picture_type; |
slot->frame_num = enc->pic.frame_num; |
slot->pic_order_cnt = enc->pic.pic_order_cnt; |
if (!enc->pic.not_referenced) { |
LIST_DEL(&slot->list); |
LIST_ADD(&slot->list, &enc->cpb_slots); |
} |
} |
static void rvce_get_feedback(struct pipe_video_codec *encoder, |
void *feedback, unsigned *size) |
{ |
struct rvce_encoder *enc = (struct rvce_encoder*)encoder; |
struct rvid_buffer *fb = feedback; |
if (size) { |
uint32_t *ptr = enc->ws->buffer_map(fb->res->cs_buf, enc->cs, PIPE_TRANSFER_READ_WRITE); |
if (ptr[1]) { |
*size = ptr[4] - ptr[9]; |
} else { |
*size = 0; |
} |
enc->ws->buffer_unmap(fb->res->cs_buf); |
} |
//dump_feedback(enc, fb); |
rvid_destroy_buffer(fb); |
FREE(fb); |
} |
/** |
* flush any outstanding command buffers to the hardware |
*/ |
static void rvce_flush(struct pipe_video_codec *encoder) |
{ |
} |
static void rvce_cs_flush(void *ctx, unsigned flags, |
struct pipe_fence_handle **fence) |
{ |
// just ignored |
} |
struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context, |
const struct pipe_video_codec *templ, |
struct radeon_winsys* ws, |
rvce_get_buffer get_buffer) |
{ |
struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen; |
struct rvce_encoder *enc; |
struct pipe_video_buffer *tmp_buf, templat = {}; |
struct radeon_surf *tmp_surf; |
unsigned cpb_size; |
if (!rscreen->info.vce_fw_version) { |
RVID_ERR("Kernel doesn't supports VCE!\n"); |
return NULL; |
} else if (!rvce_is_fw_version_supported(rscreen)) { |
RVID_ERR("Unsupported VCE fw version loaded!\n"); |
return NULL; |
} |
enc = CALLOC_STRUCT(rvce_encoder); |
if (!enc) |
return NULL; |
if ((rscreen->info.drm_major > 2) || (rscreen->info.drm_minor >= 42)) |
enc->use_vui = true; |
enc->base = *templ; |
enc->base.context = context; |
enc->base.destroy = rvce_destroy; |
enc->base.begin_frame = rvce_begin_frame; |
enc->base.encode_bitstream = rvce_encode_bitstream; |
enc->base.end_frame = rvce_end_frame; |
enc->base.flush = rvce_flush; |
enc->base.get_feedback = rvce_get_feedback; |
enc->get_buffer = get_buffer; |
enc->screen = context->screen; |
enc->ws = ws; |
enc->cs = ws->cs_create(ws, RING_VCE, rvce_cs_flush, enc, NULL); |
if (!enc->cs) { |
RVID_ERR("Can't get command submission context.\n"); |
goto error; |
} |
templat.buffer_format = PIPE_FORMAT_NV12; |
templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420; |
templat.width = enc->base.width; |
templat.height = enc->base.height; |
templat.interlaced = false; |
if (!(tmp_buf = context->create_video_buffer(context, &templat))) { |
RVID_ERR("Can't create video buffer.\n"); |
goto error; |
} |
enc->cpb_num = get_cpb_num(enc); |
if (!enc->cpb_num) |
goto error; |
get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf); |
cpb_size = align(tmp_surf->level[0].pitch_bytes, 128); |
cpb_size = cpb_size * align(tmp_surf->npix_y, 16); |
cpb_size = cpb_size * 3 / 2; |
cpb_size = cpb_size * enc->cpb_num; |
tmp_buf->destroy(tmp_buf); |
if (!rvid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) { |
RVID_ERR("Can't create CPB buffer.\n"); |
goto error; |
} |
enc->cpb_array = CALLOC(enc->cpb_num, sizeof(struct rvce_cpb_slot)); |
if (!enc->cpb_array) |
goto error; |
reset_cpb(enc); |
radeon_vce_40_2_2_init(enc); |
return &enc->base; |
error: |
if (enc->cs) |
enc->ws->cs_destroy(enc->cs); |
rvid_destroy_buffer(&enc->cpb); |
FREE(enc->cpb_array); |
FREE(enc); |
return NULL; |
} |
/** |
* check if kernel has the right fw version loaded |
*/ |
bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen) |
{ |
return rscreen->info.vce_fw_version == ((40 << 24) | (2 << 16) | (2 << 8)); |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_vce.h |
---|
0,0 → 1,117 |
/************************************************************************** |
* |
* Copyright 2013 Advanced Micro Devices, Inc. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/* |
* Authors: |
* Christian König <christian.koenig@amd.com> |
* |
*/ |
#ifndef RADEON_VCE_H |
#define RADEON_VCE_H |
#include "util/list.h" |
#define RVCE_RELOC(buf, usage, domain) (enc->ws->cs_add_reloc(enc->cs, (buf), (usage), domain, RADEON_PRIO_MIN)) |
#define RVCE_CS(value) (enc->cs->buf[enc->cs->cdw++] = (value)) |
#define RVCE_BEGIN(cmd) { uint32_t *begin = &enc->cs->buf[enc->cs->cdw++]; RVCE_CS(cmd) |
#define RVCE_READ(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_READ, domain) * 4) |
#define RVCE_WRITE(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_WRITE, domain) * 4) |
#define RVCE_READWRITE(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_READWRITE, domain) * 4) |
#define RVCE_END() *begin = (&enc->cs->buf[enc->cs->cdw] - begin) * 4; } |
struct r600_common_screen; |
/* driver dependent callback */ |
typedef void (*rvce_get_buffer)(struct pipe_resource *resource, |
struct radeon_winsys_cs_handle **handle, |
struct radeon_surf **surface); |
/* Coded picture buffer slot */ |
struct rvce_cpb_slot { |
struct list_head list; |
unsigned index; |
enum pipe_h264_enc_picture_type picture_type; |
unsigned frame_num; |
unsigned pic_order_cnt; |
}; |
/* VCE encoder representation */ |
struct rvce_encoder { |
struct pipe_video_codec base; |
/* version specific packets */ |
void (*session)(struct rvce_encoder *enc); |
void (*create)(struct rvce_encoder *enc); |
void (*feedback)(struct rvce_encoder *enc); |
void (*rate_control)(struct rvce_encoder *enc); |
void (*config_extension)(struct rvce_encoder *enc); |
void (*pic_control)(struct rvce_encoder *enc); |
void (*motion_estimation)(struct rvce_encoder *enc); |
void (*rdo)(struct rvce_encoder *enc); |
void (*vui)(struct rvce_encoder *enc); |
void (*encode)(struct rvce_encoder *enc); |
void (*destroy)(struct rvce_encoder *enc); |
unsigned stream_handle; |
struct pipe_screen *screen; |
struct radeon_winsys* ws; |
struct radeon_winsys_cs* cs; |
rvce_get_buffer get_buffer; |
struct radeon_winsys_cs_handle* handle; |
struct radeon_surf* luma; |
struct radeon_surf* chroma; |
struct radeon_winsys_cs_handle* bs_handle; |
unsigned bs_size; |
struct rvce_cpb_slot *cpb_array; |
struct list_head cpb_slots; |
unsigned cpb_num; |
struct rvid_buffer *fb; |
struct rvid_buffer cpb; |
struct pipe_h264_enc_picture_desc pic; |
bool use_vui; |
}; |
struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context, |
const struct pipe_video_codec *templat, |
struct radeon_winsys* ws, |
rvce_get_buffer get_buffer); |
bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen); |
/* init vce fw 40.2.2 specific callbacks */ |
void radeon_vce_40_2_2_init(struct rvce_encoder *enc); |
#endif |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_vce_40_2_2.c |
---|
0,0 → 1,452 |
/************************************************************************** |
* |
* Copyright 2013 Advanced Micro Devices, Inc. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/* |
* Authors: |
* Christian König <christian.koenig@amd.com> |
* |
*/ |
#include <stdio.h> |
#include "pipe/p_video_codec.h" |
#include "util/u_video.h" |
#include "util/u_memory.h" |
#include "vl/vl_video_buffer.h" |
#include "r600_pipe_common.h" |
#include "radeon_video.h" |
#include "radeon_vce.h" |
static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 }; |
static struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc) |
{ |
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list); |
} |
static struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc) |
{ |
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list); |
} |
static struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc) |
{ |
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list); |
} |
static void frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, |
unsigned *luma_offset, unsigned *chroma_offset) |
{ |
unsigned pitch = align(enc->luma->level[0].pitch_bytes, 128); |
unsigned vpitch = align(enc->luma->npix_y, 16); |
unsigned fsize = pitch * (vpitch + vpitch / 2); |
*luma_offset = slot->index * fsize; |
*chroma_offset = *luma_offset + pitch * vpitch; |
} |
static void session(struct rvce_encoder *enc) |
{ |
RVCE_BEGIN(0x00000001); // session cmd |
RVCE_CS(enc->stream_handle); |
RVCE_END(); |
} |
static void task_info(struct rvce_encoder *enc, uint32_t taskOperation) |
{ |
RVCE_BEGIN(0x00000002); // task info |
RVCE_CS(0xffffffff); // offsetOfNextTaskInfo |
RVCE_CS(taskOperation); // taskOperation |
RVCE_CS(0x00000000); // referencePictureDependency |
RVCE_CS(0x00000000); // collocateFlagDependency |
RVCE_CS(0x00000000); // feedbackIndex |
RVCE_CS(0x00000000); // videoBitstreamRingIndex |
RVCE_END(); |
} |
static void feedback(struct rvce_encoder *enc) |
{ |
RVCE_BEGIN(0x05000005); // feedback buffer |
RVCE_WRITE(enc->fb->res->cs_buf, enc->fb->res->domains); // feedbackRingAddressHi |
RVCE_CS(0x00000000); // feedbackRingAddressLo |
RVCE_CS(0x00000001); // feedbackRingSize |
RVCE_END(); |
} |
static void create(struct rvce_encoder *enc) |
{ |
task_info(enc, 0x00000000); |
RVCE_BEGIN(0x01000001); // create cmd |
RVCE_CS(0x00000000); // encUseCircularBuffer |
RVCE_CS(profiles[enc->base.profile - |
PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE]); // encProfile |
RVCE_CS(enc->base.level); // encLevel |
RVCE_CS(0x00000000); // encPicStructRestriction |
RVCE_CS(enc->base.width); // encImageWidth |
RVCE_CS(enc->base.height); // encImageHeight |
RVCE_CS(enc->luma->level[0].pitch_bytes); // encRefPicLumaPitch |
RVCE_CS(enc->chroma->level[0].pitch_bytes); // encRefPicChromaPitch |
RVCE_CS(align(enc->luma->npix_y, 16) / 8); // encRefYHeightInQw |
RVCE_CS(0x00000000); // encRefPic(Addr|Array)Mode, encPicStructRestriction, disableRDO |
RVCE_END(); |
} |
static void rate_control(struct rvce_encoder *enc) |
{ |
RVCE_BEGIN(0x04000005); // rate control |
RVCE_CS(enc->pic.rate_ctrl.rate_ctrl_method); // encRateControlMethod |
RVCE_CS(enc->pic.rate_ctrl.target_bitrate); // encRateControlTargetBitRate |
RVCE_CS(enc->pic.rate_ctrl.peak_bitrate); // encRateControlPeakBitRate |
RVCE_CS(enc->pic.rate_ctrl.frame_rate_num); // encRateControlFrameRateNum |
RVCE_CS(0x00000000); // encGOPSize |
RVCE_CS(enc->pic.quant_i_frames); // encQP_I |
RVCE_CS(enc->pic.quant_p_frames); // encQP_P |
RVCE_CS(enc->pic.quant_b_frames); // encQP_B |
RVCE_CS(enc->pic.rate_ctrl.vbv_buffer_size); // encVBVBufferSize |
RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); // encRateControlFrameRateDen |
RVCE_CS(0x00000000); // encVBVBufferLevel |
RVCE_CS(0x00000000); // encMaxAUSize |
RVCE_CS(0x00000000); // encQPInitialMode |
RVCE_CS(enc->pic.rate_ctrl.target_bits_picture); // encTargetBitsPerPicture |
RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_integer); // encPeakBitsPerPictureInteger |
RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_fraction); // encPeakBitsPerPictureFractional |
RVCE_CS(0x00000000); // encMinQP |
RVCE_CS(0x00000033); // encMaxQP |
RVCE_CS(0x00000000); // encSkipFrameEnable |
RVCE_CS(0x00000000); // encFillerDataEnable |
RVCE_CS(0x00000000); // encEnforceHRD |
RVCE_CS(0x00000000); // encBPicsDeltaQP |
RVCE_CS(0x00000000); // encReferenceBPicsDeltaQP |
RVCE_CS(0x00000000); // encRateControlReInitDisable |
RVCE_END(); |
} |
static void config_extension(struct rvce_encoder *enc) |
{ |
RVCE_BEGIN(0x04000001); // config extension |
RVCE_CS(0x00000003); // encEnablePerfLogging |
RVCE_END(); |
} |
static void pic_control(struct rvce_encoder *enc) |
{ |
unsigned encNumMBsPerSlice; |
encNumMBsPerSlice = align(enc->base.width, 16) / 16; |
encNumMBsPerSlice *= align(enc->base.height, 16) / 16; |
RVCE_BEGIN(0x04000002); // pic control |
RVCE_CS(0x00000000); // encUseConstrainedIntraPred |
RVCE_CS(0x00000000); // encCABACEnable |
RVCE_CS(0x00000000); // encCABACIDC |
RVCE_CS(0x00000000); // encLoopFilterDisable |
RVCE_CS(0x00000000); // encLFBetaOffset |
RVCE_CS(0x00000000); // encLFAlphaC0Offset |
RVCE_CS(0x00000000); // encCropLeftOffset |
RVCE_CS((align(enc->base.width, 16) - enc->base.width) >> 1); // encCropRightOffset |
RVCE_CS(0x00000000); // encCropTopOffset |
RVCE_CS((align(enc->base.height, 16) - enc->base.height) >> 1); // encCropBottomOffset |
RVCE_CS(encNumMBsPerSlice); // encNumMBsPerSlice |
RVCE_CS(0x00000000); // encIntraRefreshNumMBsPerSlot |
RVCE_CS(0x00000000); // encForceIntraRefresh |
RVCE_CS(0x00000000); // encForceIMBPeriod |
RVCE_CS(0x00000000); // encPicOrderCntType |
RVCE_CS(0x00000000); // log2_max_pic_order_cnt_lsb_minus4 |
RVCE_CS(0x00000000); // encSPSID |
RVCE_CS(0x00000000); // encPPSID |
RVCE_CS(0x00000040); // encConstraintSetFlags |
RVCE_CS(MAX2(enc->base.max_references, 1) - 1); // encBPicPattern |
RVCE_CS(0x00000000); // weightPredModeBPicture |
RVCE_CS(MIN2(enc->base.max_references, 2)); // encNumberOfReferenceFrames |
RVCE_CS(enc->base.max_references + 1); // encMaxNumRefFrames |
RVCE_CS(0x00000001); // encNumDefaultActiveRefL0 |
RVCE_CS(0x00000001); // encNumDefaultActiveRefL1 |
RVCE_CS(0x00000000); // encSliceMode |
RVCE_CS(0x00000000); // encMaxSliceSize |
RVCE_END(); |
} |
static void motion_estimation(struct rvce_encoder *enc) |
{ |
RVCE_BEGIN(0x04000007); // motion estimation |
RVCE_CS(0x00000001); // encIMEDecimationSearch |
RVCE_CS(0x00000001); // motionEstHalfPixel |
RVCE_CS(0x00000000); // motionEstQuarterPixel |
RVCE_CS(0x00000000); // disableFavorPMVPoint |
RVCE_CS(0x00000000); // forceZeroPointCenter |
RVCE_CS(0x00000000); // LSMVert |
RVCE_CS(0x00000010); // encSearchRangeX |
RVCE_CS(0x00000010); // encSearchRangeY |
RVCE_CS(0x00000010); // encSearch1RangeX |
RVCE_CS(0x00000010); // encSearch1RangeY |
RVCE_CS(0x00000000); // disable16x16Frame1 |
RVCE_CS(0x00000000); // disableSATD |
RVCE_CS(0x00000000); // enableAMD |
RVCE_CS(0x000000fe); // encDisableSubMode |
RVCE_CS(0x00000000); // encIMESkipX |
RVCE_CS(0x00000000); // encIMESkipY |
RVCE_CS(0x00000000); // encEnImeOverwDisSubm |
RVCE_CS(0x00000000); // encImeOverwDisSubmNo |
RVCE_CS(0x00000001); // encIME2SearchRangeX |
RVCE_CS(0x00000001); // encIME2SearchRangeY |
RVCE_CS(0x00000000); // parallelModeSpeedupEnable |
RVCE_CS(0x00000000); // fme0_encDisableSubMode |
RVCE_CS(0x00000000); // fme1_encDisableSubMode |
RVCE_CS(0x00000000); // imeSWSpeedupEnable |
RVCE_END(); |
} |
static void rdo(struct rvce_encoder *enc) |
{ |
RVCE_BEGIN(0x04000008); // rdo |
RVCE_CS(0x00000000); // encDisableTbePredIFrame |
RVCE_CS(0x00000000); // encDisableTbePredPFrame |
RVCE_CS(0x00000000); // useFmeInterpolY |
RVCE_CS(0x00000000); // useFmeInterpolUV |
RVCE_CS(0x00000000); // useFmeIntrapolY |
RVCE_CS(0x00000000); // useFmeIntrapolUV |
RVCE_CS(0x00000000); // useFmeInterpolY_1 |
RVCE_CS(0x00000000); // useFmeInterpolUV_1 |
RVCE_CS(0x00000000); // useFmeIntrapolY_1 |
RVCE_CS(0x00000000); // useFmeIntrapolUV_1 |
RVCE_CS(0x00000000); // enc16x16CostAdj |
RVCE_CS(0x00000000); // encSkipCostAdj |
RVCE_CS(0x00000000); // encForce16x16skip |
RVCE_CS(0x00000000); // encDisableThresholdCalcA |
RVCE_CS(0x00000000); // encLumaCoeffCost |
RVCE_CS(0x00000000); // encLumaMBCoeffCost |
RVCE_CS(0x00000000); // encChromaCoeffCost |
RVCE_END(); |
} |
static void vui(struct rvce_encoder *enc) |
{ |
int i; |
RVCE_BEGIN(0x04000009); // vui |
RVCE_CS(0x00000000); //aspectRatioInfoPresentFlag |
RVCE_CS(0x00000000); //aspectRatioInfo.aspectRatioIdc |
RVCE_CS(0x00000000); //aspectRatioInfo.sarWidth |
RVCE_CS(0x00000000); //aspectRatioInfo.sarHeight |
RVCE_CS(0x00000000); //overscanInfoPresentFlag |
RVCE_CS(0x00000000); //overScanInfo.overscanAppropFlag |
RVCE_CS(0x00000000); //videoSignalTypePresentFlag |
RVCE_CS(0x00000005); //videoSignalTypeInfo.videoFormat |
RVCE_CS(0x00000000); //videoSignalTypeInfo.videoFullRangeFlag |
RVCE_CS(0x00000000); //videoSignalTypeInfo.colorDescriptionPresentFlag |
RVCE_CS(0x00000002); //videoSignalTypeInfo.colorPrim |
RVCE_CS(0x00000002); //videoSignalTypeInfo.transferChar |
RVCE_CS(0x00000002); //videoSignalTypeInfo.matrixCoef |
RVCE_CS(0x00000000); //chromaLocInfoPresentFlag |
RVCE_CS(0x00000000); //chromaLocInfo.chromaLocTop |
RVCE_CS(0x00000000); //chromaLocInfo.chromaLocBottom |
RVCE_CS(0x00000001); //timingInfoPresentFlag |
RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); //timingInfo.numUnitsInTick |
RVCE_CS(enc->pic.rate_ctrl.frame_rate_num * 2); //timingInfo.timeScale; |
RVCE_CS(0x00000001); //timingInfo.fixedFrameRateFlag |
RVCE_CS(0x00000000); //nalHRDParametersPresentFlag |
RVCE_CS(0x00000000); //hrdParam.cpbCntMinus1 |
RVCE_CS(0x00000004); //hrdParam.bitRateScale |
RVCE_CS(0x00000006); //hrdParam.cpbSizeScale |
for (i = 0; i < 32; i++) { |
RVCE_CS(0x00000000); //hrdParam.bitRateValueMinus |
RVCE_CS(0x00000000); //hrdParam.cpbSizeValueMinus |
RVCE_CS(0x00000000); //hrdParam.cbrFlag |
} |
RVCE_CS(0x00000017); //hrdParam.initialCpbRemovalDelayLengthMinus1 |
RVCE_CS(0x00000017); //hrdParam.cpbRemovalDelayLengthMinus1 |
RVCE_CS(0x00000017); //hrdParam.dpbOutputDelayLengthMinus1 |
RVCE_CS(0x00000018); //hrdParam.timeOffsetLength |
RVCE_CS(0x00000000); //lowDelayHRDFlag |
RVCE_CS(0x00000000); //picStructPresentFlag |
RVCE_CS(0x00000000); //bitstreamRestrictionPresentFlag |
RVCE_CS(0x00000001); //bitstreamRestrictions.motionVectorsOverPicBoundariesFlag |
RVCE_CS(0x00000002); //bitstreamRestrictions.maxBytesPerPicDenom |
RVCE_CS(0x00000001); //bitstreamRestrictions.maxBitsPerMbDenom |
RVCE_CS(0x00000010); //bitstreamRestrictions.log2MaxMvLengthHori |
RVCE_CS(0x00000010); //bitstreamRestrictions.log2MaxMvLengthVert |
RVCE_CS(0x00000003); //bitstreamRestrictions.numReorderFrames |
RVCE_CS(0x00000003); //bitstreamRestrictions.maxDecFrameBuffering |
RVCE_END(); |
} |
static void encode(struct rvce_encoder *enc) |
{ |
int i; |
unsigned luma_offset, chroma_offset; |
task_info(enc, 0x00000003); |
RVCE_BEGIN(0x05000001); // context buffer |
RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains); // encodeContextAddressHi |
RVCE_CS(0x00000000); // encodeContextAddressLo |
RVCE_END(); |
RVCE_BEGIN(0x05000004); // video bitstream buffer |
RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT); // videoBitstreamRingAddressHi |
RVCE_CS(0x00000000); // videoBitstreamRingAddressLo |
RVCE_CS(enc->bs_size); // videoBitstreamRingSize |
RVCE_END(); |
RVCE_BEGIN(0x03000001); // encode |
RVCE_CS(0x00000000); // insertHeaders |
RVCE_CS(0x00000000); // pictureStructure |
RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize |
RVCE_CS(0x00000000); // forceRefreshMap |
RVCE_CS(0x00000000); // insertAUD |
RVCE_CS(0x00000000); // endOfSequence |
RVCE_CS(0x00000000); // endOfStream |
RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureLumaAddressHi |
RVCE_CS(enc->luma->level[0].offset); // inputPictureLumaAddressLo |
RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureChromaAddressHi |
RVCE_CS(enc->chroma->level[0].offset); // inputPictureChromaAddressLo |
RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch |
RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch |
RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch |
RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode |
RVCE_CS(0x00000000); // encInputPicTileConfig |
RVCE_CS(enc->pic.picture_type); // encPicType |
RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag |
RVCE_CS(0x00000000); // encIdrPicId |
RVCE_CS(0x00000000); // encMGSKeyPic |
RVCE_CS(!enc->pic.not_referenced); // encReferenceFlag |
RVCE_CS(0x00000000); // encTemporalLayerIndex |
RVCE_CS(0x00000000); // num_ref_idx_active_override_flag |
RVCE_CS(0x00000000); // num_ref_idx_l0_active_minus1 |
RVCE_CS(0x00000000); // num_ref_idx_l1_active_minus1 |
i = enc->pic.frame_num - enc->pic.ref_idx_l0; |
if (i > 1 && enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) { |
RVCE_CS(0x00000001); // encRefListModificationOp |
RVCE_CS(i - 1); // encRefListModificationNum |
} else { |
RVCE_CS(0x00000000); // encRefListModificationOp |
RVCE_CS(0x00000000); // encRefListModificationNum |
} |
for (i = 0; i < 3; ++i) { |
RVCE_CS(0x00000000); // encRefListModificationOp |
RVCE_CS(0x00000000); // encRefListModificationNum |
} |
for (i = 0; i < 4; ++i) { |
RVCE_CS(0x00000000); // encDecodedPictureMarkingOp |
RVCE_CS(0x00000000); // encDecodedPictureMarkingNum |
RVCE_CS(0x00000000); // encDecodedPictureMarkingIdx |
RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingOp |
RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingNum |
} |
// encReferencePictureL0[0] |
RVCE_CS(0x00000000); // pictureStructure |
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || |
enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { |
struct rvce_cpb_slot *l0 = l0_slot(enc); |
frame_offset(enc, l0, &luma_offset, &chroma_offset); |
RVCE_CS(l0->picture_type); // encPicType |
RVCE_CS(l0->frame_num); // frameNumber |
RVCE_CS(l0->pic_order_cnt); // pictureOrderCount |
RVCE_CS(luma_offset); // lumaOffset |
RVCE_CS(chroma_offset); // chromaOffset |
} else { |
RVCE_CS(0x00000000); // encPicType |
RVCE_CS(0x00000000); // frameNumber |
RVCE_CS(0x00000000); // pictureOrderCount |
RVCE_CS(0xffffffff); // lumaOffset |
RVCE_CS(0xffffffff); // chromaOffset |
} |
// encReferencePictureL0[1] |
RVCE_CS(0x00000000); // pictureStructure |
RVCE_CS(0x00000000); // encPicType |
RVCE_CS(0x00000000); // frameNumber |
RVCE_CS(0x00000000); // pictureOrderCount |
RVCE_CS(0xffffffff); // lumaOffset |
RVCE_CS(0xffffffff); // chromaOffset |
// encReferencePictureL1[0] |
RVCE_CS(0x00000000); // pictureStructure |
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { |
struct rvce_cpb_slot *l1 = l1_slot(enc); |
frame_offset(enc, l1, &luma_offset, &chroma_offset); |
RVCE_CS(l1->picture_type); // encPicType |
RVCE_CS(l1->frame_num); // frameNumber |
RVCE_CS(l1->pic_order_cnt); // pictureOrderCount |
RVCE_CS(luma_offset); // lumaOffset |
RVCE_CS(chroma_offset); // chromaOffset |
} else { |
RVCE_CS(0x00000000); // encPicType |
RVCE_CS(0x00000000); // frameNumber |
RVCE_CS(0x00000000); // pictureOrderCount |
RVCE_CS(0xffffffff); // lumaOffset |
RVCE_CS(0xffffffff); // chromaOffset |
} |
frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset); |
RVCE_CS(luma_offset); // encReconstructedLumaOffset |
RVCE_CS(chroma_offset); // encReconstructedChromaOffset |
RVCE_CS(0x00000000); // encColocBufferOffset |
RVCE_CS(0x00000000); // encReconstructedRefBasePictureLumaOffset |
RVCE_CS(0x00000000); // encReconstructedRefBasePictureChromaOffset |
RVCE_CS(0x00000000); // encReferenceRefBasePictureLumaOffset |
RVCE_CS(0x00000000); // encReferenceRefBasePictureChromaOffset |
RVCE_CS(0x00000000); // pictureCount |
RVCE_CS(enc->pic.frame_num); // frameNumber |
RVCE_CS(enc->pic.pic_order_cnt); // pictureOrderCount |
RVCE_CS(0x00000000); // numIPicRemainInRCGOP |
RVCE_CS(0x00000000); // numPPicRemainInRCGOP |
RVCE_CS(0x00000000); // numBPicRemainInRCGOP |
RVCE_CS(0x00000000); // numIRPicRemainInRCGOP |
RVCE_CS(0x00000000); // enableIntraRefresh |
RVCE_END(); |
} |
static void destroy(struct rvce_encoder *enc) |
{ |
task_info(enc, 0x00000001); |
RVCE_BEGIN(0x02000001); // destroy |
RVCE_END(); |
} |
void radeon_vce_40_2_2_init(struct rvce_encoder *enc) |
{ |
enc->session = session; |
enc->create = create; |
enc->feedback = feedback; |
enc->rate_control = rate_control; |
enc->config_extension = config_extension; |
enc->pic_control = pic_control; |
enc->motion_estimation = motion_estimation; |
enc->rdo = rdo; |
enc->vui = vui; |
enc->encode = encode; |
enc->destroy = destroy; |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_video.c |
---|
0,0 → 1,321 |
/************************************************************************** |
* |
* Copyright 2013 Advanced Micro Devices, Inc. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/* |
* Authors: |
* Christian König <christian.koenig@amd.com> |
* |
*/ |
#include <unistd.h> |
#include "util/u_memory.h" |
#include "util/u_video.h" |
#include "vl/vl_defines.h" |
#include "vl/vl_video_buffer.h" |
#include "r600_pipe_common.h" |
#include "radeon_video.h" |
#include "radeon_vce.h" |
/* generate an stream handle */ |
unsigned rvid_alloc_stream_handle() |
{ |
static unsigned counter = 0; |
unsigned stream_handle = 0; |
unsigned pid = getpid(); |
int i; |
for (i = 0; i < 32; ++i) |
stream_handle |= ((pid >> i) & 1) << (31 - i); |
stream_handle ^= ++counter; |
return stream_handle; |
} |
/* create a buffer in the winsys */ |
bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer, |
unsigned size, unsigned usage) |
{ |
memset(buffer, 0, sizeof(*buffer)); |
buffer->usage = usage; |
buffer->res = (struct r600_resource *) |
pipe_buffer_create(screen, PIPE_BIND_CUSTOM, usage, size); |
return buffer->res != NULL; |
} |
/* destroy a buffer */ |
void rvid_destroy_buffer(struct rvid_buffer *buffer) |
{ |
pipe_resource_reference((struct pipe_resource **)&buffer->res, NULL); |
} |
/* reallocate a buffer, preserving its content */ |
bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs, |
struct rvid_buffer *new_buf, unsigned new_size) |
{ |
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; |
struct radeon_winsys* ws = rscreen->ws; |
unsigned bytes = MIN2(new_buf->res->buf->size, new_size); |
struct rvid_buffer old_buf = *new_buf; |
void *src = NULL, *dst = NULL; |
if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage)) |
goto error; |
src = ws->buffer_map(old_buf.res->cs_buf, cs, PIPE_TRANSFER_READ); |
if (!src) |
goto error; |
dst = ws->buffer_map(new_buf->res->cs_buf, cs, PIPE_TRANSFER_WRITE); |
if (!dst) |
goto error; |
memcpy(dst, src, bytes); |
if (new_size > bytes) { |
new_size -= bytes; |
dst += bytes; |
memset(dst, 0, new_size); |
} |
ws->buffer_unmap(new_buf->res->cs_buf); |
ws->buffer_unmap(old_buf.res->cs_buf); |
rvid_destroy_buffer(&old_buf); |
return true; |
error: |
if (src) |
ws->buffer_unmap(old_buf.res->cs_buf); |
rvid_destroy_buffer(new_buf); |
*new_buf = old_buf; |
return false; |
} |
/* clear the buffer with zeros */ |
void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer) |
{ |
struct r600_common_context *rctx = (struct r600_common_context*)context; |
rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size, |
0, false); |
context->flush(context, NULL, 0); |
} |
/** |
* join surfaces into the same buffer with identical tiling params |
* sumup their sizes and replace the backend buffers with a single bo |
*/ |
void rvid_join_surfaces(struct radeon_winsys* ws, unsigned bind, |
struct pb_buffer** buffers[VL_NUM_COMPONENTS], |
struct radeon_surf *surfaces[VL_NUM_COMPONENTS]) |
{ |
unsigned best_tiling, best_wh, off; |
unsigned size, alignment; |
struct pb_buffer *pb; |
unsigned i, j; |
for (i = 0, best_tiling = 0, best_wh = ~0; i < VL_NUM_COMPONENTS; ++i) { |
unsigned wh; |
if (!surfaces[i]) |
continue; |
/* choose the smallest bank w/h for now */ |
wh = surfaces[i]->bankw * surfaces[i]->bankh; |
if (wh < best_wh) { |
best_wh = wh; |
best_tiling = i; |
} |
} |
for (i = 0, off = 0; i < VL_NUM_COMPONENTS; ++i) { |
if (!surfaces[i]) |
continue; |
/* copy the tiling parameters */ |
surfaces[i]->bankw = surfaces[best_tiling]->bankw; |
surfaces[i]->bankh = surfaces[best_tiling]->bankh; |
surfaces[i]->mtilea = surfaces[best_tiling]->mtilea; |
surfaces[i]->tile_split = surfaces[best_tiling]->tile_split; |
/* adjust the texture layer offsets */ |
off = align(off, surfaces[i]->bo_alignment); |
for (j = 0; j < Elements(surfaces[i]->level); ++j) |
surfaces[i]->level[j].offset += off; |
off += surfaces[i]->bo_size; |
} |
for (i = 0, size = 0, alignment = 0; i < VL_NUM_COMPONENTS; ++i) { |
if (!buffers[i] || !*buffers[i]) |
continue; |
size = align(size, (*buffers[i])->alignment); |
size += (*buffers[i])->size; |
alignment = MAX2(alignment, (*buffers[i])->alignment * 1); |
} |
if (!size) |
return; |
/* TODO: 2D tiling workaround */ |
alignment *= 2; |
pb = ws->buffer_create(ws, size, alignment, bind, RADEON_DOMAIN_VRAM, 0); |
if (!pb) |
return; |
for (i = 0; i < VL_NUM_COMPONENTS; ++i) { |
if (!buffers[i] || !*buffers[i]) |
continue; |
pb_reference(buffers[i], pb); |
} |
pb_reference(&pb, NULL); |
} |
int rvid_get_video_param(struct pipe_screen *screen, |
enum pipe_video_profile profile, |
enum pipe_video_entrypoint entrypoint, |
enum pipe_video_cap param) |
{ |
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; |
if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) { |
switch (param) { |
case PIPE_VIDEO_CAP_SUPPORTED: |
return u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC && |
rvce_is_fw_version_supported(rscreen); |
case PIPE_VIDEO_CAP_NPOT_TEXTURES: |
return 1; |
case PIPE_VIDEO_CAP_MAX_WIDTH: |
return 2048; |
case PIPE_VIDEO_CAP_MAX_HEIGHT: |
return 1152; |
case PIPE_VIDEO_CAP_PREFERED_FORMAT: |
return PIPE_FORMAT_NV12; |
case PIPE_VIDEO_CAP_PREFERS_INTERLACED: |
return false; |
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: |
return false; |
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: |
return true; |
default: |
return 0; |
} |
} |
/* UVD 2.x limits */ |
if (rscreen->family < CHIP_PALM) { |
enum pipe_video_format codec = u_reduce_video_profile(profile); |
switch (param) { |
case PIPE_VIDEO_CAP_SUPPORTED: |
/* no support for MPEG4 */ |
return codec != PIPE_VIDEO_FORMAT_MPEG4 && |
/* FIXME: VC-1 simple/main profile is broken */ |
profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE && |
profile != PIPE_VIDEO_PROFILE_VC1_MAIN; |
case PIPE_VIDEO_CAP_PREFERS_INTERLACED: |
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: |
/* MPEG2 only with shaders and no support for |
interlacing on R6xx style UVD */ |
return codec != PIPE_VIDEO_FORMAT_MPEG12 && |
rscreen->family > CHIP_RV770; |
default: |
break; |
} |
} |
switch (param) { |
case PIPE_VIDEO_CAP_SUPPORTED: |
switch (u_reduce_video_profile(profile)) { |
case PIPE_VIDEO_FORMAT_MPEG12: |
case PIPE_VIDEO_FORMAT_MPEG4: |
case PIPE_VIDEO_FORMAT_MPEG4_AVC: |
return entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE; |
case PIPE_VIDEO_FORMAT_VC1: |
/* FIXME: VC-1 simple/main profile is broken */ |
return profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED && |
entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE; |
default: |
return false; |
} |
case PIPE_VIDEO_CAP_NPOT_TEXTURES: |
return 1; |
case PIPE_VIDEO_CAP_MAX_WIDTH: |
return 2048; |
case PIPE_VIDEO_CAP_MAX_HEIGHT: |
return 1152; |
case PIPE_VIDEO_CAP_PREFERED_FORMAT: |
return PIPE_FORMAT_NV12; |
case PIPE_VIDEO_CAP_PREFERS_INTERLACED: |
return true; |
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: |
return true; |
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: |
return true; |
case PIPE_VIDEO_CAP_MAX_LEVEL: |
switch (profile) { |
case PIPE_VIDEO_PROFILE_MPEG1: |
return 0; |
case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE: |
case PIPE_VIDEO_PROFILE_MPEG2_MAIN: |
return 3; |
case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE: |
return 3; |
case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE: |
return 5; |
case PIPE_VIDEO_PROFILE_VC1_SIMPLE: |
return 1; |
case PIPE_VIDEO_PROFILE_VC1_MAIN: |
return 2; |
case PIPE_VIDEO_PROFILE_VC1_ADVANCED: |
return 4; |
case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: |
case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: |
case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: |
return 41; |
default: |
return 0; |
} |
default: |
return 0; |
} |
} |
boolean rvid_is_format_supported(struct pipe_screen *screen, |
enum pipe_format format, |
enum pipe_video_profile profile, |
enum pipe_video_entrypoint entrypoint) |
{ |
/* we can only handle this one with UVD */ |
if (profile != PIPE_VIDEO_PROFILE_UNKNOWN) |
return format == PIPE_FORMAT_NV12; |
return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint); |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_video.h |
---|
0,0 → 1,85 |
/************************************************************************** |
* |
* Copyright 2013 Advanced Micro Devices, Inc. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/* |
* Authors: |
* Christian König <christian.koenig@amd.com> |
* |
*/ |
#ifndef RADEON_VIDEO_H |
#define RADEON_VIDEO_H |
#include "radeon/radeon_winsys.h" |
#include "vl/vl_video_buffer.h" |
#define RVID_ERR(fmt, args...) \ |
fprintf(stderr, "EE %s:%d %s UVD - "fmt, __FILE__, __LINE__, __func__, ##args) |
/* video buffer representation */ |
struct rvid_buffer |
{ |
unsigned usage; |
struct r600_resource *res; |
}; |
/* generate an stream handle */ |
unsigned rvid_alloc_stream_handle(void); |
/* create a buffer in the winsys */ |
bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer, |
unsigned size, unsigned usage); |
/* destroy a buffer */ |
void rvid_destroy_buffer(struct rvid_buffer *buffer); |
/* reallocate a buffer, preserving its content */ |
bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs, |
struct rvid_buffer *new_buf, unsigned new_size); |
/* clear the buffer with zeros */ |
void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer); |
/* join surfaces into the same buffer with identical tiling params |
sumup their sizes and replace the backend buffers with a single bo */ |
void rvid_join_surfaces(struct radeon_winsys* ws, unsigned bind, |
struct pb_buffer** buffers[VL_NUM_COMPONENTS], |
struct radeon_surf *surfaces[VL_NUM_COMPONENTS]); |
/* returns supported codecs and other parameters */ |
int rvid_get_video_param(struct pipe_screen *screen, |
enum pipe_video_profile profile, |
enum pipe_video_entrypoint entrypoint, |
enum pipe_video_cap param); |
/* the hardware only supports NV12 */ |
boolean rvid_is_format_supported(struct pipe_screen *screen, |
enum pipe_format format, |
enum pipe_video_profile profile, |
enum pipe_video_entrypoint entrypoint); |
#endif // RADEON_VIDEO_H |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_winsys.h |
---|
0,0 → 1,683 |
/* |
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> |
* Copyright 2010 Marek Olšák <maraeo@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
#ifndef RADEON_WINSYS_H |
#define RADEON_WINSYS_H |
/* The public winsys interface header for the radeon driver. */ |
/* R300 features in DRM. |
* |
* 2.6.0: |
* - Hyper-Z |
* - GB_Z_PEQ_CONFIG on rv350->r4xx |
* - R500 FG_ALPHA_VALUE |
* |
* 2.8.0: |
* - R500 US_FORMAT regs |
* - R500 ARGB2101010 colorbuffer |
* - CMask and AA regs |
* - R16F/RG16F |
*/ |
#include "pipebuffer/pb_buffer.h" |
#define RADEON_MAX_CMDBUF_DWORDS (16 * 1024) |
#define RADEON_FLUSH_ASYNC (1 << 0) |
#define RADEON_FLUSH_KEEP_TILING_FLAGS (1 << 1) /* needs DRM 2.12.0 */ |
#define RADEON_FLUSH_COMPUTE (1 << 2) |
#define RADEON_FLUSH_END_OF_FRAME (1 << 3) |
/* Tiling flags. */ |
enum radeon_bo_layout { |
RADEON_LAYOUT_LINEAR = 0, |
RADEON_LAYOUT_TILED, |
RADEON_LAYOUT_SQUARETILED, |
RADEON_LAYOUT_UNKNOWN |
}; |
enum radeon_bo_domain { /* bitfield */ |
RADEON_DOMAIN_GTT = 2, |
RADEON_DOMAIN_VRAM = 4, |
RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT |
}; |
enum radeon_bo_flag { /* bitfield */ |
RADEON_FLAG_GTT_WC = (1 << 0), |
RADEON_FLAG_CPU_ACCESS = (1 << 1), |
RADEON_FLAG_NO_CPU_ACCESS = (1 << 2), |
}; |
enum radeon_bo_usage { /* bitfield */ |
RADEON_USAGE_READ = 2, |
RADEON_USAGE_WRITE = 4, |
RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE |
}; |
enum radeon_family { |
CHIP_UNKNOWN = 0, |
CHIP_R300, /* R3xx-based cores. */ |
CHIP_R350, |
CHIP_RV350, |
CHIP_RV370, |
CHIP_RV380, |
CHIP_RS400, |
CHIP_RC410, |
CHIP_RS480, |
CHIP_R420, /* R4xx-based cores. */ |
CHIP_R423, |
CHIP_R430, |
CHIP_R480, |
CHIP_R481, |
CHIP_RV410, |
CHIP_RS600, |
CHIP_RS690, |
CHIP_RS740, |
CHIP_RV515, /* R5xx-based cores. */ |
CHIP_R520, |
CHIP_RV530, |
CHIP_R580, |
CHIP_RV560, |
CHIP_RV570, |
CHIP_R600, |
CHIP_RV610, |
CHIP_RV630, |
CHIP_RV670, |
CHIP_RV620, |
CHIP_RV635, |
CHIP_RS780, |
CHIP_RS880, |
CHIP_RV770, |
CHIP_RV730, |
CHIP_RV710, |
CHIP_RV740, |
CHIP_CEDAR, |
CHIP_REDWOOD, |
CHIP_JUNIPER, |
CHIP_CYPRESS, |
CHIP_HEMLOCK, |
CHIP_PALM, |
CHIP_SUMO, |
CHIP_SUMO2, |
CHIP_BARTS, |
CHIP_TURKS, |
CHIP_CAICOS, |
CHIP_CAYMAN, |
CHIP_ARUBA, |
CHIP_TAHITI, |
CHIP_PITCAIRN, |
CHIP_VERDE, |
CHIP_OLAND, |
CHIP_HAINAN, |
CHIP_BONAIRE, |
CHIP_KAVERI, |
CHIP_KABINI, |
CHIP_HAWAII, |
CHIP_MULLINS, |
CHIP_LAST, |
}; |
enum chip_class { |
CLASS_UNKNOWN = 0, |
R300, |
R400, |
R500, |
R600, |
R700, |
EVERGREEN, |
CAYMAN, |
SI, |
CIK, |
}; |
enum ring_type { |
RING_GFX = 0, |
RING_DMA, |
RING_UVD, |
RING_VCE, |
RING_LAST, |
}; |
enum radeon_value_id { |
RADEON_REQUESTED_VRAM_MEMORY, |
RADEON_REQUESTED_GTT_MEMORY, |
RADEON_BUFFER_WAIT_TIME_NS, |
RADEON_TIMESTAMP, |
RADEON_NUM_CS_FLUSHES, |
RADEON_NUM_BYTES_MOVED, |
RADEON_VRAM_USAGE, |
RADEON_GTT_USAGE, |
RADEON_GPU_TEMPERATURE, |
RADEON_CURRENT_SCLK, |
RADEON_CURRENT_MCLK |
}; |
enum radeon_bo_priority { |
RADEON_PRIO_MIN, |
RADEON_PRIO_SHADER_DATA, /* shader code, resource descriptors */ |
RADEON_PRIO_SHADER_BUFFER_RO, /* read-only */ |
RADEON_PRIO_SHADER_TEXTURE_RO, /* read-only */ |
RADEON_PRIO_SHADER_RESOURCE_RW, /* buffers, textures, streamout, GS rings, RATs; read/write */ |
RADEON_PRIO_COLOR_BUFFER, |
RADEON_PRIO_DEPTH_BUFFER, |
RADEON_PRIO_SHADER_TEXTURE_MSAA, |
RADEON_PRIO_COLOR_BUFFER_MSAA, |
RADEON_PRIO_DEPTH_BUFFER_MSAA, |
RADEON_PRIO_COLOR_META, |
RADEON_PRIO_DEPTH_META, |
RADEON_PRIO_MAX /* must be <= 15 */ |
}; |
struct winsys_handle; |
struct radeon_winsys_cs_handle; |
struct radeon_winsys_cs { |
unsigned cdw; /* Number of used dwords. */ |
uint32_t *buf; /* The command buffer. */ |
enum ring_type ring_type; |
}; |
struct radeon_info { |
uint32_t pci_id; |
enum radeon_family family; |
enum chip_class chip_class; |
uint64_t gart_size; |
uint64_t vram_size; |
uint32_t max_sclk; |
uint32_t max_compute_units; |
uint32_t max_se; |
uint32_t max_sh_per_se; |
uint32_t drm_major; /* version */ |
uint32_t drm_minor; |
uint32_t drm_patchlevel; |
boolean has_uvd; |
uint32_t vce_fw_version; |
boolean has_userptr; |
uint32_t r300_num_gb_pipes; |
uint32_t r300_num_z_pipes; |
uint32_t r600_num_backends; |
uint32_t r600_clock_crystal_freq; |
uint32_t r600_tiling_config; |
uint32_t r600_num_tile_pipes; |
uint32_t r600_max_pipes; |
boolean r600_virtual_address; |
boolean r600_has_dma; |
uint32_t r600_backend_map; |
boolean r600_backend_map_valid; |
boolean si_tile_mode_array_valid; |
uint32_t si_tile_mode_array[32]; |
uint32_t si_backend_enabled_mask; |
boolean cik_macrotile_mode_array_valid; |
uint32_t cik_macrotile_mode_array[16]; |
}; |
enum radeon_feature_id { |
RADEON_FID_R300_HYPERZ_ACCESS, /* ZMask + HiZ */ |
RADEON_FID_R300_CMASK_ACCESS, |
}; |
#define RADEON_SURF_MAX_LEVEL 32 |
#define RADEON_SURF_TYPE_MASK 0xFF |
#define RADEON_SURF_TYPE_SHIFT 0 |
#define RADEON_SURF_TYPE_1D 0 |
#define RADEON_SURF_TYPE_2D 1 |
#define RADEON_SURF_TYPE_3D 2 |
#define RADEON_SURF_TYPE_CUBEMAP 3 |
#define RADEON_SURF_TYPE_1D_ARRAY 4 |
#define RADEON_SURF_TYPE_2D_ARRAY 5 |
#define RADEON_SURF_MODE_MASK 0xFF |
#define RADEON_SURF_MODE_SHIFT 8 |
#define RADEON_SURF_MODE_LINEAR 0 |
#define RADEON_SURF_MODE_LINEAR_ALIGNED 1 |
#define RADEON_SURF_MODE_1D 2 |
#define RADEON_SURF_MODE_2D 3 |
#define RADEON_SURF_SCANOUT (1 << 16) |
#define RADEON_SURF_ZBUFFER (1 << 17) |
#define RADEON_SURF_SBUFFER (1 << 18) |
#define RADEON_SURF_Z_OR_SBUFFER (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER) |
#define RADEON_SURF_HAS_SBUFFER_MIPTREE (1 << 19) |
#define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20) |
#define RADEON_SURF_FMASK (1 << 21) |
#define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK) |
#define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT) |
#define RADEON_SURF_CLR(v, field) ((v) & ~(RADEON_SURF_ ## field ## _MASK << RADEON_SURF_ ## field ## _SHIFT)) |
struct radeon_surf_level { |
uint64_t offset; |
uint64_t slice_size; |
uint32_t npix_x; |
uint32_t npix_y; |
uint32_t npix_z; |
uint32_t nblk_x; |
uint32_t nblk_y; |
uint32_t nblk_z; |
uint32_t pitch_bytes; |
uint32_t mode; |
}; |
struct radeon_surf { |
/* These are inputs to the calculator. */ |
uint32_t npix_x; |
uint32_t npix_y; |
uint32_t npix_z; |
uint32_t blk_w; |
uint32_t blk_h; |
uint32_t blk_d; |
uint32_t array_size; |
uint32_t last_level; |
uint32_t bpe; |
uint32_t nsamples; |
uint32_t flags; |
/* These are return values. Some of them can be set by the caller, but |
* they will be treated as hints (e.g. bankw, bankh) and might be |
* changed by the calculator. |
*/ |
uint64_t bo_size; |
uint64_t bo_alignment; |
/* This applies to EG and later. */ |
uint32_t bankw; |
uint32_t bankh; |
uint32_t mtilea; |
uint32_t tile_split; |
uint32_t stencil_tile_split; |
uint64_t stencil_offset; |
struct radeon_surf_level level[RADEON_SURF_MAX_LEVEL]; |
struct radeon_surf_level stencil_level[RADEON_SURF_MAX_LEVEL]; |
uint32_t tiling_index[RADEON_SURF_MAX_LEVEL]; |
uint32_t stencil_tiling_index[RADEON_SURF_MAX_LEVEL]; |
}; |
struct radeon_winsys { |
/** |
* The screen object this winsys was created for |
*/ |
struct pipe_screen *screen; |
/** |
* Decrement the winsys reference count. |
* |
* \param ws The winsys this function is called for. |
* \return True if the winsys and screen should be destroyed. |
*/ |
bool (*unref)(struct radeon_winsys *ws); |
/** |
* Destroy this winsys. |
* |
* \param ws The winsys this function is called from. |
*/ |
void (*destroy)(struct radeon_winsys *ws); |
/** |
* Query an info structure from winsys. |
* |
* \param ws The winsys this function is called from. |
* \param info Return structure |
*/ |
void (*query_info)(struct radeon_winsys *ws, |
struct radeon_info *info); |
/************************************************************************** |
* Buffer management. Buffer attributes are mostly fixed over its lifetime. |
* |
* Remember that gallium gets to choose the interface it needs, and the |
* window systems must then implement that interface (rather than the |
* other way around...). |
*************************************************************************/ |
/** |
* Create a buffer object. |
* |
* \param ws The winsys this function is called from. |
* \param size The size to allocate. |
* \param alignment An alignment of the buffer in memory. |
* \param use_reusable_pool Whether the cache buffer manager should be used. |
* \param domain A bitmask of the RADEON_DOMAIN_* flags. |
* \return The created buffer object. |
*/ |
struct pb_buffer *(*buffer_create)(struct radeon_winsys *ws, |
unsigned size, |
unsigned alignment, |
boolean use_reusable_pool, |
enum radeon_bo_domain domain, |
enum radeon_bo_flag flags); |
struct radeon_winsys_cs_handle *(*buffer_get_cs_handle)( |
struct pb_buffer *buf); |
/** |
* Map the entire data store of a buffer object into the client's address |
* space. |
* |
* \param buf A winsys buffer object to map. |
* \param cs A command stream to flush if the buffer is referenced by it. |
* \param usage A bitmask of the PIPE_TRANSFER_* flags. |
* \return The pointer at the beginning of the buffer. |
*/ |
void *(*buffer_map)(struct radeon_winsys_cs_handle *buf, |
struct radeon_winsys_cs *cs, |
enum pipe_transfer_usage usage); |
/** |
* Unmap a buffer object from the client's address space. |
* |
* \param buf A winsys buffer object to unmap. |
*/ |
void (*buffer_unmap)(struct radeon_winsys_cs_handle *buf); |
/** |
* Return TRUE if a buffer object is being used by the GPU. |
* |
* \param buf A winsys buffer object. |
* \param usage Only check whether the buffer is busy for the given usage. |
*/ |
boolean (*buffer_is_busy)(struct pb_buffer *buf, |
enum radeon_bo_usage usage); |
/** |
* Wait for a buffer object until it is not used by a GPU. This is |
* equivalent to a fence placed after the last command using the buffer, |
* and synchronizing to the fence. |
* |
* \param buf A winsys buffer object to wait for. |
* \param usage Only wait until the buffer is idle for the given usage, |
* but may still be busy for some other usage. |
*/ |
void (*buffer_wait)(struct pb_buffer *buf, enum radeon_bo_usage usage); |
/** |
* Return tiling flags describing a memory layout of a buffer object. |
* |
* \param buf A winsys buffer object to get the flags from. |
* \param macrotile A pointer to the return value of the microtile flag. |
* \param microtile A pointer to the return value of the macrotile flag. |
* |
* \note microtile and macrotile are not bitmasks! |
*/ |
void (*buffer_get_tiling)(struct pb_buffer *buf, |
enum radeon_bo_layout *microtile, |
enum radeon_bo_layout *macrotile, |
unsigned *bankw, unsigned *bankh, |
unsigned *tile_split, |
unsigned *stencil_tile_split, |
unsigned *mtilea, |
bool *scanout); |
/** |
* Set tiling flags describing a memory layout of a buffer object. |
* |
* \param buf A winsys buffer object to set the flags for. |
* \param cs A command stream to flush if the buffer is referenced by it. |
* \param macrotile A macrotile flag. |
* \param microtile A microtile flag. |
* \param stride A stride of the buffer in bytes, for texturing. |
* |
* \note microtile and macrotile are not bitmasks! |
*/ |
void (*buffer_set_tiling)(struct pb_buffer *buf, |
struct radeon_winsys_cs *rcs, |
enum radeon_bo_layout microtile, |
enum radeon_bo_layout macrotile, |
unsigned bankw, unsigned bankh, |
unsigned tile_split, |
unsigned stencil_tile_split, |
unsigned mtilea, |
unsigned stride, |
bool scanout); |
/** |
* Get a winsys buffer from a winsys handle. The internal structure |
* of the handle is platform-specific and only a winsys should access it. |
* |
* \param ws The winsys this function is called from. |
* \param whandle A winsys handle pointer as was received from a state |
* tracker. |
* \param stride The returned buffer stride in bytes. |
*/ |
struct pb_buffer *(*buffer_from_handle)(struct radeon_winsys *ws, |
struct winsys_handle *whandle, |
unsigned *stride); |
/** |
* Get a winsys buffer from a user pointer. The resulting buffer can't |
* be exported. Both pointer and size must be page aligned. |
* |
* \param ws The winsys this function is called from. |
* \param pointer User pointer to turn into a buffer object. |
* \param Size Size in bytes for the new buffer. |
*/ |
struct pb_buffer *(*buffer_from_ptr)(struct radeon_winsys *ws, |
void *pointer, unsigned size); |
/** |
* Get a winsys handle from a winsys buffer. The internal structure |
* of the handle is platform-specific and only a winsys should access it. |
* |
* \param buf A winsys buffer object to get the handle from. |
* \param whandle A winsys handle pointer. |
* \param stride A stride of the buffer in bytes, for texturing. |
* \return TRUE on success. |
*/ |
boolean (*buffer_get_handle)(struct pb_buffer *buf, |
unsigned stride, |
struct winsys_handle *whandle); |
/** |
* Return the virtual address of a buffer. |
* |
* \param buf A winsys buffer object |
* \return virtual address |
*/ |
uint64_t (*buffer_get_virtual_address)(struct radeon_winsys_cs_handle *buf); |
/** |
* Query the initial placement of the buffer from the kernel driver. |
*/ |
enum radeon_bo_domain (*buffer_get_initial_domain)(struct radeon_winsys_cs_handle *buf); |
/************************************************************************** |
* Command submission. |
* |
* Each pipe context should create its own command stream and submit |
* commands independently of other contexts. |
*************************************************************************/ |
/** |
* Create a command stream. |
* |
* \param ws The winsys this function is called from. |
* \param ring_type The ring type (GFX, DMA, UVD) |
* \param flush Flush callback function associated with the command stream. |
* \param user User pointer that will be passed to the flush callback. |
* \param trace_buf Trace buffer when tracing is enabled |
*/ |
struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws, |
enum ring_type ring_type, |
void (*flush)(void *ctx, unsigned flags, |
struct pipe_fence_handle **fence), |
void *flush_ctx, |
struct radeon_winsys_cs_handle *trace_buf); |
/** |
* Destroy a command stream. |
* |
* \param cs A command stream to destroy. |
*/ |
void (*cs_destroy)(struct radeon_winsys_cs *cs); |
/** |
* Add a new buffer relocation. Every relocation must first be added |
* before it can be written. |
* |
* \param cs A command stream to add buffer for validation against. |
* \param buf A winsys buffer to validate. |
* \param usage Whether the buffer is used for read and/or write. |
* \param domain Bitmask of the RADEON_DOMAIN_* flags. |
* \param priority A higher number means a greater chance of being |
* placed in the requested domain. 15 is the maximum. |
* \return Relocation index. |
*/ |
unsigned (*cs_add_reloc)(struct radeon_winsys_cs *cs, |
struct radeon_winsys_cs_handle *buf, |
enum radeon_bo_usage usage, |
enum radeon_bo_domain domain, |
enum radeon_bo_priority priority); |
/** |
* Return the index of an already-added buffer. |
* |
* \param cs Command stream |
* \param buf Buffer |
* \return The buffer index, or -1 if the buffer has not been added. |
*/ |
int (*cs_get_reloc)(struct radeon_winsys_cs *cs, |
struct radeon_winsys_cs_handle *buf); |
/** |
* Return TRUE if there is enough memory in VRAM and GTT for the relocs |
* added so far. If the validation fails, all the relocations which have |
* been added since the last call of cs_validate will be removed and |
* the CS will be flushed (provided there are still any relocations). |
* |
* \param cs A command stream to validate. |
*/ |
boolean (*cs_validate)(struct radeon_winsys_cs *cs); |
/** |
* Return TRUE if there is enough memory in VRAM and GTT for the relocs |
* added so far. |
* |
* \param cs A command stream to validate. |
* \param vram VRAM memory size pending to be use |
* \param gtt GTT memory size pending to be use |
*/ |
boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt); |
/** |
* Flush a command stream. |
* |
* \param cs A command stream to flush. |
* \param flags, RADEON_FLUSH_ASYNC or 0. |
* \param fence Pointer to a fence. If non-NULL, a fence is inserted |
* after the CS and is returned through this parameter. |
* \param cs_trace_id A unique identifier of the cs, used for tracing. |
*/ |
void (*cs_flush)(struct radeon_winsys_cs *cs, |
unsigned flags, |
struct pipe_fence_handle **fence, |
uint32_t cs_trace_id); |
/** |
* Return TRUE if a buffer is referenced by a command stream. |
* |
* \param cs A command stream. |
* \param buf A winsys buffer. |
*/ |
boolean (*cs_is_buffer_referenced)(struct radeon_winsys_cs *cs, |
struct radeon_winsys_cs_handle *buf, |
enum radeon_bo_usage usage); |
/** |
* Request access to a feature for a command stream. |
* |
* \param cs A command stream. |
* \param fid Feature ID, one of RADEON_FID_* |
* \param enable Whether to enable or disable the feature. |
*/ |
boolean (*cs_request_feature)(struct radeon_winsys_cs *cs, |
enum radeon_feature_id fid, |
boolean enable); |
/** |
* Make sure all asynchronous flush of the cs have completed |
* |
* \param cs A command stream. |
*/ |
void (*cs_sync_flush)(struct radeon_winsys_cs *cs); |
/** |
* Wait for the fence and return true if the fence has been signalled. |
* The timeout of 0 will only return the status. |
* The timeout of PIPE_TIMEOUT_INFINITE will always wait until the fence |
* is signalled. |
*/ |
bool (*fence_wait)(struct radeon_winsys *ws, |
struct pipe_fence_handle *fence, |
uint64_t timeout); |
/** |
* Reference counting for fences. |
*/ |
void (*fence_reference)(struct pipe_fence_handle **dst, |
struct pipe_fence_handle *src); |
/** |
* Initialize surface |
* |
* \param ws The winsys this function is called from. |
* \param surf Surface structure ptr |
*/ |
int (*surface_init)(struct radeon_winsys *ws, |
struct radeon_surf *surf); |
/** |
* Find best values for a surface |
* |
* \param ws The winsys this function is called from. |
* \param surf Surface structure ptr |
*/ |
int (*surface_best)(struct radeon_winsys *ws, |
struct radeon_surf *surf); |
uint64_t (*query_value)(struct radeon_winsys *ws, |
enum radeon_value_id value); |
void (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, |
unsigned num_registers, uint32_t *out); |
}; |
static INLINE void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value) |
{ |
cs->buf[cs->cdw++] = value; |
} |
static INLINE void radeon_emit_array(struct radeon_winsys_cs *cs, |
const uint32_t *values, unsigned count) |
{ |
memcpy(cs->buf+cs->cdw, values, count * 4); |
cs->cdw += count; |
} |
#endif |